Expose reference external sequence API

* Expose the reference external sequences API for zstdmt.
  Allows external sequences of any length, which get split when necessary.
* Reset the LDM window when the context is reset.
* Store the maximum number of LDM sequences.
* Sequence generation now returns the number of last literals.
* Fix sequence generation to not throw out the last literals when blocks of
  more than 1 MB are encountered.
This commit is contained in:
Nick Terrell 2018-03-06 19:50:50 -08:00
parent 4c5cbac179
commit a9a6dcba63
5 changed files with 166 additions and 69 deletions

View File

@ -213,12 +213,6 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* s
/*-******************************************* /*-*******************************************
* Private declarations * Private declarations
*********************************************/ *********************************************/
typedef struct rawSeq_s {
U32 offset;
U32 litLength;
U32 matchLength;
} rawSeq;
typedef struct seqDef_s { typedef struct seqDef_s {
U32 offset; U32 offset;
U16 litLength; U16 litLength;

View File

@ -920,6 +920,8 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl
(U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag);
cctx->stage = ZSTDcs_init; cctx->stage = ZSTDcs_init;
cctx->dictID = 0; cctx->dictID = 0;
if (params.ldmParams.enableLdm)
ZSTD_window_clear(&cctx->ldmState.window);
ZSTD_invalidateMatchState(&cctx->blockState.matchState); ZSTD_invalidateMatchState(&cctx->blockState.matchState);
ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock);
XXH64_reset(&cctx->xxhState, 0); XXH64_reset(&cctx->xxhState, 0);
@ -1079,6 +1081,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
ptr = zc->ldmState.hashTable + ldmHSize; ptr = zc->ldmState.hashTable + ldmHSize;
zc->ldmSequences = (rawSeq*)ptr; zc->ldmSequences = (rawSeq*)ptr;
ptr = zc->ldmSequences + maxNbLdmSeq; ptr = zc->ldmSequences + maxNbLdmSeq;
zc->maxNbLdmSequences = maxNbLdmSeq;
memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window));
} }
@ -1103,6 +1106,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc,
memset(ptr, 0, ldmBucketSize); memset(ptr, 0, ldmBucketSize);
zc->ldmState.bucketOffsets = (BYTE*)ptr; zc->ldmState.bucketOffsets = (BYTE*)ptr;
ptr = zc->ldmState.bucketOffsets + ldmBucketSize; ptr = zc->ldmState.bucketOffsets + ldmBucketSize;
ZSTD_window_clear(&zc->ldmState.window);
} }
/* buffers */ /* buffers */
@ -1834,17 +1838,33 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc,
for (i = 0; i < ZSTD_REP_NUM; ++i) for (i = 0; i < ZSTD_REP_NUM; ++i)
zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i];
} }
if (zc->appliedParams.ldmParams.enableLdm) { if (zc->externSeqStore.pos < zc->externSeqStore.size) {
size_t const nbSeq = assert(!zc->appliedParams.ldmParams.enableLdm);
ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences, /* Updates ldmSeqStore.pos */
&zc->appliedParams.ldmParams,
src, srcSize, extDict);
lastLLSize = lastLLSize =
ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq, ZSTD_ldm_blockCompress(&zc->externSeqStore,
ms, &zc->seqStore, ms, &zc->seqStore,
zc->blockState.nextCBlock->rep, zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams, &zc->appliedParams.cParams,
src, srcSize, extDict); src, srcSize, extDict);
assert(zc->externSeqStore.pos <= zc->externSeqStore.size);
} else if (zc->appliedParams.ldmParams.enableLdm) {
rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0};
ldmSeqStore.seq = zc->ldmSequences;
ldmSeqStore.capacity = zc->maxNbLdmSequences;
/* Updates ldmSeqStore.size */
CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore,
&zc->appliedParams.ldmParams,
src, srcSize));
/* Updates ldmSeqStore.pos */
lastLLSize =
ZSTD_ldm_blockCompress(&ldmSeqStore,
ms, &zc->seqStore,
zc->blockState.nextCBlock->rep,
&zc->appliedParams.cParams,
src, srcSize, extDict);
assert(ldmSeqStore.pos == ldmSeqStore.size);
} else { /* not long range mode */ } else { /* not long range mode */
ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict);
lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize);
@ -2005,6 +2025,19 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity)
} }
} }
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq)
{
if (cctx->stage != ZSTDcs_init)
return ERROR(stage_wrong);
if (cctx->appliedParams.ldmParams.enableLdm)
return ERROR(parameter_unsupported);
cctx->externSeqStore.seq = seq;
cctx->externSeqStore.size = nbSeq;
cctx->externSeqStore.capacity = nbSeq;
cctx->externSeqStore.pos = 0;
return 0;
}
static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx,
void* dst, size_t dstCapacity, void* dst, size_t dstCapacity,

View File

@ -152,6 +152,19 @@ typedef struct {
U32 windowLog; /* Window log for the LDM */ U32 windowLog; /* Window log for the LDM */
} ldmParams_t; } ldmParams_t;
typedef struct {
U32 offset;
U32 litLength;
U32 matchLength;
} rawSeq;
typedef struct {
rawSeq* seq; /* The start of the sequences */
size_t pos; /* The position where reading stopped. <= size. */
size_t size; /* The number of sequences. <= capacity. */
size_t capacity; /* The capacity of the `seq` pointer */
} rawSeqStore_t;
struct ZSTD_CCtx_params_s { struct ZSTD_CCtx_params_s {
ZSTD_format_e format; ZSTD_format_e format;
ZSTD_compressionParameters cParams; ZSTD_compressionParameters cParams;
@ -191,9 +204,11 @@ struct ZSTD_CCtx_s {
ZSTD_customMem customMem; ZSTD_customMem customMem;
size_t staticSize; size_t staticSize;
seqStore_t seqStore; /* sequences storage ptrs */ seqStore_t seqStore; /* sequences storage ptrs */
ldmState_t ldmState; /* long distance matching state */ ldmState_t ldmState; /* long distance matching state */
rawSeq* ldmSequences; /* Storage for the ldm output sequences */ rawSeq* ldmSequences; /* Storage for the ldm output sequences */
size_t maxNbLdmSequences;
rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */
ZSTD_blockState_t blockState; ZSTD_blockState_t blockState;
U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */
@ -660,4 +675,17 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx,
size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity);
/* ZSTD_referenceExternalSequences() :
* Must be called before starting a compression operation.
* seqs must parse a prefix of the source.
* This cannot be used when long range matching is enabled.
* Zstd will use these sequences, and pass the literals to a secondary block
* compressor.
* @return : An error code on failure.
* NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory
* access and data corruption.
*/
size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq);
#endif /* ZSTD_COMPRESS_H */ #endif /* ZSTD_COMPRESS_H */

View File

@ -295,12 +295,11 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor)
} }
static size_t ZSTD_ldm_generateSequences_internal( static size_t ZSTD_ldm_generateSequences_internal(
ldmState_t* ldmState, rawSeq* sequences, ldmState_t* ldmState, rawSeqStore_t* rawSeqStore,
ldmParams_t const* params, void const* src, size_t srcSize, ldmParams_t const* params, void const* src, size_t srcSize)
int const extDict)
{ {
rawSeq const* const sequencesStart = sequences;
/* LDM parameters */ /* LDM parameters */
int const extDict = ZSTD_window_hasExtDict(ldmState->window);
U32 const minMatchLength = params->minMatchLength; U32 const minMatchLength = params->minMatchLength;
U64 const hashPower = ldmState->hashPower; U64 const hashPower = ldmState->hashPower;
U32 const hBits = params->hashLog - params->bucketSizeLog; U32 const hBits = params->hashLog - params->bucketSizeLog;
@ -424,11 +423,15 @@ static size_t ZSTD_ldm_generateSequences_internal(
*/ */
U32 const matchIndex = bestEntry->offset; U32 const matchIndex = bestEntry->offset;
U32 const offset = current - matchIndex; U32 const offset = current - matchIndex;
rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size;
sequences->litLength = (U32)(ip - anchor); /* Out of sequence storage */
sequences->matchLength = (U32)mLength; if (rawSeqStore->size == rawSeqStore->capacity)
sequences->offset = offset; return ERROR(dstSize_tooSmall);
++sequences; seq->litLength = (U32)(ip - anchor);
seq->matchLength = (U32)mLength;
seq->offset = offset;
rawSeqStore->size++;
} }
/* Insert the current entry into the hash table */ /* Insert the current entry into the hash table */
@ -449,8 +452,7 @@ static size_t ZSTD_ldm_generateSequences_internal(
ip += mLength; ip += mLength;
anchor = ip; anchor = ip;
} }
/* Return the number of sequences generated */ return iend - anchor;
return sequences - sequencesStart;
} }
/*! ZSTD_ldm_reduceTable() : /*! ZSTD_ldm_reduceTable() :
@ -466,35 +468,44 @@ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size,
} }
size_t ZSTD_ldm_generateSequences( size_t ZSTD_ldm_generateSequences(
ldmState_t* ldmState, rawSeq* sequences, ldmState_t* ldmState, rawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize, ldmParams_t const* params, void const* src, size_t srcSize)
int const extDict)
{ {
U32 const maxDist = 1U << params->windowLog; U32 const maxDist = 1U << params->windowLog;
BYTE const* const istart = (BYTE const*)src; BYTE const* const istart = (BYTE const*)src;
size_t const kMaxChunkSize = 1 << 20; size_t const kMaxChunkSize = 1 << 20;
size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0);
size_t nbSeq = 0;
size_t chunk; size_t chunk;
size_t leftoverSize = 0;
assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize);
/* Check that ZSTD_window_update() has been called for this chunk prior /* Check that ZSTD_window_update() has been called for this chunk prior
* to passing it to this function. * to passing it to this function.
*/ */
assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize);
for (chunk = 0; chunk < nbChunks; ++chunk) { /* The input could be very large (in zstdmt), so it must be broken up into
* chunks to enforce the maximmum distance and handle overflow correction.
*/
assert(sequences->pos <= sequences->size);
assert(sequences->size <= sequences->capacity);
for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) {
size_t const chunkStart = chunk * kMaxChunkSize; size_t const chunkStart = chunk * kMaxChunkSize;
size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize); size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize);
size_t const chunkSize = chunkEnd - chunkStart; size_t const chunkSize = chunkEnd - chunkStart;
size_t newLeftoverSize;
size_t const prevSize = sequences->size;
assert(chunkStart < srcSize); assert(chunkStart < srcSize);
/* 1. Perform overflow correction if necessary. */
if (ZSTD_window_needOverflowCorrection(ldmState->window)) { if (ZSTD_window_needOverflowCorrection(ldmState->window)) {
U32 const ldmHSize = 1U << params->hashLog; U32 const ldmHSize = 1U << params->hashLog;
U32 const correction = ZSTD_window_correctOverflow( U32 const correction = ZSTD_window_correctOverflow(
&ldmState->window, /* cycleLog */ 0, maxDist, src); &ldmState->window, /* cycleLog */ 0, maxDist, src);
ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction);
} }
/* kMaxChunkSize should be small enough that we don't lose too much of /* 2. We enforce the maximum offset allowed.
*
* kMaxChunkSize should be small enough that we don't lose too much of
* the window through early invalidation. * the window through early invalidation.
* TODO: * Test the chunk size. * TODO: * Test the chunk size.
* * Try invalidation after the sequence generation and test the * * Try invalidation after the sequence generation and test the
@ -502,14 +513,28 @@ size_t ZSTD_ldm_generateSequences(
*/ */
ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd, ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd,
maxDist); maxDist);
nbSeq += ZSTD_ldm_generateSequences_internal( /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */
ldmState, sequences + nbSeq, params, istart + chunkStart, chunkSize, newLeftoverSize = ZSTD_ldm_generateSequences_internal(
extDict); ldmState, sequences, params, istart + chunkStart,
chunkSize);
if (ZSTD_isError(newLeftoverSize))
return newLeftoverSize;
/* 4. We add the leftover literals from previous iterations to the first
* newly generated sequence, or add the `newLeftoverSize` if none are
* generated.
*/
/* Prepend the leftover literals from the last call */
if (prevSize < sequences->size) {
sequences->seq[prevSize].litLength += (U32)leftoverSize;
leftoverSize = newLeftoverSize;
} else {
assert(newLeftoverSize == chunkSize);
leftoverSize += chunkSize;
}
} }
return nbSeq; return 0;
} }
#if 0
/** /**
* If the sequence length is longer than remaining then the sequence is split * If the sequence length is longer than remaining then the sequence is split
* between this block and the next. * between this block and the next.
@ -517,11 +542,11 @@ size_t ZSTD_ldm_generateSequences(
* Returns the current sequence to handle, or if the rest of the block should * Returns the current sequence to handle, or if the rest of the block should
* be literals, it returns a sequence with offset == 0. * be literals, it returns a sequence with offset == 0.
*/ */
static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq, static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore,
size_t const seq, size_t const remaining, U32 const remaining, U32 const minMatch)
U32 const minMatch)
{ {
rawSeq sequence = sequences[seq]; size_t const pos = rawSeqStore->pos;
rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos];
assert(sequence.offset > 0); assert(sequence.offset > 0);
/* Handle partial sequences */ /* Handle partial sequences */
if (remaining <= sequence.litLength) { if (remaining <= sequence.litLength) {
@ -529,8 +554,7 @@ static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq,
* They will become the last literals of this block. * They will become the last literals of this block.
* The next block starts off with the remaining literals. * The next block starts off with the remaining literals.
*/ */
sequences[seq].litLength -= remaining; rawSeqStore->seq[pos].litLength -= remaining;
*nbSeq = seq;
sequence.offset = 0; sequence.offset = 0;
} else if (remaining < sequence.litLength + sequence.matchLength) { } else if (remaining < sequence.litLength + sequence.matchLength) {
/* Split the match up into two sequences. One in this block, and one /* Split the match up into two sequences. One in this block, and one
@ -543,31 +567,38 @@ static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq,
assert(remaining > sequence.litLength); assert(remaining > sequence.litLength);
assert(matchPrefix < sequence.matchLength); assert(matchPrefix < sequence.matchLength);
assert(matchPrefix + matchSuffix == sequence.matchLength); assert(matchPrefix + matchSuffix == sequence.matchLength);
/* Update the current sequence */ /* Update the first sequence */
sequence.matchLength = matchPrefix; sequence.matchLength = matchPrefix;
/* Update the next sequence when long enough, otherwise omit it. */ /* Update the second sequence */
if (matchSuffix >= minMatch) { if (matchSuffix >= minMatch) {
sequences[seq].litLength = 0; /* Update the second sequence, since the suffix is long enough */
sequences[seq].matchLength = matchSuffix; rawSeqStore->seq[pos].litLength = 0;
*nbSeq = seq; rawSeqStore->seq[pos].matchLength = matchSuffix;
} else { } else {
sequences[seq + 1].litLength += matchSuffix; /* Omit the second sequence since the match suffix is too short.
*nbSeq = seq + 1; * Add to the next sequences literals (if any).
*/
if (pos + 1 < rawSeqStore->size)
rawSeqStore->seq[pos + 1].litLength += matchSuffix;
rawSeqStore->pos++; /* Consume the sequence */
} }
if (sequence.matchLength < minMatch) { if (sequence.matchLength < minMatch) {
/* Skip the current sequence if it is too short */ /* Skip the current sequence if it is too short */
sequence.offset = 0; sequence.offset = 0;
} }
} else {
/* No partial sequence */
rawSeqStore->pos++; /* Consume the sequence */
} }
return sequence; return sequence;
} }
#endif
size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict) int const extDict)
{ {
unsigned const minMatch = cParams->searchLength;
ZSTD_blockCompressor const blockCompressor = ZSTD_blockCompressor const blockCompressor =
ZSTD_selectBlockCompressor(cParams->strategy, extDict); ZSTD_selectBlockCompressor(cParams->strategy, extDict);
BYTE const* const base = ms->window.base; BYTE const* const base = ms->window.base;
@ -576,15 +607,20 @@ size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq,
BYTE const* const iend = istart + srcSize; BYTE const* const iend = istart + srcSize;
/* Input positions */ /* Input positions */
BYTE const* ip = istart; BYTE const* ip = istart;
size_t seq;
/* Loop through each sequence and apply the block compressor to the lits */
for (seq = 0; seq < nbSeq; ++seq) {
rawSeq const sequence = sequences[seq];
int i;
assert(rawSeqStore->pos <= rawSeqStore->size);
assert(rawSeqStore->size <= rawSeqStore->capacity);
/* Loop through each sequence and apply the block compressor to the lits */
while (rawSeqStore->pos < rawSeqStore->size && ip < iend) {
/* maybeSplitSequence updates rawSeqStore->pos */
rawSeq const sequence = maybeSplitSequence(rawSeqStore,
(U32)(iend - ip), minMatch);
int i;
/* End signal */
if (sequence.offset == 0) if (sequence.offset == 0)
break; break;
assert(sequence.offset <= (1U << cParams->windowLog));
assert(ip + sequence.litLength + sequence.matchLength <= iend); assert(ip + sequence.litLength + sequence.matchLength <= iend);
/* Fill tables for block compressor */ /* Fill tables for block compressor */
@ -608,8 +644,10 @@ size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq,
ip += sequence.matchLength; ip += sequence.matchLength;
} }
} }
/* Fill the tables for the block compressor */
ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_limitTableUpdate(ms, ip);
ZSTD_ldm_fillFastTables(ms, cParams, ip); ZSTD_ldm_fillFastTables(ms, cParams, ip);
/* Compress the last literals */
{ {
size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams,
ip, iend - ip); ip, iend - ip);

View File

@ -28,18 +28,19 @@ extern "C" {
* ZSTD_ldm_generateSequences(): * ZSTD_ldm_generateSequences():
* *
* Generates the sequences using the long distance match finder. * Generates the sequences using the long distance match finder.
* The sequences completely parse a prefix of the source, but leave off the last * Generates long range matching sequences in `sequences`, which parse a prefix
* literals. Returns the number of sequences generated into `sequences`. The * of the source. `sequences` must be large enough to store every sequence,
* user must have called ZSTD_window_update() for all of the input they have, * which can be checked with `ZSTD_ldm_getMaxNbSeq()`.
* even if they pass it to ZSTD_ldm_generateSequences() in chunks. * @returns 0 or an error code.
* *
* NOTE: The source may be any size, assuming it doesn't overflow the hash table * NOTE: The user must have called ZSTD_window_update() for all of the input
* indices, and the output sequences table is large enough.. * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks.
* NOTE: This function returns an error if it runs out of space to store
* sequences.
*/ */
size_t ZSTD_ldm_generateSequences( size_t ZSTD_ldm_generateSequences(
ldmState_t* ldms, rawSeq* sequences, ldmState_t* ldms, rawSeqStore_t* sequences,
ldmParams_t const* params, void const* src, size_t srcSize, ldmParams_t const* params, void const* src, size_t srcSize);
int const extDict);
/** /**
* ZSTD_ldm_blockCompress(): * ZSTD_ldm_blockCompress():
@ -48,15 +49,18 @@ size_t ZSTD_ldm_generateSequences(
* block compressor. The literals section of every sequence is passed to the * block compressor. The literals section of every sequence is passed to the
* secondary block compressor, and those sequences are interspersed with the * secondary block compressor, and those sequences are interspersed with the
* predefined sequences. Returns the length of the last literals. * predefined sequences. Returns the length of the last literals.
* `nbSeq` is the number of sequences available in `sequences`. * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed.
* `rawSeqStore.seq` may also be updated to split the last sequence between two
* blocks.
* @return The length of the last literals.
* *
* NOTE: The source must be at most the maximum block size, but the predefined * NOTE: The source must be at most the maximum block size, but the predefined
* sequences can be any size, and may be longer than the block. In the case that * sequences can be any size, and may be longer than the block. In the case that
* they are longer than the block, the last sequences may need to be split into * they are longer than the block, the last sequences may need to be split into
* two. We handle that case correctly, and update `sequences` and `nbSeq` * two. We handle that case correctly, and update `rawSeqStore` appropriately.
* appropriately. * NOTE: This function does not return any errors.
*/ */
size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore,
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize,
int const extDict); int const extDict);