From a9a6dcba63805ba3d514bfd314571c2374de9ec2 Mon Sep 17 00:00:00 2001 From: Nick Terrell Date: Tue, 6 Mar 2018 19:50:50 -0800 Subject: [PATCH] Expose reference external sequence API * Expose the reference external sequences API for zstdmt. Allows external sequences of any length, which get split when necessary. * Reset the LDM window when the context is reset. * Store the maximum number of LDM sequences. * Sequence generation now returns the number of last literals. * Fix sequence generation to not throw out the last literals when blocks of more than 1 MB are encountered. --- lib/common/zstd_internal.h | 6 -- lib/compress/zstd_compress.c | 45 ++++++++-- lib/compress/zstd_compress_internal.h | 34 +++++++- lib/compress/zstd_ldm.c | 120 +++++++++++++++++--------- lib/compress/zstd_ldm.h | 30 ++++--- 5 files changed, 166 insertions(+), 69 deletions(-) diff --git a/lib/common/zstd_internal.h b/lib/common/zstd_internal.h index 1c6841a3..65c08a82 100644 --- a/lib/common/zstd_internal.h +++ b/lib/common/zstd_internal.h @@ -213,12 +213,6 @@ MEM_STATIC void ZSTD_wildcopy_e(void* dst, const void* src, void* dstEnd) /* s /*-******************************************* * Private declarations *********************************************/ -typedef struct rawSeq_s { - U32 offset; - U32 litLength; - U32 matchLength; -} rawSeq; - typedef struct seqDef_s { U32 offset; U16 litLength; diff --git a/lib/compress/zstd_compress.c b/lib/compress/zstd_compress.c index c461fa44..fdf6eceb 100644 --- a/lib/compress/zstd_compress.c +++ b/lib/compress/zstd_compress.c @@ -920,6 +920,8 @@ static size_t ZSTD_continueCCtx(ZSTD_CCtx* cctx, ZSTD_CCtx_params params, U64 pl (U32)pledgedSrcSize, cctx->appliedParams.fParams.contentSizeFlag); cctx->stage = ZSTDcs_init; cctx->dictID = 0; + if (params.ldmParams.enableLdm) + ZSTD_window_clear(&cctx->ldmState.window); ZSTD_invalidateMatchState(&cctx->blockState.matchState); ZSTD_reset_compressedBlockState(cctx->blockState.prevCBlock); XXH64_reset(&cctx->xxhState, 0); @@ -1079,6 +1081,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, ptr = zc->ldmState.hashTable + ldmHSize; zc->ldmSequences = (rawSeq*)ptr; ptr = zc->ldmSequences + maxNbLdmSeq; + zc->maxNbLdmSequences = maxNbLdmSeq; memset(&zc->ldmState.window, 0, sizeof(zc->ldmState.window)); } @@ -1103,6 +1106,7 @@ static size_t ZSTD_resetCCtx_internal(ZSTD_CCtx* zc, memset(ptr, 0, ldmBucketSize); zc->ldmState.bucketOffsets = (BYTE*)ptr; ptr = zc->ldmState.bucketOffsets + ldmBucketSize; + ZSTD_window_clear(&zc->ldmState.window); } /* buffers */ @@ -1834,17 +1838,33 @@ static size_t ZSTD_compressBlock_internal(ZSTD_CCtx* zc, for (i = 0; i < ZSTD_REP_NUM; ++i) zc->blockState.nextCBlock->rep[i] = zc->blockState.prevCBlock->rep[i]; } - if (zc->appliedParams.ldmParams.enableLdm) { - size_t const nbSeq = - ZSTD_ldm_generateSequences(&zc->ldmState, zc->ldmSequences, - &zc->appliedParams.ldmParams, - src, srcSize, extDict); + if (zc->externSeqStore.pos < zc->externSeqStore.size) { + assert(!zc->appliedParams.ldmParams.enableLdm); + /* Updates ldmSeqStore.pos */ lastLLSize = - ZSTD_ldm_blockCompress(zc->ldmSequences, nbSeq, + ZSTD_ldm_blockCompress(&zc->externSeqStore, ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize, extDict); + assert(zc->externSeqStore.pos <= zc->externSeqStore.size); + } else if (zc->appliedParams.ldmParams.enableLdm) { + rawSeqStore_t ldmSeqStore = {NULL, 0, 0, 0}; + + ldmSeqStore.seq = zc->ldmSequences; + ldmSeqStore.capacity = zc->maxNbLdmSequences; + /* Updates ldmSeqStore.size */ + CHECK_F(ZSTD_ldm_generateSequences(&zc->ldmState, &ldmSeqStore, + &zc->appliedParams.ldmParams, + src, srcSize)); + /* Updates ldmSeqStore.pos */ + lastLLSize = + ZSTD_ldm_blockCompress(&ldmSeqStore, + ms, &zc->seqStore, + zc->blockState.nextCBlock->rep, + &zc->appliedParams.cParams, + src, srcSize, extDict); + assert(ldmSeqStore.pos == ldmSeqStore.size); } else { /* not long range mode */ ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(zc->appliedParams.cParams.strategy, extDict); lastLLSize = blockCompressor(ms, &zc->seqStore, zc->blockState.nextCBlock->rep, &zc->appliedParams.cParams, src, srcSize); @@ -2005,6 +2025,19 @@ size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity) } } +size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq) +{ + if (cctx->stage != ZSTDcs_init) + return ERROR(stage_wrong); + if (cctx->appliedParams.ldmParams.enableLdm) + return ERROR(parameter_unsupported); + cctx->externSeqStore.seq = seq; + cctx->externSeqStore.size = nbSeq; + cctx->externSeqStore.capacity = nbSeq; + cctx->externSeqStore.pos = 0; + return 0; +} + static size_t ZSTD_compressContinue_internal (ZSTD_CCtx* cctx, void* dst, size_t dstCapacity, diff --git a/lib/compress/zstd_compress_internal.h b/lib/compress/zstd_compress_internal.h index 65e99cd9..af168c7c 100644 --- a/lib/compress/zstd_compress_internal.h +++ b/lib/compress/zstd_compress_internal.h @@ -152,6 +152,19 @@ typedef struct { U32 windowLog; /* Window log for the LDM */ } ldmParams_t; +typedef struct { + U32 offset; + U32 litLength; + U32 matchLength; +} rawSeq; + +typedef struct { + rawSeq* seq; /* The start of the sequences */ + size_t pos; /* The position where reading stopped. <= size. */ + size_t size; /* The number of sequences. <= capacity. */ + size_t capacity; /* The capacity of the `seq` pointer */ +} rawSeqStore_t; + struct ZSTD_CCtx_params_s { ZSTD_format_e format; ZSTD_compressionParameters cParams; @@ -191,9 +204,11 @@ struct ZSTD_CCtx_s { ZSTD_customMem customMem; size_t staticSize; - seqStore_t seqStore; /* sequences storage ptrs */ - ldmState_t ldmState; /* long distance matching state */ - rawSeq* ldmSequences; /* Storage for the ldm output sequences */ + seqStore_t seqStore; /* sequences storage ptrs */ + ldmState_t ldmState; /* long distance matching state */ + rawSeq* ldmSequences; /* Storage for the ldm output sequences */ + size_t maxNbLdmSequences; + rawSeqStore_t externSeqStore; /* Mutable reference to external sequences */ ZSTD_blockState_t blockState; U32* entropyWorkspace; /* entropy workspace of HUF_WORKSPACE_SIZE bytes */ @@ -660,4 +675,17 @@ size_t ZSTD_compress_advanced_internal(ZSTD_CCtx* cctx, size_t ZSTD_writeLastEmptyBlock(void* dst, size_t dstCapacity); +/* ZSTD_referenceExternalSequences() : + * Must be called before starting a compression operation. + * seqs must parse a prefix of the source. + * This cannot be used when long range matching is enabled. + * Zstd will use these sequences, and pass the literals to a secondary block + * compressor. + * @return : An error code on failure. + * NOTE: seqs are not verified! Invalid sequences can cause out-of-bounds memory + * access and data corruption. + */ +size_t ZSTD_referenceExternalSequences(ZSTD_CCtx* cctx, rawSeq* seq, size_t nbSeq); + + #endif /* ZSTD_COMPRESS_H */ diff --git a/lib/compress/zstd_ldm.c b/lib/compress/zstd_ldm.c index 22f9b4a8..d75cdf5a 100644 --- a/lib/compress/zstd_ldm.c +++ b/lib/compress/zstd_ldm.c @@ -295,12 +295,11 @@ static void ZSTD_ldm_limitTableUpdate(ZSTD_matchState_t* ms, const BYTE* anchor) } static size_t ZSTD_ldm_generateSequences_internal( - ldmState_t* ldmState, rawSeq* sequences, - ldmParams_t const* params, void const* src, size_t srcSize, - int const extDict) + ldmState_t* ldmState, rawSeqStore_t* rawSeqStore, + ldmParams_t const* params, void const* src, size_t srcSize) { - rawSeq const* const sequencesStart = sequences; /* LDM parameters */ + int const extDict = ZSTD_window_hasExtDict(ldmState->window); U32 const minMatchLength = params->minMatchLength; U64 const hashPower = ldmState->hashPower; U32 const hBits = params->hashLog - params->bucketSizeLog; @@ -424,11 +423,15 @@ static size_t ZSTD_ldm_generateSequences_internal( */ U32 const matchIndex = bestEntry->offset; U32 const offset = current - matchIndex; + rawSeq* const seq = rawSeqStore->seq + rawSeqStore->size; - sequences->litLength = (U32)(ip - anchor); - sequences->matchLength = (U32)mLength; - sequences->offset = offset; - ++sequences; + /* Out of sequence storage */ + if (rawSeqStore->size == rawSeqStore->capacity) + return ERROR(dstSize_tooSmall); + seq->litLength = (U32)(ip - anchor); + seq->matchLength = (U32)mLength; + seq->offset = offset; + rawSeqStore->size++; } /* Insert the current entry into the hash table */ @@ -449,8 +452,7 @@ static size_t ZSTD_ldm_generateSequences_internal( ip += mLength; anchor = ip; } - /* Return the number of sequences generated */ - return sequences - sequencesStart; + return iend - anchor; } /*! ZSTD_ldm_reduceTable() : @@ -466,35 +468,44 @@ static void ZSTD_ldm_reduceTable(ldmEntry_t* const table, U32 const size, } size_t ZSTD_ldm_generateSequences( - ldmState_t* ldmState, rawSeq* sequences, - ldmParams_t const* params, void const* src, size_t srcSize, - int const extDict) + ldmState_t* ldmState, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize) { U32 const maxDist = 1U << params->windowLog; BYTE const* const istart = (BYTE const*)src; size_t const kMaxChunkSize = 1 << 20; size_t const nbChunks = (srcSize / kMaxChunkSize) + ((srcSize % kMaxChunkSize) != 0); - size_t nbSeq = 0; size_t chunk; + size_t leftoverSize = 0; assert(ZSTD_CHUNKSIZE_MAX >= kMaxChunkSize); /* Check that ZSTD_window_update() has been called for this chunk prior * to passing it to this function. */ assert(ldmState->window.nextSrc >= (BYTE const*)src + srcSize); - for (chunk = 0; chunk < nbChunks; ++chunk) { + /* The input could be very large (in zstdmt), so it must be broken up into + * chunks to enforce the maximmum distance and handle overflow correction. + */ + assert(sequences->pos <= sequences->size); + assert(sequences->size <= sequences->capacity); + for (chunk = 0; chunk < nbChunks && sequences->size < sequences->capacity; ++chunk) { size_t const chunkStart = chunk * kMaxChunkSize; size_t const chunkEnd = MIN(chunkStart + kMaxChunkSize, srcSize); size_t const chunkSize = chunkEnd - chunkStart; + size_t newLeftoverSize; + size_t const prevSize = sequences->size; assert(chunkStart < srcSize); + /* 1. Perform overflow correction if necessary. */ if (ZSTD_window_needOverflowCorrection(ldmState->window)) { U32 const ldmHSize = 1U << params->hashLog; U32 const correction = ZSTD_window_correctOverflow( &ldmState->window, /* cycleLog */ 0, maxDist, src); ZSTD_ldm_reduceTable(ldmState->hashTable, ldmHSize, correction); } - /* kMaxChunkSize should be small enough that we don't lose too much of + /* 2. We enforce the maximum offset allowed. + * + * kMaxChunkSize should be small enough that we don't lose too much of * the window through early invalidation. * TODO: * Test the chunk size. * * Try invalidation after the sequence generation and test the @@ -502,14 +513,28 @@ size_t ZSTD_ldm_generateSequences( */ ZSTD_window_enforceMaxDist(&ldmState->window, istart + chunkEnd, maxDist); - nbSeq += ZSTD_ldm_generateSequences_internal( - ldmState, sequences + nbSeq, params, istart + chunkStart, chunkSize, - extDict); + /* 3. Generate the sequences for the chunk, and get newLeftoverSize. */ + newLeftoverSize = ZSTD_ldm_generateSequences_internal( + ldmState, sequences, params, istart + chunkStart, + chunkSize); + if (ZSTD_isError(newLeftoverSize)) + return newLeftoverSize; + /* 4. We add the leftover literals from previous iterations to the first + * newly generated sequence, or add the `newLeftoverSize` if none are + * generated. + */ + /* Prepend the leftover literals from the last call */ + if (prevSize < sequences->size) { + sequences->seq[prevSize].litLength += (U32)leftoverSize; + leftoverSize = newLeftoverSize; + } else { + assert(newLeftoverSize == chunkSize); + leftoverSize += chunkSize; + } } - return nbSeq; + return 0; } -#if 0 /** * If the sequence length is longer than remaining then the sequence is split * between this block and the next. @@ -517,11 +542,11 @@ size_t ZSTD_ldm_generateSequences( * Returns the current sequence to handle, or if the rest of the block should * be literals, it returns a sequence with offset == 0. */ -static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq, - size_t const seq, size_t const remaining, - U32 const minMatch) +static rawSeq maybeSplitSequence(rawSeqStore_t* rawSeqStore, + U32 const remaining, U32 const minMatch) { - rawSeq sequence = sequences[seq]; + size_t const pos = rawSeqStore->pos; + rawSeq sequence = rawSeqStore->seq[rawSeqStore->pos]; assert(sequence.offset > 0); /* Handle partial sequences */ if (remaining <= sequence.litLength) { @@ -529,8 +554,7 @@ static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq, * They will become the last literals of this block. * The next block starts off with the remaining literals. */ - sequences[seq].litLength -= remaining; - *nbSeq = seq; + rawSeqStore->seq[pos].litLength -= remaining; sequence.offset = 0; } else if (remaining < sequence.litLength + sequence.matchLength) { /* Split the match up into two sequences. One in this block, and one @@ -543,31 +567,38 @@ static rawSeq maybeSplitSequence(rawSeq* sequences, size_t* nbSeq, assert(remaining > sequence.litLength); assert(matchPrefix < sequence.matchLength); assert(matchPrefix + matchSuffix == sequence.matchLength); - /* Update the current sequence */ + /* Update the first sequence */ sequence.matchLength = matchPrefix; - /* Update the next sequence when long enough, otherwise omit it. */ + /* Update the second sequence */ if (matchSuffix >= minMatch) { - sequences[seq].litLength = 0; - sequences[seq].matchLength = matchSuffix; - *nbSeq = seq; + /* Update the second sequence, since the suffix is long enough */ + rawSeqStore->seq[pos].litLength = 0; + rawSeqStore->seq[pos].matchLength = matchSuffix; } else { - sequences[seq + 1].litLength += matchSuffix; - *nbSeq = seq + 1; + /* Omit the second sequence since the match suffix is too short. + * Add to the next sequences literals (if any). + */ + if (pos + 1 < rawSeqStore->size) + rawSeqStore->seq[pos + 1].litLength += matchSuffix; + rawSeqStore->pos++; /* Consume the sequence */ } if (sequence.matchLength < minMatch) { /* Skip the current sequence if it is too short */ sequence.offset = 0; } + } else { + /* No partial sequence */ + rawSeqStore->pos++; /* Consume the sequence */ } return sequence; } -#endif -size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, int const extDict) { + unsigned const minMatch = cParams->searchLength; ZSTD_blockCompressor const blockCompressor = ZSTD_selectBlockCompressor(cParams->strategy, extDict); BYTE const* const base = ms->window.base; @@ -576,15 +607,20 @@ size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, BYTE const* const iend = istart + srcSize; /* Input positions */ BYTE const* ip = istart; - size_t seq; - /* Loop through each sequence and apply the block compressor to the lits */ - for (seq = 0; seq < nbSeq; ++seq) { - rawSeq const sequence = sequences[seq]; - int i; + assert(rawSeqStore->pos <= rawSeqStore->size); + assert(rawSeqStore->size <= rawSeqStore->capacity); + /* Loop through each sequence and apply the block compressor to the lits */ + while (rawSeqStore->pos < rawSeqStore->size && ip < iend) { + /* maybeSplitSequence updates rawSeqStore->pos */ + rawSeq const sequence = maybeSplitSequence(rawSeqStore, + (U32)(iend - ip), minMatch); + int i; + /* End signal */ if (sequence.offset == 0) break; + assert(sequence.offset <= (1U << cParams->windowLog)); assert(ip + sequence.litLength + sequence.matchLength <= iend); /* Fill tables for block compressor */ @@ -608,8 +644,10 @@ size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, ip += sequence.matchLength; } } + /* Fill the tables for the block compressor */ ZSTD_ldm_limitTableUpdate(ms, ip); ZSTD_ldm_fillFastTables(ms, cParams, ip); + /* Compress the last literals */ { size_t const lastLiterals = blockCompressor(ms, seqStore, rep, cParams, ip, iend - ip); diff --git a/lib/compress/zstd_ldm.h b/lib/compress/zstd_ldm.h index d9219a18..9d2f7c39 100644 --- a/lib/compress/zstd_ldm.h +++ b/lib/compress/zstd_ldm.h @@ -28,18 +28,19 @@ extern "C" { * ZSTD_ldm_generateSequences(): * * Generates the sequences using the long distance match finder. - * The sequences completely parse a prefix of the source, but leave off the last - * literals. Returns the number of sequences generated into `sequences`. The - * user must have called ZSTD_window_update() for all of the input they have, - * even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * Generates long range matching sequences in `sequences`, which parse a prefix + * of the source. `sequences` must be large enough to store every sequence, + * which can be checked with `ZSTD_ldm_getMaxNbSeq()`. + * @returns 0 or an error code. * - * NOTE: The source may be any size, assuming it doesn't overflow the hash table - * indices, and the output sequences table is large enough.. + * NOTE: The user must have called ZSTD_window_update() for all of the input + * they have, even if they pass it to ZSTD_ldm_generateSequences() in chunks. + * NOTE: This function returns an error if it runs out of space to store + * sequences. */ size_t ZSTD_ldm_generateSequences( - ldmState_t* ldms, rawSeq* sequences, - ldmParams_t const* params, void const* src, size_t srcSize, - int const extDict); + ldmState_t* ldms, rawSeqStore_t* sequences, + ldmParams_t const* params, void const* src, size_t srcSize); /** * ZSTD_ldm_blockCompress(): @@ -48,15 +49,18 @@ size_t ZSTD_ldm_generateSequences( * block compressor. The literals section of every sequence is passed to the * secondary block compressor, and those sequences are interspersed with the * predefined sequences. Returns the length of the last literals. - * `nbSeq` is the number of sequences available in `sequences`. + * Updates `rawSeqStore.pos` to indicate how many sequences have been consumed. + * `rawSeqStore.seq` may also be updated to split the last sequence between two + * blocks. + * @return The length of the last literals. * * NOTE: The source must be at most the maximum block size, but the predefined * sequences can be any size, and may be longer than the block. In the case that * they are longer than the block, the last sequences may need to be split into - * two. We handle that case correctly, and update `sequences` and `nbSeq` - * appropriately. + * two. We handle that case correctly, and update `rawSeqStore` appropriately. + * NOTE: This function does not return any errors. */ -size_t ZSTD_ldm_blockCompress(rawSeq const* sequences, size_t nbSeq, +size_t ZSTD_ldm_blockCompress(rawSeqStore_t* rawSeqStore, ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], ZSTD_compressionParameters const* cParams, void const* src, size_t srcSize, int const extDict);