Merge pull request #27 from Cyan4973/dev

Dev
dev
Yann Collet 2015-03-01 19:30:35 +01:00
commit 765207c549
10 changed files with 634 additions and 483 deletions

114
lib/fse.c
View File

@ -87,6 +87,8 @@
#include "fse_static.h"
#ifndef MEM_ACCESS_MODULE
#define MEM_ACCESS_MODULE
/****************************************************************
* Basic Types
*****************************************************************/
@ -109,6 +111,7 @@ typedef unsigned long long U64;
typedef signed long long S64;
#endif
#endif /* MEM_ACCESS_MODULE */
/****************************************************************
* Memory I/O
@ -560,6 +563,8 @@ int FSE_compareRankT(const void* r1, const void* r2)
return 2 * (R1->count < R2->count) - 1;
}
#if 0
static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned* count, U32 maxSymbolValue)
{
rank_t rank[FSE_MAX_SYMBOL_VALUE+2];
@ -601,6 +606,100 @@ static size_t FSE_adjustNormSlow(short* norm, int pointsToRemove, const unsigned
return 0;
}
#else
/* Secondary normalization method.
To be used when primary method fails. */
static size_t FSE_normalizeM2(short* norm, U32 tableLog, const unsigned* count, size_t total, U32 maxSymbolValue)
{
U32 s;
U32 distributed = 0;
U32 ToDistribute;
/* Init */
U32 lowThreshold = (U32)(total >> tableLog);
U32 lowOne = (U32)((total * 3) >> (tableLog + 1));
for (s=0; s<=maxSymbolValue; s++)
{
if (count[s] == 0)
{
norm[s]=0;
continue;
}
if (count[s] <= lowThreshold)
{
norm[s] = -1;
distributed++;
total -= count[s];
continue;
}
if (count[s] <= lowOne)
{
norm[s] = 1;
distributed++;
total -= count[s];
continue;
}
norm[s]=-2;
}
ToDistribute = (1 << tableLog) - distributed;
if ((total / ToDistribute) > lowOne)
{
/* risk of rounding to zero */
lowOne = (U32)((total * 3) / (ToDistribute * 2));
for (s=0; s<=maxSymbolValue; s++)
{
if ((norm[s] == -2) && (count[s] <= lowOne))
{
norm[s] = 1;
distributed++;
total -= count[s];
continue;
}
}
ToDistribute = (1 << tableLog) - distributed;
}
if (distributed == maxSymbolValue+1)
{
/* all values are pretty poor;
probably incompressible data (should have already been detected);
find max, then give all remaining points to max */
U32 maxV = 0, maxC =0;
for (s=0; s<=maxSymbolValue; s++)
if (count[s] > maxC) maxV=s, maxC=count[s];
norm[maxV] += ToDistribute;
return 0;
}
{
U64 const vStepLog = 62 - tableLog;
U64 const mid = (1ULL << (vStepLog-1)) - 1;
U64 const rStep = ((((U64)1<<vStepLog) * ToDistribute) + mid) / total; /* scale on remaining */
U64 tmpTotal = mid;
for (s=0; s<=maxSymbolValue; s++)
{
if (norm[s]==-2)
{
U64 end = tmpTotal + (count[s] * rStep);
U32 sStart = (U32)(tmpTotal >> vStepLog);
U32 sEnd = (U32)(end >> vStepLog);
U32 weight = sEnd - sStart;
if (weight < 1)
return (size_t)-FSE_ERROR_GENERIC;
norm[s] = weight;
tmpTotal = end;
}
}
}
return 0;
}
#endif
size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
const unsigned* count, size_t total,
@ -655,10 +754,9 @@ size_t FSE_normalizeCount (short* normalizedCounter, unsigned tableLog,
}
if (-stillToDistribute >= (normalizedCounter[largest] >> 1))
{
/* corner case, need to converge towards normalization with caution */
size_t errorCode = FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
/* corner case, need another normalization method */
size_t errorCode = FSE_normalizeM2(normalizedCounter, tableLog, count, total, maxSymbolValue);
if (FSE_isError(errorCode)) return errorCode;
//FSE_adjustNormSlow(normalizedCounter, -stillToDistribute, count, maxSymbolValue);
}
else normalizedCounter[largest] += (short)stillToDistribute;
}
@ -868,12 +966,6 @@ size_t FSE_compress_usingCTable (void* dst, size_t dstSize,
}
static size_t FSE_compressRLE (BYTE *out, BYTE symbol)
{
*out=symbol;
return 1;
}
size_t FSE_compressBound(size_t size) { return FSE_COMPRESSBOUND(size); }
@ -900,8 +992,8 @@ size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize
/* Scan input and build symbol stats */
errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
if (FSE_isError(errorCode)) return errorCode;
if (errorCode == srcSize) return FSE_compressRLE (ostart, *istart);
if (errorCode < ((srcSize * 7) >> 10)) return 0; /* Heuristic : not compressible enough */
if (errorCode == srcSize) return 1;
if (errorCode < (srcSize >> 7)) return 0; /* Heuristic : not compressible enough */
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
errorCode = FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);

View File

@ -68,9 +68,6 @@
#include <stdio.h> /* debug : printf */
#include "zstd_static.h"
#if defined(__clang__) || defined(__GNUC__)
# ifdef __clang__
# pragma clang diagnostic ignored "-Wtypedef-redefinition"
# endif
# include "fse.c" /* due to GCC/Clang inlining limitations, including *.c runs noticeably faster */
#else
# include "fse_static.h"
@ -80,7 +77,6 @@
/********************************************************
* Compiler specifics
*********************************************************/
//#if (!(defined(_MSC_VER) && (_MSC_VER<=1500))) /* exclude Visual 2008 and below */
#ifdef __AVX2__
# include <immintrin.h> /* AVX2 intrinsics */
#endif
@ -100,10 +96,12 @@
#endif
#ifndef MEM_ACCESS_MODULE
#define MEM_ACCESS_MODULE
/********************************************************
* Basic Types
*********************************************************/
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99
#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
@ -120,19 +118,18 @@ typedef signed int S32;
typedef unsigned long long U64;
#endif
#endif /* MEM_ACCESS_MODULE */
/********************************************************
* Constants
*********************************************************/
static const U32 ZSTD_magicNumber = 0xFD2FB51C;
static const U32 ZSTD_magicNumber = 0xFD2FB51C; /* Initial (limited) frame format */
#define HASH_LOG (ZSTD_MEMORY_USAGE - 2)
#define HASH_TABLESIZE (1 << HASH_LOG)
#define HASH_MASK (HASH_TABLESIZE - 1)
#define MAXD_LOG 16
#define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
#define KNUTH 2654435761
#define BIT7 128
@ -142,10 +139,10 @@ static const U32 ZSTD_magicNumber = 0xFD2FB51C;
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<20)
#define GB *(1U<<30)
#define BLOCKSIZE (128 KB) // define, for static allocation
static const U32 g_maxDistance = 512 KB;
#define BLOCKSIZE (128 KB) /* define, for static allocation */
static const U32 g_maxDistance = 4 * BLOCKSIZE;
static const U32 g_maxLimit = 1 GB;
static const U32 g_searchStrength = 8;
@ -289,12 +286,36 @@ typedef struct
U32 origSize;
} blockProperties_t;
typedef struct {
void* buffer;
U32* offsetStart;
U32* offset;
BYTE* litStart;
BYTE* lit;
BYTE* litLengthStart;
BYTE* litLength;
BYTE* matchLengthStart;
BYTE* matchLength;
BYTE* dumpsStart;
BYTE* dumps;
} seqStore_t;
void ZSTD_resetSeqStore(seqStore_t* ssPtr)
{
ssPtr->offset = ssPtr->offsetStart;
ssPtr->lit = ssPtr->litStart;
ssPtr->litLength = ssPtr->litLengthStart;
ssPtr->matchLength = ssPtr->matchLengthStart;
ssPtr->dumps = ssPtr->dumpsStart;
}
typedef struct
{
const BYTE* base;
U32 current;
U32 nextUpdate;
BYTE* workplace;
seqStore_t seqStore;
#ifdef __AVX2__
__m256i hashTable[HASH_TABLESIZE>>3];
#else
@ -305,25 +326,28 @@ typedef struct
ZSTD_cctx_t ZSTD_createCCtx(void)
{
cctxi_t* srt = (cctxi_t *) malloc( sizeof(cctxi_t) );
srt->workplace = (BYTE*) malloc(WORKPLACESIZE);
return (ZSTD_cctx_t)srt;
cctxi_t* ctx = (cctxi_t*) malloc( sizeof(cctxi_t) );
ctx->seqStore.buffer = malloc(WORKPLACESIZE);
ctx->seqStore.offsetStart = (U32*) (ctx->seqStore.buffer);
ctx->seqStore.litStart = (BYTE*) (ctx->seqStore.offsetStart + (BLOCKSIZE>>2));
ctx->seqStore.litLengthStart = ctx->seqStore.litStart + BLOCKSIZE;
ctx->seqStore.matchLengthStart = ctx->seqStore.litLengthStart + (BLOCKSIZE>>2);
ctx->seqStore.dumpsStart = ctx->seqStore.matchLengthStart + (BLOCKSIZE>>2);
return (ZSTD_cctx_t)ctx;
}
void ZSTD_resetCCtx(ZSTD_cctx_t ctx)
void ZSTD_resetCCtx(ZSTD_cctx_t cctx)
{
cctxi_t* srt = (cctxi_t*)ctx;
srt->base = NULL;
memset(srt->hashTable, 0, HASH_TABLESIZE*4);
cctxi_t* ctx = (cctxi_t*)cctx;
ctx->base = NULL;
memset(ctx->hashTable, 0, HASH_TABLESIZE*4);
}
size_t ZSTD_freeCCtx(ZSTD_cctx_t ctx)
size_t ZSTD_freeCCtx(ZSTD_cctx_t cctx)
{
cctxi_t *srt = (cctxi_t *) (ctx);
free(srt->workplace);
free(srt);
cctxi_t* ctx = (cctxi_t*) (cctx);
free(ctx->seqStore.buffer);
free(ctx);
return 0;
}
@ -360,9 +384,9 @@ static unsigned ZSTD_highbit(U32 val)
unsigned long r;
_BitScanReverse(&r, val);
return (unsigned)r;
# elif defined(__GNUC__) && (GCC_VERSION >= 304) // GCC Intrinsic
# elif defined(__GNUC__) && (GCC_VERSION >= 304) /* GCC Intrinsic */
return 31 - __builtin_clz(val);
# else // Software version
# else /* Software version */
static const int DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
U32 v = val;
int r;
@ -481,13 +505,11 @@ static size_t ZSTD_compressRle (void* dst, size_t maxDstSize, const void* src, s
ostart[ZSTD_blockHeaderSize] = *(BYTE*)src;
// Build header
{
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_rle<<6);
}
/* Build header */
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_rle<<6);
return ZSTD_blockHeaderSize+1;
}
@ -500,13 +522,11 @@ static size_t ZSTD_noCompressBlock (void* dst, size_t maxDstSize, const void* sr
if (srcSize + ZSTD_blockHeaderSize > maxDstSize) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
memcpy(ostart + ZSTD_blockHeaderSize, src, srcSize);
// Build header
{
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
}
/* Build header */
ostart[0] = (BYTE)(srcSize>>16);
ostart[1] = (BYTE)(srcSize>>8);
ostart[2] = (BYTE)srcSize;
ostart[0] += (BYTE)(bt_raw<<6); /* is a raw (uncompressed) block */
return ZSTD_blockHeaderSize+srcSize;
}
@ -523,7 +543,7 @@ static size_t ZSTD_compressLiterals_usingCTable(void* dst, size_t dstSize,
FSE_CStream_t bitC;
FSE_CState_t CState1, CState2;
// init
/* init */
(void)dstSize; // objective : ensure it fits into dstBuffer (Todo)
FSE_initCStream(&bitC, dst);
FSE_initCState(&CState1, CTable);
@ -594,36 +614,38 @@ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
size_t errorCode;
const size_t minGain = ZSTD_minGain(srcSize);
// early out
/* early out */
if (dstSize < FSE_compressBound(srcSize)) return (size_t)-ZSTD_ERROR_maxDstSize_tooSmall;
// Scan input and build symbol stats
/* Scan input and build symbol stats */
errorCode = FSE_count (count, ip, srcSize, &maxSymbolValue);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
if (errorCode == srcSize) return 1;
if (errorCode < ((srcSize * 7) >> 10)) return 0;
//if (errorCode < ((srcSize * 7) >> 10)) return 0;
//if (errorCode < (srcSize >> 7)) return 0;
if (errorCode < (srcSize >> 6)) return 0; /* heuristic : probably not compressible enough */
tableLog = FSE_optimalTableLog(tableLog, srcSize, maxSymbolValue);
errorCode = (int)FSE_normalizeCount (norm, tableLog, count, srcSize, maxSymbolValue);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
// Write table description header
/* Write table description header */
errorCode = FSE_writeHeader (op, FSE_MAX_HEADERSIZE, norm, maxSymbolValue, tableLog);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
op += errorCode;
// Compress
/* Compress */
errorCode = FSE_buildCTable (&CTable, norm, maxSymbolValue, tableLog);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
errorCode = ZSTD_compressLiterals_usingCTable(op, oend - op, ip, srcSize, &CTable);
if (ZSTD_isError(errorCode)) return errorCode;
op += errorCode;
// check compressibility
/* check compressibility */
if ( (size_t)(op-ostart) >= srcSize-minGain)
return 0;
// Build header
/* Build header */
{
size_t totalSize;
totalSize = op - ostart - ZSTD_blockHeaderSize;
@ -637,14 +659,9 @@ static size_t ZSTD_compressLiterals (void* dst, size_t dstSize,
}
static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
const BYTE* op_lit_start, const BYTE* op_lit,
const BYTE* op_litLength_start, const BYTE* op_litLength,
const BYTE* op_matchLength_start,
const U32* op_offset_start,
const BYTE* op_dumps_start, const BYTE* op_dumps,
size_t srcSize, size_t lastLLSize
)
static size_t ZSTD_compressSequences(BYTE* dst, size_t maxDstSize,
const seqStore_t* seqStorePtr,
size_t lastLLSize, size_t srcSize)
{
FSE_CStream_t blockStream;
U32 count[256];
@ -652,14 +669,18 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
size_t mostFrequent;
U32 max = 255;
U32 tableLog = 11;
const size_t nbSeq = op_litLength - op_litLength_start;
U32 CTable_LitLength [FSE_CTABLE_SIZE_U32(LLFSELog, MaxLL )];
U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog, MaxOff)];
U32 CTable_OffsetBits [FSE_CTABLE_SIZE_U32(OffFSELog,MaxOff)];
U32 CTable_MatchLength[FSE_CTABLE_SIZE_U32(MLFSELog, MaxML )];
U32 LLtype, Offtype, MLtype;
const BYTE* const op_lit_start = seqStorePtr->litStart;
const BYTE* op_lit = seqStorePtr->lit;
const BYTE* const op_litLength_start = seqStorePtr->litLengthStart;
const BYTE* op_litLength = seqStorePtr->litLength;
const U32* op_offset = seqStorePtr->offset;
const BYTE* op_matchLength = seqStorePtr->matchLength;
const size_t nbSeq = op_litLength - op_litLength_start;
BYTE* op;
const U32* op_offset = op_offset_start + nbSeq;
const BYTE* op_matchLength = op_matchLength_start + nbSeq;
BYTE offsetBits_start[BLOCKSIZE / 4];
BYTE* offsetBitsPtr = offsetBits_start;
const size_t minGain = ZSTD_minGain(srcSize);
@ -699,7 +720,7 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
/* dumps */
{
size_t dumpsLength = op_dumps- op_dumps_start;
size_t dumpsLength = seqStorePtr->dumps - seqStorePtr->dumpsStart;
if (dumpsLength < 512)
{
op[0] = (BYTE)(dumpsLength >> 8);
@ -713,16 +734,16 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
op[2] = (BYTE)(dumpsLength);
op += 3;
}
memcpy(op, op_dumps_start, dumpsLength);
memcpy(op, seqStorePtr->dumpsStart, dumpsLength);
op += dumpsLength;
}
/* Encoding table of Literal Lengths */
max = MaxLL;
mostFrequent = FSE_countFast(count, op_litLength_start, nbSeq, &max);
mostFrequent = FSE_countFast(count, seqStorePtr->litLengthStart, nbSeq, &max);
if (mostFrequent == nbSeq)
{
*op++ = *op_litLength_start;
*op++ = *(seqStorePtr->litLengthStart);
FSE_buildCTable_rle(CTable_LitLength, (BYTE)max);
LLtype = bt_rle;
}
@ -744,6 +765,7 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
{
/* create OffsetBits */
size_t i;
const U32* const op_offset_start = seqStorePtr->offsetStart;
max = MaxOff;
for (i=0; i<nbSeq; i++)
{
@ -775,10 +797,10 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
/* Encoding Table of MatchLengths */
max = MaxML;
mostFrequent = FSE_countFast(count, op_matchLength_start, nbSeq, &max);
mostFrequent = FSE_countFast(count, seqStorePtr->matchLengthStart, nbSeq, &max);
if (mostFrequent == nbSeq)
{
*op++ = *op_matchLength_start;
*op++ = *seqStorePtr->matchLengthStart;
FSE_buildCTable_rle(CTable_MatchLength, (BYTE)max);
MLtype = bt_rle;
}
@ -839,57 +861,45 @@ static size_t ZSTD_compressEntropy(BYTE* dst, size_t maxDstSize,
}
static size_t ZSTD_storeSeq(BYTE* op_lit, BYTE* op_ll, U32* op_offset, BYTE* op_ml, BYTE* op_dumps,
size_t litLength, const BYTE* srcLit, size_t offset, size_t matchLength)
static void ZSTD_storeSeq(seqStore_t* seqStorePtr, size_t litLength, const BYTE* literals, size_t offset, size_t matchLength)
{
const BYTE* const dumpStart = op_dumps;
const BYTE* const l_end = op_lit + litLength;
BYTE* op_lit = seqStorePtr->lit;
BYTE* const l_end = op_lit + litLength;
/* copy Literals */
while (op_lit<l_end) COPY8(op_lit, srcLit);
while (op_lit<l_end) COPY8(op_lit, literals);
seqStorePtr->lit += litLength;
/* literal Length */
if (litLength >= MaxLL)
{
*op_ll++ = MaxLL;
*(seqStorePtr->litLength++) = MaxLL;
if (litLength<255 + MaxLL)
*op_dumps++ = (BYTE)(litLength - MaxLL);
*(seqStorePtr->dumps++) = (BYTE)(litLength - MaxLL);
else
{
*op_dumps++ = 255;
ZSTD_writeLE32(op_dumps, (U32)litLength); op_dumps += 3;
//litLength |= 0xFF000000;
//ZSTD_writeBE32(op_dumps, (U32)litLength);
//op_dumps += 4;
*(seqStorePtr->dumps++) = 255;
ZSTD_writeLE32(seqStorePtr->dumps, (U32)litLength); seqStorePtr->dumps += 3;
}
}
else *op_ll = (BYTE)litLength;
else *(seqStorePtr->litLength++) = (BYTE)litLength;
/* match offset */
*op_offset = (U32)offset;
/* match offset */
*(seqStorePtr->offset++) = (U32)offset;
/* match Length */
if (matchLength >= MaxML)
{
*op_ml++ = MaxML;
if (matchLength<255 + MaxML)
*op_dumps++ = (BYTE)(matchLength - MaxML);
*(seqStorePtr->matchLength++) = MaxML;
if (matchLength < 255+MaxML)
*(seqStorePtr->dumps++) = (BYTE)(matchLength - MaxML);
else
{
*op_dumps++ = 255;
ZSTD_writeLE32(op_dumps, (U32)matchLength); op_dumps+=3;
//*(U32*)op_dumps = (U32)matchLength; op_dumps += 3; /* store direct result */
//matchLength |= 0xFF000000;
//ZSTD_writeBE32(op_dumps, (U32)matchLength);
//op_dumps += 4;
*(seqStorePtr->dumps++) = 255;
ZSTD_writeLE32(seqStorePtr->dumps, (U32)matchLength); seqStorePtr->dumps+=3;
}
}
else *op_ml = (BYTE)matchLength;
return op_dumps - dumpStart;
else *(seqStorePtr->matchLength++) = (BYTE)matchLength;
}
@ -928,12 +938,12 @@ static int ZSTD_checkMatch(const BYTE* match, const BYTE* ip)
}
static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
static size_t ZSTD_compressBlock(void* cctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
cctxi_t* srt = (cctxi_t*) ctx;
U32* HashTable = (U32*)(srt->hashTable);
void* workplace = srt->workplace;
const BYTE* const base = srt->base;
cctxi_t* ctx = (cctxi_t*) cctx;
U32* HashTable = (U32*)(ctx->hashTable);
seqStore_t* seqStorePtr = &(ctx->seqStore);
const BYTE* const base = ctx->base;
const BYTE* const istart = (const BYTE*)src;
const BYTE* ip = istart + 1;
@ -941,15 +951,13 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
const BYTE* const iend = istart + srcSize;
const BYTE* const ilimit = iend - 16;
U32 *op_offset = (U32*)(workplace), *op_offset_start = op_offset;
BYTE *op_l = (BYTE*)workplace + srcSize + 4, *op_l_start = op_l;
BYTE *op_rl = op_l + srcSize + 4, *op_rl_start = op_rl;
BYTE *op_ml = op_rl + (srcSize >> 2) + 4, *op_ml_start = op_ml;
BYTE *op_dumps = op_ml + (srcSize >> 2) + 4, *op_dumps_start = op_dumps;
size_t prevOffset=0, offset=0;
size_t lastLLSize;
/* init */
ZSTD_resetSeqStore(seqStorePtr);
/* Main Search Loop */
while (ip < ilimit)
{
@ -969,8 +977,7 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
if (offsetCode == prevOffset) offsetCode = 0;
prevOffset = offset;
offset = ip-match;
op_dumps += ZSTD_storeSeq(op_l, op_rl++, op_offset++, op_ml++, op_dumps, litLength, anchor, offsetCode, matchLength);
op_l += litLength;
ZSTD_storeSeq(seqStorePtr, litLength, anchor, offsetCode, matchLength);
/* Fill Table */
ZSTD_addPtr(HashTable, ip+1, base);
@ -982,13 +989,12 @@ static size_t ZSTD_compressBlock(void* ctx, void* dst, size_t maxDstSize, const
/* Last Literals */
lastLLSize = iend - anchor;
memcpy(op_l, anchor, lastLLSize);
op_l += lastLLSize;
memcpy(seqStorePtr->lit, anchor, lastLLSize);
seqStorePtr->lit += lastLLSize;
/* Finale compression stage */
return ZSTD_compressEntropy((BYTE*)dst, maxDstSize,
op_l_start, op_l, op_rl_start, op_rl, op_ml_start, op_offset_start, op_dumps_start, op_dumps,
srcSize, lastLLSize);
return ZSTD_compressSequences((BYTE*)dst, maxDstSize,
seqStorePtr, lastLLSize, srcSize);
}
@ -1256,17 +1262,17 @@ FORCE_INLINE size_t ZSTD_decompressLiterals_usingDTable_generic(
FSE_initDState(&state2, &bitD, DTable);
op = oend;
// 2 symbols per loop
/* 2-4 symbols per loop */
while (!FSE_reloadDStream(&bitD) && (op>olimit+3))
{
*--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
if (LitFSELog*2+7 > sizeof(size_t)*8) // This test must be static
if (LitFSELog*2+7 > sizeof(size_t)*8) /* This test must be static */
FSE_reloadDStream(&bitD);
*--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
if (LitFSELog*4+7 < sizeof(size_t)*8) // This test must be static
if (LitFSELog*4+7 < sizeof(size_t)*8) /* This test must be static */
{
*--op = fast ? FSE_decodeSymbolFast(&state1, &bitD) : FSE_decodeSymbol(&state1, &bitD);
*--op = fast ? FSE_decodeSymbolFast(&state2, &bitD) : FSE_decodeSymbol(&state2, &bitD);
@ -1317,7 +1323,7 @@ static size_t ZSTD_decompressLiterals(void* ctx, void* dst, size_t maxDstSize,
U32 fastMode;
size_t errorCode;
if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; // too small input size
if (srcSize < 2) return (size_t)-ZSTD_ERROR_wrongLBlockSize; /* too small input size */
errorCode = FSE_readHeader (norm, &maxSymbolValue, &tableLog, ip, srcSize);
if (FSE_isError(errorCode)) return (size_t)-ZSTD_ERROR_GENERIC;
@ -1572,7 +1578,6 @@ _another_round:
if (add < 255) matchLength += add;
else
{
//matchLength = (*(U32*)dumps) & 0xFFFFFF;
matchLength = ZSTD_readLE32(dumps) & 0xFFFFFF;
dumps += 3;
}
@ -1703,24 +1708,24 @@ size_t ZSTD_decompress(void* dst, size_t maxDstSize, const void* src, size_t src
}
/******************************
/*******************************
* Streaming Decompression API
******************************/
*******************************/
typedef struct
{
U32 ctx[FSE_DTABLE_SIZE_U32(LLFSELog) + FSE_DTABLE_SIZE_U32(OffFSELog) + FSE_DTABLE_SIZE_U32(MLFSELog)];
size_t expected;
blockType_t bType;
U32 started;
U32 phase;
} dctx_t;
ZSTD_dctx_t ZSTD_createDCtx(void)
{
dctx_t* dctx = (dctx_t*)malloc(sizeof(dctx_t));
dctx->expected = 4 + ZSTD_blockHeaderSize; // Frame Header + Block Header
dctx->started = 0;
dctx->expected = ZSTD_frameHeaderSize;
dctx->phase = 0;
return (ZSTD_dctx_t)dctx;
}
@ -1731,7 +1736,7 @@ size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx)
}
size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_dctx_t dctx)
{
return ((dctx_t*)dctx)->expected;
}
@ -1739,63 +1744,67 @@ size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx)
size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize)
{
dctx_t* ctx = (dctx_t*)dctx;
size_t cSize = srcSize - ZSTD_blockHeaderSize;
size_t rSize;
// Sanity check
/* Sanity check */
if (srcSize != ctx->expected) return (size_t)-ZSTD_ERROR_wrongSrcSize;
// Decompress
if (!ctx->started)
/* Decompress : frame header */
if (ctx->phase == 0)
{
// Just check correct magic header
/* Check frame magic header */
U32 magicNumber = ZSTD_readBE32(src);
if (magicNumber != ZSTD_magicNumber) return (size_t)-ZSTD_ERROR_wrongMagicNumber;
rSize = 0;
ctx->phase = 1;
ctx->expected = ZSTD_blockHeaderSize;
return 0;
}
else
/* Decompress : block header */
if (ctx->phase == 1)
{
blockProperties_t bp;
size_t blockSize = ZSTD_getcBlockSize(src, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(blockSize)) return blockSize;
if (bp.blockType == bt_end)
{
ctx->expected = 0;
ctx->phase = 0;
}
else
{
ctx->expected = blockSize;
ctx->bType = bp.blockType;
ctx->phase = 2;
}
return 0;
}
/* Decompress : block content */
{
size_t rSize;
switch(ctx->bType)
{
case bt_compressed:
rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, cSize);
rSize = ZSTD_decompressBlock(ctx, dst, maxDstSize, src, srcSize);
break;
case bt_raw :
rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, cSize);
rSize = ZSTD_copyUncompressedBlock(dst, maxDstSize, src, srcSize);
break;
case bt_rle :
return (size_t)-ZSTD_ERROR_GENERIC; /* not yet handled */
break;
case bt_end :
case bt_end : /* should never happen (filtered at phase 1) */
rSize = 0;
break;
default:
return (size_t)-ZSTD_ERROR_GENERIC;
}
ctx->phase = 1;
ctx->expected = ZSTD_blockHeaderSize;
return rSize;
}
// Prepare next block
{
const BYTE* header = (const BYTE*)src;
blockProperties_t bp;
size_t blockSize;
header += cSize;
blockSize = ZSTD_getcBlockSize(header, ZSTD_blockHeaderSize, &bp);
if (ZSTD_isError(blockSize)) return blockSize;
if (bp.blockType == bt_end)
{
ctx->expected = 0;
ctx->started = 0;
}
else
{
ctx->expected = blockSize + ZSTD_blockHeaderSize;
ctx->bType = bp.blockType;
ctx->started = 1;
}
}
return rSize;
}

View File

@ -47,7 +47,7 @@ extern "C" {
**************************************/
#define ZSTD_VERSION_MAJOR 0 /* for breaking interface changes */
#define ZSTD_VERSION_MINOR 0 /* for new (non-breaking) interface capabilities */
#define ZSTD_VERSION_RELEASE 1 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_RELEASE 2 /* for tweaks, bug-fixes, or development */
#define ZSTD_VERSION_NUMBER (ZSTD_VERSION_MAJOR *100*100 + ZSTD_VERSION_MINOR *100 + ZSTD_VERSION_RELEASE)
unsigned ZSTD_versionNumber (void);

View File

@ -57,9 +57,16 @@ typedef void* ZSTD_dctx_t;
ZSTD_dctx_t ZSTD_createDCtx(void);
size_t ZSTD_freeDCtx(ZSTD_dctx_t dctx);
size_t ZSTD_getNextcBlockSize(ZSTD_dctx_t dctx);
size_t ZSTD_nextSrcSizeToDecompress(ZSTD_dctx_t dctx);
size_t ZSTD_decompressContinue(ZSTD_dctx_t dctx, void* dst, size_t maxDstSize, const void* src, size_t srcSize);
/*
Use above functions alternatively.
ZSTD_nextSrcSizeToDecompress() tells how much bytes to provide as input to ZSTD_decompressContinue().
This value is expected to be provided, precisely, as 'srcSize'.
Otherwise, compression will fail (result is an error code, which can be tested using ZSTD_isError() )
ZSTD_decompressContinue() result is the number of bytes regenerated within 'dst'.
It can be zero, which is not an error; it just means ZSTD_decompressContinue() has decoded some header.
*/
/**************************************
* Error management
@ -77,4 +84,4 @@ typedef enum { ZSTD_LIST_ERRORS(ZSTD_GENERATE_ENUM) } ZSTD_errorCodes; /* expo
#if defined (__cplusplus)
}
#endif
#endif

View File

@ -30,7 +30,7 @@
# fullbench32: Same as fullbench, but forced to compile in 32-bits mode
# ##########################################################################
RELEASE?= r0
RELEASE?= r1
DESTDIR?=
PREFIX ?= /usr
@ -66,10 +66,10 @@ zstd: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
zstd32: $(ZSTDDIR)/zstd.c xxhash.c bench.c fileio.c zstdcli.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
fullbench : $(ZSTDDIR)/zstd.c fullbench.c
fullbench : $(ZSTDDIR)/zstd.c datagen.c fullbench.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
fullbench32: $(ZSTDDIR)/zstd.c fullbench.c
fullbench32: $(ZSTDDIR)/zstd.c datagen.c fullbench.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
fuzzer : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
@ -78,7 +78,7 @@ fuzzer : $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
fuzzer32: $(ZSTDDIR)/zstd.c xxhash.c fuzzer.c
$(CC) -m32 $(FLAGS) $^ -o $@$(EXT)
datagen : datagen.c
datagen : datagen.c datagencli.c
$(CC) $(FLAGS) $^ -o $@$(EXT)
clean:
@ -129,15 +129,11 @@ test-zstd32: zstd32 datagen
test-fullbench: fullbench datagen
./fullbench -i1
./datagen -P0 -g516K > tmp
./fullbench -i1 tmp
@rm tmp
./fullbench -i1 -P0
test-fullbench32: fullbench32 datagen
./fullbench32 -i1
./datagen -P0 -g516K > tmp
./fullbench32 -i1 tmp
@rm tmp
./fullbench32 -i1 -P0
test-fuzzer: fuzzer
./fuzzer

View File

@ -23,18 +23,12 @@
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
* Remove Visual warning messages
**************************************/
#define _CRT_SECURE_NO_WARNINGS /* fgets */
/**************************************
* Includes
**************************************/
#include <stdlib.h> /* malloc */
#include <stdio.h> /* fgets, sscanf */
#include <string.h> /* strcmp */
#include <stdio.h> /* FILE, fwrite */
#include <string.h> /* memcpy */
/**************************************
@ -62,60 +56,31 @@
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
# include <fcntl.h> /* _O_BINARY */
# include <io.h> /* _setmode, _isatty */
# ifdef __MINGW32__
int _fileno(FILE *stream); /* MINGW somehow forgets to include this windows declaration into <stdio.h> */
# endif
# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
#else
# include <unistd.h> /* isatty */
# define SET_BINARY_MODE(file)
# define IS_CONSOLE(stdStream) isatty(fileno(stdStream))
#endif
/**************************************
* Constants
**************************************/
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "r0"
#endif
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define CDG_SIZE_DEFAULT (64 KB)
#define CDG_SEED_DEFAULT 0
#define CDG_COMPRESSIBILITY_DEFAULT 50
#define PRIME1 2654435761U
#define PRIME2 2246822519U
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
/**************************************
* Local Parameters
**************************************/
static unsigned no_prompt = 0;
static unsigned displayLevel = 2;
/*********************************************************
* Local Functions
*********************************************************/
#define CDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static unsigned int CDG_rand(U32* src)
#define RDG_rotl32(x,r) ((x << r) | (x >> (32 - r)))
static unsigned int RDG_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= PRIME1;
rand32 ^= PRIME2;
rand32 = CDG_rotl32(rand32, 13);
rand32 = RDG_rotl32(rand32, 13);
*src = rand32;
return rand32;
}
@ -123,7 +88,7 @@ static unsigned int CDG_rand(U32* src)
#define LTSIZE 8192
#define LTMASK (LTSIZE-1)
static void* CDG_createLiteralDistrib(double ld)
static void* RDG_createLiteralDistrib(double ld)
{
BYTE* lt = malloc(LTSIZE);
U32 i = 0;
@ -150,208 +115,95 @@ static void* CDG_createLiteralDistrib(double ld)
return lt;
}
static char CDG_genChar(U32* seed, const void* ltctx)
static char RDG_genChar(U32* seed, const void* ltctx)
{
const BYTE* lt = ltctx;
U32 id = CDG_rand(seed) & LTMASK;
U32 id = RDG_rand(seed) & LTMASK;
return lt[id];
}
#define CDG_RAND15BITS ((CDG_rand(seed) >> 3) & 32767)
#define CDG_RANDLENGTH ( ((CDG_rand(seed) >> 7) & 7) ? (CDG_rand(seed) & 15) : (CDG_rand(seed) & 511) + 15)
#define CDG_DICTSIZE (32 KB)
static void CDG_generate(U64 size, U32* seed, double matchProba, double litProba)
#define RDG_DICTSIZE (32 KB)
#define RDG_RAND15BITS ((RDG_rand(seed) >> 3) & 32767)
#define RDG_RANDLENGTH ( ((RDG_rand(seed) >> 7) & 7) ? (RDG_rand(seed) & 15) : (RDG_rand(seed) & 511) + 15)
void RDG_genBlock(void* buffer, size_t buffSize, size_t prefixSize, double matchProba, void* litTable, unsigned* seedPtr)
{
BYTE fullbuff[CDG_DICTSIZE + 128 KB + 1];
BYTE* buff = fullbuff + CDG_DICTSIZE;
U64 total=0;
U32 P32 = (U32)(32768 * matchProba);
U32 pos=1;
U32 genBlockSize = 128 KB;
void* ldctx = CDG_createLiteralDistrib(litProba);
FILE* fout = stdout;
BYTE* buffPtr = ((BYTE*)buffer) - prefixSize;
const U32 matchProba32 = (U32)(32768 * matchProba);
size_t pos = prefixSize;
void* ldctx = litTable;
U32* seed = seedPtr;
/* init */
SET_BINARY_MODE(stdout);
fullbuff[0] = CDG_genChar(seed, ldctx);
while (pos<32 KB)
if (pos==0) buffPtr[0] = RDG_genChar(seed, ldctx), pos=1;
/* Generate compressible data */
while (pos < buffSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (CDG_RAND15BITS < P32)
if (RDG_RAND15BITS < matchProba32)
{
/* Copy (within 64K) */
/* Copy (within 32K) */
int match;
U32 d;
int ref;
int length = CDG_RANDLENGTH + 4;
U32 offset = CDG_RAND15BITS + 1;
int length = RDG_RANDLENGTH + 4;
U32 offset = RDG_RAND15BITS + 1;
if (offset > pos) offset = pos;
ref = pos - offset;
if (pos + length > buffSize) length = buffSize - pos;
match = pos - offset;
d = pos + length;
while (pos < d) fullbuff[pos++] = fullbuff[ref++];
while (pos < d) buffPtr[pos++] = buffPtr[match++];
}
else
{
/* Literal (noise) */
U32 d = pos + CDG_RANDLENGTH;
while (pos < d) fullbuff[pos++] = CDG_genChar(seed, ldctx);
U32 d;
int length = RDG_RANDLENGTH;
if (pos + length > buffSize) length = buffSize - pos;
d = pos + length;
while (pos < d) buffPtr[pos++] = RDG_genChar(seed, ldctx);
}
}
}
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed)
{
void* ldctx;
if (litProba==0.0) litProba = matchProba / 3.8;
ldctx = RDG_createLiteralDistrib(litProba);
RDG_genBlock(buffer, size, 0, matchProba, ldctx, &seed);
free(ldctx);
}
#define RDG_BLOCKSIZE (128 KB)
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed)
{
BYTE fullbuff[RDG_DICTSIZE + RDG_BLOCKSIZE + 1];
BYTE* buff = fullbuff + RDG_DICTSIZE;
U64 total = 0;
U32 genBlockSize = RDG_BLOCKSIZE;
void* ldctx;
/* init */
if (litProba==0.0) litProba = matchProba / 3.8;
ldctx = RDG_createLiteralDistrib(litProba);
SET_BINARY_MODE(stdout);
/* Generate dict */
RDG_genBlock(fullbuff, RDG_DICTSIZE, 0, matchProba, ldctx, &seed);
/* Generate compressible data */
pos = 0;
while (total < size)
{
if (size-total < 128 KB) genBlockSize = (U32)(size-total);
RDG_genBlock(buff, RDG_BLOCKSIZE, RDG_DICTSIZE, matchProba, ldctx, &seed);
if (size-total < RDG_BLOCKSIZE) genBlockSize = (U32)(size-total);
total += genBlockSize;
buff[genBlockSize] = 0;
pos = 0;
while (pos<genBlockSize)
{
/* Select : Literal (char) or Match (within 32K) */
if (CDG_RAND15BITS < P32)
{
/* Copy (within 64K) */
int ref;
U32 d;
int length = CDG_RANDLENGTH + 4;
U32 offset = CDG_RAND15BITS + 1;
if (pos + length > genBlockSize ) length = genBlockSize - pos;
ref = pos - offset;
d = pos + length;
while (pos < d) buff[pos++] = buff[ref++];
}
else
{
/* Literal (noise) */
U32 d;
int length = CDG_RANDLENGTH;
if (pos + length > genBlockSize) length = genBlockSize - pos;
d = pos + length;
while (pos < d) buff[pos++] = CDG_genChar(seed, ldctx);
}
}
/* output generated data */
fwrite(buff, 1, genBlockSize, fout);
/* Regenerate prefix */
memcpy(fullbuff, buff + 96 KB, 32 KB);
}
}
/*********************************************************
* Command line
*********************************************************/
static int CDG_usage(char* programName)
{
DISPLAY( "Compressible data generator\n");
DISPLAY( "Usage :\n");
DISPLAY( " %s [size] [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -g# : generate # data (default:%i)\n", CDG_SIZE_DEFAULT);
DISPLAY( " -s# : Select seed (default:%i)\n", CDG_SEED_DEFAULT);
DISPLAY( " -p# : Select compressibility in %% (default:%i%%)\n", CDG_COMPRESSIBILITY_DEFAULT);
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, char** argv)
{
int argNb;
double proba = (double)CDG_COMPRESSIBILITY_DEFAULT / 100;
double litProba = proba / 3.6;
U64 size = CDG_SIZE_DEFAULT;
U32 seed = CDG_SEED_DEFAULT;
char* programName;
/* Check command line */
programName = argv[0];
for(argNb=1; argNb<argc; argNb++)
{
char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (*argument=='-')
{
if (!strcmp(argument, "--no-prompt")) { no_prompt=1; continue; }
argument++;
while (*argument!=0)
{
switch(*argument)
{
case 'h':
return CDG_usage(programName);
case 'g':
argument++;
size=0;
while ((*argument>='0') && (*argument<='9'))
{
size *= 10;
size += *argument - '0';
argument++;
}
if (*argument=='K') { size <<= 10; argument++; }
if (*argument=='M') { size <<= 20; argument++; }
if (*argument=='G') { size <<= 30; argument++; }
if (*argument=='B') { argument++; }
break;
case 's':
argument++;
seed=0;
while ((*argument>='0') && (*argument<='9'))
{
seed *= 10;
seed += *argument - '0';
argument++;
}
break;
case 'P':
argument++;
proba=0.0;
while ((*argument>='0') && (*argument<='9'))
{
proba *= 10;
proba += *argument - '0';
argument++;
}
if (proba>100.) proba=100.;
proba /= 100.;
litProba = proba / 4.;
break;
case 'L':
argument++;
litProba=0.;
while ((*argument>='0') && (*argument<='9'))
{
litProba *= 10;
litProba += *argument - '0';
argument++;
}
if (litProba>100.) litProba=100.;
litProba /= 100.;
break;
case 'v':
displayLevel = 4;
argument++;
break;
default:
return CDG_usage(programName);
}
}
}
fwrite(buff, 1, genBlockSize, stdout);
/* update dict */
memcpy(fullbuff, buff + (RDG_BLOCKSIZE - RDG_DICTSIZE), RDG_DICTSIZE);
}
DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
DISPLAYLEVEL(3, "Seed = %u \n", seed);
if (proba!=CDG_COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
CDG_generate(size, &seed, proba, litProba);
return 0;
free(ldctx);
}

40
programs/datagen.h Normal file
View File

@ -0,0 +1,40 @@
/*
datagen.h - compressible data generator header
Copyright (C) Yann Collet 2012-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
#include <stddef.h> /* size_t */
void RDG_genOut(unsigned long long size, double matchProba, double litProba, unsigned seed);
void RDG_genBuffer(void* buffer, size_t size, double matchProba, double litProba, unsigned seed);
/* RDG_genOut
Generate 'size' bytes of compressible data into stdout.
Compressibility can be controlled using 'matchProba'.
'LitProba' is optional, and affect variability of bytes. If litProba==0.0, default value is used.
Generated data can be selected using 'seed'.
If (matchProba, litProba and seed) are equal, the function always generate the same content.
RDG_genBuffer
Same as RDG_genOut, but generate data into provided buffer
*/

190
programs/datagencli.c Normal file
View File

@ -0,0 +1,190 @@
/*
datagencli.c
compressible data command line generator
Copyright (C) Yann Collet 2012-2015
GPL v2 License
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
You can contact the author at :
- ZSTD source repository : https://github.com/Cyan4973/zstd
- Public forum : https://groups.google.com/forum/#!forum/lz4c
*/
/**************************************
* Includes
**************************************/
#include <stdio.h> /* fprintf, stderr */
#include "datagen.h" /* RDG_generate */
/**************************************
* Basic Types
**************************************/
#if defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */
# include <stdint.h>
typedef uint8_t BYTE;
typedef uint16_t U16;
typedef uint32_t U32;
typedef int32_t S32;
typedef uint64_t U64;
#else
typedef unsigned char BYTE;
typedef unsigned short U16;
typedef unsigned int U32;
typedef signed int S32;
typedef unsigned long long U64;
#endif
/**************************************
* Constants
**************************************/
#ifndef ZSTD_VERSION
# define ZSTD_VERSION "r1"
#endif
#define KB *(1 <<10)
#define MB *(1 <<20)
#define GB *(1U<<30)
#define SIZE_DEFAULT (64 KB)
#define SEED_DEFAULT 0
#define COMPRESSIBILITY_DEFAULT 50
/**************************************
* Macros
**************************************/
#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
#define DISPLAYLEVEL(l, ...) if (displayLevel>=l) { DISPLAY(__VA_ARGS__); }
static unsigned displayLevel = 2;
/*********************************************************
* Command line
*********************************************************/
static int usage(char* programName)
{
DISPLAY( "Compressible data generator\n");
DISPLAY( "Usage :\n");
DISPLAY( " %s [size] [args]\n", programName);
DISPLAY( "\n");
DISPLAY( "Arguments :\n");
DISPLAY( " -g# : generate # data (default:%i)\n", SIZE_DEFAULT);
DISPLAY( " -s# : Select seed (default:%i)\n", SEED_DEFAULT);
DISPLAY( " -P# : Select compressibility in %% (default:%i%%)\n", COMPRESSIBILITY_DEFAULT);
DISPLAY( " -h : display help and exit\n");
return 0;
}
int main(int argc, char** argv)
{
int argNb;
double proba = (double)COMPRESSIBILITY_DEFAULT / 100;
double litProba = 0.0;
U64 size = SIZE_DEFAULT;
U32 seed = SEED_DEFAULT;
char* programName;
/* Check command line */
programName = argv[0];
for(argNb=1; argNb<argc; argNb++)
{
char* argument = argv[argNb];
if(!argument) continue; /* Protection if argument empty */
/* Handle commands. Aggregated commands are allowed */
if (*argument=='-')
{
argument++;
while (*argument!=0)
{
switch(*argument)
{
case 'h':
return usage(programName);
case 'g':
argument++;
size=0;
while ((*argument>='0') && (*argument<='9'))
{
size *= 10;
size += *argument - '0';
argument++;
}
if (*argument=='K') { size <<= 10; argument++; }
if (*argument=='M') { size <<= 20; argument++; }
if (*argument=='G') { size <<= 30; argument++; }
if (*argument=='B') { argument++; }
break;
case 's':
argument++;
seed=0;
while ((*argument>='0') && (*argument<='9'))
{
seed *= 10;
seed += *argument - '0';
argument++;
}
break;
case 'P':
argument++;
proba=0.0;
while ((*argument>='0') && (*argument<='9'))
{
proba *= 10;
proba += *argument - '0';
argument++;
}
if (proba>100.) proba=100.;
proba /= 100.;
break;
case 'L': /* hidden argument : Literal distribution probability */
argument++;
litProba=0.;
while ((*argument>='0') && (*argument<='9'))
{
litProba *= 10;
litProba += *argument - '0';
argument++;
}
if (litProba>100.) litProba=100.;
litProba /= 100.;
break;
case 'v':
displayLevel = 4;
argument++;
break;
default:
return usage(programName);
}
}
}
}
DISPLAYLEVEL(4, "Data Generator %s \n", ZSTD_VERSION);
DISPLAYLEVEL(3, "Seed = %u \n", seed);
if (proba!=COMPRESSIBILITY_DEFAULT) DISPLAYLEVEL(3, "Compressibility : %i%%\n", (U32)(proba*100));
RDG_genOut(size, proba, litProba, seed);
DISPLAYLEVEL(1, "\n");
return 0;
}

View File

@ -327,10 +327,10 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha
/* Init */
FIO_getFileHandles(&finput, &foutput, input_filename, output_filename);
dctx = ZSTD_createDCtx();
toRead = ZSTD_getNextcBlockSize(dctx);
if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header");
/* check header */
toRead = ZSTD_nextSrcSizeToDecompress(dctx);
if (toRead > MAXHEADERSIZE) EXM_THROW(30, "Not enough memory to read header");
sizeCheck = fread(header, (size_t)1, toRead, finput);
if (sizeCheck != toRead) EXM_THROW(31, "Read error : cannot read header");
sizeCheck = ZSTD_decompressContinue(dctx, NULL, 0, header, toRead); // Decode frame header
@ -348,7 +348,7 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha
if (!inBuff || !outBuff) EXM_THROW(33, "Allocation error : not enough memory");
/* Main decompression Loop */
toRead = ZSTD_getNextcBlockSize(dctx);
toRead = ZSTD_nextSrcSizeToDecompress(dctx);
while (toRead)
{
size_t readSize, decodedSize;
@ -361,16 +361,19 @@ unsigned long long FIO_decompressFilename(const char* output_filename, const cha
/* Decode block */
decodedSize = ZSTD_decompressContinue(dctx, op, oend-op, inBuff, readSize);
/* Write block */
sizeCheck = fwrite(op, 1, decodedSize, foutput);
if (sizeCheck != decodedSize) EXM_THROW(35, "Write error : unable to write data block to destination file");
filesize += decodedSize;
if (decodedSize) /* not a header */
{
/* Write block */
sizeCheck = fwrite(op, 1, decodedSize, foutput);
if (sizeCheck != decodedSize) EXM_THROW(35, "Write error : unable to write data block to destination file");
filesize += decodedSize;
op += decodedSize;
if (op==oend) op = outBuff;
DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(filesize>>20) );
}
/* prepare for next Block */
op += decodedSize;
if (op==oend) op = outBuff;
toRead = ZSTD_getNextcBlockSize(dctx);
DISPLAYUPDATE(2, "\rDecoded : %u MB... ", (U32)(filesize>>20) );
toRead = ZSTD_nextSrcSizeToDecompress(dctx);
}
DISPLAYLEVEL(2, "\r%79s\r", "");

View File

@ -62,6 +62,7 @@
#include "zstd.h"
#include "fse_static.h"
#include "datagen.h"
/**************************************
@ -113,9 +114,7 @@
#define MAX_MEM (1984 MB)
#define DEFAULT_CHUNKSIZE (4<<20)
static double g_compressibilityDefault = 0.50;
static const U32 prime1 = 2654435761U;
static const U32 prime2 = 2246822519U;
#define COMPRESSIBILITY_DEFAULT 0.50
static const size_t sampleSize = 10000000;
@ -129,6 +128,7 @@ static const size_t sampleSize = 10000000;
* Benchmark Parameters
**************************************/
static int nbIterations = NBLOOPS;
static double g_compressibility = COMPRESSIBILITY_DEFAULT;
void BMK_SetNbIterations(int nbLoops)
{
@ -215,58 +215,6 @@ static U64 BMK_GetFileSize(char* infilename)
}
static U32 BMK_rotl32(unsigned val32, unsigned nbBits) { return((val32 << nbBits) | (val32 >> (32 - nbBits))); }
static U32 BMK_rand(U32* src)
{
U32 rand32 = *src;
rand32 *= prime1;
rand32 += prime2;
rand32 = BMK_rotl32(rand32, 13);
*src = rand32;
return rand32 >> 9;
}
#define BMK_RAND15BITS ( BMK_rand(&seed) & 0x7FFF)
#define BMK_RANDLENGTH ((BMK_rand(&seed) & 3) ? (BMK_rand(&seed) % 15) : (BMK_rand(&seed) % 510) + 15)
#define BMK_RANDCHAR (BYTE)((BMK_rand(&seed) & 63) + '0')
static void BMK_datagen(void* buffer, size_t bufferSize, double proba, U32 seed)
{
BYTE* BBuffer = (BYTE*)buffer;
unsigned pos = 0;
U32 P32 = (U32)(32768 * proba);
/* First Byte */
BBuffer[pos++] = BMK_RANDCHAR;
while (pos < bufferSize)
{
/* Select : Literal (noise) or copy (within 64K) */
if (BMK_RAND15BITS < P32)
{
/* Match */
size_t match, end;
unsigned length = BMK_RANDLENGTH + 4;
unsigned offset = BMK_RAND15BITS + 1;
if (offset > pos) offset = pos;
match = pos - offset;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BBuffer[match++];
}
else
{
/* Literal */
size_t end;
unsigned length = BMK_RANDLENGTH;
end = pos + length;
if (end > bufferSize) end = bufferSize;
while (pos < end) BBuffer[pos++] = BMK_RANDCHAR;
}
}
}
/*********************************************************
* Benchmark wrappers
*********************************************************/
@ -329,8 +277,8 @@ size_t local_conditionalNull(void* dst, size_t dstSize, void* buff2, const void*
if (b==0) total = 0; // 825
//if (!b) total = 0; // 825
//total = b ? total : 0; // 622
//total *= !!b; // 465
//total &= -!b; // 622
//total *= !!b; // 465
}
return total;
}
@ -357,7 +305,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
double bestTime = 100000000.;
size_t errorCode = 0;
// Declaration
/* Selection */
switch(benchNb)
{
case 1:
@ -399,7 +347,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
case 11:
g_cSize = ZSTD_compress(buff2, dstBuffSize, src, srcSize);
break;
case 31: // ZSTD_decodeLiteralsBlock
case 31: /* ZSTD_decodeLiteralsBlock */
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -417,7 +365,7 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
srcSize = srcSize > 128 KB ? 128 KB : srcSize; // relative to block
break;
}
case 32: // ZSTD_decodeSeqHeaders
case 32: /* ZSTD_decodeSeqHeaders */
{
blockProperties_t bp;
const BYTE* ip = dstBuff;
@ -444,15 +392,14 @@ size_t benchMem(void* src, size_t srcSize, U32 benchNb)
/* test functions */
case 101: // conditionalNull
case 101: /* conditionalNull */
{
size_t i;
U32 seed = (U32)srcSize;
for (i=0; i<srcSize; i++)
buff2[i] = (BYTE)(BMK_rand(&seed) & 15);
buff2[i] = i & 15;
break;
}
case 102: //
case 102: /* local_decodeLiteralsForward */
{
blockProperties_t bp;
ZSTD_compress(dstBuff, dstBuffSize, src, srcSize);
@ -503,7 +450,7 @@ int benchSample(U32 benchNb)
{
char* origBuff;
size_t benchedSize = sampleSize;
const char* name = "Sample50";
const char* name = "Sample 10MiB";
/* Allocation */
origBuff = (char*) malloc((size_t)benchedSize);
@ -514,7 +461,7 @@ int benchSample(U32 benchNb)
}
/* Fill buffer */
BMK_datagen(origBuff, benchedSize, g_compressibilityDefault, 0);
RDG_genBuffer(origBuff, benchedSize, g_compressibility, 0.0, 0);
/* bench */
DISPLAY("\r%79s\r", "");
@ -609,6 +556,7 @@ int usage_advanced(void)
DISPLAY( "\nAdvanced options :\n");
DISPLAY( " -b# : test only function # \n");
DISPLAY( " -i# : iteration loops [1-9](default : %i)\n", NBLOOPS);
DISPLAY( " -P# : sample compressibility (default : %.1f%%)\n", COMPRESSIBILITY_DEFAULT * 100);
return 0;
}
@ -648,14 +596,14 @@ int main(int argc, char** argv)
switch(argument[0])
{
// Display help on usage
/* Display help on usage */
case 'h' :
case 'H': usage(exename); usage_advanced(); return 0;
// Pause at the end (hidden option)
/* Pause at the end (hidden option) */
case 'p': main_pause = 1; break;
// Select specific bench algorithm only
/* Select specific algorithm to bench */
case 'b':
benchNb = 0;
while ((argument[1]>= '0') && (argument[1]<= '9'))
@ -666,7 +614,7 @@ int main(int argc, char** argv)
}
break;
// Modify Nb Iterations
/* Modify Nb Iterations */
case 'i':
if ((argument[1] >='1') && (argument[1] <='9'))
{
@ -676,14 +624,28 @@ int main(int argc, char** argv)
}
break;
// Unknown command
/* Select specific algorithm to bench */
case 'P':
{
U32 proba32 = 0;
while ((argument[1]>= '0') && (argument[1]<= '9'))
{
proba32 *= 10;
proba32 += argument[1] - '0';
argument++;
}
g_compressibility = (double)proba32 / 100.;
}
break;
/* Unknown command */
default : badusage(exename); return 1;
}
}
continue;
}
// first provided filename is input
/* first provided filename is input */
if (!input_filename) { input_filename=argument; filenamesStart=i; continue; }
}