zstd/lib/fse.h

323 lines
16 KiB
C
Raw Normal View History

2015-01-23 16:58:16 -08:00
/* ******************************************************************
FSE : Finite State Entropy coder
header file
Copyright (C) 2013-2015, Yann Collet.
BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
You can contact the author at :
- Source repository : https://github.com/Cyan4973/FiniteStateEntropy
- Public forum : https://groups.google.com/forum/#!forum/lz4c
****************************************************************** */
#pragma once
#if defined (__cplusplus)
extern "C" {
#endif
/******************************************
* Includes
******************************************/
2015-07-04 16:56:41 -07:00
#include <stddef.h> /* size_t, ptrdiff_t */
2015-01-23 16:58:16 -08:00
/******************************************
* FSE simple functions
******************************************/
size_t FSE_compress(void* dst, size_t maxDstSize,
const void* src, size_t srcSize);
2015-07-04 16:56:41 -07:00
size_t FSE_decompress(void* dst, size_t maxDstSize,
2015-01-23 16:58:16 -08:00
const void* cSrc, size_t cSrcSize);
/*
FSE_compress():
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated, and sized to handle worst case situations.
Worst case size evaluation is provided by FSE_compressBound().
return : size of compressed data
2015-07-04 16:56:41 -07:00
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
2015-01-23 16:58:16 -08:00
FSE_decompress():
Decompress FSE data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'maxDstSize'.
return : size of regenerated data (<= maxDstSize)
or an error code, which can be tested using FSE_isError()
2015-07-04 16:56:41 -07:00
** Important ** : FSE_decompress() doesn't decompress non-compressible nor RLE data !!!
Why ? : making this distinction requires a header.
Header management is intentionally delegated to the user layer, which can better manage special cases.
*/
/******************************************
* Huff0 simple functions
******************************************/
size_t HUF_compress (void* dst, size_t dstSize, const void* src, size_t srcSize);
size_t HUF_decompress(void* dst, size_t maxDstSize,
const void* cSrc, size_t cSrcSize);
/*
HUF_compress():
Compress content of buffer 'src', of size 'srcSize', into destination buffer 'dst'.
'dst' buffer must be already allocated, and sized to handle worst case situations.
Worst case size evaluation is provided by FSE_compressBound().
return : size of compressed data
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
HUF_decompress():
Decompress Huff0 data from buffer 'cSrc', of size 'cSrcSize',
into already allocated destination buffer 'dst', of size 'maxDstSize'.
return : size of regenerated data (<= maxDstSize)
or an error code, which can be tested using FSE_isError()
** Important ** : HUF_decompress() doesn't decompress non-compressible nor RLE data !!!
2015-01-23 16:58:16 -08:00
*/
/******************************************
* Tool functions
******************************************/
size_t FSE_compressBound(size_t size); /* maximum compressed size */
/* Error Management */
unsigned FSE_isError(size_t code); /* tells if a return value is an error code */
const char* FSE_getErrorName(size_t code); /* provides error code string (useful for debugging) */
/******************************************
* FSE advanced functions
******************************************/
/*
FSE_compress2():
Same as FSE_compress(), but allows the selection of 'maxSymbolValue' and 'tableLog'
Both parameters can be defined as '0' to mean : use default value
return : size of compressed data
2015-07-04 16:56:41 -07:00
Special values : if return == 0, srcData is not compressible => Nothing is stored within cSrc !!!
if return == 1, srcData is a single byte symbol * srcSize times. Use RLE compression.
if FSE_isError(return), it's an error code.
2015-01-23 16:58:16 -08:00
*/
size_t FSE_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
size_t HUF_compress2 (void* dst, size_t dstSize, const void* src, size_t srcSize, unsigned maxSymbolValue, unsigned tableLog);
2015-01-23 16:58:16 -08:00
/******************************************
2015-07-04 16:56:41 -07:00
* FSE detailed API
2015-01-23 16:58:16 -08:00
******************************************/
/*
2015-07-04 16:56:41 -07:00
FSE_compress() does the following:
2015-01-23 16:58:16 -08:00
1. count symbol occurrence from source[] into table count[]
2. normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
3. save normalized counters to memory buffer using writeHeader()
4. build encoding table 'CTable' from normalized counters
2015-07-04 16:56:41 -07:00
5. encode the data stream using encoding table 'CTable'
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
FSE_decompress() does the following:
2015-01-23 16:58:16 -08:00
1. read normalized counters with readHeader()
2. build decoding table 'DTable' from normalized counters
2015-07-04 16:56:41 -07:00
3. decode the data stream using decoding table 'DTable'
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
The following API allows to trigger specific sub-functions for advanced tasks.
For example, it's possible to compress several blocks using the same 'CTable',
or to save and provide normalized distribution using one's own method.
2015-01-23 16:58:16 -08:00
*/
/* *** COMPRESSION *** */
2015-07-04 16:56:41 -07:00
/*
FSE_count():
Provides the precise count of each symbol within a table 'count'
'count' is a table of unsigned int, of minimum size (maxSymbolValuePtr[0]+1).
maxSymbolValuePtr[0] will be updated if detected smaller than initially expected
return : the count of the most frequent symbol (which is not identified)
if return == srcSize, there is only one symbol.
if FSE_isError(return), it's an error code. */
size_t FSE_count(unsigned* count, unsigned* maxSymbolValuePtr, const unsigned char* src, size_t srcSize);
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
/*
FSE_optimalTableLog():
dynamically downsize 'tableLog' when conditions are met.
It saves CPU time, by using smaller tables, while preserving or even improving compression ratio.
return : recommended tableLog (necessarily <= initial 'tableLog') */
2015-01-23 16:58:16 -08:00
unsigned FSE_optimalTableLog(unsigned tableLog, size_t srcSize, unsigned maxSymbolValue);
2015-07-04 16:56:41 -07:00
/*
FSE_normalizeCount():
normalize counters so that sum(count[]) == Power_of_2 (2^tableLog)
'normalizedCounter' is a table of short, of minimum size (maxSymbolValue+1).
return : tableLog,
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_normalizeCount(short* normalizedCounter, unsigned tableLog, const unsigned* count, size_t srcSize, unsigned maxSymbolValue);
/*
FSE_NCountWriteBound():
Provides the maximum possible size of an FSE normalized table, given 'maxSymbolValue' and 'tableLog'
Typically useful for allocation purpose. */
size_t FSE_NCountWriteBound(unsigned maxSymbolValue, unsigned tableLog);
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
/*
FSE_writeNCount():
Compactly save 'normalizedCounter' into 'buffer'.
return : size of the compressed table
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_writeNCount (void* buffer, size_t bufferSize, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
2015-01-23 16:58:16 -08:00
/*
2015-07-04 16:56:41 -07:00
Constructor and Destructor of type FSE_CTable
Not that its size depends on parameters 'tableLog' and 'maxSymbolValue' */
typedef unsigned FSE_CTable; /* don't allocate that. It's just a way to be more restrictive than void */
FSE_CTable* FSE_createCTable (unsigned tableLog, unsigned maxSymbolValue);
void FSE_freeCTable (FSE_CTable* ct);
/*
FSE_buildCTable():
Builds 'ct', which must be already allocated, using FSE_createCTable()
return : 0
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_buildCTable(FSE_CTable* ct, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
/*
FSE_compress_usingCTable():
Compress 'src' using 'ct' into 'dst' which must be already allocated
return : size of compressed data
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_compress_usingCTable (void* dst, size_t dstSize, const void* src, size_t srcSize, const FSE_CTable* ct);
/*
Tutorial :
----------
2015-01-23 16:58:16 -08:00
The first step is to count all symbols. FSE_count() provides one quick way to do this job.
2015-07-04 16:56:41 -07:00
Result will be saved into 'count', a table of unsigned int, which must be already allocated, and have 'maxSymbolValuePtr[0]+1' cells.
'src' is a table of bytes of size 'srcSize'. All values within 'src' MUST be <= maxSymbolValuePtr[0]
maxSymbolValuePtr[0] will be updated, with its real value (necessarily <= original value)
2015-01-23 16:58:16 -08:00
FSE_count() will return the number of occurrence of the most frequent symbol.
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
The next step is to normalize the frequencies.
FSE_normalizeCount() will ensure that sum of frequencies is == 2 ^'tableLog'.
It also guarantees a minimum of 1 to any Symbol which frequency is >= 1.
You can use input 'tableLog'==0 to mean "use default tableLog value".
If you are unsure of which tableLog value to use, you can optionally call FSE_optimalTableLog(),
which will provide the optimal valid tableLog given sourceSize, maxSymbolValue, and a user-defined maximum (0 means "default").
The result of FSE_normalizeCount() will be saved into a table,
called 'normalizedCounter', which is a table of signed short.
'normalizedCounter' must be already allocated, and have at least 'maxSymbolValue+1' cells.
The return value is tableLog if everything proceeded as expected.
It is 0 if there is a single symbol within distribution.
If there is an error(typically, invalid tableLog value), the function will return an ErrorCode (which can be tested using FSE_isError()).
'normalizedCounter' can be saved in a compact manner to a memory area using FSE_writeHeader().
'header' buffer must be already allocated.
For guaranteed success, buffer size must be at least FSE_headerBound().
The result of the function is the number of bytes written into 'header'.
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()) (for example, buffer size too small).
2015-07-04 16:56:41 -07:00
'normalizedCounter' can then be used to create the compression table 'CTable'.
2015-01-23 16:58:16 -08:00
The space required by 'CTable' must be already allocated. Its size is provided by FSE_sizeof_CTable().
'CTable' must be aligned of 4 bytes boundaries.
You can then use FSE_buildCTable() to fill 'CTable'.
In both cases, if there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
2015-07-04 16:56:41 -07:00
'CTable' can then be used to compress 'src', with FSE_compress_usingCTable().
Similar to FSE_count(), the convention is that 'src' is assumed to be a table of char of size 'srcSize'
The function returns the size of compressed data (without header).
If there is an error, the function will return an ErrorCode (which can be tested using FSE_isError()).
2015-01-23 16:58:16 -08:00
*/
/* *** DECOMPRESSION *** */
2015-07-04 16:56:41 -07:00
/*
FSE_readNCount():
Read compactly saved 'normalizedCounter' from 'rBuffer'.
return : size read from 'rBuffer'
or an errorCode, which can be tested using FSE_isError()
maxSymbolValuePtr[0] and tableLogPtr[0] will also be updated with their respective values */
size_t FSE_readNCount (short* normalizedCounter, unsigned* maxSymbolValuePtr, unsigned* tableLogPtr, const void* rBuffer, size_t rBuffSize);
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
/*
Constructor and Destructor of type FSE_DTable
Note that its size depends on parameters 'tableLog' */
typedef unsigned FSE_DTable; /* don't allocate that. It's just a way to be more restrictive than void */
FSE_DTable* FSE_createDTable(unsigned tableLog);
void FSE_freeDTable(FSE_DTable* dt);
2015-01-23 16:58:16 -08:00
2015-07-04 16:56:41 -07:00
/*
FSE_buildDTable():
Builds 'dt', which must be already allocated, using FSE_createDTable()
return : 1 if 'dt' is compatible with fast mode, 0 otherwise,
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_buildDTable (FSE_DTable* dt, const short* normalizedCounter, unsigned maxSymbolValue, unsigned tableLog);
2015-01-23 16:58:16 -08:00
/*
2015-07-04 16:56:41 -07:00
FSE_decompress_usingDTable():
Decompress compressed source 'cSrc' of size 'cSrcSize'
using 'dt' into 'dst' which must be already allocated.
Use fastMode==1 only if authorized by result of FSE_buildDTable().
return : size of regenerated data (necessarily <= maxDstSize)
or an errorCode, which can be tested using FSE_isError() */
size_t FSE_decompress_usingDTable(void* dst, size_t maxDstSize, const void* cSrc, size_t cSrcSize, const FSE_DTable* dt, size_t fastMode);
/*
Tutorial :
----------
(Note : these functions only decompress FSE-compressed blocks.
If block is uncompressed, use memcpy() instead
If block is a single repeated byte, use memset() instead )
2015-01-23 16:58:16 -08:00
The first step is to obtain the normalized frequencies of symbols.
This can be performed by reading a header with FSE_readHeader().
2015-07-04 16:56:41 -07:00
'normalizedCounter' must be already allocated, and have at least 'maxSymbolValuePtr[0]+1' cells of short.
2015-01-23 16:58:16 -08:00
In practice, that means it's necessary to know 'maxSymbolValue' beforehand,
or size the table to handle worst case situations (typically 256).
FSE_readHeader will provide 'tableLog' and 'maxSymbolValue' stored into the header.
The result of FSE_readHeader() is the number of bytes read from 'header'.
2015-07-04 16:56:41 -07:00
Note that 'headerSize' must be at least 4 bytes, even if useful information is less than that.
2015-01-23 16:58:16 -08:00
If there is an error, the function will return an error code, which can be tested using FSE_isError().
2015-07-04 16:56:41 -07:00
The next step is to create the decompression tables 'FSE_DTable' from 'normalizedCounter'.
2015-01-23 16:58:16 -08:00
This is performed by the function FSE_buildDTable().
2015-07-04 16:56:41 -07:00
The space required by 'FSE_DTable' must be already allocated using FSE_createDTable().
The function will return 1 if FSE_DTable is compatible with fastMode, 0 otherwise.
2015-01-23 16:58:16 -08:00
If there is an error, the function will return an error code, which can be tested using FSE_isError().
2015-07-04 16:56:41 -07:00
'FSE_DTable' can then be used to decompress 'cSrc', with FSE_decompress_usingDTable().
Only trigger fastMode if it was authorized by the result of FSE_buildDTable(), otherwise decompression will fail.
2015-01-23 16:58:16 -08:00
cSrcSize must be correct, otherwise decompression will fail.
FSE_decompress_usingDTable() result will tell how many bytes were regenerated.
If there is an error, the function will return an error code, which can be tested using FSE_isError().
*/
#if defined (__cplusplus)
}
#endif