zig/std/zlib/deflate.zig

523 lines
20 KiB
Zig

const z_stream = struct {
/// next input byte */
next_in: &const u8,
/// number of bytes available at next_in
avail_in: u16,
/// total number of input bytes read so far
total_in: u32,
/// next output byte will go here
next_out: u8,
/// remaining free space at next_out
avail_out: u16,
/// total number of bytes output so far
total_out: u32,
/// last error message, NULL if no error
msg: ?&const u8,
/// not visible by applications
state:
struct internal_state FAR *state; // not visible by applications */
alloc_func zalloc; // used to allocate the internal state */
free_func zfree; // used to free the internal state */
voidpf opaque; // private data object passed to zalloc and zfree */
int data_type; // best guess about the data type: binary or text
// for deflate, or the decoding state for inflate */
uint32_t adler; // Adler-32 or CRC-32 value of the uncompressed data */
uint32_t reserved; // reserved for future use */
};
typedef struct internal_state {
z_stream * strm; /* pointer back to this zlib stream */
int status; /* as the name implies */
uint8_t *pending_buf; /* output still pending */
ulg pending_buf_size; /* size of pending_buf */
uint8_t *pending_out; /* next pending byte to output to the stream */
ulg pending; /* nb of bytes in the pending buffer */
int wrap; /* bit 0 true for zlib, bit 1 true for gzip */
gz_headerp gzhead; /* gzip header information to write */
ulg gzindex; /* where in extra, name, or comment */
uint8_t method; /* can only be DEFLATED */
int last_flush; /* value of flush param for previous deflate call */
/* used by deflate.c: */
uint16_t w_size; /* LZ77 window size (32K by default) */
uint16_t w_bits; /* log2(w_size) (8..16) */
uint16_t w_mask; /* w_size - 1 */
uint8_t *window;
/* Sliding window. Input bytes are read into the second half of the window,
* and move to the first half later to keep a dictionary of at least wSize
* bytes. With this organization, matches are limited to a distance of
* wSize-MAX_MATCH bytes, but this ensures that IO is always
* performed with a length multiple of the block size. Also, it limits
* the window size to 64K, which is quite useful on MSDOS.
* To do: use the user input buffer as sliding window.
*/
ulg window_size;
/* Actual size of window: 2*wSize, except when the user input buffer
* is directly used as sliding window.
*/
Posf *prev;
/* Link to older string with same hash index. To limit the size of this
* array to 64K, this link is maintained only for the last 32K strings.
* An index in this array is thus a window index modulo 32K.
*/
Posf *head; /* Heads of the hash chains or NIL. */
uint16_t ins_h; /* hash index of string to be inserted */
uint16_t hash_size; /* number of elements in hash table */
uint16_t hash_bits; /* log2(hash_size) */
uint16_t hash_mask; /* hash_size-1 */
uint16_t hash_shift;
/* Number of bits by which ins_h must be shifted at each input
* step. It must be such that after MIN_MATCH steps, the oldest
* byte no longer takes part in the hash key, that is:
* hash_shift * MIN_MATCH >= hash_bits
*/
long block_start;
/* Window position at the beginning of the current output block. Gets
* negative when the window is moved backwards.
*/
uint16_t match_length; /* length of best match */
IPos prev_match; /* previous match */
int match_available; /* set if previous match exists */
uint16_t strstart; /* start of string to insert */
uint16_t match_start; /* start of matching string */
uint16_t lookahead; /* number of valid bytes ahead in window */
uint16_t prev_length;
/* Length of the best match at previous step. Matches not greater than this
* are discarded. This is used in the lazy match evaluation.
*/
uint16_t max_chain_length;
/* To speed up deflation, hash chains are never searched beyond this
* length. A higher limit improves compression ratio but degrades the
* speed.
*/
uint16_t max_lazy_match;
/* Attempt to find a better match only when the current match is strictly
* smaller than this value. This mechanism is used only for compression
* levels >= 4.
*/
# define max_insert_length max_lazy_match
/* Insert new strings in the hash table only if the match length is not
* greater than this length. This saves time but degrades compression.
* max_insert_length is used only for compression levels <= 3.
*/
int level; /* compression level (1..9) */
int strategy; /* favor or force Huffman coding*/
uint16_t good_match;
/* Use a faster search when the previous match is longer than this */
int nice_match; /* Stop searching when current match exceeds this */
/* used by trees.c: */
/* Didn't use ct_data typedef below to suppress compiler warning */
struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */
struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */
struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */
struct tree_desc_s l_desc; /* desc. for literal tree */
struct tree_desc_s d_desc; /* desc. for distance tree */
struct tree_desc_s bl_desc; /* desc. for bit length tree */
ush bl_count[MAX_BITS+1];
/* number of codes at each bit length for an optimal tree */
int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */
int heap_len; /* number of elements in the heap */
int heap_max; /* element of largest frequency */
/* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used.
* The same heap array is used to build all trees.
*/
uch depth[2*L_CODES+1];
/* Depth of each subtree used as tie breaker for trees of equal frequency
*/
uchf *l_buf; /* buffer for literals or lengths */
uint16_t lit_bufsize;
/* Size of match buffer for literals/lengths. There are 4 reasons for
* limiting lit_bufsize to 64K:
* - frequencies can be kept in 16 bit counters
* - if compression is not successful for the first block, all input
* data is still in the window so we can still emit a stored block even
* when input comes from standard input. (This can also be done for
* all blocks if lit_bufsize is not greater than 32K.)
* - if compression is not successful for a file smaller than 64K, we can
* even emit a stored file instead of a stored block (saving 5 bytes).
* This is applicable only for zip (not gzip or zlib).
* - creating new Huffman trees less frequently may not provide fast
* adaptation to changes in the input data statistics. (Take for
* example a binary file with poorly compressible code followed by
* a highly compressible string table.) Smaller buffer sizes give
* fast adaptation but have of course the overhead of transmitting
* trees more frequently.
* - I can't count above 4
*/
uint16_t last_lit; /* running index in l_buf */
ushf *d_buf;
/* Buffer for distances. To simplify the code, d_buf and l_buf have
* the same number of elements. To use different lengths, an extra flag
* array would be necessary.
*/
ulg opt_len; /* bit length of current block with optimal trees */
ulg static_len; /* bit length of current block with static trees */
uint16_t matches; /* number of string matches in current block */
uint16_t insert; /* bytes at end of window left to insert */
#ifdef ZLIB_DEBUG
ulg compressed_len; /* total bit length of compressed file mod 2^32 */
ulg bits_sent; /* bit length of compressed data sent mod 2^32 */
#endif
ush bi_buf;
/* Output buffer. bits are inserted starting at the bottom (least
* significant bits).
*/
int bi_valid;
/* Number of valid bits in bi_buf. All bits above the last valid bit
* are always zero.
*/
ulg high_water;
/* High water mark offset in window for initialized bytes -- bytes above
* this are set to zero in order to avoid memory check warnings when
* longest match routines access bytes past the input. This is then
* updated to the new high water mark.
*/
} FAR deflate_state;
fn deflate(strm: &z_stream, flush: int) -> %void {
}
int deflate (z_stream * strm, int flush) {
int old_flush; /* value of flush param for previous deflate call */
deflate_state *s;
if (deflateStateCheck(strm) || flush > Z_BLOCK || flush < 0) {
return Z_STREAM_ERROR;
}
s = strm->state;
if (strm->next_out == Z_NULL ||
(strm->avail_in != 0 && strm->next_in == Z_NULL) ||
(s->status == FINISH_STATE && flush != Z_FINISH)) {
ERR_RETURN(strm, Z_STREAM_ERROR);
}
if (strm->avail_out == 0) ERR_RETURN(strm, Z_BUF_ERROR);
old_flush = s->last_flush;
s->last_flush = flush;
/* Flush as much pending output as possible */
if (s->pending != 0) {
flush_pending(strm);
if (strm->avail_out == 0) {
/* Since avail_out is 0, deflate will be called again with
* more output space, but possibly with both pending and
* avail_in equal to zero. There won't be anything to do,
* but this is not an error situation so make sure we
* return OK instead of BUF_ERROR at next call of deflate:
*/
s->last_flush = -1;
return Z_OK;
}
/* Make sure there is something to do and avoid duplicate consecutive
* flushes. For repeated and useless calls with Z_FINISH, we keep
* returning Z_STREAM_END instead of Z_BUF_ERROR.
*/
} else if (strm->avail_in == 0 && RANK(flush) <= RANK(old_flush) &&
flush != Z_FINISH) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* User must not provide more input after the first FINISH: */
if (s->status == FINISH_STATE && strm->avail_in != 0) {
ERR_RETURN(strm, Z_BUF_ERROR);
}
/* Write the header */
if (s->status == INIT_STATE) {
/* zlib header */
uint16_t header = (Z_DEFLATED + ((s->w_bits-8)<<4)) << 8;
uint16_t level_flags;
if (s->strategy >= Z_HUFFMAN_ONLY || s->level < 2)
level_flags = 0;
else if (s->level < 6)
level_flags = 1;
else if (s->level == 6)
level_flags = 2;
else
level_flags = 3;
header |= (level_flags << 6);
if (s->strstart != 0) header |= PRESET_DICT;
header += 31 - (header % 31);
putShortMSB(s, header);
/* Save the adler32 of the preset dictionary: */
if (s->strstart != 0) {
putShortMSB(s, (uint16_t)(strm->adler >> 16));
putShortMSB(s, (uint16_t)(strm->adler & 0xffff));
}
strm->adler = adler32(0L, Z_NULL, 0);
s->status = BUSY_STATE;
/* Compression must start with an empty pending buffer */
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
}
#ifdef GZIP
if (s->status == GZIP_STATE) {
/* gzip header */
strm->adler = crc32(0L, Z_NULL, 0);
put_byte(s, 31);
put_byte(s, 139);
put_byte(s, 8);
if (s->gzhead == Z_NULL) {
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, 0);
put_byte(s, s->level == 9 ? 2 :
(s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
4 : 0));
put_byte(s, OS_CODE);
s->status = BUSY_STATE;
/* Compression must start with an empty pending buffer */
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
}
else {
put_byte(s, (s->gzhead->text ? 1 : 0) +
(s->gzhead->hcrc ? 2 : 0) +
(s->gzhead->extra == Z_NULL ? 0 : 4) +
(s->gzhead->name == Z_NULL ? 0 : 8) +
(s->gzhead->comment == Z_NULL ? 0 : 16)
);
put_byte(s, (uint8_t)(s->gzhead->time & 0xff));
put_byte(s, (uint8_t)((s->gzhead->time >> 8) & 0xff));
put_byte(s, (uint8_t)((s->gzhead->time >> 16) & 0xff));
put_byte(s, (uint8_t)((s->gzhead->time >> 24) & 0xff));
put_byte(s, s->level == 9 ? 2 :
(s->strategy >= Z_HUFFMAN_ONLY || s->level < 2 ?
4 : 0));
put_byte(s, s->gzhead->os & 0xff);
if (s->gzhead->extra != Z_NULL) {
put_byte(s, s->gzhead->extra_len & 0xff);
put_byte(s, (s->gzhead->extra_len >> 8) & 0xff);
}
if (s->gzhead->hcrc)
strm->adler = crc32(strm->adler, s->pending_buf,
s->pending);
s->gzindex = 0;
s->status = EXTRA_STATE;
}
}
if (s->status == EXTRA_STATE) {
if (s->gzhead->extra != Z_NULL) {
ulg beg = s->pending; /* start of bytes to update crc */
uint16_t left = (s->gzhead->extra_len & 0xffff) - s->gzindex;
while (s->pending + left > s->pending_buf_size) {
uint16_t copy = s->pending_buf_size - s->pending;
zmemcpy(s->pending_buf + s->pending,
s->gzhead->extra + s->gzindex, copy);
s->pending = s->pending_buf_size;
HCRC_UPDATE(beg);
s->gzindex += copy;
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
beg = 0;
left -= copy;
}
zmemcpy(s->pending_buf + s->pending,
s->gzhead->extra + s->gzindex, left);
s->pending += left;
HCRC_UPDATE(beg);
s->gzindex = 0;
}
s->status = NAME_STATE;
}
if (s->status == NAME_STATE) {
if (s->gzhead->name != Z_NULL) {
ulg beg = s->pending; /* start of bytes to update crc */
int val;
do {
if (s->pending == s->pending_buf_size) {
HCRC_UPDATE(beg);
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
beg = 0;
}
val = s->gzhead->name[s->gzindex++];
put_byte(s, val);
} while (val != 0);
HCRC_UPDATE(beg);
s->gzindex = 0;
}
s->status = COMMENT_STATE;
}
if (s->status == COMMENT_STATE) {
if (s->gzhead->comment != Z_NULL) {
ulg beg = s->pending; /* start of bytes to update crc */
int val;
do {
if (s->pending == s->pending_buf_size) {
HCRC_UPDATE(beg);
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
beg = 0;
}
val = s->gzhead->comment[s->gzindex++];
put_byte(s, val);
} while (val != 0);
HCRC_UPDATE(beg);
}
s->status = HCRC_STATE;
}
if (s->status == HCRC_STATE) {
if (s->gzhead->hcrc) {
if (s->pending + 2 > s->pending_buf_size) {
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
}
put_byte(s, (uint8_t)(strm->adler & 0xff));
put_byte(s, (uint8_t)((strm->adler >> 8) & 0xff));
strm->adler = crc32(0L, Z_NULL, 0);
}
s->status = BUSY_STATE;
/* Compression must start with an empty pending buffer */
flush_pending(strm);
if (s->pending != 0) {
s->last_flush = -1;
return Z_OK;
}
}
#endif
/* Start a new block or continue the current one.
*/
if (strm->avail_in != 0 || s->lookahead != 0 ||
(flush != Z_NO_FLUSH && s->status != FINISH_STATE)) {
block_state bstate;
bstate = s->level == 0 ? deflate_stored(s, flush) :
s->strategy == Z_HUFFMAN_ONLY ? deflate_huff(s, flush) :
s->strategy == Z_RLE ? deflate_rle(s, flush) :
(*(configuration_table[s->level].func))(s, flush);
if (bstate == finish_started || bstate == finish_done) {
s->status = FINISH_STATE;
}
if (bstate == need_more || bstate == finish_started) {
if (strm->avail_out == 0) {
s->last_flush = -1; /* avoid BUF_ERROR next call, see above */
}
return Z_OK;
/* If flush != Z_NO_FLUSH && avail_out == 0, the next call
* of deflate should use the same flush parameter to make sure
* that the flush is complete. So we don't have to output an
* empty block here, this will be done at next call. This also
* ensures that for a very small output buffer, we emit at most
* one empty block.
*/
}
if (bstate == block_done) {
if (flush == Z_PARTIAL_FLUSH) {
_tr_align(s);
} else if (flush != Z_BLOCK) { /* FULL_FLUSH or SYNC_FLUSH */
_tr_stored_block(s, (char*)0, 0L, 0);
/* For a full flush, this empty block will be recognized
* as a special marker by inflate_sync().
*/
if (flush == Z_FULL_FLUSH) {
CLEAR_HASH(s); /* forget history */
if (s->lookahead == 0) {
s->strstart = 0;
s->block_start = 0L;
s->insert = 0;
}
}
}
flush_pending(strm);
if (strm->avail_out == 0) {
s->last_flush = -1; /* avoid BUF_ERROR at next call, see above */
return Z_OK;
}
}
}
if (flush != Z_FINISH) return Z_OK;
if (s->wrap <= 0) return Z_STREAM_END;
/* Write the trailer */
#ifdef GZIP
if (s->wrap == 2) {
put_byte(s, (uint8_t)(strm->adler & 0xff));
put_byte(s, (uint8_t)((strm->adler >> 8) & 0xff));
put_byte(s, (uint8_t)((strm->adler >> 16) & 0xff));
put_byte(s, (uint8_t)((strm->adler >> 24) & 0xff));
put_byte(s, (uint8_t)(strm->total_in & 0xff));
put_byte(s, (uint8_t)((strm->total_in >> 8) & 0xff));
put_byte(s, (uint8_t)((strm->total_in >> 16) & 0xff));
put_byte(s, (uint8_t)((strm->total_in >> 24) & 0xff));
}
else
#endif
{
putShortMSB(s, (uint16_t)(strm->adler >> 16));
putShortMSB(s, (uint16_t)(strm->adler & 0xffff));
}
flush_pending(strm);
/* If avail_out is zero, the application will call deflate again
* to flush the rest.
*/
if (s->wrap > 0) s->wrap = -s->wrap; /* write the trailer only once! */
return s->pending != 0 ? Z_OK : Z_STREAM_END;
}