1168404Spjd/* deflate.h -- internal compression state 2168404Spjd * Copyright (C) 1995-2004 Jean-loup Gailly 3168404Spjd * For conditions of distribution and use, see copyright notice in zlib.h 4168404Spjd */ 5168404Spjd 6168404Spjd/* WARNING: this file should *not* be used by applications. It is 7168404Spjd part of the implementation of the compression library and is 8168404Spjd subject to change. Applications should only use zlib.h. 9168404Spjd */ 10168404Spjd 11168404Spjd#ifndef _DEFLATE_H 12168404Spjd#define _DEFLATE_H 13168404Spjd 14168404Spjd#pragma ident "%Z%%M% %I% %E% SMI" 15168404Spjd 16168404Spjd#include "zutil.h" 17168404Spjd 18168404Spjd/* define NO_GZIP when compiling if you want to disable gzip header and 19168404Spjd trailer creation by deflate(). NO_GZIP would be used to avoid linking in 20168404Spjd the crc code when it is not needed. For shared libraries, gzip encoding 21168404Spjd should be left enabled. */ 22168404Spjd#ifndef NO_GZIP 23168404Spjd# define GZIP 24168404Spjd#endif 25168404Spjd 26168404Spjd/* =========================================================================== 27168404Spjd * Internal compression state. 28168404Spjd */ 29168404Spjd 30168404Spjd#define LENGTH_CODES 29 31168404Spjd/* number of length codes, not counting the special END_BLOCK code */ 32168404Spjd 33168404Spjd#define LITERALS 256 34168404Spjd/* number of literal bytes 0..255 */ 35168404Spjd 36168404Spjd#define L_CODES (LITERALS+1+LENGTH_CODES) 37168404Spjd/* number of Literal or Length codes, including the END_BLOCK code */ 38168404Spjd 39168404Spjd#define D_CODES 30 40168404Spjd/* number of distance codes */ 41168404Spjd 42168404Spjd#define BL_CODES 19 43168404Spjd/* number of codes used to transfer the bit lengths */ 44168404Spjd 45168404Spjd#define HEAP_SIZE (2*L_CODES+1) 46168404Spjd/* maximum heap size */ 47168404Spjd 48168404Spjd#define MAX_BITS 15 49168404Spjd/* All codes must not exceed MAX_BITS bits */ 50168404Spjd 51168404Spjd#define INIT_STATE 42 52168404Spjd#define EXTRA_STATE 69 53168404Spjd#define NAME_STATE 73 54168404Spjd#define COMMENT_STATE 91 55168404Spjd#define HCRC_STATE 103 56168404Spjd#define BUSY_STATE 113 57168404Spjd#define FINISH_STATE 666 58168404Spjd/* Stream status */ 59168404Spjd 60168404Spjd 61168404Spjd/* Data structure describing a single value and its code string. */ 62168404Spjdtypedef struct ct_data_s { 63168404Spjd union { 64168404Spjd ush freq; /* frequency count */ 65168404Spjd ush code; /* bit string */ 66168404Spjd } fc; 67168404Spjd union { 68168404Spjd ush dad; /* father node in Huffman tree */ 69168404Spjd ush len; /* length of bit string */ 70168404Spjd } dl; 71168404Spjd} FAR ct_data; 72168404Spjd 73168404Spjd#define Freq fc.freq 74168404Spjd#define Code fc.code 75168404Spjd#define Dad dl.dad 76168404Spjd#define Len dl.len 77168404Spjd 78168404Spjdtypedef struct static_tree_desc_s static_tree_desc; 79168404Spjd 80168404Spjdtypedef struct tree_desc_s { 81168404Spjd ct_data *dyn_tree; /* the dynamic tree */ 82168404Spjd int max_code; /* largest code with non zero frequency */ 83168404Spjd static_tree_desc *stat_desc; /* the corresponding static tree */ 84168404Spjd} FAR tree_desc; 85168404Spjd 86168404Spjdtypedef ush Pos; 87168404Spjdtypedef Pos FAR Posf; 88168404Spjdtypedef unsigned IPos; 89168404Spjd 90168404Spjd/* A Pos is an index in the character window. We use short instead of int to 91168404Spjd * save space in the various tables. IPos is used only for parameter passing. 92168404Spjd */ 93168404Spjd 94168404Spjdtypedef struct internal_state { 95168404Spjd z_streamp strm; /* pointer back to this zlib stream */ 96168404Spjd int status; /* as the name implies */ 97168404Spjd Bytef *pending_buf; /* output still pending */ 98168404Spjd ulg pending_buf_size; /* size of pending_buf */ 99168404Spjd Bytef *pending_out; /* next pending byte to output to the stream */ 100168404Spjd uInt pending; /* nb of bytes in the pending buffer */ 101168404Spjd int wrap; /* bit 0 true for zlib, bit 1 true for gzip */ 102168404Spjd gz_headerp gzhead; /* gzip header information to write */ 103168404Spjd uInt gzindex; /* where in extra, name, or comment */ 104168404Spjd Byte method; /* STORED (for zip only) or DEFLATED */ 105168404Spjd int last_flush; /* value of flush param for previous deflate call */ 106168404Spjd 107168404Spjd /* used by deflate.c: */ 108168404Spjd 109168404Spjd uInt w_size; /* LZ77 window size (32K by default) */ 110168404Spjd uInt w_bits; /* log2(w_size) (8..16) */ 111168404Spjd uInt w_mask; /* w_size - 1 */ 112168404Spjd 113168404Spjd Bytef *window; 114168404Spjd /* Sliding window. Input bytes are read into the second half of the window, 115168404Spjd * and move to the first half later to keep a dictionary of at least wSize 116168404Spjd * bytes. With this organization, matches are limited to a distance of 117168404Spjd * wSize-MAX_MATCH bytes, but this ensures that IO is always 118168404Spjd * performed with a length multiple of the block size. Also, it limits 119168404Spjd * the window size to 64K, which is quite useful on MSDOS. 120168404Spjd * To do: use the user input buffer as sliding window. 121168404Spjd */ 122168404Spjd 123168404Spjd ulg window_size; 124168404Spjd /* Actual size of window: 2*wSize, except when the user input buffer 125168404Spjd * is directly used as sliding window. 126168404Spjd */ 127168404Spjd 128168404Spjd Posf *prev; 129168404Spjd /* Link to older string with same hash index. To limit the size of this 130168404Spjd * array to 64K, this link is maintained only for the last 32K strings. 131168404Spjd * An index in this array is thus a window index modulo 32K. 132168404Spjd */ 133168404Spjd 134168404Spjd Posf *head; /* Heads of the hash chains or NIL. */ 135168404Spjd 136168404Spjd uInt ins_h; /* hash index of string to be inserted */ 137168404Spjd uInt hash_size; /* number of elements in hash table */ 138168404Spjd uInt hash_bits; /* log2(hash_size) */ 139168404Spjd uInt hash_mask; /* hash_size-1 */ 140168404Spjd 141168404Spjd uInt hash_shift; 142168404Spjd /* Number of bits by which ins_h must be shifted at each input 143168404Spjd * step. It must be such that after MIN_MATCH steps, the oldest 144168404Spjd * byte no longer takes part in the hash key, that is: 145168404Spjd * hash_shift * MIN_MATCH >= hash_bits 146168404Spjd */ 147168404Spjd 148168404Spjd long block_start; 149168404Spjd /* Window position at the beginning of the current output block. Gets 150168404Spjd * negative when the window is moved backwards. 151168404Spjd */ 152168404Spjd 153168404Spjd uInt match_length; /* length of best match */ 154168404Spjd IPos prev_match; /* previous match */ 155168404Spjd int match_available; /* set if previous match exists */ 156168404Spjd uInt strstart; /* start of string to insert */ 157168404Spjd uInt match_start; /* start of matching string */ 158168404Spjd uInt lookahead; /* number of valid bytes ahead in window */ 159168404Spjd 160168404Spjd uInt prev_length; 161168404Spjd /* Length of the best match at previous step. Matches not greater than this 162168404Spjd * are discarded. This is used in the lazy match evaluation. 163168404Spjd */ 164168404Spjd 165168404Spjd uInt max_chain_length; 166168404Spjd /* To speed up deflation, hash chains are never searched beyond this 167168404Spjd * length. A higher limit improves compression ratio but degrades the 168168404Spjd * speed. 169168404Spjd */ 170168404Spjd 171168404Spjd uInt max_lazy_match; 172168404Spjd /* Attempt to find a better match only when the current match is strictly 173168404Spjd * smaller than this value. This mechanism is used only for compression 174168404Spjd * levels >= 4. 175168404Spjd */ 176168404Spjd# define max_insert_length max_lazy_match 177168404Spjd /* Insert new strings in the hash table only if the match length is not 178168404Spjd * greater than this length. This saves time but degrades compression. 179168404Spjd * max_insert_length is used only for compression levels <= 3. 180168404Spjd */ 181168404Spjd 182168404Spjd int level; /* compression level (1..9) */ 183168404Spjd int strategy; /* favor or force Huffman coding*/ 184168404Spjd 185168404Spjd uInt good_match; 186168404Spjd /* Use a faster search when the previous match is longer than this */ 187168404Spjd 188168404Spjd int nice_match; /* Stop searching when current match exceeds this */ 189168404Spjd 190168404Spjd /* used by trees.c: */ 191168404Spjd /* Didn't use ct_data typedef below to supress compiler warning */ 192168404Spjd struct ct_data_s dyn_ltree[HEAP_SIZE]; /* literal and length tree */ 193168404Spjd struct ct_data_s dyn_dtree[2*D_CODES+1]; /* distance tree */ 194168404Spjd struct ct_data_s bl_tree[2*BL_CODES+1]; /* Huffman tree for bit lengths */ 195168404Spjd 196168404Spjd struct tree_desc_s l_desc; /* desc. for literal tree */ 197168404Spjd struct tree_desc_s d_desc; /* desc. for distance tree */ 198168404Spjd struct tree_desc_s bl_desc; /* desc. for bit length tree */ 199168404Spjd 200168404Spjd ush bl_count[MAX_BITS+1]; 201168404Spjd /* number of codes at each bit length for an optimal tree */ 202168404Spjd 203168404Spjd int heap[2*L_CODES+1]; /* heap used to build the Huffman trees */ 204168404Spjd int heap_len; /* number of elements in the heap */ 205168404Spjd int heap_max; /* element of largest frequency */ 206168404Spjd /* The sons of heap[n] are heap[2*n] and heap[2*n+1]. heap[0] is not used. 207168404Spjd * The same heap array is used to build all trees. 208168404Spjd */ 209168404Spjd 210168404Spjd uch depth[2*L_CODES+1]; 211168404Spjd /* Depth of each subtree used as tie breaker for trees of equal frequency 212168404Spjd */ 213168404Spjd 214168404Spjd uchf *l_buf; /* buffer for literals or lengths */ 215168404Spjd 216168404Spjd uInt lit_bufsize; 217168404Spjd /* Size of match buffer for literals/lengths. There are 4 reasons for 218168404Spjd * limiting lit_bufsize to 64K: 219168404Spjd * - frequencies can be kept in 16 bit counters 220168404Spjd * - if compression is not successful for the first block, all input 221168404Spjd * data is still in the window so we can still emit a stored block even 222168404Spjd * when input comes from standard input. (This can also be done for 223168404Spjd * all blocks if lit_bufsize is not greater than 32K.) 224168404Spjd * - if compression is not successful for a file smaller than 64K, we can 225168404Spjd * even emit a stored file instead of a stored block (saving 5 bytes). 226168404Spjd * This is applicable only for zip (not gzip or zlib). 227168404Spjd * - creating new Huffman trees less frequently may not provide fast 228168404Spjd * adaptation to changes in the input data statistics. (Take for 229168404Spjd * example a binary file with poorly compressible code followed by 230168404Spjd * a highly compressible string table.) Smaller buffer sizes give 231168404Spjd * fast adaptation but have of course the overhead of transmitting 232168404Spjd * trees more frequently. 233168404Spjd * - I can't count above 4 234168404Spjd */ 235168404Spjd 236168404Spjd uInt last_lit; /* running index in l_buf */ 237168404Spjd 238168404Spjd ushf *d_buf; 239168404Spjd /* Buffer for distances. To simplify the code, d_buf and l_buf have 240168404Spjd * the same number of elements. To use different lengths, an extra flag 241168404Spjd * array would be necessary. 242168404Spjd */ 243168404Spjd 244168404Spjd ulg opt_len; /* bit length of current block with optimal trees */ 245168404Spjd ulg static_len; /* bit length of current block with static trees */ 246168404Spjd uInt matches; /* number of string matches in current block */ 247168404Spjd int last_eob_len; /* bit length of EOB code for last block */ 248168404Spjd 249168404Spjd#ifdef DEBUG 250168404Spjd ulg compressed_len; /* total bit length of compressed file mod 2^32 */ 251168404Spjd ulg bits_sent; /* bit length of compressed data sent mod 2^32 */ 252168404Spjd#endif 253168404Spjd 254168404Spjd ush bi_buf; 255168404Spjd /* Output buffer. bits are inserted starting at the bottom (least 256168404Spjd * significant bits). 257168404Spjd */ 258168404Spjd int bi_valid; 259168404Spjd /* Number of valid bits in bi_buf. All bits above the last valid bit 260168404Spjd * are always zero. 261168404Spjd */ 262168404Spjd 263168404Spjd} FAR deflate_state; 264168404Spjd 265168404Spjd/* Output a byte on the stream. 266168404Spjd * IN assertion: there is enough room in pending_buf. 267168404Spjd */ 268168404Spjd#define put_byte(s, c) {s->pending_buf[s->pending++] = (c);} 269168404Spjd 270168404Spjd 271168404Spjd#define MIN_LOOKAHEAD (MAX_MATCH+MIN_MATCH+1) 272168404Spjd/* Minimum amount of lookahead, except at the end of the input file. 273168404Spjd * See deflate.c for comments about the MIN_MATCH+1. 274168404Spjd */ 275168404Spjd 276168404Spjd#define MAX_DIST(s) ((s)->w_size-MIN_LOOKAHEAD) 277168404Spjd/* In order to simplify the code, particularly on 16 bit machines, match 278168404Spjd * distances are limited to MAX_DIST instead of WSIZE. 279168404Spjd */ 280168404Spjd 281168404Spjd /* in trees.c */ 282168404Spjdvoid _tr_init OF((deflate_state *s)); 283168404Spjdint _tr_tally OF((deflate_state *s, unsigned dist, unsigned lc)); 284168404Spjdvoid _tr_flush_block OF((deflate_state *s, charf *buf, ulg stored_len, 285168404Spjd int eof)); 286168404Spjdvoid _tr_align OF((deflate_state *s)); 287168404Spjdvoid _tr_stored_block OF((deflate_state *s, charf *buf, ulg stored_len, 288168404Spjd int eof)); 289168404Spjd 290168404Spjd#define d_code(dist) \ 291168404Spjd ((dist) < 256 ? _dist_code[dist] : _dist_code[256+((dist)>>7)]) 292168404Spjd/* Mapping from a distance to a distance code. dist is the distance - 1 and 293168404Spjd * must not have side effects. _dist_code[256] and _dist_code[257] are never 294168404Spjd * used. 295168404Spjd */ 296168404Spjd 297168404Spjd#ifndef DEBUG 298168404Spjd/* Inline versions of _tr_tally for speed: */ 299168404Spjd 300168404Spjd#if defined(GEN_TREES_H) || !defined(STDC) 301168404Spjd extern uch _length_code[]; 302168404Spjd extern uch _dist_code[]; 303168404Spjd#else 304168404Spjd extern const uch _length_code[]; 305168404Spjd extern const uch _dist_code[]; 306168404Spjd#endif 307168404Spjd 308168404Spjd# define _tr_tally_lit(s, c, flush) \ 309168404Spjd { uch cc = (c); \ 310168404Spjd s->d_buf[s->last_lit] = 0; \ 311168404Spjd s->l_buf[s->last_lit++] = cc; \ 312168404Spjd s->dyn_ltree[cc].Freq++; \ 313168404Spjd flush = (s->last_lit == s->lit_bufsize-1); \ 314168404Spjd } 315168404Spjd# define _tr_tally_dist(s, distance, length, flush) \ 316168404Spjd { uch len = (length); \ 317168404Spjd ush dist = (distance); \ 318168404Spjd s->d_buf[s->last_lit] = dist; \ 319168404Spjd s->l_buf[s->last_lit++] = len; \ 320168404Spjd dist--; \ 321168404Spjd s->dyn_ltree[_length_code[len]+LITERALS+1].Freq++; \ 322168404Spjd s->dyn_dtree[d_code(dist)].Freq++; \ 323168404Spjd flush = (s->last_lit == s->lit_bufsize-1); \ 324168404Spjd } 325168404Spjd#else 326168404Spjd# define _tr_tally_lit(s, c, flush) flush = _tr_tally(s, 0, c) 327168404Spjd# define _tr_tally_dist(s, distance, length, flush) \ 328168404Spjd flush = _tr_tally(s, distance, length) 329168404Spjd#endif 330168404Spjd 331168404Spjd#endif /* _DEFLATE_H */ 332