Deleted Added
full compact
trees.c (180208) trees.c (205471)
1/* trees.c -- output deflated data using Huffman coding
1/* trees.c -- output deflated data using Huffman coding
2 * Copyright (C) 1995-2005 Jean-loup Gailly
2 * Copyright (C) 1995-2009 Jean-loup Gailly
3 * detect_data_type() function provided freely by Cosmin Truta, 2006
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/*
7 * ALGORITHM
8 *
9 * The "deflation" process uses several Huffman trees. The more
10 * common source values are represented by shorter bit sequences.

--- 136 unchanged lines hidden (view full) ---

147local void build_tree OF((deflate_state *s, tree_desc *desc));
148local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
149local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
150local int build_bl_tree OF((deflate_state *s));
151local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
152 int blcodes));
153local void compress_block OF((deflate_state *s, ct_data *ltree,
154 ct_data *dtree));
4 * For conditions of distribution and use, see copyright notice in zlib.h
5 */
6
7/*
8 * ALGORITHM
9 *
10 * The "deflation" process uses several Huffman trees. The more
11 * common source values are represented by shorter bit sequences.

--- 136 unchanged lines hidden (view full) ---

148local void build_tree OF((deflate_state *s, tree_desc *desc));
149local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
150local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
151local int build_bl_tree OF((deflate_state *s));
152local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
153 int blcodes));
154local void compress_block OF((deflate_state *s, ct_data *ltree,
155 ct_data *dtree));
155local void set_data_type OF((deflate_state *s));
156local int detect_data_type OF((deflate_state *s));
156local unsigned bi_reverse OF((unsigned value, int length));
157local void bi_windup OF((deflate_state *s));
158local void bi_flush OF((deflate_state *s));
159local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
160 int header));
161
162#ifdef GEN_TREES_H
163local void gen_trees_header OF((void));

--- 34 unchanged lines hidden (view full) ---

198 Assert(length > 0 && length <= 15, "invalid length");
199 s->bits_sent += (ulg)length;
200
201 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
202 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
203 * unused bits in value.
204 */
205 if (s->bi_valid > (int)Buf_size - length) {
157local unsigned bi_reverse OF((unsigned value, int length));
158local void bi_windup OF((deflate_state *s));
159local void bi_flush OF((deflate_state *s));
160local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
161 int header));
162
163#ifdef GEN_TREES_H
164local void gen_trees_header OF((void));

--- 34 unchanged lines hidden (view full) ---

199 Assert(length > 0 && length <= 15, "invalid length");
200 s->bits_sent += (ulg)length;
201
202 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
203 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
204 * unused bits in value.
205 */
206 if (s->bi_valid > (int)Buf_size - length) {
206 s->bi_buf |= (value << s->bi_valid);
207 s->bi_buf |= (ush)value << s->bi_valid;
207 put_short(s, s->bi_buf);
208 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
209 s->bi_valid += length - Buf_size;
210 } else {
208 put_short(s, s->bi_buf);
209 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
210 s->bi_valid += length - Buf_size;
211 } else {
211 s->bi_buf |= value << s->bi_valid;
212 s->bi_buf |= (ush)value << s->bi_valid;
212 s->bi_valid += length;
213 }
214}
215#else /* !DEBUG */
216
217#define send_bits(s, value, length) \
218{ int len = length;\
219 if (s->bi_valid > (int)Buf_size - len) {\
220 int val = value;\
213 s->bi_valid += length;
214 }
215}
216#else /* !DEBUG */
217
218#define send_bits(s, value, length) \
219{ int len = length;\
220 if (s->bi_valid > (int)Buf_size - len) {\
221 int val = value;\
221 s->bi_buf |= (val << s->bi_valid);\
222 s->bi_buf |= (ush)val << s->bi_valid;\
222 put_short(s, s->bi_buf);\
223 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
224 s->bi_valid += len - Buf_size;\
225 } else {\
223 put_short(s, s->bi_buf);\
224 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
225 s->bi_valid += len - Buf_size;\
226 } else {\
226 s->bi_buf |= (value) << s->bi_valid;\
227 s->bi_buf |= (ush)(value) << s->bi_valid;\
227 s->bi_valid += len;\
228 }\
229}
230#endif /* DEBUG */
231
232
233/* the arguments must not have side effects */
234

--- 10 unchanged lines hidden (view full) ---

245 int code; /* code value */
246 int dist; /* distance index */
247 ush bl_count[MAX_BITS+1];
248 /* number of codes at each bit length for an optimal tree */
249
250 if (static_init_done) return;
251
252 /* For some embedded targets, global variables are not initialized: */
228 s->bi_valid += len;\
229 }\
230}
231#endif /* DEBUG */
232
233
234/* the arguments must not have side effects */
235

--- 10 unchanged lines hidden (view full) ---

246 int code; /* code value */
247 int dist; /* distance index */
248 ush bl_count[MAX_BITS+1];
249 /* number of codes at each bit length for an optimal tree */
250
251 if (static_init_done) return;
252
253 /* For some embedded targets, global variables are not initialized: */
254#ifdef NO_INIT_GLOBAL_POINTERS
253 static_l_desc.static_tree = static_ltree;
254 static_l_desc.extra_bits = extra_lbits;
255 static_d_desc.static_tree = static_dtree;
256 static_d_desc.extra_bits = extra_dbits;
257 static_bl_desc.extra_bits = extra_blbits;
255 static_l_desc.static_tree = static_ltree;
256 static_l_desc.extra_bits = extra_lbits;
257 static_d_desc.static_tree = static_dtree;
258 static_d_desc.extra_bits = extra_dbits;
259 static_bl_desc.extra_bits = extra_blbits;
260#endif
258
259 /* Initialize the mapping length (0..255) -> length code (0..28) */
260 length = 0;
261 for (code = 0; code < LENGTH_CODES-1; code++) {
262 base_length[code] = length;
263 for (n = 0; n < (1<<extra_lbits[code]); n++) {
264 _length_code[length++] = (uch)code;
265 }

--- 593 unchanged lines hidden (view full) ---

859
860 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
861 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
862}
863
864/* ===========================================================================
865 * Send a stored block
866 */
261
262 /* Initialize the mapping length (0..255) -> length code (0..28) */
263 length = 0;
264 for (code = 0; code < LENGTH_CODES-1; code++) {
265 base_length[code] = length;
266 for (n = 0; n < (1<<extra_lbits[code]); n++) {
267 _length_code[length++] = (uch)code;
268 }

--- 593 unchanged lines hidden (view full) ---

862
863 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
864 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
865}
866
867/* ===========================================================================
868 * Send a stored block
869 */
867void _tr_stored_block(s, buf, stored_len, eof)
870void _tr_stored_block(s, buf, stored_len, last)
868 deflate_state *s;
869 charf *buf; /* input block */
870 ulg stored_len; /* length of input block */
871 deflate_state *s;
872 charf *buf; /* input block */
873 ulg stored_len; /* length of input block */
871 int eof; /* true if this is the last block for a file */
874 int last; /* one if this is the last block for a file */
872{
875{
873 send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
876 send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */
874#ifdef DEBUG
875 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876 s->compressed_len += (stored_len + 4) << 3;
877#endif
878 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
879}
880
881/* ===========================================================================

--- 31 unchanged lines hidden (view full) ---

913 }
914 s->last_eob_len = 7;
915}
916
917/* ===========================================================================
918 * Determine the best encoding for the current block: dynamic trees, static
919 * trees or store, and output the encoded block to the zip file.
920 */
877#ifdef DEBUG
878 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
879 s->compressed_len += (stored_len + 4) << 3;
880#endif
881 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
882}
883
884/* ===========================================================================

--- 31 unchanged lines hidden (view full) ---

916 }
917 s->last_eob_len = 7;
918}
919
920/* ===========================================================================
921 * Determine the best encoding for the current block: dynamic trees, static
922 * trees or store, and output the encoded block to the zip file.
923 */
921void _tr_flush_block(s, buf, stored_len, eof)
924void _tr_flush_block(s, buf, stored_len, last)
922 deflate_state *s;
923 charf *buf; /* input block, or NULL if too old */
924 ulg stored_len; /* length of input block */
925 deflate_state *s;
926 charf *buf; /* input block, or NULL if too old */
927 ulg stored_len; /* length of input block */
925 int eof; /* true if this is the last block for a file */
928 int last; /* one if this is the last block for a file */
926{
927 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
928 int max_blindex = 0; /* index of last bit length code of non zero freq */
929
930 /* Build the Huffman trees unless a stored block is forced */
931 if (s->level > 0) {
932
933 /* Check if the file is binary or text */
929{
930 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
931 int max_blindex = 0; /* index of last bit length code of non zero freq */
932
933 /* Build the Huffman trees unless a stored block is forced */
934 if (s->level > 0) {
935
936 /* Check if the file is binary or text */
934 if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN)
935 set_data_type(s);
937 if (s->strm->data_type == Z_UNKNOWN)
938 s->strm->data_type = detect_data_type(s);
936
937 /* Construct the literal and distance trees */
938 build_tree(s, (tree_desc *)(&(s->l_desc)));
939 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
940 s->static_len));
941
942 build_tree(s, (tree_desc *)(&(s->d_desc)));
943 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,

--- 29 unchanged lines hidden (view full) ---

973 /* 4: two words for the lengths */
974#endif
975 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
976 * Otherwise we can't have processed more than WSIZE input bytes since
977 * the last block flush, because compression would have been
978 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
979 * transform a block into a stored block.
980 */
939
940 /* Construct the literal and distance trees */
941 build_tree(s, (tree_desc *)(&(s->l_desc)));
942 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
943 s->static_len));
944
945 build_tree(s, (tree_desc *)(&(s->d_desc)));
946 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,

--- 29 unchanged lines hidden (view full) ---

976 /* 4: two words for the lengths */
977#endif
978 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
979 * Otherwise we can't have processed more than WSIZE input bytes since
980 * the last block flush, because compression would have been
981 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
982 * transform a block into a stored block.
983 */
981 _tr_stored_block(s, buf, stored_len, eof);
984 _tr_stored_block(s, buf, stored_len, last);
982
983#ifdef FORCE_STATIC
984 } else if (static_lenb >= 0) { /* force static trees */
985#else
986 } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
987#endif
985
986#ifdef FORCE_STATIC
987 } else if (static_lenb >= 0) { /* force static trees */
988#else
989 } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) {
990#endif
988 send_bits(s, (STATIC_TREES<<1)+eof, 3);
991 send_bits(s, (STATIC_TREES<<1)+last, 3);
989 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
990#ifdef DEBUG
991 s->compressed_len += 3 + s->static_len;
992#endif
993 } else {
992 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
993#ifdef DEBUG
994 s->compressed_len += 3 + s->static_len;
995#endif
996 } else {
994 send_bits(s, (DYN_TREES<<1)+eof, 3);
997 send_bits(s, (DYN_TREES<<1)+last, 3);
995 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
996 max_blindex+1);
997 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
998#ifdef DEBUG
999 s->compressed_len += 3 + s->opt_len;
1000#endif
1001 }
1002 Assert (s->compressed_len == s->bits_sent, "bad compressed size");
1003 /* The above check is made mod 2^32, for files larger than 512 MB
1004 * and uLong implemented on 32 bits.
1005 */
1006 init_block(s);
1007
998 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
999 max_blindex+1);
1000 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
1001#ifdef DEBUG
1002 s->compressed_len += 3 + s->opt_len;
1003#endif
1004 }
1005 Assert (s->compressed_len == s->bits_sent, "bad compressed size");
1006 /* The above check is made mod 2^32, for files larger than 512 MB
1007 * and uLong implemented on 32 bits.
1008 */
1009 init_block(s);
1010
1008 if (eof) {
1011 if (last) {
1009 bi_windup(s);
1010#ifdef DEBUG
1011 s->compressed_len += 7; /* align on byte boundary */
1012#endif
1013 }
1014 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1012 bi_windup(s);
1013#ifdef DEBUG
1014 s->compressed_len += 7; /* align on byte boundary */
1015#endif
1016 }
1017 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1015 s->compressed_len-7*eof));
1018 s->compressed_len-7*last));
1016}
1017
1018/* ===========================================================================
1019 * Save the match info and tally the frequency counts. Return true if
1020 * the current block must be flushed.
1021 */
1022int _tr_tally (s, dist, lc)
1023 deflate_state *s;

--- 89 unchanged lines hidden (view full) ---

1113
1114 } while (lx < s->last_lit);
1115
1116 send_code(s, END_BLOCK, ltree);
1117 s->last_eob_len = ltree[END_BLOCK].Len;
1118}
1119
1120/* ===========================================================================
1019}
1020
1021/* ===========================================================================
1022 * Save the match info and tally the frequency counts. Return true if
1023 * the current block must be flushed.
1024 */
1025int _tr_tally (s, dist, lc)
1026 deflate_state *s;

--- 89 unchanged lines hidden (view full) ---

1116
1117 } while (lx < s->last_lit);
1118
1119 send_code(s, END_BLOCK, ltree);
1120 s->last_eob_len = ltree[END_BLOCK].Len;
1121}
1122
1123/* ===========================================================================
1121 * Set the data type to BINARY or TEXT, using a crude approximation:
1122 * set it to Z_TEXT if all symbols are either printable characters (33 to 255)
1123 * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise.
1124 * Check if the data type is TEXT or BINARY, using the following algorithm:
1125 * - TEXT if the two conditions below are satisfied:
1126 * a) There are no non-portable control characters belonging to the
1127 * "black list" (0..6, 14..25, 28..31).
1128 * b) There is at least one printable character belonging to the
1129 * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255).
1130 * - BINARY otherwise.
1131 * - The following partially-portable control characters form a
1132 * "gray list" that is ignored in this detection algorithm:
1133 * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}).
1124 * IN assertion: the fields Freq of dyn_ltree are set.
1125 */
1134 * IN assertion: the fields Freq of dyn_ltree are set.
1135 */
1126local void set_data_type(s)
1136local int detect_data_type(s)
1127 deflate_state *s;
1128{
1137 deflate_state *s;
1138{
1139 /* black_mask is the bit mask of black-listed bytes
1140 * set bits 0..6, 14..25, and 28..31
1141 * 0xf3ffc07f = binary 11110011111111111100000001111111
1142 */
1143 unsigned long black_mask = 0xf3ffc07fUL;
1129 int n;
1130
1144 int n;
1145
1131 for (n = 0; n < 9; n++)
1146 /* Check for non-textual ("black-listed") bytes. */
1147 for (n = 0; n <= 31; n++, black_mask >>= 1)
1148 if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0))
1149 return Z_BINARY;
1150
1151 /* Check for textual ("white-listed") bytes. */
1152 if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0
1153 || s->dyn_ltree[13].Freq != 0)
1154 return Z_TEXT;
1155 for (n = 32; n < LITERALS; n++)
1132 if (s->dyn_ltree[n].Freq != 0)
1156 if (s->dyn_ltree[n].Freq != 0)
1133 break;
1134 if (n == 9)
1135 for (n = 14; n < 32; n++)
1136 if (s->dyn_ltree[n].Freq != 0)
1137 break;
1138 s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY;
1157 return Z_TEXT;
1158
1159 /* There are no "black-listed" or "white-listed" bytes:
1160 * this stream either is empty or has tolerated ("gray-listed") bytes only.
1161 */
1162 return Z_BINARY;
1139}
1140
1141/* ===========================================================================
1142 * Reverse the first len bits of a code, using straightforward code (a faster
1143 * method would use a table)
1144 * IN assertion: 1 <= len <= 15
1145 */
1146local unsigned bi_reverse(code, len)

--- 73 unchanged lines hidden ---
1163}
1164
1165/* ===========================================================================
1166 * Reverse the first len bits of a code, using straightforward code (a faster
1167 * method would use a table)
1168 * IN assertion: 1 <= len <= 15
1169 */
1170local unsigned bi_reverse(code, len)

--- 73 unchanged lines hidden ---