trees.c (180208) | trees.c (205471) |
---|---|
1/* trees.c -- output deflated data using Huffman coding | 1/* trees.c -- output deflated data using Huffman coding |
2 * Copyright (C) 1995-2005 Jean-loup Gailly | 2 * Copyright (C) 1995-2009 Jean-loup Gailly 3 * detect_data_type() function provided freely by Cosmin Truta, 2006 |
3 * For conditions of distribution and use, see copyright notice in zlib.h 4 */ 5 6/* 7 * ALGORITHM 8 * 9 * The "deflation" process uses several Huffman trees. The more 10 * common source values are represented by shorter bit sequences. --- 136 unchanged lines hidden (view full) --- 147local void build_tree OF((deflate_state *s, tree_desc *desc)); 148local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); 149local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); 150local int build_bl_tree OF((deflate_state *s)); 151local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, 152 int blcodes)); 153local void compress_block OF((deflate_state *s, ct_data *ltree, 154 ct_data *dtree)); | 4 * For conditions of distribution and use, see copyright notice in zlib.h 5 */ 6 7/* 8 * ALGORITHM 9 * 10 * The "deflation" process uses several Huffman trees. The more 11 * common source values are represented by shorter bit sequences. --- 136 unchanged lines hidden (view full) --- 148local void build_tree OF((deflate_state *s, tree_desc *desc)); 149local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code)); 150local void send_tree OF((deflate_state *s, ct_data *tree, int max_code)); 151local int build_bl_tree OF((deflate_state *s)); 152local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes, 153 int blcodes)); 154local void compress_block OF((deflate_state *s, ct_data *ltree, 155 ct_data *dtree)); |
155local void set_data_type OF((deflate_state *s)); | 156local int detect_data_type OF((deflate_state *s)); |
156local unsigned bi_reverse OF((unsigned value, int length)); 157local void bi_windup OF((deflate_state *s)); 158local void bi_flush OF((deflate_state *s)); 159local void copy_block OF((deflate_state *s, charf *buf, unsigned len, 160 int header)); 161 162#ifdef GEN_TREES_H 163local void gen_trees_header OF((void)); --- 34 unchanged lines hidden (view full) --- 198 Assert(length > 0 && length <= 15, "invalid length"); 199 s->bits_sent += (ulg)length; 200 201 /* If not enough room in bi_buf, use (valid) bits from bi_buf and 202 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) 203 * unused bits in value. 204 */ 205 if (s->bi_valid > (int)Buf_size - length) { | 157local unsigned bi_reverse OF((unsigned value, int length)); 158local void bi_windup OF((deflate_state *s)); 159local void bi_flush OF((deflate_state *s)); 160local void copy_block OF((deflate_state *s, charf *buf, unsigned len, 161 int header)); 162 163#ifdef GEN_TREES_H 164local void gen_trees_header OF((void)); --- 34 unchanged lines hidden (view full) --- 199 Assert(length > 0 && length <= 15, "invalid length"); 200 s->bits_sent += (ulg)length; 201 202 /* If not enough room in bi_buf, use (valid) bits from bi_buf and 203 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid)) 204 * unused bits in value. 205 */ 206 if (s->bi_valid > (int)Buf_size - length) { |
206 s->bi_buf |= (value << s->bi_valid); | 207 s->bi_buf |= (ush)value << s->bi_valid; |
207 put_short(s, s->bi_buf); 208 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); 209 s->bi_valid += length - Buf_size; 210 } else { | 208 put_short(s, s->bi_buf); 209 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid); 210 s->bi_valid += length - Buf_size; 211 } else { |
211 s->bi_buf |= value << s->bi_valid; | 212 s->bi_buf |= (ush)value << s->bi_valid; |
212 s->bi_valid += length; 213 } 214} 215#else /* !DEBUG */ 216 217#define send_bits(s, value, length) \ 218{ int len = length;\ 219 if (s->bi_valid > (int)Buf_size - len) {\ 220 int val = value;\ | 213 s->bi_valid += length; 214 } 215} 216#else /* !DEBUG */ 217 218#define send_bits(s, value, length) \ 219{ int len = length;\ 220 if (s->bi_valid > (int)Buf_size - len) {\ 221 int val = value;\ |
221 s->bi_buf |= (val << s->bi_valid);\ | 222 s->bi_buf |= (ush)val << s->bi_valid;\ |
222 put_short(s, s->bi_buf);\ 223 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ 224 s->bi_valid += len - Buf_size;\ 225 } else {\ | 223 put_short(s, s->bi_buf);\ 224 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\ 225 s->bi_valid += len - Buf_size;\ 226 } else {\ |
226 s->bi_buf |= (value) << s->bi_valid;\ | 227 s->bi_buf |= (ush)(value) << s->bi_valid;\ |
227 s->bi_valid += len;\ 228 }\ 229} 230#endif /* DEBUG */ 231 232 233/* the arguments must not have side effects */ 234 --- 10 unchanged lines hidden (view full) --- 245 int code; /* code value */ 246 int dist; /* distance index */ 247 ush bl_count[MAX_BITS+1]; 248 /* number of codes at each bit length for an optimal tree */ 249 250 if (static_init_done) return; 251 252 /* For some embedded targets, global variables are not initialized: */ | 228 s->bi_valid += len;\ 229 }\ 230} 231#endif /* DEBUG */ 232 233 234/* the arguments must not have side effects */ 235 --- 10 unchanged lines hidden (view full) --- 246 int code; /* code value */ 247 int dist; /* distance index */ 248 ush bl_count[MAX_BITS+1]; 249 /* number of codes at each bit length for an optimal tree */ 250 251 if (static_init_done) return; 252 253 /* For some embedded targets, global variables are not initialized: */ |
254#ifdef NO_INIT_GLOBAL_POINTERS |
|
253 static_l_desc.static_tree = static_ltree; 254 static_l_desc.extra_bits = extra_lbits; 255 static_d_desc.static_tree = static_dtree; 256 static_d_desc.extra_bits = extra_dbits; 257 static_bl_desc.extra_bits = extra_blbits; | 255 static_l_desc.static_tree = static_ltree; 256 static_l_desc.extra_bits = extra_lbits; 257 static_d_desc.static_tree = static_dtree; 258 static_d_desc.extra_bits = extra_dbits; 259 static_bl_desc.extra_bits = extra_blbits; |
260#endif |
|
258 259 /* Initialize the mapping length (0..255) -> length code (0..28) */ 260 length = 0; 261 for (code = 0; code < LENGTH_CODES-1; code++) { 262 base_length[code] = length; 263 for (n = 0; n < (1<<extra_lbits[code]); n++) { 264 _length_code[length++] = (uch)code; 265 } --- 593 unchanged lines hidden (view full) --- 859 860 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ 861 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); 862} 863 864/* =========================================================================== 865 * Send a stored block 866 */ | 261 262 /* Initialize the mapping length (0..255) -> length code (0..28) */ 263 length = 0; 264 for (code = 0; code < LENGTH_CODES-1; code++) { 265 base_length[code] = length; 266 for (n = 0; n < (1<<extra_lbits[code]); n++) { 267 _length_code[length++] = (uch)code; 268 } --- 593 unchanged lines hidden (view full) --- 862 863 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */ 864 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent)); 865} 866 867/* =========================================================================== 868 * Send a stored block 869 */ |
867void _tr_stored_block(s, buf, stored_len, eof) | 870void _tr_stored_block(s, buf, stored_len, last) |
868 deflate_state *s; 869 charf *buf; /* input block */ 870 ulg stored_len; /* length of input block */ | 871 deflate_state *s; 872 charf *buf; /* input block */ 873 ulg stored_len; /* length of input block */ |
871 int eof; /* true if this is the last block for a file */ | 874 int last; /* one if this is the last block for a file */ |
872{ | 875{ |
873 send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */ | 876 send_bits(s, (STORED_BLOCK<<1)+last, 3); /* send block type */ |
874#ifdef DEBUG 875 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; 876 s->compressed_len += (stored_len + 4) << 3; 877#endif 878 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ 879} 880 881/* =========================================================================== --- 31 unchanged lines hidden (view full) --- 913 } 914 s->last_eob_len = 7; 915} 916 917/* =========================================================================== 918 * Determine the best encoding for the current block: dynamic trees, static 919 * trees or store, and output the encoded block to the zip file. 920 */ | 877#ifdef DEBUG 878 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L; 879 s->compressed_len += (stored_len + 4) << 3; 880#endif 881 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */ 882} 883 884/* =========================================================================== --- 31 unchanged lines hidden (view full) --- 916 } 917 s->last_eob_len = 7; 918} 919 920/* =========================================================================== 921 * Determine the best encoding for the current block: dynamic trees, static 922 * trees or store, and output the encoded block to the zip file. 923 */ |
921void _tr_flush_block(s, buf, stored_len, eof) | 924void _tr_flush_block(s, buf, stored_len, last) |
922 deflate_state *s; 923 charf *buf; /* input block, or NULL if too old */ 924 ulg stored_len; /* length of input block */ | 925 deflate_state *s; 926 charf *buf; /* input block, or NULL if too old */ 927 ulg stored_len; /* length of input block */ |
925 int eof; /* true if this is the last block for a file */ | 928 int last; /* one if this is the last block for a file */ |
926{ 927 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ 928 int max_blindex = 0; /* index of last bit length code of non zero freq */ 929 930 /* Build the Huffman trees unless a stored block is forced */ 931 if (s->level > 0) { 932 933 /* Check if the file is binary or text */ | 929{ 930 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ 931 int max_blindex = 0; /* index of last bit length code of non zero freq */ 932 933 /* Build the Huffman trees unless a stored block is forced */ 934 if (s->level > 0) { 935 936 /* Check if the file is binary or text */ |
934 if (stored_len > 0 && s->strm->data_type == Z_UNKNOWN) 935 set_data_type(s); | 937 if (s->strm->data_type == Z_UNKNOWN) 938 s->strm->data_type = detect_data_type(s); |
936 937 /* Construct the literal and distance trees */ 938 build_tree(s, (tree_desc *)(&(s->l_desc))); 939 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, 940 s->static_len)); 941 942 build_tree(s, (tree_desc *)(&(s->d_desc))); 943 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, --- 29 unchanged lines hidden (view full) --- 973 /* 4: two words for the lengths */ 974#endif 975 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. 976 * Otherwise we can't have processed more than WSIZE input bytes since 977 * the last block flush, because compression would have been 978 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to 979 * transform a block into a stored block. 980 */ | 939 940 /* Construct the literal and distance trees */ 941 build_tree(s, (tree_desc *)(&(s->l_desc))); 942 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len, 943 s->static_len)); 944 945 build_tree(s, (tree_desc *)(&(s->d_desc))); 946 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len, --- 29 unchanged lines hidden (view full) --- 976 /* 4: two words for the lengths */ 977#endif 978 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. 979 * Otherwise we can't have processed more than WSIZE input bytes since 980 * the last block flush, because compression would have been 981 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to 982 * transform a block into a stored block. 983 */ |
981 _tr_stored_block(s, buf, stored_len, eof); | 984 _tr_stored_block(s, buf, stored_len, last); |
982 983#ifdef FORCE_STATIC 984 } else if (static_lenb >= 0) { /* force static trees */ 985#else 986 } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { 987#endif | 985 986#ifdef FORCE_STATIC 987 } else if (static_lenb >= 0) { /* force static trees */ 988#else 989 } else if (s->strategy == Z_FIXED || static_lenb == opt_lenb) { 990#endif |
988 send_bits(s, (STATIC_TREES<<1)+eof, 3); | 991 send_bits(s, (STATIC_TREES<<1)+last, 3); |
989 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); 990#ifdef DEBUG 991 s->compressed_len += 3 + s->static_len; 992#endif 993 } else { | 992 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree); 993#ifdef DEBUG 994 s->compressed_len += 3 + s->static_len; 995#endif 996 } else { |
994 send_bits(s, (DYN_TREES<<1)+eof, 3); | 997 send_bits(s, (DYN_TREES<<1)+last, 3); |
995 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, 996 max_blindex+1); 997 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); 998#ifdef DEBUG 999 s->compressed_len += 3 + s->opt_len; 1000#endif 1001 } 1002 Assert (s->compressed_len == s->bits_sent, "bad compressed size"); 1003 /* The above check is made mod 2^32, for files larger than 512 MB 1004 * and uLong implemented on 32 bits. 1005 */ 1006 init_block(s); 1007 | 998 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1, 999 max_blindex+1); 1000 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree); 1001#ifdef DEBUG 1002 s->compressed_len += 3 + s->opt_len; 1003#endif 1004 } 1005 Assert (s->compressed_len == s->bits_sent, "bad compressed size"); 1006 /* The above check is made mod 2^32, for files larger than 512 MB 1007 * and uLong implemented on 32 bits. 1008 */ 1009 init_block(s); 1010 |
1008 if (eof) { | 1011 if (last) { |
1009 bi_windup(s); 1010#ifdef DEBUG 1011 s->compressed_len += 7; /* align on byte boundary */ 1012#endif 1013 } 1014 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, | 1012 bi_windup(s); 1013#ifdef DEBUG 1014 s->compressed_len += 7; /* align on byte boundary */ 1015#endif 1016 } 1017 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3, |
1015 s->compressed_len-7*eof)); | 1018 s->compressed_len-7*last)); |
1016} 1017 1018/* =========================================================================== 1019 * Save the match info and tally the frequency counts. Return true if 1020 * the current block must be flushed. 1021 */ 1022int _tr_tally (s, dist, lc) 1023 deflate_state *s; --- 89 unchanged lines hidden (view full) --- 1113 1114 } while (lx < s->last_lit); 1115 1116 send_code(s, END_BLOCK, ltree); 1117 s->last_eob_len = ltree[END_BLOCK].Len; 1118} 1119 1120/* =========================================================================== | 1019} 1020 1021/* =========================================================================== 1022 * Save the match info and tally the frequency counts. Return true if 1023 * the current block must be flushed. 1024 */ 1025int _tr_tally (s, dist, lc) 1026 deflate_state *s; --- 89 unchanged lines hidden (view full) --- 1116 1117 } while (lx < s->last_lit); 1118 1119 send_code(s, END_BLOCK, ltree); 1120 s->last_eob_len = ltree[END_BLOCK].Len; 1121} 1122 1123/* =========================================================================== |
1121 * Set the data type to BINARY or TEXT, using a crude approximation: 1122 * set it to Z_TEXT if all symbols are either printable characters (33 to 255) 1123 * or white spaces (9 to 13, or 32); or set it to Z_BINARY otherwise. | 1124 * Check if the data type is TEXT or BINARY, using the following algorithm: 1125 * - TEXT if the two conditions below are satisfied: 1126 * a) There are no non-portable control characters belonging to the 1127 * "black list" (0..6, 14..25, 28..31). 1128 * b) There is at least one printable character belonging to the 1129 * "white list" (9 {TAB}, 10 {LF}, 13 {CR}, 32..255). 1130 * - BINARY otherwise. 1131 * - The following partially-portable control characters form a 1132 * "gray list" that is ignored in this detection algorithm: 1133 * (7 {BEL}, 8 {BS}, 11 {VT}, 12 {FF}, 26 {SUB}, 27 {ESC}). |
1124 * IN assertion: the fields Freq of dyn_ltree are set. 1125 */ | 1134 * IN assertion: the fields Freq of dyn_ltree are set. 1135 */ |
1126local void set_data_type(s) | 1136local int detect_data_type(s) |
1127 deflate_state *s; 1128{ | 1137 deflate_state *s; 1138{ |
1139 /* black_mask is the bit mask of black-listed bytes 1140 * set bits 0..6, 14..25, and 28..31 1141 * 0xf3ffc07f = binary 11110011111111111100000001111111 1142 */ 1143 unsigned long black_mask = 0xf3ffc07fUL; |
|
1129 int n; 1130 | 1144 int n; 1145 |
1131 for (n = 0; n < 9; n++) | 1146 /* Check for non-textual ("black-listed") bytes. */ 1147 for (n = 0; n <= 31; n++, black_mask >>= 1) 1148 if ((black_mask & 1) && (s->dyn_ltree[n].Freq != 0)) 1149 return Z_BINARY; 1150 1151 /* Check for textual ("white-listed") bytes. */ 1152 if (s->dyn_ltree[9].Freq != 0 || s->dyn_ltree[10].Freq != 0 1153 || s->dyn_ltree[13].Freq != 0) 1154 return Z_TEXT; 1155 for (n = 32; n < LITERALS; n++) |
1132 if (s->dyn_ltree[n].Freq != 0) | 1156 if (s->dyn_ltree[n].Freq != 0) |
1133 break; 1134 if (n == 9) 1135 for (n = 14; n < 32; n++) 1136 if (s->dyn_ltree[n].Freq != 0) 1137 break; 1138 s->strm->data_type = (n == 32) ? Z_TEXT : Z_BINARY; | 1157 return Z_TEXT; 1158 1159 /* There are no "black-listed" or "white-listed" bytes: 1160 * this stream either is empty or has tolerated ("gray-listed") bytes only. 1161 */ 1162 return Z_BINARY; |
1139} 1140 1141/* =========================================================================== 1142 * Reverse the first len bits of a code, using straightforward code (a faster 1143 * method would use a table) 1144 * IN assertion: 1 <= len <= 15 1145 */ 1146local unsigned bi_reverse(code, len) --- 73 unchanged lines hidden --- | 1163} 1164 1165/* =========================================================================== 1166 * Reverse the first len bits of a code, using straightforward code (a faster 1167 * method would use a table) 1168 * IN assertion: 1 <= len <= 15 1169 */ 1170local unsigned bi_reverse(code, len) --- 73 unchanged lines hidden --- |