huffman.c revision 177421
1275970Scy 2275970Scy/*-------------------------------------------------------------*/ 3275970Scy/*--- Huffman coding low-level stuff ---*/ 4275970Scy/*--- huffman.c ---*/ 5275970Scy/*-------------------------------------------------------------*/ 6275970Scy 7275970Scy/* ------------------------------------------------------------------ 8275970Scy This file is part of bzip2/libbzip2, a program and library for 9275970Scy lossless, block-sorting data compression. 10275970Scy 11275970Scy bzip2/libbzip2 version 1.0.5 of 10 December 2007 12275970Scy Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org> 13275970Scy 14275970Scy Please read the WARNING, DISCLAIMER and PATENTS sections in the 15275970Scy README file. 16275970Scy 17275970Scy This program is released under the terms of the license contained 18275970Scy in the file LICENSE. 19275970Scy ------------------------------------------------------------------ */ 20275970Scy 21275970Scy 22275970Scy#include "bzlib_private.h" 23275970Scy 24275970Scy/*---------------------------------------------------*/ 25275970Scy#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 26275970Scy#define DEPTHOF(zz1) ((zz1) & 0x000000ff) 27275970Scy#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 28275970Scy 29275970Scy#define ADDWEIGHTS(zw1,zw2) \ 30275970Scy (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 31275970Scy (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 32275970Scy 33275970Scy#define UPHEAP(z) \ 34275970Scy{ \ 35275970Scy Int32 zz, tmp; \ 36275970Scy zz = z; tmp = heap[zz]; \ 37275970Scy while (weight[tmp] < weight[heap[zz >> 1]]) { \ 38275970Scy heap[zz] = heap[zz >> 1]; \ 39275970Scy zz >>= 1; \ 40275970Scy } \ 41275970Scy heap[zz] = tmp; \ 42275970Scy} 43275970Scy 44275970Scy#define DOWNHEAP(z) \ 45275970Scy{ \ 46275970Scy Int32 zz, yy, tmp; \ 47275970Scy zz = z; tmp = heap[zz]; \ 48275970Scy while (True) { \ 49275970Scy yy = zz << 1; \ 50275970Scy if (yy > nHeap) break; \ 51275970Scy if (yy < nHeap && \ 52275970Scy weight[heap[yy+1]] < weight[heap[yy]]) \ 53275970Scy yy++; \ 54275970Scy if (weight[tmp] < weight[heap[yy]]) break; \ 55275970Scy heap[zz] = heap[yy]; \ 56275970Scy zz = yy; \ 57275970Scy } \ 58275970Scy heap[zz] = tmp; \ 59275970Scy} 60275970Scy 61275970Scy 62275970Scy/*---------------------------------------------------*/ 63275970Scyvoid BZ2_hbMakeCodeLengths ( UChar *len, 64275970Scy Int32 *freq, 65275970Scy Int32 alphaSize, 66275970Scy Int32 maxLen ) 67275970Scy{ 68275970Scy /*-- 69275970Scy Nodes and heap entries run from 1. Entry 0 70275970Scy for both the heap and nodes is a sentinel. 71275970Scy --*/ 72275970Scy Int32 nNodes, nHeap, n1, n2, i, j, k; 73275970Scy Bool tooLong; 74275970Scy 75275970Scy Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 76275970Scy Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 77275970Scy Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 78275970Scy 79275970Scy for (i = 0; i < alphaSize; i++) 80275970Scy weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 81275970Scy 82275970Scy while (True) { 83275970Scy 84 nNodes = alphaSize; 85 nHeap = 0; 86 87 heap[0] = 0; 88 weight[0] = 0; 89 parent[0] = -2; 90 91 for (i = 1; i <= alphaSize; i++) { 92 parent[i] = -1; 93 nHeap++; 94 heap[nHeap] = i; 95 UPHEAP(nHeap); 96 } 97 98 AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 99 100 while (nHeap > 1) { 101 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 102 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 103 nNodes++; 104 parent[n1] = parent[n2] = nNodes; 105 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 106 parent[nNodes] = -1; 107 nHeap++; 108 heap[nHeap] = nNodes; 109 UPHEAP(nHeap); 110 } 111 112 AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 113 114 tooLong = False; 115 for (i = 1; i <= alphaSize; i++) { 116 j = 0; 117 k = i; 118 while (parent[k] >= 0) { k = parent[k]; j++; } 119 len[i-1] = j; 120 if (j > maxLen) tooLong = True; 121 } 122 123 if (! tooLong) break; 124 125 /* 17 Oct 04: keep-going condition for the following loop used 126 to be 'i < alphaSize', which missed the last element, 127 theoretically leading to the possibility of the compressor 128 looping. However, this count-scaling step is only needed if 129 one of the generated Huffman code words is longer than 130 maxLen, which up to and including version 1.0.2 was 20 bits, 131 which is extremely unlikely. In version 1.0.3 maxLen was 132 changed to 17 bits, which has minimal effect on compression 133 ratio, but does mean this scaling step is used from time to 134 time, enough to verify that it works. 135 136 This means that bzip2-1.0.3 and later will only produce 137 Huffman codes with a maximum length of 17 bits. However, in 138 order to preserve backwards compatibility with bitstreams 139 produced by versions pre-1.0.3, the decompressor must still 140 handle lengths of up to 20. */ 141 142 for (i = 1; i <= alphaSize; i++) { 143 j = weight[i] >> 8; 144 j = 1 + (j / 2); 145 weight[i] = j << 8; 146 } 147 } 148} 149 150 151/*---------------------------------------------------*/ 152void BZ2_hbAssignCodes ( Int32 *code, 153 UChar *length, 154 Int32 minLen, 155 Int32 maxLen, 156 Int32 alphaSize ) 157{ 158 Int32 n, vec, i; 159 160 vec = 0; 161 for (n = minLen; n <= maxLen; n++) { 162 for (i = 0; i < alphaSize; i++) 163 if (length[i] == n) { code[i] = vec; vec++; }; 164 vec <<= 1; 165 } 166} 167 168 169/*---------------------------------------------------*/ 170void BZ2_hbCreateDecodeTables ( Int32 *limit, 171 Int32 *base, 172 Int32 *perm, 173 UChar *length, 174 Int32 minLen, 175 Int32 maxLen, 176 Int32 alphaSize ) 177{ 178 Int32 pp, i, j, vec; 179 180 pp = 0; 181 for (i = minLen; i <= maxLen; i++) 182 for (j = 0; j < alphaSize; j++) 183 if (length[j] == i) { perm[pp] = j; pp++; }; 184 185 for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 186 for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 187 188 for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 189 190 for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 191 vec = 0; 192 193 for (i = minLen; i <= maxLen; i++) { 194 vec += (base[i+1] - base[i]); 195 limit[i] = vec-1; 196 vec <<= 1; 197 } 198 for (i = minLen + 1; i <= maxLen; i++) 199 base[i] = ((limit[i-1] + 1) << 1) - base[i]; 200} 201 202 203/*-------------------------------------------------------------*/ 204/*--- end huffman.c ---*/ 205/*-------------------------------------------------------------*/ 206