huffman.c revision 177421
1275970Scy
2275970Scy/*-------------------------------------------------------------*/
3275970Scy/*--- Huffman coding low-level stuff                        ---*/
4275970Scy/*---                                             huffman.c ---*/
5275970Scy/*-------------------------------------------------------------*/
6275970Scy
7275970Scy/* ------------------------------------------------------------------
8275970Scy   This file is part of bzip2/libbzip2, a program and library for
9275970Scy   lossless, block-sorting data compression.
10275970Scy
11275970Scy   bzip2/libbzip2 version 1.0.5 of 10 December 2007
12275970Scy   Copyright (C) 1996-2007 Julian Seward <jseward@bzip.org>
13275970Scy
14275970Scy   Please read the WARNING, DISCLAIMER and PATENTS sections in the
15275970Scy   README file.
16275970Scy
17275970Scy   This program is released under the terms of the license contained
18275970Scy   in the file LICENSE.
19275970Scy   ------------------------------------------------------------------ */
20275970Scy
21275970Scy
22275970Scy#include "bzlib_private.h"
23275970Scy
24275970Scy/*---------------------------------------------------*/
25275970Scy#define WEIGHTOF(zz0)  ((zz0) & 0xffffff00)
26275970Scy#define DEPTHOF(zz1)   ((zz1) & 0x000000ff)
27275970Scy#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3))
28275970Scy
29275970Scy#define ADDWEIGHTS(zw1,zw2)                           \
30275970Scy   (WEIGHTOF(zw1)+WEIGHTOF(zw2)) |                    \
31275970Scy   (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2)))
32275970Scy
33275970Scy#define UPHEAP(z)                                     \
34275970Scy{                                                     \
35275970Scy   Int32 zz, tmp;                                     \
36275970Scy   zz = z; tmp = heap[zz];                            \
37275970Scy   while (weight[tmp] < weight[heap[zz >> 1]]) {      \
38275970Scy      heap[zz] = heap[zz >> 1];                       \
39275970Scy      zz >>= 1;                                       \
40275970Scy   }                                                  \
41275970Scy   heap[zz] = tmp;                                    \
42275970Scy}
43275970Scy
44275970Scy#define DOWNHEAP(z)                                   \
45275970Scy{                                                     \
46275970Scy   Int32 zz, yy, tmp;                                 \
47275970Scy   zz = z; tmp = heap[zz];                            \
48275970Scy   while (True) {                                     \
49275970Scy      yy = zz << 1;                                   \
50275970Scy      if (yy > nHeap) break;                          \
51275970Scy      if (yy < nHeap &&                               \
52275970Scy          weight[heap[yy+1]] < weight[heap[yy]])      \
53275970Scy         yy++;                                        \
54275970Scy      if (weight[tmp] < weight[heap[yy]]) break;      \
55275970Scy      heap[zz] = heap[yy];                            \
56275970Scy      zz = yy;                                        \
57275970Scy   }                                                  \
58275970Scy   heap[zz] = tmp;                                    \
59275970Scy}
60275970Scy
61275970Scy
62275970Scy/*---------------------------------------------------*/
63275970Scyvoid BZ2_hbMakeCodeLengths ( UChar *len,
64275970Scy                             Int32 *freq,
65275970Scy                             Int32 alphaSize,
66275970Scy                             Int32 maxLen )
67275970Scy{
68275970Scy   /*--
69275970Scy      Nodes and heap entries run from 1.  Entry 0
70275970Scy      for both the heap and nodes is a sentinel.
71275970Scy   --*/
72275970Scy   Int32 nNodes, nHeap, n1, n2, i, j, k;
73275970Scy   Bool  tooLong;
74275970Scy
75275970Scy   Int32 heap   [ BZ_MAX_ALPHA_SIZE + 2 ];
76275970Scy   Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ];
77275970Scy   Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ];
78275970Scy
79275970Scy   for (i = 0; i < alphaSize; i++)
80275970Scy      weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
81275970Scy
82275970Scy   while (True) {
83275970Scy
84      nNodes = alphaSize;
85      nHeap = 0;
86
87      heap[0] = 0;
88      weight[0] = 0;
89      parent[0] = -2;
90
91      for (i = 1; i <= alphaSize; i++) {
92         parent[i] = -1;
93         nHeap++;
94         heap[nHeap] = i;
95         UPHEAP(nHeap);
96      }
97
98      AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 );
99
100      while (nHeap > 1) {
101         n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
102         n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1);
103         nNodes++;
104         parent[n1] = parent[n2] = nNodes;
105         weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]);
106         parent[nNodes] = -1;
107         nHeap++;
108         heap[nHeap] = nNodes;
109         UPHEAP(nHeap);
110      }
111
112      AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 );
113
114      tooLong = False;
115      for (i = 1; i <= alphaSize; i++) {
116         j = 0;
117         k = i;
118         while (parent[k] >= 0) { k = parent[k]; j++; }
119         len[i-1] = j;
120         if (j > maxLen) tooLong = True;
121      }
122
123      if (! tooLong) break;
124
125      /* 17 Oct 04: keep-going condition for the following loop used
126         to be 'i < alphaSize', which missed the last element,
127         theoretically leading to the possibility of the compressor
128         looping.  However, this count-scaling step is only needed if
129         one of the generated Huffman code words is longer than
130         maxLen, which up to and including version 1.0.2 was 20 bits,
131         which is extremely unlikely.  In version 1.0.3 maxLen was
132         changed to 17 bits, which has minimal effect on compression
133         ratio, but does mean this scaling step is used from time to
134         time, enough to verify that it works.
135
136         This means that bzip2-1.0.3 and later will only produce
137         Huffman codes with a maximum length of 17 bits.  However, in
138         order to preserve backwards compatibility with bitstreams
139         produced by versions pre-1.0.3, the decompressor must still
140         handle lengths of up to 20. */
141
142      for (i = 1; i <= alphaSize; i++) {
143         j = weight[i] >> 8;
144         j = 1 + (j / 2);
145         weight[i] = j << 8;
146      }
147   }
148}
149
150
151/*---------------------------------------------------*/
152void BZ2_hbAssignCodes ( Int32 *code,
153                         UChar *length,
154                         Int32 minLen,
155                         Int32 maxLen,
156                         Int32 alphaSize )
157{
158   Int32 n, vec, i;
159
160   vec = 0;
161   for (n = minLen; n <= maxLen; n++) {
162      for (i = 0; i < alphaSize; i++)
163         if (length[i] == n) { code[i] = vec; vec++; };
164      vec <<= 1;
165   }
166}
167
168
169/*---------------------------------------------------*/
170void BZ2_hbCreateDecodeTables ( Int32 *limit,
171                                Int32 *base,
172                                Int32 *perm,
173                                UChar *length,
174                                Int32 minLen,
175                                Int32 maxLen,
176                                Int32 alphaSize )
177{
178   Int32 pp, i, j, vec;
179
180   pp = 0;
181   for (i = minLen; i <= maxLen; i++)
182      for (j = 0; j < alphaSize; j++)
183         if (length[j] == i) { perm[pp] = j; pp++; };
184
185   for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0;
186   for (i = 0; i < alphaSize; i++) base[length[i]+1]++;
187
188   for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1];
189
190   for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0;
191   vec = 0;
192
193   for (i = minLen; i <= maxLen; i++) {
194      vec += (base[i+1] - base[i]);
195      limit[i] = vec-1;
196      vec <<= 1;
197   }
198   for (i = minLen + 1; i <= maxLen; i++)
199      base[i] = ((limit[i-1] + 1) << 1) - base[i];
200}
201
202
203/*-------------------------------------------------------------*/
204/*--- end                                         huffman.c ---*/
205/*-------------------------------------------------------------*/
206