1/* $NetBSD: huffman.c,v 1.1.1.3 2019/07/21 11:35:30 maya Exp $ */ 2 3 4/*-------------------------------------------------------------*/ 5/*--- Huffman coding low-level stuff ---*/ 6/*--- huffman.c ---*/ 7/*-------------------------------------------------------------*/ 8 9/* ------------------------------------------------------------------ 10 This file is part of bzip2/libbzip2, a program and library for 11 lossless, block-sorting data compression. 12 13 bzip2/libbzip2 version 1.0.8 of 13 July 2019 14 Copyright (C) 1996-2019 Julian Seward <jseward@acm.org> 15 16 Please read the WARNING, DISCLAIMER and PATENTS sections in the 17 README file. 18 19 This program is released under the terms of the license contained 20 in the file LICENSE. 21 ------------------------------------------------------------------ */ 22 23 24#include "bzlib_private.h" 25 26/*---------------------------------------------------*/ 27#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) 28#define DEPTHOF(zz1) ((zz1) & 0x000000ff) 29#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) 30 31#define ADDWEIGHTS(zw1,zw2) \ 32 (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ 33 (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) 34 35#define UPHEAP(z) \ 36{ \ 37 Int32 zz, tmp; \ 38 zz = z; tmp = heap[zz]; \ 39 while (weight[tmp] < weight[heap[zz >> 1]]) { \ 40 heap[zz] = heap[zz >> 1]; \ 41 zz >>= 1; \ 42 } \ 43 heap[zz] = tmp; \ 44} 45 46#define DOWNHEAP(z) \ 47{ \ 48 Int32 zz, yy, tmp; \ 49 zz = z; tmp = heap[zz]; \ 50 while (True) { \ 51 yy = zz << 1; \ 52 if (yy > nHeap) break; \ 53 if (yy < nHeap && \ 54 weight[heap[yy+1]] < weight[heap[yy]]) \ 55 yy++; \ 56 if (weight[tmp] < weight[heap[yy]]) break; \ 57 heap[zz] = heap[yy]; \ 58 zz = yy; \ 59 } \ 60 heap[zz] = tmp; \ 61} 62 63 64/*---------------------------------------------------*/ 65void BZ2_hbMakeCodeLengths ( UChar *len, 66 Int32 *freq, 67 Int32 alphaSize, 68 Int32 maxLen ) 69{ 70 /*-- 71 Nodes and heap entries run from 1. Entry 0 72 for both the heap and nodes is a sentinel. 73 --*/ 74 Int32 nNodes, nHeap, n1, n2, i, j, k; 75 Bool tooLong; 76 77 Int32 heap [ BZ_MAX_ALPHA_SIZE + 2 ]; 78 Int32 weight [ BZ_MAX_ALPHA_SIZE * 2 ]; 79 Int32 parent [ BZ_MAX_ALPHA_SIZE * 2 ]; 80 81 for (i = 0; i < alphaSize; i++) 82 weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; 83 84 while (True) { 85 86 nNodes = alphaSize; 87 nHeap = 0; 88 89 heap[0] = 0; 90 weight[0] = 0; 91 parent[0] = -2; 92 93 for (i = 1; i <= alphaSize; i++) { 94 parent[i] = -1; 95 nHeap++; 96 heap[nHeap] = i; 97 UPHEAP(nHeap); 98 } 99 100 AssertH( nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001 ); 101 102 while (nHeap > 1) { 103 n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 104 n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP(1); 105 nNodes++; 106 parent[n1] = parent[n2] = nNodes; 107 weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); 108 parent[nNodes] = -1; 109 nHeap++; 110 heap[nHeap] = nNodes; 111 UPHEAP(nHeap); 112 } 113 114 AssertH( nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002 ); 115 116 tooLong = False; 117 for (i = 1; i <= alphaSize; i++) { 118 j = 0; 119 k = i; 120 while (parent[k] >= 0) { k = parent[k]; j++; } 121 len[i-1] = j; 122 if (j > maxLen) tooLong = True; 123 } 124 125 if (! tooLong) break; 126 127 /* 17 Oct 04: keep-going condition for the following loop used 128 to be 'i < alphaSize', which missed the last element, 129 theoretically leading to the possibility of the compressor 130 looping. However, this count-scaling step is only needed if 131 one of the generated Huffman code words is longer than 132 maxLen, which up to and including version 1.0.2 was 20 bits, 133 which is extremely unlikely. In version 1.0.3 maxLen was 134 changed to 17 bits, which has minimal effect on compression 135 ratio, but does mean this scaling step is used from time to 136 time, enough to verify that it works. 137 138 This means that bzip2-1.0.3 and later will only produce 139 Huffman codes with a maximum length of 17 bits. However, in 140 order to preserve backwards compatibility with bitstreams 141 produced by versions pre-1.0.3, the decompressor must still 142 handle lengths of up to 20. */ 143 144 for (i = 1; i <= alphaSize; i++) { 145 j = weight[i] >> 8; 146 j = 1 + (j / 2); 147 weight[i] = j << 8; 148 } 149 } 150} 151 152 153/*---------------------------------------------------*/ 154void BZ2_hbAssignCodes ( Int32 *code, 155 UChar *length, 156 Int32 minLen, 157 Int32 maxLen, 158 Int32 alphaSize ) 159{ 160 Int32 n, vec, i; 161 162 vec = 0; 163 for (n = minLen; n <= maxLen; n++) { 164 for (i = 0; i < alphaSize; i++) 165 if (length[i] == n) { code[i] = vec; vec++; }; 166 vec <<= 1; 167 } 168} 169 170 171/*---------------------------------------------------*/ 172void BZ2_hbCreateDecodeTables ( Int32 *limit, 173 Int32 *base, 174 Int32 *perm, 175 UChar *length, 176 Int32 minLen, 177 Int32 maxLen, 178 Int32 alphaSize ) 179{ 180 Int32 pp, i, j, vec; 181 182 pp = 0; 183 for (i = minLen; i <= maxLen; i++) 184 for (j = 0; j < alphaSize; j++) 185 if (length[j] == i) { perm[pp] = j; pp++; }; 186 187 for (i = 0; i < BZ_MAX_CODE_LEN; i++) base[i] = 0; 188 for (i = 0; i < alphaSize; i++) base[length[i]+1]++; 189 190 for (i = 1; i < BZ_MAX_CODE_LEN; i++) base[i] += base[i-1]; 191 192 for (i = 0; i < BZ_MAX_CODE_LEN; i++) limit[i] = 0; 193 vec = 0; 194 195 for (i = minLen; i <= maxLen; i++) { 196 vec += (base[i+1] - base[i]); 197 limit[i] = vec-1; 198 vec <<= 1; 199 } 200 for (i = minLen + 1; i <= maxLen; i++) 201 base[i] = ((limit[i-1] + 1) << 1) - base[i]; 202} 203 204 205/*-------------------------------------------------------------*/ 206/*--- end huffman.c ---*/ 207/*-------------------------------------------------------------*/ 208