compress.c revision 146293
1202375Srdivacky 2202375Srdivacky/*-------------------------------------------------------------*/ 3202375Srdivacky/*--- Compression machinery (not incl block sorting) ---*/ 4202375Srdivacky/*--- compress.c ---*/ 5202375Srdivacky/*-------------------------------------------------------------*/ 6202375Srdivacky 7202375Srdivacky/*-- 8202375Srdivacky This file is a part of bzip2 and/or libbzip2, a program and 9202375Srdivacky library for lossless, block-sorting data compression. 10202375Srdivacky 11202375Srdivacky Copyright (C) 1996-2005 Julian R Seward. All rights reserved. 12202375Srdivacky 13202375Srdivacky Redistribution and use in source and binary forms, with or without 14202375Srdivacky modification, are permitted provided that the following conditions 15263508Sdim are met: 16202375Srdivacky 17249423Sdim 1. Redistributions of source code must retain the above copyright 18202375Srdivacky notice, this list of conditions and the following disclaimer. 19202375Srdivacky 20202375Srdivacky 2. The origin of this software must not be misrepresented; you must 21202375Srdivacky not claim that you wrote the original software. If you use this 22202375Srdivacky software in a product, an acknowledgment in the product 23249423Sdim documentation would be appreciated but is not required. 24249423Sdim 25249423Sdim 3. Altered source versions must be plainly marked as such, and must 26249423Sdim not be misrepresented as being the original software. 27249423Sdim 28251662Sdim 4. The name of the author may not be used to endorse or promote 29249423Sdim products derived from this software without specific prior written 30251662Sdim permission. 31249423Sdim 32249423Sdim THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 33249423Sdim OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 34249423Sdim WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 35249423Sdim ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 36249423Sdim DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 37249423Sdim DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 38249423Sdim GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 39249423Sdim INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 40249423Sdim WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 41251662Sdim NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 42249423Sdim SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 43249423Sdim 44249423Sdim Julian Seward, Cambridge, UK. 45249423Sdim jseward@bzip.org 46251662Sdim bzip2/libbzip2 version 1.0 of 21 March 2000 47249423Sdim 48249423Sdim This program is based on (at least) the work of: 49249423Sdim Mike Burrows 50251662Sdim David Wheeler 51249423Sdim Peter Fenwick 52249423Sdim Alistair Moffat 53251662Sdim Radford Neal 54249423Sdim Ian H. Witten 55249423Sdim Robert Sedgewick 56249423Sdim Jon L. Bentley 57249423Sdim 58249423Sdim For more information on these sources, see the manual. 59249423Sdim--*/ 60251662Sdim 61249423Sdim/*-- 62249423Sdim CHANGES 63249423Sdim ~~~~~~~ 64249423Sdim 0.9.0 -- original version. 65251662Sdim 66249423Sdim 0.9.0a/b -- no changes in this file. 67249423Sdim 68249423Sdim 0.9.0c 69249423Sdim * changed setting of nGroups in sendMTFValues() so as to 70249423Sdim do a bit better on small files 71249423Sdim--*/ 72249423Sdim 73249423Sdim#include "bzlib_private.h" 74249423Sdim 75249423Sdim 76249423Sdim/*---------------------------------------------------*/ 77249423Sdim/*--- Bit stream I/O ---*/ 78251662Sdim/*---------------------------------------------------*/ 79251662Sdim 80251662Sdim/*---------------------------------------------------*/ 81251662Sdimvoid BZ2_bsInitWrite ( EState* s ) 82251662Sdim{ 83249423Sdim s->bsLive = 0; 84249423Sdim s->bsBuff = 0; 85249423Sdim} 86251662Sdim 87249423Sdim 88249423Sdim/*---------------------------------------------------*/ 89249423Sdimstatic 90249423Sdimvoid bsFinishWrite ( EState* s ) 91251662Sdim{ 92249423Sdim while (s->bsLive > 0) { 93249423Sdim s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24); 94249423Sdim s->numZ++; 95249423Sdim s->bsBuff <<= 8; 96251662Sdim s->bsLive -= 8; 97249423Sdim } 98249423Sdim} 99251662Sdim 100249423Sdim 101249423Sdim/*---------------------------------------------------*/ 102249423Sdim#define bsNEEDW(nz) \ 103249423Sdim{ \ 104249423Sdim while (s->bsLive >= 8) { \ 105249423Sdim s->zbits[s->numZ] \ 106249423Sdim = (UChar)(s->bsBuff >> 24); \ 107249423Sdim s->numZ++; \ 108251662Sdim s->bsBuff <<= 8; \ 109249423Sdim s->bsLive -= 8; \ 110249423Sdim } \ 111249423Sdim} 112249423Sdim 113249423Sdim 114249423Sdim/*---------------------------------------------------*/ 115249423Sdimstatic 116251662Sdim__inline__ 117249423Sdimvoid bsW ( EState* s, Int32 n, UInt32 v ) 118249423Sdim{ 119251662Sdim bsNEEDW ( n ); 120249423Sdim s->bsBuff |= (v << (32 - s->bsLive - n)); 121249423Sdim s->bsLive += n; 122249423Sdim} 123249423Sdim 124249423Sdim 125249423Sdim/*---------------------------------------------------*/ 126249423Sdimstatic 127249423Sdimvoid bsPutUInt32 ( EState* s, UInt32 u ) 128251662Sdim{ 129249423Sdim bsW ( s, 8, (u >> 24) & 0xffL ); 130251662Sdim bsW ( s, 8, (u >> 16) & 0xffL ); 131249423Sdim bsW ( s, 8, (u >> 8) & 0xffL ); 132249423Sdim bsW ( s, 8, u & 0xffL ); 133249423Sdim} 134251662Sdim 135249423Sdim 136249423Sdim/*---------------------------------------------------*/ 137249423Sdimstatic 138251662Sdimvoid bsPutUChar ( EState* s, UChar c ) 139249423Sdim{ 140249423Sdim bsW( s, 8, (UInt32)c ); 141249423Sdim} 142249423Sdim 143249423Sdim 144249423Sdim/*---------------------------------------------------*/ 145249423Sdim/*--- The back end proper ---*/ 146251662Sdim/*---------------------------------------------------*/ 147249423Sdim 148249423Sdim/*---------------------------------------------------*/ 149249423Sdimstatic 150249423Sdimvoid makeMaps_e ( EState* s ) 151251662Sdim{ 152249423Sdim Int32 i; 153249423Sdim s->nInUse = 0; 154249423Sdim for (i = 0; i < 256; i++) 155249423Sdim if (s->inUse[i]) { 156249423Sdim s->unseqToSeq[i] = s->nInUse; 157249423Sdim s->nInUse++; 158249423Sdim } 159251662Sdim} 160249423Sdim 161249423Sdim 162251662Sdim/*---------------------------------------------------*/ 163249423Sdimstatic 164249423Sdimvoid generateMTFValues ( EState* s ) 165249423Sdim{ 166249423Sdim UChar yy[256]; 167249423Sdim Int32 i, j; 168249423Sdim Int32 zPend; 169251662Sdim Int32 wr; 170249423Sdim Int32 EOB; 171249423Sdim 172249423Sdim /* 173249423Sdim After sorting (eg, here), 174249423Sdim s->arr1 [ 0 .. s->nblock-1 ] holds sorted order, 175249423Sdim and 176249423Sdim ((UChar*)s->arr2) [ 0 .. s->nblock-1 ] 177249423Sdim holds the original block data. 178249423Sdim 179251662Sdim The first thing to do is generate the MTF values, 180249423Sdim and put them in 181249423Sdim ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ]. 182251662Sdim Because there are strictly fewer or equal MTF values 183249423Sdim than block values, ptr values in this area are overwritten 184249423Sdim with MTF values only when they are no longer needed. 185249423Sdim 186249423Sdim The final compressed bitstream is generated into the 187249423Sdim area starting at 188249423Sdim (UChar*) (&((UChar*)s->arr2)[s->nblock]) 189249423Sdim 190249423Sdim These storage aliases are set up in bzCompressInit(), 191249423Sdim except for the last one, which is arranged in 192249423Sdim compressBlock(). 193249423Sdim */ 194251662Sdim UInt32* ptr = s->ptr; 195249423Sdim UChar* block = s->block; 196249423Sdim UInt16* mtfv = s->mtfv; 197249423Sdim 198249423Sdim makeMaps_e ( s ); 199249423Sdim EOB = s->nInUse+1; 200249423Sdim 201249423Sdim for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0; 202249423Sdim 203249423Sdim wr = 0; 204249423Sdim zPend = 0; 205249423Sdim for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i; 206249423Sdim 207249423Sdim for (i = 0; i < s->nblock; i++) { 208249423Sdim UChar ll_i; 209249423Sdim AssertD ( wr <= i, "generateMTFValues(1)" ); 210249423Sdim j = ptr[i]-1; if (j < 0) j += s->nblock; 211249423Sdim ll_i = s->unseqToSeq[block[j]]; 212249423Sdim AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" ); 213249423Sdim 214249423Sdim if (yy[0] == ll_i) { 215249423Sdim zPend++; 216249423Sdim } else { 217251662Sdim 218249423Sdim if (zPend > 0) { 219249423Sdim zPend--; 220249423Sdim while (True) { 221249423Sdim if (zPend & 1) { 222249423Sdim mtfv[wr] = BZ_RUNB; wr++; 223249423Sdim s->mtfFreq[BZ_RUNB]++; 224249423Sdim } else { 225249423Sdim mtfv[wr] = BZ_RUNA; wr++; 226249423Sdim s->mtfFreq[BZ_RUNA]++; 227249423Sdim } 228249423Sdim if (zPend < 2) break; 229249423Sdim zPend = (zPend - 2) / 2; 230249423Sdim }; 231251662Sdim zPend = 0; 232249423Sdim } 233249423Sdim { 234249423Sdim register UChar rtmp; 235249423Sdim register UChar* ryy_j; 236249423Sdim register UChar rll_i; 237249423Sdim rtmp = yy[1]; 238249423Sdim yy[1] = yy[0]; 239249423Sdim ryy_j = &(yy[1]); 240249423Sdim rll_i = ll_i; 241249423Sdim while ( rll_i != rtmp ) { 242249423Sdim register UChar rtmp2; 243249423Sdim ryy_j++; 244249423Sdim rtmp2 = rtmp; 245249423Sdim rtmp = *ryy_j; 246249423Sdim *ryy_j = rtmp2; 247249423Sdim }; 248249423Sdim yy[0] = rtmp; 249249423Sdim j = ryy_j - &(yy[0]); 250249423Sdim mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++; 251249423Sdim } 252249423Sdim 253249423Sdim } 254249423Sdim } 255249423Sdim 256249423Sdim if (zPend > 0) { 257249423Sdim zPend--; 258249423Sdim while (True) { 259249423Sdim if (zPend & 1) { 260251662Sdim mtfv[wr] = BZ_RUNB; wr++; 261249423Sdim s->mtfFreq[BZ_RUNB]++; 262249423Sdim } else { 263249423Sdim mtfv[wr] = BZ_RUNA; wr++; 264249423Sdim s->mtfFreq[BZ_RUNA]++; 265249423Sdim } 266249423Sdim if (zPend < 2) break; 267251662Sdim zPend = (zPend - 2) / 2; 268249423Sdim }; 269249423Sdim zPend = 0; 270249423Sdim } 271249423Sdim 272249423Sdim mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++; 273249423Sdim 274249423Sdim s->nMTF = wr; 275249423Sdim} 276249423Sdim 277249423Sdim 278249423Sdim/*---------------------------------------------------*/ 279249423Sdim#define BZ_LESSER_ICOST 0 280249423Sdim#define BZ_GREATER_ICOST 15 281251662Sdim 282249423Sdimstatic 283249423Sdimvoid sendMTFValues ( EState* s ) 284249423Sdim{ 285249423Sdim Int32 v, t, i, j, gs, ge, totc, bt, bc, iter; 286249423Sdim Int32 nSelectors, alphaSize, minLen, maxLen, selCtr; 287249423Sdim Int32 nGroups, nBytes; 288249423Sdim 289249423Sdim /*-- 290249423Sdim UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 291249423Sdim is a global since the decoder also needs it. 292249423Sdim 293249423Sdim Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 294249423Sdim Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; 295249423Sdim are also globals only used in this proc. 296249423Sdim Made global to keep stack frame size small. 297249423Sdim --*/ 298249423Sdim 299249423Sdim 300249423Sdim UInt16 cost[BZ_N_GROUPS]; 301249423Sdim Int32 fave[BZ_N_GROUPS]; 302249423Sdim 303249423Sdim UInt16* mtfv = s->mtfv; 304249423Sdim 305249423Sdim if (s->verbosity >= 3) 306249423Sdim VPrintf3( " %d in block, %d after MTF & 1-2 coding, " 307249423Sdim "%d+2 syms in use\n", 308249423Sdim s->nblock, s->nMTF, s->nInUse ); 309251662Sdim 310249423Sdim alphaSize = s->nInUse+2; 311249423Sdim for (t = 0; t < BZ_N_GROUPS; t++) 312249423Sdim for (v = 0; v < alphaSize; v++) 313249423Sdim s->len[t][v] = BZ_GREATER_ICOST; 314249423Sdim 315249423Sdim /*--- Decide how many coding tables to use ---*/ 316249423Sdim AssertH ( s->nMTF > 0, 3001 ); 317249423Sdim if (s->nMTF < 200) nGroups = 2; else 318249423Sdim if (s->nMTF < 600) nGroups = 3; else 319249423Sdim if (s->nMTF < 1200) nGroups = 4; else 320249423Sdim if (s->nMTF < 2400) nGroups = 5; else 321249423Sdim nGroups = 6; 322249423Sdim 323249423Sdim /*--- Generate an initial set of coding tables ---*/ 324249423Sdim { 325249423Sdim Int32 nPart, remF, tFreq, aFreq; 326249423Sdim 327249423Sdim nPart = nGroups; 328249423Sdim remF = s->nMTF; 329249423Sdim gs = 0; 330249423Sdim while (nPart > 0) { 331249423Sdim tFreq = remF / nPart; 332249423Sdim ge = gs-1; 333249423Sdim aFreq = 0; 334249423Sdim while (aFreq < tFreq && ge < alphaSize-1) { 335249423Sdim ge++; 336249423Sdim aFreq += s->mtfFreq[ge]; 337249423Sdim } 338249423Sdim 339249423Sdim if (ge > gs 340249423Sdim && nPart != nGroups && nPart != 1 341249423Sdim && ((nGroups-nPart) % 2 == 1)) { 342249423Sdim aFreq -= s->mtfFreq[ge]; 343249423Sdim ge--; 344251662Sdim } 345249423Sdim 346249423Sdim if (s->verbosity >= 3) 347249423Sdim VPrintf5( " initial group %d, [%d .. %d], " 348251662Sdim "has %d syms (%4.1f%%)\n", 349249423Sdim nPart, gs, ge, aFreq, 350249423Sdim (100.0 * (float)aFreq) / (float)(s->nMTF) ); 351249423Sdim 352249423Sdim for (v = 0; v < alphaSize; v++) 353249423Sdim if (v >= gs && v <= ge) 354249423Sdim s->len[nPart-1][v] = BZ_LESSER_ICOST; else 355249423Sdim s->len[nPart-1][v] = BZ_GREATER_ICOST; 356249423Sdim 357249423Sdim nPart--; 358249423Sdim gs = ge+1; 359249423Sdim remF -= aFreq; 360249423Sdim } 361249423Sdim } 362249423Sdim 363249423Sdim /*--- 364249423Sdim Iterate up to BZ_N_ITERS times to improve the tables. 365249423Sdim ---*/ 366249423Sdim for (iter = 0; iter < BZ_N_ITERS; iter++) { 367249423Sdim 368249423Sdim for (t = 0; t < nGroups; t++) fave[t] = 0; 369249423Sdim 370249423Sdim for (t = 0; t < nGroups; t++) 371249423Sdim for (v = 0; v < alphaSize; v++) 372249423Sdim s->rfreq[t][v] = 0; 373249423Sdim 374249423Sdim /*--- 375249423Sdim Set up an auxiliary length table which is used to fast-track 376249423Sdim the common case (nGroups == 6). 377249423Sdim ---*/ 378249423Sdim if (nGroups == 6) { 379249423Sdim for (v = 0; v < alphaSize; v++) { 380249423Sdim s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; 381249423Sdim s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; 382249423Sdim s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; 383249423Sdim } 384249423Sdim } 385249423Sdim 386249423Sdim nSelectors = 0; 387249423Sdim totc = 0; 388249423Sdim gs = 0; 389249423Sdim while (True) { 390249423Sdim 391249423Sdim /*--- Set group start & end marks. --*/ 392249423Sdim if (gs >= s->nMTF) break; 393249423Sdim ge = gs + BZ_G_SIZE - 1; 394249423Sdim if (ge >= s->nMTF) ge = s->nMTF-1; 395249423Sdim 396249423Sdim /*-- 397249423Sdim Calculate the cost of this group as coded 398249423Sdim by each of the coding tables. 399249423Sdim --*/ 400249423Sdim for (t = 0; t < nGroups; t++) cost[t] = 0; 401249423Sdim 402249423Sdim if (nGroups == 6 && 50 == ge-gs+1) { 403249423Sdim /*--- fast track the common case ---*/ 404249423Sdim register UInt32 cost01, cost23, cost45; 405249423Sdim register UInt16 icv; 406249423Sdim cost01 = cost23 = cost45 = 0; 407249423Sdim 408249423Sdim# define BZ_ITER(nn) \ 409249423Sdim icv = mtfv[gs+(nn)]; \ 410249423Sdim cost01 += s->len_pack[icv][0]; \ 411249423Sdim cost23 += s->len_pack[icv][1]; \ 412249423Sdim cost45 += s->len_pack[icv][2]; \ 413249423Sdim 414249423Sdim BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); 415249423Sdim BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); 416249423Sdim BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); 417249423Sdim BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); 418249423Sdim BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); 419251662Sdim BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); 420249423Sdim BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); 421249423Sdim BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); 422249423Sdim BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); 423249423Sdim BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); 424249423Sdim 425249423Sdim# undef BZ_ITER 426249423Sdim 427249423Sdim cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; 428249423Sdim cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; 429249423Sdim cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; 430249423Sdim 431249423Sdim } else { 432249423Sdim /*--- slow version which correctly handles all situations ---*/ 433249423Sdim for (i = gs; i <= ge; i++) { 434249423Sdim UInt16 icv = mtfv[i]; 435249423Sdim for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv]; 436249423Sdim } 437249423Sdim } 438249423Sdim 439249423Sdim /*-- 440249423Sdim Find the coding table which is best for this group, 441251662Sdim and record its identity in the selector table. 442249423Sdim --*/ 443249423Sdim bc = 999999999; bt = -1; 444251662Sdim for (t = 0; t < nGroups; t++) 445249423Sdim if (cost[t] < bc) { bc = cost[t]; bt = t; }; 446249423Sdim totc += bc; 447249423Sdim fave[bt]++; 448249423Sdim s->selector[nSelectors] = bt; 449249423Sdim nSelectors++; 450249423Sdim 451249423Sdim /*-- 452249423Sdim Increment the symbol frequencies for the selected table. 453249423Sdim --*/ 454249423Sdim if (nGroups == 6 && 50 == ge-gs+1) { 455249423Sdim /*--- fast track the common case ---*/ 456249423Sdim 457249423Sdim# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++ 458249423Sdim 459251662Sdim BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); 460249423Sdim BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); 461249423Sdim BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); 462249423Sdim BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); 463249423Sdim BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); 464249423Sdim BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); 465249423Sdim BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); 466251662Sdim BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); 467249423Sdim BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); 468249423Sdim BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); 469249423Sdim 470249423Sdim# undef BZ_ITUR 471249423Sdim 472249423Sdim } else { 473249423Sdim /*--- slow version which correctly handles all situations ---*/ 474249423Sdim for (i = gs; i <= ge; i++) 475249423Sdim s->rfreq[bt][ mtfv[i] ]++; 476249423Sdim } 477249423Sdim 478249423Sdim gs = ge+1; 479249423Sdim } 480249423Sdim if (s->verbosity >= 3) { 481249423Sdim VPrintf2 ( " pass %d: size is %d, grp uses are ", 482249423Sdim iter+1, totc/8 ); 483249423Sdim for (t = 0; t < nGroups; t++) 484249423Sdim VPrintf1 ( "%d ", fave[t] ); 485249423Sdim VPrintf0 ( "\n" ); 486249423Sdim } 487249423Sdim 488249423Sdim /*-- 489249423Sdim Recompute the tables based on the accumulated frequencies. 490249423Sdim --*/ 491249423Sdim /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See 492263508Sdim comment in huffman.c for details. */ 493249423Sdim for (t = 0; t < nGroups; t++) 494249423Sdim BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]), 495249423Sdim alphaSize, 17 /*20*/ ); 496249423Sdim } 497249423Sdim 498251662Sdim 499249423Sdim AssertH( nGroups < 8, 3002 ); 500249423Sdim AssertH( nSelectors < 32768 && 501249423Sdim nSelectors <= (2 + (900000 / BZ_G_SIZE)), 502249423Sdim 3003 ); 503249423Sdim 504249423Sdim 505249423Sdim /*--- Compute MTF values for the selectors. ---*/ 506249423Sdim { 507249423Sdim UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp; 508249423Sdim for (i = 0; i < nGroups; i++) pos[i] = i; 509249423Sdim for (i = 0; i < nSelectors; i++) { 510249423Sdim ll_i = s->selector[i]; 511249423Sdim j = 0; 512251662Sdim tmp = pos[j]; 513249423Sdim while ( ll_i != tmp ) { 514249423Sdim j++; 515249423Sdim tmp2 = tmp; 516249423Sdim tmp = pos[j]; 517249423Sdim pos[j] = tmp2; 518249423Sdim }; 519249423Sdim pos[0] = tmp; 520249423Sdim s->selectorMtf[i] = j; 521249423Sdim } 522249423Sdim }; 523251662Sdim 524249423Sdim /*--- Assign actual codes for the tables. --*/ 525249423Sdim for (t = 0; t < nGroups; t++) { 526249423Sdim minLen = 32; 527249423Sdim maxLen = 0; 528249423Sdim for (i = 0; i < alphaSize; i++) { 529249423Sdim if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; 530249423Sdim if (s->len[t][i] < minLen) minLen = s->len[t][i]; 531249423Sdim } 532249423Sdim AssertH ( !(maxLen > 17 /*20*/ ), 3004 ); 533249423Sdim AssertH ( !(minLen < 1), 3005 ); 534249423Sdim BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]), 535249423Sdim minLen, maxLen, alphaSize ); 536249423Sdim } 537249423Sdim 538249423Sdim /*--- Transmit the mapping table. ---*/ 539249423Sdim { 540249423Sdim Bool inUse16[16]; 541249423Sdim for (i = 0; i < 16; i++) { 542249423Sdim inUse16[i] = False; 543249423Sdim for (j = 0; j < 16; j++) 544249423Sdim if (s->inUse[i * 16 + j]) inUse16[i] = True; 545251662Sdim } 546249423Sdim 547249423Sdim nBytes = s->numZ; 548249423Sdim for (i = 0; i < 16; i++) 549249423Sdim if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0); 550249423Sdim 551249423Sdim for (i = 0; i < 16; i++) 552249423Sdim if (inUse16[i]) 553249423Sdim for (j = 0; j < 16; j++) { 554249423Sdim if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0); 555249423Sdim } 556249423Sdim 557249423Sdim if (s->verbosity >= 3) 558249423Sdim VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes ); 559249423Sdim } 560249423Sdim 561249423Sdim /*--- Now the selectors. ---*/ 562249423Sdim nBytes = s->numZ; 563249423Sdim bsW ( s, 3, nGroups ); 564249423Sdim bsW ( s, 15, nSelectors ); 565249423Sdim for (i = 0; i < nSelectors; i++) { 566249423Sdim for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1); 567249423Sdim bsW(s,1,0); 568249423Sdim } 569249423Sdim if (s->verbosity >= 3) 570249423Sdim VPrintf1( "selectors %d, ", s->numZ-nBytes ); 571249423Sdim 572249423Sdim /*--- Now the coding tables. ---*/ 573249423Sdim nBytes = s->numZ; 574249423Sdim 575249423Sdim for (t = 0; t < nGroups; t++) { 576249423Sdim Int32 curr = s->len[t][0]; 577249423Sdim bsW ( s, 5, curr ); 578249423Sdim for (i = 0; i < alphaSize; i++) { 579249423Sdim while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ }; 580249423Sdim while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ }; 581249423Sdim bsW ( s, 1, 0 ); 582249423Sdim } 583249423Sdim } 584249423Sdim 585251662Sdim if (s->verbosity >= 3) 586249423Sdim VPrintf1 ( "code lengths %d, ", s->numZ-nBytes ); 587249423Sdim 588249423Sdim /*--- And finally, the block data proper ---*/ 589249423Sdim nBytes = s->numZ; 590249423Sdim selCtr = 0; 591249423Sdim gs = 0; 592249423Sdim while (True) { 593249423Sdim if (gs >= s->nMTF) break; 594251662Sdim ge = gs + BZ_G_SIZE - 1; 595249423Sdim if (ge >= s->nMTF) ge = s->nMTF-1; 596249423Sdim AssertH ( s->selector[selCtr] < nGroups, 3006 ); 597249423Sdim 598249423Sdim if (nGroups == 6 && 50 == ge-gs+1) { 599249423Sdim /*--- fast track the common case ---*/ 600249423Sdim UInt16 mtfv_i; 601249423Sdim UChar* s_len_sel_selCtr 602249423Sdim = &(s->len[s->selector[selCtr]][0]); 603249423Sdim Int32* s_code_sel_selCtr 604249423Sdim = &(s->code[s->selector[selCtr]][0]); 605249423Sdim 606249423Sdim# define BZ_ITAH(nn) \ 607249423Sdim mtfv_i = mtfv[gs+(nn)]; \ 608249423Sdim bsW ( s, \ 609249423Sdim s_len_sel_selCtr[mtfv_i], \ 610251662Sdim s_code_sel_selCtr[mtfv_i] ) 611249423Sdim 612249423Sdim BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); 613249423Sdim BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); 614249423Sdim BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); 615249423Sdim BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); 616249423Sdim BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); 617249423Sdim BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); 618249423Sdim BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); 619249423Sdim BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); 620249423Sdim BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); 621249423Sdim BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); 622249423Sdim 623249423Sdim# undef BZ_ITAH 624249423Sdim 625249423Sdim } else { 626249423Sdim /*--- slow version which correctly handles all situations ---*/ 627249423Sdim for (i = gs; i <= ge; i++) { 628249423Sdim bsW ( s, 629249423Sdim s->len [s->selector[selCtr]] [mtfv[i]], 630249423Sdim s->code [s->selector[selCtr]] [mtfv[i]] ); 631249423Sdim } 632249423Sdim } 633249423Sdim 634249423Sdim 635249423Sdim gs = ge+1; 636249423Sdim selCtr++; 637251662Sdim } 638249423Sdim AssertH( selCtr == nSelectors, 3007 ); 639249423Sdim 640249423Sdim if (s->verbosity >= 3) 641249423Sdim VPrintf1( "codes %d\n", s->numZ-nBytes ); 642249423Sdim} 643249423Sdim 644249423Sdim 645249423Sdim/*---------------------------------------------------*/ 646249423Sdimvoid BZ2_compressBlock ( EState* s, Bool is_last_block ) 647249423Sdim{ 648249423Sdim if (s->nblock > 0) { 649249423Sdim 650251662Sdim BZ_FINALISE_CRC ( s->blockCRC ); 651249423Sdim s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); 652249423Sdim s->combinedCRC ^= s->blockCRC; 653249423Sdim if (s->blockNo > 1) s->numZ = 0; 654249423Sdim 655249423Sdim if (s->verbosity >= 2) 656249423Sdim VPrintf4( " block %d: crc = 0x%08x, " 657249423Sdim "combined CRC = 0x%08x, size = %d\n", 658249423Sdim s->blockNo, s->blockCRC, s->combinedCRC, s->nblock ); 659249423Sdim 660249423Sdim BZ2_blockSort ( s ); 661249423Sdim } 662249423Sdim 663263508Sdim s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]); 664249423Sdim 665249423Sdim /*-- If this is the first block, create the stream header. --*/ 666249423Sdim if (s->blockNo == 1) { 667249423Sdim BZ2_bsInitWrite ( s ); 668249423Sdim bsPutUChar ( s, BZ_HDR_B ); 669249423Sdim bsPutUChar ( s, BZ_HDR_Z ); 670249423Sdim bsPutUChar ( s, BZ_HDR_h ); 671249423Sdim bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) ); 672249423Sdim } 673249423Sdim 674249423Sdim if (s->nblock > 0) { 675249423Sdim 676249423Sdim bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 ); 677249423Sdim bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 ); 678249423Sdim bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 ); 679249423Sdim 680249423Sdim /*-- Now the block's CRC, so it is in a known place. --*/ 681249423Sdim bsPutUInt32 ( s, s->blockCRC ); 682249423Sdim 683249423Sdim /*-- 684249423Sdim Now a single bit indicating (non-)randomisation. 685251662Sdim As of version 0.9.5, we use a better sorting algorithm 686249423Sdim which makes randomisation unnecessary. So always set 687249423Sdim the randomised bit to 'no'. Of course, the decoder 688249423Sdim still needs to be able to handle randomised blocks 689249423Sdim so as to maintain backwards compatibility with 690249423Sdim older versions of bzip2. 691249423Sdim --*/ 692249423Sdim bsW(s,1,0); 693249423Sdim 694249423Sdim bsW ( s, 24, s->origPtr ); 695249423Sdim generateMTFValues ( s ); 696249423Sdim sendMTFValues ( s ); 697249423Sdim } 698249423Sdim 699249423Sdim 700249423Sdim /*-- If this is the last block, add the stream trailer. --*/ 701249423Sdim if (is_last_block) { 702249423Sdim 703249423Sdim bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 ); 704249423Sdim bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 ); 705249423Sdim bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 ); 706251662Sdim bsPutUInt32 ( s, s->combinedCRC ); 707249423Sdim if (s->verbosity >= 2) 708249423Sdim VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC ); 709249423Sdim bsFinishWrite ( s ); 710249423Sdim } 711249423Sdim} 712249423Sdim 713249423Sdim 714249423Sdim/*-------------------------------------------------------------*/ 715249423Sdim/*--- end compress.c ---*/ 716249423Sdim/*-------------------------------------------------------------*/ 717249423Sdim