compress.c revision 146293
1202375Srdivacky
2202375Srdivacky/*-------------------------------------------------------------*/
3202375Srdivacky/*--- Compression machinery (not incl block sorting)        ---*/
4202375Srdivacky/*---                                            compress.c ---*/
5202375Srdivacky/*-------------------------------------------------------------*/
6202375Srdivacky
7202375Srdivacky/*--
8202375Srdivacky  This file is a part of bzip2 and/or libbzip2, a program and
9202375Srdivacky  library for lossless, block-sorting data compression.
10202375Srdivacky
11202375Srdivacky  Copyright (C) 1996-2005 Julian R Seward.  All rights reserved.
12202375Srdivacky
13202375Srdivacky  Redistribution and use in source and binary forms, with or without
14202375Srdivacky  modification, are permitted provided that the following conditions
15263508Sdim  are met:
16202375Srdivacky
17249423Sdim  1. Redistributions of source code must retain the above copyright
18202375Srdivacky     notice, this list of conditions and the following disclaimer.
19202375Srdivacky
20202375Srdivacky  2. The origin of this software must not be misrepresented; you must
21202375Srdivacky     not claim that you wrote the original software.  If you use this
22202375Srdivacky     software in a product, an acknowledgment in the product
23249423Sdim     documentation would be appreciated but is not required.
24249423Sdim
25249423Sdim  3. Altered source versions must be plainly marked as such, and must
26249423Sdim     not be misrepresented as being the original software.
27249423Sdim
28251662Sdim  4. The name of the author may not be used to endorse or promote
29249423Sdim     products derived from this software without specific prior written
30251662Sdim     permission.
31249423Sdim
32249423Sdim  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
33249423Sdim  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
34249423Sdim  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
35249423Sdim  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
36249423Sdim  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
37249423Sdim  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
38249423Sdim  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
39249423Sdim  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
40249423Sdim  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
41251662Sdim  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
42249423Sdim  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43249423Sdim
44249423Sdim  Julian Seward, Cambridge, UK.
45249423Sdim  jseward@bzip.org
46251662Sdim  bzip2/libbzip2 version 1.0 of 21 March 2000
47249423Sdim
48249423Sdim  This program is based on (at least) the work of:
49249423Sdim     Mike Burrows
50251662Sdim     David Wheeler
51249423Sdim     Peter Fenwick
52249423Sdim     Alistair Moffat
53251662Sdim     Radford Neal
54249423Sdim     Ian H. Witten
55249423Sdim     Robert Sedgewick
56249423Sdim     Jon L. Bentley
57249423Sdim
58249423Sdim  For more information on these sources, see the manual.
59249423Sdim--*/
60251662Sdim
61249423Sdim/*--
62249423Sdim   CHANGES
63249423Sdim   ~~~~~~~
64249423Sdim   0.9.0 -- original version.
65251662Sdim
66249423Sdim   0.9.0a/b -- no changes in this file.
67249423Sdim
68249423Sdim   0.9.0c
69249423Sdim      * changed setting of nGroups in sendMTFValues() so as to
70249423Sdim        do a bit better on small files
71249423Sdim--*/
72249423Sdim
73249423Sdim#include "bzlib_private.h"
74249423Sdim
75249423Sdim
76249423Sdim/*---------------------------------------------------*/
77249423Sdim/*--- Bit stream I/O                              ---*/
78251662Sdim/*---------------------------------------------------*/
79251662Sdim
80251662Sdim/*---------------------------------------------------*/
81251662Sdimvoid BZ2_bsInitWrite ( EState* s )
82251662Sdim{
83249423Sdim   s->bsLive = 0;
84249423Sdim   s->bsBuff = 0;
85249423Sdim}
86251662Sdim
87249423Sdim
88249423Sdim/*---------------------------------------------------*/
89249423Sdimstatic
90249423Sdimvoid bsFinishWrite ( EState* s )
91251662Sdim{
92249423Sdim   while (s->bsLive > 0) {
93249423Sdim      s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
94249423Sdim      s->numZ++;
95249423Sdim      s->bsBuff <<= 8;
96251662Sdim      s->bsLive -= 8;
97249423Sdim   }
98249423Sdim}
99251662Sdim
100249423Sdim
101249423Sdim/*---------------------------------------------------*/
102249423Sdim#define bsNEEDW(nz)                           \
103249423Sdim{                                             \
104249423Sdim   while (s->bsLive >= 8) {                   \
105249423Sdim      s->zbits[s->numZ]                       \
106249423Sdim         = (UChar)(s->bsBuff >> 24);          \
107249423Sdim      s->numZ++;                              \
108251662Sdim      s->bsBuff <<= 8;                        \
109249423Sdim      s->bsLive -= 8;                         \
110249423Sdim   }                                          \
111249423Sdim}
112249423Sdim
113249423Sdim
114249423Sdim/*---------------------------------------------------*/
115249423Sdimstatic
116251662Sdim__inline__
117249423Sdimvoid bsW ( EState* s, Int32 n, UInt32 v )
118249423Sdim{
119251662Sdim   bsNEEDW ( n );
120249423Sdim   s->bsBuff |= (v << (32 - s->bsLive - n));
121249423Sdim   s->bsLive += n;
122249423Sdim}
123249423Sdim
124249423Sdim
125249423Sdim/*---------------------------------------------------*/
126249423Sdimstatic
127249423Sdimvoid bsPutUInt32 ( EState* s, UInt32 u )
128251662Sdim{
129249423Sdim   bsW ( s, 8, (u >> 24) & 0xffL );
130251662Sdim   bsW ( s, 8, (u >> 16) & 0xffL );
131249423Sdim   bsW ( s, 8, (u >>  8) & 0xffL );
132249423Sdim   bsW ( s, 8,  u        & 0xffL );
133249423Sdim}
134251662Sdim
135249423Sdim
136249423Sdim/*---------------------------------------------------*/
137249423Sdimstatic
138251662Sdimvoid bsPutUChar ( EState* s, UChar c )
139249423Sdim{
140249423Sdim   bsW( s, 8, (UInt32)c );
141249423Sdim}
142249423Sdim
143249423Sdim
144249423Sdim/*---------------------------------------------------*/
145249423Sdim/*--- The back end proper                         ---*/
146251662Sdim/*---------------------------------------------------*/
147249423Sdim
148249423Sdim/*---------------------------------------------------*/
149249423Sdimstatic
150249423Sdimvoid makeMaps_e ( EState* s )
151251662Sdim{
152249423Sdim   Int32 i;
153249423Sdim   s->nInUse = 0;
154249423Sdim   for (i = 0; i < 256; i++)
155249423Sdim      if (s->inUse[i]) {
156249423Sdim         s->unseqToSeq[i] = s->nInUse;
157249423Sdim         s->nInUse++;
158249423Sdim      }
159251662Sdim}
160249423Sdim
161249423Sdim
162251662Sdim/*---------------------------------------------------*/
163249423Sdimstatic
164249423Sdimvoid generateMTFValues ( EState* s )
165249423Sdim{
166249423Sdim   UChar   yy[256];
167249423Sdim   Int32   i, j;
168249423Sdim   Int32   zPend;
169251662Sdim   Int32   wr;
170249423Sdim   Int32   EOB;
171249423Sdim
172249423Sdim   /*
173249423Sdim      After sorting (eg, here),
174249423Sdim         s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
175249423Sdim         and
176249423Sdim         ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
177249423Sdim         holds the original block data.
178249423Sdim
179251662Sdim      The first thing to do is generate the MTF values,
180249423Sdim      and put them in
181249423Sdim         ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
182251662Sdim      Because there are strictly fewer or equal MTF values
183249423Sdim      than block values, ptr values in this area are overwritten
184249423Sdim      with MTF values only when they are no longer needed.
185249423Sdim
186249423Sdim      The final compressed bitstream is generated into the
187249423Sdim      area starting at
188249423Sdim         (UChar*) (&((UChar*)s->arr2)[s->nblock])
189249423Sdim
190249423Sdim      These storage aliases are set up in bzCompressInit(),
191249423Sdim      except for the last one, which is arranged in
192249423Sdim      compressBlock().
193249423Sdim   */
194251662Sdim   UInt32* ptr   = s->ptr;
195249423Sdim   UChar* block  = s->block;
196249423Sdim   UInt16* mtfv  = s->mtfv;
197249423Sdim
198249423Sdim   makeMaps_e ( s );
199249423Sdim   EOB = s->nInUse+1;
200249423Sdim
201249423Sdim   for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
202249423Sdim
203249423Sdim   wr = 0;
204249423Sdim   zPend = 0;
205249423Sdim   for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
206249423Sdim
207249423Sdim   for (i = 0; i < s->nblock; i++) {
208249423Sdim      UChar ll_i;
209249423Sdim      AssertD ( wr <= i, "generateMTFValues(1)" );
210249423Sdim      j = ptr[i]-1; if (j < 0) j += s->nblock;
211249423Sdim      ll_i = s->unseqToSeq[block[j]];
212249423Sdim      AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
213249423Sdim
214249423Sdim      if (yy[0] == ll_i) {
215249423Sdim         zPend++;
216249423Sdim      } else {
217251662Sdim
218249423Sdim         if (zPend > 0) {
219249423Sdim            zPend--;
220249423Sdim            while (True) {
221249423Sdim               if (zPend & 1) {
222249423Sdim                  mtfv[wr] = BZ_RUNB; wr++;
223249423Sdim                  s->mtfFreq[BZ_RUNB]++;
224249423Sdim               } else {
225249423Sdim                  mtfv[wr] = BZ_RUNA; wr++;
226249423Sdim                  s->mtfFreq[BZ_RUNA]++;
227249423Sdim               }
228249423Sdim               if (zPend < 2) break;
229249423Sdim               zPend = (zPend - 2) / 2;
230249423Sdim            };
231251662Sdim            zPend = 0;
232249423Sdim         }
233249423Sdim         {
234249423Sdim            register UChar  rtmp;
235249423Sdim            register UChar* ryy_j;
236249423Sdim            register UChar  rll_i;
237249423Sdim            rtmp  = yy[1];
238249423Sdim            yy[1] = yy[0];
239249423Sdim            ryy_j = &(yy[1]);
240249423Sdim            rll_i = ll_i;
241249423Sdim            while ( rll_i != rtmp ) {
242249423Sdim               register UChar rtmp2;
243249423Sdim               ryy_j++;
244249423Sdim               rtmp2  = rtmp;
245249423Sdim               rtmp   = *ryy_j;
246249423Sdim               *ryy_j = rtmp2;
247249423Sdim            };
248249423Sdim            yy[0] = rtmp;
249249423Sdim            j = ryy_j - &(yy[0]);
250249423Sdim            mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
251249423Sdim         }
252249423Sdim
253249423Sdim      }
254249423Sdim   }
255249423Sdim
256249423Sdim   if (zPend > 0) {
257249423Sdim      zPend--;
258249423Sdim      while (True) {
259249423Sdim         if (zPend & 1) {
260251662Sdim            mtfv[wr] = BZ_RUNB; wr++;
261249423Sdim            s->mtfFreq[BZ_RUNB]++;
262249423Sdim         } else {
263249423Sdim            mtfv[wr] = BZ_RUNA; wr++;
264249423Sdim            s->mtfFreq[BZ_RUNA]++;
265249423Sdim         }
266249423Sdim         if (zPend < 2) break;
267251662Sdim         zPend = (zPend - 2) / 2;
268249423Sdim      };
269249423Sdim      zPend = 0;
270249423Sdim   }
271249423Sdim
272249423Sdim   mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
273249423Sdim
274249423Sdim   s->nMTF = wr;
275249423Sdim}
276249423Sdim
277249423Sdim
278249423Sdim/*---------------------------------------------------*/
279249423Sdim#define BZ_LESSER_ICOST  0
280249423Sdim#define BZ_GREATER_ICOST 15
281251662Sdim
282249423Sdimstatic
283249423Sdimvoid sendMTFValues ( EState* s )
284249423Sdim{
285249423Sdim   Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
286249423Sdim   Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
287249423Sdim   Int32 nGroups, nBytes;
288249423Sdim
289249423Sdim   /*--
290249423Sdim   UChar  len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
291249423Sdim   is a global since the decoder also needs it.
292249423Sdim
293249423Sdim   Int32  code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
294249423Sdim   Int32  rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
295249423Sdim   are also globals only used in this proc.
296249423Sdim   Made global to keep stack frame size small.
297249423Sdim   --*/
298249423Sdim
299249423Sdim
300249423Sdim   UInt16 cost[BZ_N_GROUPS];
301249423Sdim   Int32  fave[BZ_N_GROUPS];
302249423Sdim
303249423Sdim   UInt16* mtfv = s->mtfv;
304249423Sdim
305249423Sdim   if (s->verbosity >= 3)
306249423Sdim      VPrintf3( "      %d in block, %d after MTF & 1-2 coding, "
307249423Sdim                "%d+2 syms in use\n",
308249423Sdim                s->nblock, s->nMTF, s->nInUse );
309251662Sdim
310249423Sdim   alphaSize = s->nInUse+2;
311249423Sdim   for (t = 0; t < BZ_N_GROUPS; t++)
312249423Sdim      for (v = 0; v < alphaSize; v++)
313249423Sdim         s->len[t][v] = BZ_GREATER_ICOST;
314249423Sdim
315249423Sdim   /*--- Decide how many coding tables to use ---*/
316249423Sdim   AssertH ( s->nMTF > 0, 3001 );
317249423Sdim   if (s->nMTF < 200)  nGroups = 2; else
318249423Sdim   if (s->nMTF < 600)  nGroups = 3; else
319249423Sdim   if (s->nMTF < 1200) nGroups = 4; else
320249423Sdim   if (s->nMTF < 2400) nGroups = 5; else
321249423Sdim                       nGroups = 6;
322249423Sdim
323249423Sdim   /*--- Generate an initial set of coding tables ---*/
324249423Sdim   {
325249423Sdim      Int32 nPart, remF, tFreq, aFreq;
326249423Sdim
327249423Sdim      nPart = nGroups;
328249423Sdim      remF  = s->nMTF;
329249423Sdim      gs = 0;
330249423Sdim      while (nPart > 0) {
331249423Sdim         tFreq = remF / nPart;
332249423Sdim         ge = gs-1;
333249423Sdim         aFreq = 0;
334249423Sdim         while (aFreq < tFreq && ge < alphaSize-1) {
335249423Sdim            ge++;
336249423Sdim            aFreq += s->mtfFreq[ge];
337249423Sdim         }
338249423Sdim
339249423Sdim         if (ge > gs
340249423Sdim             && nPart != nGroups && nPart != 1
341249423Sdim             && ((nGroups-nPart) % 2 == 1)) {
342249423Sdim            aFreq -= s->mtfFreq[ge];
343249423Sdim            ge--;
344251662Sdim         }
345249423Sdim
346249423Sdim         if (s->verbosity >= 3)
347249423Sdim            VPrintf5( "      initial group %d, [%d .. %d], "
348251662Sdim                      "has %d syms (%4.1f%%)\n",
349249423Sdim                      nPart, gs, ge, aFreq,
350249423Sdim                      (100.0 * (float)aFreq) / (float)(s->nMTF) );
351249423Sdim
352249423Sdim         for (v = 0; v < alphaSize; v++)
353249423Sdim            if (v >= gs && v <= ge)
354249423Sdim               s->len[nPart-1][v] = BZ_LESSER_ICOST; else
355249423Sdim               s->len[nPart-1][v] = BZ_GREATER_ICOST;
356249423Sdim
357249423Sdim         nPart--;
358249423Sdim         gs = ge+1;
359249423Sdim         remF -= aFreq;
360249423Sdim      }
361249423Sdim   }
362249423Sdim
363249423Sdim   /*---
364249423Sdim      Iterate up to BZ_N_ITERS times to improve the tables.
365249423Sdim   ---*/
366249423Sdim   for (iter = 0; iter < BZ_N_ITERS; iter++) {
367249423Sdim
368249423Sdim      for (t = 0; t < nGroups; t++) fave[t] = 0;
369249423Sdim
370249423Sdim      for (t = 0; t < nGroups; t++)
371249423Sdim         for (v = 0; v < alphaSize; v++)
372249423Sdim            s->rfreq[t][v] = 0;
373249423Sdim
374249423Sdim      /*---
375249423Sdim        Set up an auxiliary length table which is used to fast-track
376249423Sdim	the common case (nGroups == 6).
377249423Sdim      ---*/
378249423Sdim      if (nGroups == 6) {
379249423Sdim         for (v = 0; v < alphaSize; v++) {
380249423Sdim            s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
381249423Sdim            s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
382249423Sdim            s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
383249423Sdim	 }
384249423Sdim      }
385249423Sdim
386249423Sdim      nSelectors = 0;
387249423Sdim      totc = 0;
388249423Sdim      gs = 0;
389249423Sdim      while (True) {
390249423Sdim
391249423Sdim         /*--- Set group start & end marks. --*/
392249423Sdim         if (gs >= s->nMTF) break;
393249423Sdim         ge = gs + BZ_G_SIZE - 1;
394249423Sdim         if (ge >= s->nMTF) ge = s->nMTF-1;
395249423Sdim
396249423Sdim         /*--
397249423Sdim            Calculate the cost of this group as coded
398249423Sdim            by each of the coding tables.
399249423Sdim         --*/
400249423Sdim         for (t = 0; t < nGroups; t++) cost[t] = 0;
401249423Sdim
402249423Sdim         if (nGroups == 6 && 50 == ge-gs+1) {
403249423Sdim            /*--- fast track the common case ---*/
404249423Sdim            register UInt32 cost01, cost23, cost45;
405249423Sdim            register UInt16 icv;
406249423Sdim            cost01 = cost23 = cost45 = 0;
407249423Sdim
408249423Sdim#           define BZ_ITER(nn)                \
409249423Sdim               icv = mtfv[gs+(nn)];           \
410249423Sdim               cost01 += s->len_pack[icv][0]; \
411249423Sdim               cost23 += s->len_pack[icv][1]; \
412249423Sdim               cost45 += s->len_pack[icv][2]; \
413249423Sdim
414249423Sdim            BZ_ITER(0);  BZ_ITER(1);  BZ_ITER(2);  BZ_ITER(3);  BZ_ITER(4);
415249423Sdim            BZ_ITER(5);  BZ_ITER(6);  BZ_ITER(7);  BZ_ITER(8);  BZ_ITER(9);
416249423Sdim            BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
417249423Sdim            BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
418249423Sdim            BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
419251662Sdim            BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
420249423Sdim            BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
421249423Sdim            BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
422249423Sdim            BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
423249423Sdim            BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
424249423Sdim
425249423Sdim#           undef BZ_ITER
426249423Sdim
427249423Sdim            cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
428249423Sdim            cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
429249423Sdim            cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
430249423Sdim
431249423Sdim         } else {
432249423Sdim	    /*--- slow version which correctly handles all situations ---*/
433249423Sdim            for (i = gs; i <= ge; i++) {
434249423Sdim               UInt16 icv = mtfv[i];
435249423Sdim               for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
436249423Sdim            }
437249423Sdim         }
438249423Sdim
439249423Sdim         /*--
440249423Sdim            Find the coding table which is best for this group,
441251662Sdim            and record its identity in the selector table.
442249423Sdim         --*/
443249423Sdim         bc = 999999999; bt = -1;
444251662Sdim         for (t = 0; t < nGroups; t++)
445249423Sdim            if (cost[t] < bc) { bc = cost[t]; bt = t; };
446249423Sdim         totc += bc;
447249423Sdim         fave[bt]++;
448249423Sdim         s->selector[nSelectors] = bt;
449249423Sdim         nSelectors++;
450249423Sdim
451249423Sdim         /*--
452249423Sdim            Increment the symbol frequencies for the selected table.
453249423Sdim          --*/
454249423Sdim         if (nGroups == 6 && 50 == ge-gs+1) {
455249423Sdim            /*--- fast track the common case ---*/
456249423Sdim
457249423Sdim#           define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
458249423Sdim
459251662Sdim            BZ_ITUR(0);  BZ_ITUR(1);  BZ_ITUR(2);  BZ_ITUR(3);  BZ_ITUR(4);
460249423Sdim            BZ_ITUR(5);  BZ_ITUR(6);  BZ_ITUR(7);  BZ_ITUR(8);  BZ_ITUR(9);
461249423Sdim            BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
462249423Sdim            BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
463249423Sdim            BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
464249423Sdim            BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
465249423Sdim            BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
466251662Sdim            BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
467249423Sdim            BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
468249423Sdim            BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
469249423Sdim
470249423Sdim#           undef BZ_ITUR
471249423Sdim
472249423Sdim         } else {
473249423Sdim	    /*--- slow version which correctly handles all situations ---*/
474249423Sdim            for (i = gs; i <= ge; i++)
475249423Sdim               s->rfreq[bt][ mtfv[i] ]++;
476249423Sdim         }
477249423Sdim
478249423Sdim         gs = ge+1;
479249423Sdim      }
480249423Sdim      if (s->verbosity >= 3) {
481249423Sdim         VPrintf2 ( "      pass %d: size is %d, grp uses are ",
482249423Sdim                   iter+1, totc/8 );
483249423Sdim         for (t = 0; t < nGroups; t++)
484249423Sdim            VPrintf1 ( "%d ", fave[t] );
485249423Sdim         VPrintf0 ( "\n" );
486249423Sdim      }
487249423Sdim
488249423Sdim      /*--
489249423Sdim        Recompute the tables based on the accumulated frequencies.
490249423Sdim      --*/
491249423Sdim      /* maxLen was changed from 20 to 17 in bzip2-1.0.3.  See
492263508Sdim         comment in huffman.c for details. */
493249423Sdim      for (t = 0; t < nGroups; t++)
494249423Sdim         BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
495249423Sdim                                 alphaSize, 17 /*20*/ );
496249423Sdim   }
497249423Sdim
498251662Sdim
499249423Sdim   AssertH( nGroups < 8, 3002 );
500249423Sdim   AssertH( nSelectors < 32768 &&
501249423Sdim            nSelectors <= (2 + (900000 / BZ_G_SIZE)),
502249423Sdim            3003 );
503249423Sdim
504249423Sdim
505249423Sdim   /*--- Compute MTF values for the selectors. ---*/
506249423Sdim   {
507249423Sdim      UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
508249423Sdim      for (i = 0; i < nGroups; i++) pos[i] = i;
509249423Sdim      for (i = 0; i < nSelectors; i++) {
510249423Sdim         ll_i = s->selector[i];
511249423Sdim         j = 0;
512251662Sdim         tmp = pos[j];
513249423Sdim         while ( ll_i != tmp ) {
514249423Sdim            j++;
515249423Sdim            tmp2 = tmp;
516249423Sdim            tmp = pos[j];
517249423Sdim            pos[j] = tmp2;
518249423Sdim         };
519249423Sdim         pos[0] = tmp;
520249423Sdim         s->selectorMtf[i] = j;
521249423Sdim      }
522249423Sdim   };
523251662Sdim
524249423Sdim   /*--- Assign actual codes for the tables. --*/
525249423Sdim   for (t = 0; t < nGroups; t++) {
526249423Sdim      minLen = 32;
527249423Sdim      maxLen = 0;
528249423Sdim      for (i = 0; i < alphaSize; i++) {
529249423Sdim         if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
530249423Sdim         if (s->len[t][i] < minLen) minLen = s->len[t][i];
531249423Sdim      }
532249423Sdim      AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
533249423Sdim      AssertH ( !(minLen < 1),  3005 );
534249423Sdim      BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
535249423Sdim                          minLen, maxLen, alphaSize );
536249423Sdim   }
537249423Sdim
538249423Sdim   /*--- Transmit the mapping table. ---*/
539249423Sdim   {
540249423Sdim      Bool inUse16[16];
541249423Sdim      for (i = 0; i < 16; i++) {
542249423Sdim          inUse16[i] = False;
543249423Sdim          for (j = 0; j < 16; j++)
544249423Sdim             if (s->inUse[i * 16 + j]) inUse16[i] = True;
545251662Sdim      }
546249423Sdim
547249423Sdim      nBytes = s->numZ;
548249423Sdim      for (i = 0; i < 16; i++)
549249423Sdim         if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
550249423Sdim
551249423Sdim      for (i = 0; i < 16; i++)
552249423Sdim         if (inUse16[i])
553249423Sdim            for (j = 0; j < 16; j++) {
554249423Sdim               if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
555249423Sdim            }
556249423Sdim
557249423Sdim      if (s->verbosity >= 3)
558249423Sdim         VPrintf1( "      bytes: mapping %d, ", s->numZ-nBytes );
559249423Sdim   }
560249423Sdim
561249423Sdim   /*--- Now the selectors. ---*/
562249423Sdim   nBytes = s->numZ;
563249423Sdim   bsW ( s, 3, nGroups );
564249423Sdim   bsW ( s, 15, nSelectors );
565249423Sdim   for (i = 0; i < nSelectors; i++) {
566249423Sdim      for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
567249423Sdim      bsW(s,1,0);
568249423Sdim   }
569249423Sdim   if (s->verbosity >= 3)
570249423Sdim      VPrintf1( "selectors %d, ", s->numZ-nBytes );
571249423Sdim
572249423Sdim   /*--- Now the coding tables. ---*/
573249423Sdim   nBytes = s->numZ;
574249423Sdim
575249423Sdim   for (t = 0; t < nGroups; t++) {
576249423Sdim      Int32 curr = s->len[t][0];
577249423Sdim      bsW ( s, 5, curr );
578249423Sdim      for (i = 0; i < alphaSize; i++) {
579249423Sdim         while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
580249423Sdim         while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
581249423Sdim         bsW ( s, 1, 0 );
582249423Sdim      }
583249423Sdim   }
584249423Sdim
585251662Sdim   if (s->verbosity >= 3)
586249423Sdim      VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
587249423Sdim
588249423Sdim   /*--- And finally, the block data proper ---*/
589249423Sdim   nBytes = s->numZ;
590249423Sdim   selCtr = 0;
591249423Sdim   gs = 0;
592249423Sdim   while (True) {
593249423Sdim      if (gs >= s->nMTF) break;
594251662Sdim      ge = gs + BZ_G_SIZE - 1;
595249423Sdim      if (ge >= s->nMTF) ge = s->nMTF-1;
596249423Sdim      AssertH ( s->selector[selCtr] < nGroups, 3006 );
597249423Sdim
598249423Sdim      if (nGroups == 6 && 50 == ge-gs+1) {
599249423Sdim            /*--- fast track the common case ---*/
600249423Sdim            UInt16 mtfv_i;
601249423Sdim            UChar* s_len_sel_selCtr
602249423Sdim               = &(s->len[s->selector[selCtr]][0]);
603249423Sdim            Int32* s_code_sel_selCtr
604249423Sdim               = &(s->code[s->selector[selCtr]][0]);
605249423Sdim
606249423Sdim#           define BZ_ITAH(nn)                      \
607249423Sdim               mtfv_i = mtfv[gs+(nn)];              \
608249423Sdim               bsW ( s,                             \
609249423Sdim                     s_len_sel_selCtr[mtfv_i],      \
610251662Sdim                     s_code_sel_selCtr[mtfv_i] )
611249423Sdim
612249423Sdim            BZ_ITAH(0);  BZ_ITAH(1);  BZ_ITAH(2);  BZ_ITAH(3);  BZ_ITAH(4);
613249423Sdim            BZ_ITAH(5);  BZ_ITAH(6);  BZ_ITAH(7);  BZ_ITAH(8);  BZ_ITAH(9);
614249423Sdim            BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
615249423Sdim            BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
616249423Sdim            BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
617249423Sdim            BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
618249423Sdim            BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
619249423Sdim            BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
620249423Sdim            BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
621249423Sdim            BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
622249423Sdim
623249423Sdim#           undef BZ_ITAH
624249423Sdim
625249423Sdim      } else {
626249423Sdim	 /*--- slow version which correctly handles all situations ---*/
627249423Sdim         for (i = gs; i <= ge; i++) {
628249423Sdim            bsW ( s,
629249423Sdim                  s->len  [s->selector[selCtr]] [mtfv[i]],
630249423Sdim                  s->code [s->selector[selCtr]] [mtfv[i]] );
631249423Sdim         }
632249423Sdim      }
633249423Sdim
634249423Sdim
635249423Sdim      gs = ge+1;
636249423Sdim      selCtr++;
637251662Sdim   }
638249423Sdim   AssertH( selCtr == nSelectors, 3007 );
639249423Sdim
640249423Sdim   if (s->verbosity >= 3)
641249423Sdim      VPrintf1( "codes %d\n", s->numZ-nBytes );
642249423Sdim}
643249423Sdim
644249423Sdim
645249423Sdim/*---------------------------------------------------*/
646249423Sdimvoid BZ2_compressBlock ( EState* s, Bool is_last_block )
647249423Sdim{
648249423Sdim   if (s->nblock > 0) {
649249423Sdim
650251662Sdim      BZ_FINALISE_CRC ( s->blockCRC );
651249423Sdim      s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
652249423Sdim      s->combinedCRC ^= s->blockCRC;
653249423Sdim      if (s->blockNo > 1) s->numZ = 0;
654249423Sdim
655249423Sdim      if (s->verbosity >= 2)
656249423Sdim         VPrintf4( "    block %d: crc = 0x%08x, "
657249423Sdim                   "combined CRC = 0x%08x, size = %d\n",
658249423Sdim                   s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
659249423Sdim
660249423Sdim      BZ2_blockSort ( s );
661249423Sdim   }
662249423Sdim
663263508Sdim   s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
664249423Sdim
665249423Sdim   /*-- If this is the first block, create the stream header. --*/
666249423Sdim   if (s->blockNo == 1) {
667249423Sdim      BZ2_bsInitWrite ( s );
668249423Sdim      bsPutUChar ( s, BZ_HDR_B );
669249423Sdim      bsPutUChar ( s, BZ_HDR_Z );
670249423Sdim      bsPutUChar ( s, BZ_HDR_h );
671249423Sdim      bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
672249423Sdim   }
673249423Sdim
674249423Sdim   if (s->nblock > 0) {
675249423Sdim
676249423Sdim      bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
677249423Sdim      bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
678249423Sdim      bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
679249423Sdim
680249423Sdim      /*-- Now the block's CRC, so it is in a known place. --*/
681249423Sdim      bsPutUInt32 ( s, s->blockCRC );
682249423Sdim
683249423Sdim      /*--
684249423Sdim         Now a single bit indicating (non-)randomisation.
685251662Sdim         As of version 0.9.5, we use a better sorting algorithm
686249423Sdim         which makes randomisation unnecessary.  So always set
687249423Sdim         the randomised bit to 'no'.  Of course, the decoder
688249423Sdim         still needs to be able to handle randomised blocks
689249423Sdim         so as to maintain backwards compatibility with
690249423Sdim         older versions of bzip2.
691249423Sdim      --*/
692249423Sdim      bsW(s,1,0);
693249423Sdim
694249423Sdim      bsW ( s, 24, s->origPtr );
695249423Sdim      generateMTFValues ( s );
696249423Sdim      sendMTFValues ( s );
697249423Sdim   }
698249423Sdim
699249423Sdim
700249423Sdim   /*-- If this is the last block, add the stream trailer. --*/
701249423Sdim   if (is_last_block) {
702249423Sdim
703249423Sdim      bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
704249423Sdim      bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
705249423Sdim      bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
706251662Sdim      bsPutUInt32 ( s, s->combinedCRC );
707249423Sdim      if (s->verbosity >= 2)
708249423Sdim         VPrintf1( "    final combined CRC = 0x%08x\n   ", s->combinedCRC );
709249423Sdim      bsFinishWrite ( s );
710249423Sdim   }
711249423Sdim}
712249423Sdim
713249423Sdim
714249423Sdim/*-------------------------------------------------------------*/
715249423Sdim/*--- end                                        compress.c ---*/
716249423Sdim/*-------------------------------------------------------------*/
717249423Sdim