blocksort.c revision 167974
126175Sfenner
275107Sfenner/*-------------------------------------------------------------*/
326175Sfenner/*--- Block sorting machinery                               ---*/
475107Sfenner/*---                                           blocksort.c ---*/
575107Sfenner/*-------------------------------------------------------------*/
6127664Sbms
775107Sfenner/* ------------------------------------------------------------------
8127664Sbms   This file is part of bzip2/libbzip2, a program and library for
975107Sfenner   lossless, block-sorting data compression.
1075107Sfenner
1175107Sfenner   bzip2/libbzip2 version 1.0.4 of 20 December 2006
12127664Sbms   Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org>
1375107Sfenner
1475107Sfenner   Please read the WARNING, DISCLAIMER and PATENTS sections in the
1575107Sfenner   README file.
1675107Sfenner
1775107Sfenner   This program is released under the terms of the license contained
1875107Sfenner   in the file LICENSE.
1975107Sfenner   ------------------------------------------------------------------ */
2075107Sfenner
2175107Sfenner
22127664Sbms#include "bzlib_private.h"
2375107Sfenner
2475107Sfenner/*---------------------------------------------*/
2575107Sfenner/*--- Fallback O(N log(N)^2) sorting        ---*/
2626175Sfenner/*--- algorithm, for repetitive blocks      ---*/
27127664Sbms/*---------------------------------------------*/
2826175Sfenner
29127664Sbms/*---------------------------------------------*/
30172677Smlaierstatic
3126175Sfenner__inline__
3226175Sfennervoid fallbackSimpleSort ( UInt32* fmap,
3375107Sfenner                          UInt32* eclass,
3475107Sfenner                          Int32   lo,
3575107Sfenner                          Int32   hi )
3675107Sfenner{
3775107Sfenner   Int32 i, j, tmp;
3875107Sfenner   UInt32 ec_tmp;
3975107Sfenner
4075107Sfenner   if (lo == hi) return;
4175107Sfenner
4275107Sfenner   if (hi - lo > 3) {
4375107Sfenner      for ( i = hi-4; i >= lo; i-- ) {
4475107Sfenner         tmp = fmap[i];
4575107Sfenner         ec_tmp = eclass[tmp];
4675107Sfenner         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
4775107Sfenner            fmap[j-4] = fmap[j];
4875107Sfenner         fmap[j-4] = tmp;
4975107Sfenner      }
5075107Sfenner   }
5175107Sfenner
5275107Sfenner   for ( i = hi-1; i >= lo; i-- ) {
5375107Sfenner      tmp = fmap[i];
5475107Sfenner      ec_tmp = eclass[tmp];
5598530Sfenner      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
5698530Sfenner         fmap[j-1] = fmap[j];
5798530Sfenner      fmap[j-1] = tmp;
5898530Sfenner   }
5998530Sfenner}
6098530Sfenner
6198530Sfenner
6298530Sfenner/*---------------------------------------------*/
6398530Sfenner#define fswap(zz1, zz2) \
6498530Sfenner   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
6598530Sfenner
6698530Sfenner#define fvswap(zzp1, zzp2, zzn)       \
6798530Sfenner{                                     \
6898530Sfenner   Int32 yyp1 = (zzp1);               \
6998530Sfenner   Int32 yyp2 = (zzp2);               \
7098530Sfenner   Int32 yyn  = (zzn);                \
7198530Sfenner   while (yyn > 0) {                  \
7298530Sfenner      fswap(fmap[yyp1], fmap[yyp2]);  \
7375107Sfenner      yyp1++; yyp2++; yyn--;          \
7426175Sfenner   }                                  \
7575107Sfenner}
7675107Sfenner
7775107Sfenner
7839291Sfenner#define fmin(a,b) ((a) < (b)) ? (a) : (b)
7926175Sfenner
8075107Sfenner#define fpush(lz,hz) { stackLo[sp] = lz; \
8175107Sfenner                       stackHi[sp] = hz; \
8226175Sfenner                       sp++; }
83127664Sbms
84127664Sbms#define fpop(lz,hz) { sp--;              \
85127664Sbms                      lz = stackLo[sp];  \
86147894Ssam                      hz = stackHi[sp]; }
87147894Ssam
88147894Ssam#define FALLBACK_QSORT_SMALL_THRESH 10
89147894Ssam#define FALLBACK_QSORT_STACK_SIZE   100
90127664Sbms
9126175Sfenner
9226175Sfennerstatic
9375107Sfennervoid fallbackQSort3 ( UInt32* fmap,
9475107Sfenner                      UInt32* eclass,
9526175Sfenner                      Int32   loSt,
9675107Sfenner                      Int32   hiSt )
9775107Sfenner{
9875107Sfenner   Int32 unLo, unHi, ltLo, gtHi, n, m;
9975107Sfenner   Int32 sp, lo, hi;
10075107Sfenner   UInt32 med, r, r3;
10175107Sfenner   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
10275107Sfenner   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
10326175Sfenner
10498530Sfenner   r = 0;
10598530Sfenner
10698530Sfenner   sp = 0;
10798530Sfenner   fpush ( loSt, hiSt );
10898530Sfenner
10998530Sfenner   while (sp > 0) {
11098530Sfenner
11198530Sfenner      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
11298530Sfenner
11398530Sfenner      fpop ( lo, hi );
11498530Sfenner      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
11598530Sfenner         fallbackSimpleSort ( fmap, eclass, lo, hi );
11698530Sfenner         continue;
11798530Sfenner      }
11898530Sfenner
11998530Sfenner      /* Random partitioning.  Median of 3 sometimes fails to
12098530Sfenner         avoid bad cases.  Median of 9 seems to help but
12198530Sfenner         looks rather expensive.  This too seems to work but
12298530Sfenner         is cheaper.  Guidance for the magic constants
12398530Sfenner         7621 and 32768 is taken from Sedgewick's algorithms
12498530Sfenner         book, chapter 35.
12598530Sfenner      */
12698530Sfenner      r = ((r * 7621) + 1) % 32768;
12798530Sfenner      r3 = r % 3;
12898530Sfenner      if (r3 == 0) med = eclass[fmap[lo]]; else
129127664Sbms      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
13098530Sfenner                   med = eclass[fmap[hi]];
13126175Sfenner
13275107Sfenner      unLo = ltLo = lo;
13398530Sfenner      unHi = gtHi = hi;
13498530Sfenner
13598530Sfenner      while (1) {
13675107Sfenner         while (1) {
13775107Sfenner            if (unLo > unHi) break;
13875107Sfenner            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
13975107Sfenner            if (n == 0) {
14075107Sfenner               fswap(fmap[unLo], fmap[ltLo]);
14175107Sfenner               ltLo++; unLo++;
14275107Sfenner               continue;
14375107Sfenner            };
14475107Sfenner            if (n > 0) break;
14598530Sfenner            unLo++;
14675107Sfenner         }
14775107Sfenner         while (1) {
14875107Sfenner            if (unLo > unHi) break;
14975107Sfenner            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
15026175Sfenner            if (n == 0) {
15126175Sfenner               fswap(fmap[unHi], fmap[gtHi]);
15275107Sfenner               gtHi--; unHi--;
15375107Sfenner               continue;
15475107Sfenner            };
15526175Sfenner            if (n < 0) break;
15675107Sfenner            unHi--;
15798530Sfenner         }
15898530Sfenner         if (unLo > unHi) break;
15998530Sfenner         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
16098530Sfenner      }
16198530Sfenner
16298530Sfenner      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
16398530Sfenner
16498530Sfenner      if (gtHi < ltLo) continue;
16598530Sfenner
16675107Sfenner      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
16775107Sfenner      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
168127664Sbms
169127664Sbms      n = lo + unLo - ltLo - 1;
170127664Sbms      m = hi - (gtHi - unHi) + 1;
171127664Sbms
172127664Sbms      if (n - lo > hi - m) {
173127664Sbms         fpush ( lo, n );
174127664Sbms         fpush ( m, hi );
175127664Sbms      } else {
176127664Sbms         fpush ( m, hi );
177127664Sbms         fpush ( lo, n );
17875107Sfenner      }
17975107Sfenner   }
180127664Sbms}
181127664Sbms
18275107Sfenner#undef fmin
18375107Sfenner#undef fpush
18475107Sfenner#undef fpop
18575107Sfenner#undef fswap
18675107Sfenner#undef fvswap
18775107Sfenner#undef FALLBACK_QSORT_SMALL_THRESH
18875107Sfenner#undef FALLBACK_QSORT_STACK_SIZE
18975107Sfenner
190127664Sbms
191127664Sbms/*---------------------------------------------*/
192127664Sbms/* Pre:
193127664Sbms      nblock > 0
19475107Sfenner      eclass exists for [0 .. nblock-1]
195146768Ssam      ((UChar*)eclass) [0 .. nblock-1] holds block
196127664Sbms      ptr exists for [0 .. nblock-1]
197127664Sbms
198162012Ssam   Post:
199127664Sbms      ((UChar*)eclass) [0 .. nblock-1] holds block
20075107Sfenner      All other areas of eclass destroyed
20175107Sfenner      fmap [0 .. nblock-1] holds sorted order
20275107Sfenner      bhtab [ 0 .. 2+(nblock/32) ] destroyed
20375107Sfenner*/
20475107Sfenner
20575107Sfenner#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
20675107Sfenner#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
20775107Sfenner#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
20875107Sfenner#define      WORD_BH(zz)  bhtab[(zz) >> 5]
20975107Sfenner#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
21075107Sfenner
21175107Sfennerstatic
21275107Sfennervoid fallbackSort ( UInt32* fmap,
21375107Sfenner                    UInt32* eclass,
21475107Sfenner                    UInt32* bhtab,
21575107Sfenner                    Int32   nblock,
21675107Sfenner                    Int32   verb )
21798530Sfenner{
21898530Sfenner   Int32 ftab[257];
21998530Sfenner   Int32 ftabCopy[256];
22098530Sfenner   Int32 H, i, j, k, l, r, cc, cc1;
22198530Sfenner   Int32 nNotDone;
22298530Sfenner   Int32 nBhtab;
22398530Sfenner   UChar* eclass8 = (UChar*)eclass;
22475107Sfenner
22575107Sfenner   /*--
22675107Sfenner      Initial 1-char radix sort to generate
227127664Sbms      initial fmap and initial BH bits.
228127664Sbms   --*/
22975107Sfenner   if (verb >= 4)
230127664Sbms      VPrintf0 ( "        bucket sorting ...\n" );
23175107Sfenner   for (i = 0; i < 257;    i++) ftab[i] = 0;
23275107Sfenner   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
233127664Sbms   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
23475107Sfenner   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
23575107Sfenner
23675107Sfenner   for (i = 0; i < nblock; i++) {
237127664Sbms      j = eclass8[i];
238127664Sbms      k = ftab[j] - 1;
23926175Sfenner      ftab[j] = k;
24098530Sfenner      fmap[k] = i;
24198530Sfenner   }
242127664Sbms
243127664Sbms   nBhtab = 2 + (nblock / 32);
24498530Sfenner   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
24598530Sfenner   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
246127664Sbms
247127664Sbms   /*--
248127664Sbms      Inductively refine the buckets.  Kind-of an
249127664Sbms      "exponential radix sort" (!), inspired by the
250127664Sbms      Manber-Myers suffix array construction algorithm.
251127664Sbms   --*/
252147894Ssam
253147894Ssam   /*-- set sentinel bits for block-end detection --*/
254147894Ssam   for (i = 0; i < 32; i++) {
255147894Ssam      SET_BH(nblock + 2*i);
256147894Ssam      CLEAR_BH(nblock + 2*i + 1);
25726175Sfenner   }
258147894Ssam
259147894Ssam   /*-- the log(N) loop --*/
26098530Sfenner   H = 1;
26175107Sfenner   while (1) {
26275107Sfenner
26375107Sfenner      if (verb >= 4)
26475107Sfenner         VPrintf1 ( "        depth %6d has ", H );
26575107Sfenner
26675107Sfenner      j = 0;
26775107Sfenner      for (i = 0; i < nblock; i++) {
26875107Sfenner         if (ISSET_BH(i)) j = i;
26975107Sfenner         k = fmap[i] - H; if (k < 0) k += nblock;
27075107Sfenner         eclass[k] = j;
27175107Sfenner      }
27275107Sfenner
27375107Sfenner      nNotDone = 0;
274127664Sbms      r = -1;
27575107Sfenner      while (1) {
27675107Sfenner
27775107Sfenner	 /*-- find the next non-singleton bucket --*/
27875107Sfenner         k = r + 1;
27975107Sfenner         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28098530Sfenner         if (ISSET_BH(k)) {
28198530Sfenner            while (WORD_BH(k) == 0xffffffff) k += 32;
28298530Sfenner            while (ISSET_BH(k)) k++;
28398530Sfenner         }
28498530Sfenner         l = k - 1;
28598530Sfenner         if (l >= nblock) break;
286127664Sbms         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28775107Sfenner         if (!ISSET_BH(k)) {
28875107Sfenner            while (WORD_BH(k) == 0x00000000) k += 32;
28975107Sfenner            while (!ISSET_BH(k)) k++;
29075107Sfenner         }
29175107Sfenner         r = k - 1;
29275107Sfenner         if (r >= nblock) break;
29375107Sfenner
29475107Sfenner         /*-- now [l, r] bracket current bucket --*/
29575107Sfenner         if (r > l) {
29675107Sfenner            nNotDone += (r - l + 1);
297127664Sbms            fallbackQSort3 ( fmap, eclass, l, r );
298127664Sbms
299127664Sbms            /*-- scan bucket and generate header bits-- */
300127664Sbms            cc = -1;
30175107Sfenner            for (i = l; i <= r; i++) {
30275107Sfenner               cc1 = eclass[fmap[i]];
303127664Sbms               if (cc != cc1) { SET_BH(i); cc = cc1; };
30475107Sfenner            }
30575107Sfenner         }
30675107Sfenner      }
307127664Sbms
308127664Sbms      if (verb >= 4)
309127664Sbms         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
310127664Sbms
311127664Sbms      H *= 2;
312127664Sbms      if (H > nblock || nNotDone == 0) break;
313127664Sbms   }
314127664Sbms
315127664Sbms   /*--
31675107Sfenner      Reconstruct the original block in
31775107Sfenner      eclass8 [0 .. nblock-1], since the
318127664Sbms      previous phase destroyed it.
31975107Sfenner   --*/
32075107Sfenner   if (verb >= 4)
321127664Sbms      VPrintf0 ( "        reconstructing block ...\n" );
322127664Sbms   j = 0;
32375107Sfenner   for (i = 0; i < nblock; i++) {
32475107Sfenner      while (ftabCopy[j] == 0) j++;
32575107Sfenner      ftabCopy[j]--;
32675107Sfenner      eclass8[fmap[i]] = (UChar)j;
32798530Sfenner   }
32898530Sfenner   AssertH ( j < 256, 1005 );
32998530Sfenner}
33098530Sfenner
33198530Sfenner#undef       SET_BH
33298530Sfenner#undef     CLEAR_BH
33398530Sfenner#undef     ISSET_BH
33498530Sfenner#undef      WORD_BH
33598530Sfenner#undef UNALIGNED_BH
33698530Sfenner
33798530Sfenner
33898530Sfenner/*---------------------------------------------*/
33998530Sfenner/*--- The main, O(N^2 log(N)) sorting       ---*/
34098530Sfenner/*--- algorithm.  Faster for "normal"       ---*/
34198530Sfenner/*--- non-repetitive blocks.                ---*/
34298530Sfenner/*---------------------------------------------*/
34398530Sfenner
34498530Sfenner/*---------------------------------------------*/
34598530Sfennerstatic
34698530Sfenner__inline__
34798530SfennerBool mainGtU ( UInt32  i1,
34898530Sfenner               UInt32  i2,
34998530Sfenner               UChar*  block,
35098530Sfenner               UInt16* quadrant,
35198530Sfenner               UInt32  nblock,
35298530Sfenner               Int32*  budget )
35398530Sfenner{
35498530Sfenner   Int32  k;
35598530Sfenner   UChar  c1, c2;
35698530Sfenner   UInt16 s1, s2;
35798530Sfenner
35898530Sfenner   AssertD ( i1 != i2, "mainGtU" );
35998530Sfenner   /* 1 */
36098530Sfenner   c1 = block[i1]; c2 = block[i2];
36198530Sfenner   if (c1 != c2) return (c1 > c2);
36298530Sfenner   i1++; i2++;
36398530Sfenner   /* 2 */
36498530Sfenner   c1 = block[i1]; c2 = block[i2];
36598530Sfenner   if (c1 != c2) return (c1 > c2);
36698530Sfenner   i1++; i2++;
36798530Sfenner   /* 3 */
36898530Sfenner   c1 = block[i1]; c2 = block[i2];
36998530Sfenner   if (c1 != c2) return (c1 > c2);
37098530Sfenner   i1++; i2++;
37198530Sfenner   /* 4 */
37298530Sfenner   c1 = block[i1]; c2 = block[i2];
37398530Sfenner   if (c1 != c2) return (c1 > c2);
37498530Sfenner   i1++; i2++;
37598530Sfenner   /* 5 */
37698530Sfenner   c1 = block[i1]; c2 = block[i2];
37798530Sfenner   if (c1 != c2) return (c1 > c2);
37898530Sfenner   i1++; i2++;
37998530Sfenner   /* 6 */
380127664Sbms   c1 = block[i1]; c2 = block[i2];
38198530Sfenner   if (c1 != c2) return (c1 > c2);
38298530Sfenner   i1++; i2++;
38398530Sfenner   /* 7 */
38498530Sfenner   c1 = block[i1]; c2 = block[i2];
38598530Sfenner   if (c1 != c2) return (c1 > c2);
38698530Sfenner   i1++; i2++;
38798530Sfenner   /* 8 */
38898530Sfenner   c1 = block[i1]; c2 = block[i2];
38998530Sfenner   if (c1 != c2) return (c1 > c2);
39098530Sfenner   i1++; i2++;
39198530Sfenner   /* 9 */
39298530Sfenner   c1 = block[i1]; c2 = block[i2];
39398530Sfenner   if (c1 != c2) return (c1 > c2);
39498530Sfenner   i1++; i2++;
39598530Sfenner   /* 10 */
39698530Sfenner   c1 = block[i1]; c2 = block[i2];
397172677Smlaier   if (c1 != c2) return (c1 > c2);
398172677Smlaier   i1++; i2++;
399172677Smlaier   /* 11 */
400172677Smlaier   c1 = block[i1]; c2 = block[i2];
401172677Smlaier   if (c1 != c2) return (c1 > c2);
40298530Sfenner   i1++; i2++;
403172677Smlaier   /* 12 */
404172677Smlaier   c1 = block[i1]; c2 = block[i2];
405172677Smlaier   if (c1 != c2) return (c1 > c2);
406172677Smlaier   i1++; i2++;
40798530Sfenner
40898530Sfenner   k = nblock + 8;
40998530Sfenner
41098530Sfenner   do {
41198530Sfenner      /* 1 */
41298530Sfenner      c1 = block[i1]; c2 = block[i2];
41398530Sfenner      if (c1 != c2) return (c1 > c2);
41498530Sfenner      s1 = quadrant[i1]; s2 = quadrant[i2];
41598530Sfenner      if (s1 != s2) return (s1 > s2);
416127664Sbms      i1++; i2++;
41798530Sfenner      /* 2 */
41898530Sfenner      c1 = block[i1]; c2 = block[i2];
41998530Sfenner      if (c1 != c2) return (c1 > c2);
42098530Sfenner      s1 = quadrant[i1]; s2 = quadrant[i2];
421127664Sbms      if (s1 != s2) return (s1 > s2);
422127664Sbms      i1++; i2++;
423127664Sbms      /* 3 */
424127664Sbms      c1 = block[i1]; c2 = block[i2];
425127664Sbms      if (c1 != c2) return (c1 > c2);
426127664Sbms      s1 = quadrant[i1]; s2 = quadrant[i2];
427127664Sbms      if (s1 != s2) return (s1 > s2);
428146768Ssam      i1++; i2++;
429127664Sbms      /* 4 */
430147894Ssam      c1 = block[i1]; c2 = block[i2];
431127664Sbms      if (c1 != c2) return (c1 > c2);
432127664Sbms      s1 = quadrant[i1]; s2 = quadrant[i2];
433127664Sbms      if (s1 != s2) return (s1 > s2);
434127664Sbms      i1++; i2++;
435127664Sbms      /* 5 */
436127664Sbms      c1 = block[i1]; c2 = block[i2];
43775107Sfenner      if (c1 != c2) return (c1 > c2);
43826175Sfenner      s1 = quadrant[i1]; s2 = quadrant[i2];
43926175Sfenner      if (s1 != s2) return (s1 > s2);
44075107Sfenner      i1++; i2++;
44175107Sfenner      /* 6 */
44275107Sfenner      c1 = block[i1]; c2 = block[i2];
443127664Sbms      if (c1 != c2) return (c1 > c2);
44475107Sfenner      s1 = quadrant[i1]; s2 = quadrant[i2];
445127664Sbms      if (s1 != s2) return (s1 > s2);
446127664Sbms      i1++; i2++;
44726175Sfenner      /* 7 */
44875107Sfenner      c1 = block[i1]; c2 = block[i2];
44975107Sfenner      if (c1 != c2) return (c1 > c2);
45075107Sfenner      s1 = quadrant[i1]; s2 = quadrant[i2];
45175107Sfenner      if (s1 != s2) return (s1 > s2);
45275107Sfenner      i1++; i2++;
45375107Sfenner      /* 8 */
45426175Sfenner      c1 = block[i1]; c2 = block[i2];
45575107Sfenner      if (c1 != c2) return (c1 > c2);
456127664Sbms      s1 = quadrant[i1]; s2 = quadrant[i2];
45775107Sfenner      if (s1 != s2) return (s1 > s2);
45875107Sfenner      i1++; i2++;
45975107Sfenner
46075107Sfenner      if (i1 >= nblock) i1 -= nblock;
46175107Sfenner      if (i2 >= nblock) i2 -= nblock;
46275107Sfenner
46398530Sfenner      k -= 8;
46475107Sfenner      (*budget)--;
46575107Sfenner   }
46675107Sfenner      while (k >= 0);
46775107Sfenner
46875107Sfenner   return False;
46975107Sfenner}
47075107Sfenner
47175107Sfenner
47275107Sfenner/*---------------------------------------------*/
47375107Sfenner/*--
47426175Sfenner   Knuth's increments seem to work better
47575107Sfenner   than Incerpi-Sedgewick here.  Possibly
47675107Sfenner   because the number of elems to sort is
47775107Sfenner   usually small, typically <= 20.
47875107Sfenner--*/
47975107Sfennerstatic
48075107SfennerInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
48175107Sfenner                   9841, 29524, 88573, 265720,
48275107Sfenner                   797161, 2391484 };
48375107Sfenner
48475107Sfennerstatic
48575107Sfennervoid mainSimpleSort ( UInt32* ptr,
48675107Sfenner                      UChar*  block,
48775107Sfenner                      UInt16* quadrant,
48875107Sfenner                      Int32   nblock,
48975107Sfenner                      Int32   lo,
49075107Sfenner                      Int32   hi,
49175107Sfenner                      Int32   d,
49275107Sfenner                      Int32*  budget )
49375107Sfenner{
49498530Sfenner   Int32 i, j, h, bigN, hp;
49526175Sfenner   UInt32 v;
496127664Sbms
497127664Sbms   bigN = hi - lo + 1;
498127664Sbms   if (bigN < 2) return;
499127664Sbms
500127664Sbms   hp = 0;
501127664Sbms   while (incs[hp] < bigN) hp++;
502127664Sbms   hp--;
503127664Sbms
504127664Sbms   for (; hp >= 0; hp--) {
505127664Sbms      h = incs[hp];
506127664Sbms
507127664Sbms      i = lo + h;
50826175Sfenner      while (True) {
509127664Sbms
51098530Sfenner         /*-- copy 1 --*/
511127664Sbms         if (i > hi) break;
51275107Sfenner         v = ptr[i];
51375107Sfenner         j = i;
51426175Sfenner         while ( mainGtU (
51575107Sfenner                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
51626175Sfenner                 ) ) {
51775107Sfenner            ptr[j] = ptr[j-h];
51875107Sfenner            j = j - h;
51975107Sfenner            if (j <= (lo + h - 1)) break;
52075107Sfenner         }
52175107Sfenner         ptr[j] = v;
52275107Sfenner         i++;
52375107Sfenner
52426175Sfenner         /*-- copy 2 --*/
52575107Sfenner         if (i > hi) break;
52626175Sfenner         v = ptr[i];
52775107Sfenner         j = i;
528147894Ssam         while ( mainGtU (
529147894Ssam                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
530172677Smlaier                 ) ) {
531172677Smlaier            ptr[j] = ptr[j-h];
532172677Smlaier            j = j - h;
533172677Smlaier            if (j <= (lo + h - 1)) break;
534172677Smlaier         }
535172677Smlaier         ptr[j] = v;
536172677Smlaier         i++;
537172677Smlaier
538172677Smlaier         /*-- copy 3 --*/
539172677Smlaier         if (i > hi) break;
540172677Smlaier         v = ptr[i];
541172677Smlaier         j = i;
542172677Smlaier         while ( mainGtU (
543172677Smlaier                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
544172677Smlaier                 ) ) {
545172677Smlaier            ptr[j] = ptr[j-h];
546147894Ssam            j = j - h;
547147894Ssam            if (j <= (lo + h - 1)) break;
548147894Ssam         }
549147894Ssam         ptr[j] = v;
550147894Ssam         i++;
551147894Ssam
552147894Ssam         if (*budget < 0) return;
553147894Ssam      }
554147894Ssam   }
555147894Ssam}
556147894Ssam
557147894Ssam
558147894Ssam/*---------------------------------------------*/
559147894Ssam/*--
560147894Ssam   The following is an implementation of
561147894Ssam   an elegant 3-way quicksort for strings,
562147894Ssam   described in a paper "Fast Algorithms for
563147894Ssam   Sorting and Searching Strings", by Robert
564162012Ssam   Sedgewick and Jon L. Bentley.
565147894Ssam--*/
566147894Ssam
567147894Ssam#define mswap(zz1, zz2) \
568147894Ssam   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
569147894Ssam
570147894Ssam#define mvswap(zzp1, zzp2, zzn)       \
571162012Ssam{                                     \
572147894Ssam   Int32 yyp1 = (zzp1);               \
573147894Ssam   Int32 yyp2 = (zzp2);               \
574147894Ssam   Int32 yyn  = (zzn);                \
57575107Sfenner   while (yyn > 0) {                  \
57626175Sfenner      mswap(ptr[yyp1], ptr[yyp2]);    \
57775107Sfenner      yyp1++; yyp2++; yyn--;          \
57875107Sfenner   }                                  \
57975107Sfenner}
58075107Sfenner
58175107Sfennerstatic
58275107Sfenner__inline__
58375107SfennerUChar mmed3 ( UChar a, UChar b, UChar c )
58475107Sfenner{
58575107Sfenner   UChar t;
58675107Sfenner   if (a > b) { t = a; a = b; b = t; };
58726175Sfenner   if (b > c) {
58898530Sfenner      b = c;
58926175Sfenner      if (a > b) b = a;
59075107Sfenner   }
59175107Sfenner   return b;
59275107Sfenner}
59375107Sfenner
59475107Sfenner#define mmin(a,b) ((a) < (b)) ? (a) : (b)
59575107Sfenner
59675107Sfenner#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
59775107Sfenner                          stackHi[sp] = hz; \
59875107Sfenner                          stackD [sp] = dz; \
59926175Sfenner                          sp++; }
60075107Sfenner
60175107Sfenner#define mpop(lz,hz,dz) { sp--;             \
60275107Sfenner                         lz = stackLo[sp]; \
60326175Sfenner                         hz = stackHi[sp]; \
60475107Sfenner                         dz = stackD [sp]; }
60575107Sfenner
60675107Sfenner
60775107Sfenner#define mnextsize(az) (nextHi[az]-nextLo[az])
60875107Sfenner
60975107Sfenner#define mnextswap(az,bz)                                        \
61075107Sfenner   { Int32 tz;                                                  \
61175107Sfenner     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
61275107Sfenner     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
61375107Sfenner     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
61475107Sfenner
61575107Sfenner
61675107Sfenner#define MAIN_QSORT_SMALL_THRESH 20
61775107Sfenner#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
61875107Sfenner#define MAIN_QSORT_STACK_SIZE 100
61975107Sfenner
62075107Sfennerstatic
62175107Sfennervoid mainQSort3 ( UInt32* ptr,
62275107Sfenner                  UChar*  block,
62326175Sfenner                  UInt16* quadrant,
62475107Sfenner                  Int32   nblock,
62575107Sfenner                  Int32   loSt,
62675107Sfenner                  Int32   hiSt,
62775107Sfenner                  Int32   dSt,
62875107Sfenner                  Int32*  budget )
62975107Sfenner{
63075107Sfenner   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
63175107Sfenner   Int32 sp, lo, hi, d;
63226175Sfenner
63375107Sfenner   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
63475107Sfenner   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
63575107Sfenner   Int32 stackD [MAIN_QSORT_STACK_SIZE];
636127664Sbms
637127664Sbms   Int32 nextLo[3];
63875107Sfenner   Int32 nextHi[3];
639127664Sbms   Int32 nextD [3];
64075107Sfenner
64175107Sfenner   sp = 0;
64275107Sfenner   mpush ( loSt, hiSt, dSt );
643127664Sbms
64475107Sfenner   while (sp > 0) {
645127664Sbms
64675107Sfenner      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
64775107Sfenner
64875107Sfenner      mpop ( lo, hi, d );
64975107Sfenner      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
65075107Sfenner          d > MAIN_QSORT_DEPTH_THRESH) {
65175107Sfenner         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
65275107Sfenner         if (*budget < 0) return;
65375107Sfenner         continue;
65475107Sfenner      }
655127664Sbms
65675107Sfenner      med = (Int32)
657127664Sbms            mmed3 ( block[ptr[ lo         ]+d],
658127664Sbms                    block[ptr[ hi         ]+d],
65975107Sfenner                    block[ptr[ (lo+hi)>>1 ]+d] );
66075107Sfenner
66175107Sfenner      unLo = ltLo = lo;
66275107Sfenner      unHi = gtHi = hi;
66375107Sfenner
66475107Sfenner      while (True) {
66575107Sfenner         while (True) {
66675107Sfenner            if (unLo > unHi) break;
66775107Sfenner            n = ((Int32)block[ptr[unLo]+d]) - med;
66875107Sfenner            if (n == 0) {
66975107Sfenner               mswap(ptr[unLo], ptr[ltLo]);
67075107Sfenner               ltLo++; unLo++; continue;
67175107Sfenner            };
67275107Sfenner            if (n >  0) break;
67398530Sfenner            unLo++;
67475107Sfenner         }
67575107Sfenner         while (True) {
67675107Sfenner            if (unLo > unHi) break;
67775107Sfenner            n = ((Int32)block[ptr[unHi]+d]) - med;
67875107Sfenner            if (n == 0) {
67975107Sfenner               mswap(ptr[unHi], ptr[gtHi]);
68075107Sfenner               gtHi--; unHi--; continue;
68175107Sfenner            };
68275107Sfenner            if (n <  0) break;
68375107Sfenner            unHi--;
68475107Sfenner         }
685172677Smlaier         if (unLo > unHi) break;
68675107Sfenner         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
68775107Sfenner      }
68875107Sfenner
68975107Sfenner      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
69075107Sfenner
69198530Sfenner      if (gtHi < ltLo) {
69298530Sfenner         mpush(lo, hi, d+1 );
69398530Sfenner         continue;
69498530Sfenner      }
69598530Sfenner
69698530Sfenner      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
69798530Sfenner      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
69898530Sfenner
69998530Sfenner      n = lo + unLo - ltLo - 1;
70098530Sfenner      m = hi - (gtHi - unHi) + 1;
70198530Sfenner
70298530Sfenner      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
70398530Sfenner      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
70498530Sfenner      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
70598530Sfenner
70698530Sfenner      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70798530Sfenner      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
70898530Sfenner      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70998530Sfenner
71098530Sfenner      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
71198530Sfenner      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
71298530Sfenner
71398530Sfenner      mpush (nextLo[0], nextHi[0], nextD[0]);
71498530Sfenner      mpush (nextLo[1], nextHi[1], nextD[1]);
71598530Sfenner      mpush (nextLo[2], nextHi[2], nextD[2]);
71698530Sfenner   }
71798530Sfenner}
71898530Sfenner
71998530Sfenner#undef mswap
72098530Sfenner#undef mvswap
72198530Sfenner#undef mpush
72298530Sfenner#undef mpop
723172677Smlaier#undef mmin
724172677Smlaier#undef mnextsize
725172677Smlaier#undef mnextswap
726172677Smlaier#undef MAIN_QSORT_SMALL_THRESH
727172677Smlaier#undef MAIN_QSORT_DEPTH_THRESH
728172677Smlaier#undef MAIN_QSORT_STACK_SIZE
729172677Smlaier
730172677Smlaier
731172677Smlaier/*---------------------------------------------*/
732172677Smlaier/* Pre:
73398530Sfenner      nblock > N_OVERSHOOT
734172677Smlaier      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
73575107Sfenner      ((UChar*)block32) [0 .. nblock-1] holds block
73698530Sfenner      ptr exists for [0 .. nblock-1]
73798530Sfenner
73898530Sfenner   Post:
73975107Sfenner      ((UChar*)block32) [0 .. nblock-1] holds block
74026175Sfenner      All other areas of block32 destroyed
74126175Sfenner      ftab [0 .. 65536 ] destroyed
742146768Ssam      ptr [0 .. nblock-1] holds sorted order
743146768Ssam      if (*budget < 0), sorting was abandoned
744146768Ssam*/
745146768Ssam
746146768Ssam#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
747146768Ssam#define SETMASK (1 << 21)
748146768Ssam#define CLEARMASK (~(SETMASK))
749146768Ssam
750146768Ssamstatic
751146768Ssamvoid mainSort ( UInt32* ptr,
752146768Ssam                UChar*  block,
753146768Ssam                UInt16* quadrant,
754146768Ssam                UInt32* ftab,
755146768Ssam                Int32   nblock,
756146768Ssam                Int32   verb,
757146768Ssam                Int32*  budget )
758146768Ssam{
759146768Ssam   Int32  i, j, k, ss, sb;
760146768Ssam   Int32  runningOrder[256];
761146768Ssam   Bool   bigDone[256];
762146768Ssam   Int32  copyStart[256];
763146768Ssam   Int32  copyEnd  [256];
764146768Ssam   UChar  c1;
765146768Ssam   Int32  numQSorted;
766146768Ssam   UInt16 s;
767146768Ssam   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
768146768Ssam
769146768Ssam   /*-- set up the 2-byte frequency table --*/
770146768Ssam   for (i = 65536; i >= 0; i--) ftab[i] = 0;
771146768Ssam
772146768Ssam   j = block[0] << 8;
773146768Ssam   i = nblock-1;
774146768Ssam   for (; i >= 3; i -= 4) {
775146768Ssam      quadrant[i] = 0;
776146768Ssam      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
777146768Ssam      ftab[j]++;
778146768Ssam      quadrant[i-1] = 0;
779146768Ssam      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
780146768Ssam      ftab[j]++;
781146768Ssam      quadrant[i-2] = 0;
782146768Ssam      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
783146768Ssam      ftab[j]++;
784146768Ssam      quadrant[i-3] = 0;
78575107Sfenner      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
78675107Sfenner      ftab[j]++;
78798530Sfenner   }
78898530Sfenner   for (; i >= 0; i--) {
78998530Sfenner      quadrant[i] = 0;
79098530Sfenner      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
79198530Sfenner      ftab[j]++;
79275107Sfenner   }
793127664Sbms
794127664Sbms   /*-- (emphasises close relationship of block & quadrant) --*/
79526175Sfenner   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
79698530Sfenner      block   [nblock+i] = block[i];
79798530Sfenner      quadrant[nblock+i] = 0;
79898530Sfenner   }
799127664Sbms
80098530Sfenner   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
801127664Sbms
80298530Sfenner   /*-- Complete the initial radix sort --*/
80398530Sfenner   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
80498530Sfenner
80598530Sfenner   s = block[0] << 8;
80698530Sfenner   i = nblock-1;
80798530Sfenner   for (; i >= 3; i -= 4) {
808172677Smlaier      s = (s >> 8) | (block[i] << 8);
809172677Smlaier      j = ftab[s] -1;
810172677Smlaier      ftab[s] = j;
811172677Smlaier      ptr[j] = i;
812172677Smlaier      s = (s >> 8) | (block[i-1] << 8);
813172677Smlaier      j = ftab[s] -1;
814172677Smlaier      ftab[s] = j;
815172677Smlaier      ptr[j] = i-1;
816172677Smlaier      s = (s >> 8) | (block[i-2] << 8);
817172677Smlaier      j = ftab[s] -1;
818172677Smlaier      ftab[s] = j;
819172677Smlaier      ptr[j] = i-2;
820172677Smlaier      s = (s >> 8) | (block[i-3] << 8);
821172677Smlaier      j = ftab[s] -1;
822172677Smlaier      ftab[s] = j;
823172677Smlaier      ptr[j] = i-3;
824172677Smlaier   }
82598530Sfenner   for (; i >= 0; i--) {
82698530Sfenner      s = (s >> 8) | (block[i] << 8);
82798530Sfenner      j = ftab[s] -1;
82898530Sfenner      ftab[s] = j;
82998530Sfenner      ptr[j] = i;
83098530Sfenner   }
83198530Sfenner
83298530Sfenner   /*--
83398530Sfenner      Now ftab contains the first loc of every small bucket.
83498530Sfenner      Calculate the running order, from smallest to largest
83598530Sfenner      big bucket.
83698530Sfenner   --*/
83798530Sfenner   for (i = 0; i <= 255; i++) {
83898530Sfenner      bigDone     [i] = False;
83998530Sfenner      runningOrder[i] = i;
84098530Sfenner   }
84198530Sfenner
84298530Sfenner   {
84398530Sfenner      Int32 vv;
84498530Sfenner      Int32 h = 1;
84598530Sfenner      do h = 3 * h + 1; while (h <= 256);
84698530Sfenner      do {
847146768Ssam         h = h / 3;
848146768Ssam         for (i = h; i <= 255; i++) {
849146768Ssam            vv = runningOrder[i];
850146768Ssam            j = i;
85198530Sfenner            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
852146768Ssam               runningOrder[j] = runningOrder[j-h];
853146768Ssam               j = j - h;
854172677Smlaier               if (j <= (h - 1)) goto zero;
855172677Smlaier            }
85698530Sfenner            zero:
85798530Sfenner            runningOrder[j] = vv;
85898530Sfenner         }
85998530Sfenner      } while (h != 1);
86098530Sfenner   }
86198530Sfenner
86298530Sfenner   /*--
86398530Sfenner      The main sorting loop.
86498530Sfenner   --*/
86598530Sfenner
86698530Sfenner   numQSorted = 0;
86798530Sfenner
86898530Sfenner   for (i = 0; i <= 255; i++) {
86998530Sfenner
87098530Sfenner      /*--
87198530Sfenner         Process big buckets, starting with the least full.
87298530Sfenner         Basically this is a 3-step process in which we call
87398530Sfenner         mainQSort3 to sort the small buckets [ss, j], but
87498530Sfenner         also make a big effort to avoid the calls if we can.
87598530Sfenner      --*/
87698530Sfenner      ss = runningOrder[i];
87798530Sfenner
87898530Sfenner      /*--
87998530Sfenner         Step 1:
88098530Sfenner         Complete the big bucket [ss] by quicksorting
88198530Sfenner         any unsorted small buckets [ss, j], for j != ss.
88298530Sfenner         Hopefully previous pointer-scanning phases have already
88398530Sfenner         completed many of the small buckets [ss, j], so
88498530Sfenner         we don't have to sort them at all.
88598530Sfenner      --*/
886172677Smlaier      for (j = 0; j <= 255; j++) {
887172677Smlaier         if (j != ss) {
888172677Smlaier            sb = (ss << 8) + j;
889172677Smlaier            if ( ! (ftab[sb] & SETMASK) ) {
890172677Smlaier               Int32 lo = ftab[sb]   & CLEARMASK;
89198530Sfenner               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
892172677Smlaier               if (hi > lo) {
893172677Smlaier                  if (verb >= 4)
89475107Sfenner                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
89575107Sfenner                                "done %d   this %d\n",
89626175Sfenner                                ss, j, numQSorted, hi - lo + 1 );
89775107Sfenner                  mainQSort3 (
898127664Sbms                     ptr, block, quadrant, nblock,
89975107Sfenner                     lo, hi, BZ_N_RADIX, budget
900127664Sbms                  );
901127664Sbms                  numQSorted += (hi - lo + 1);
90275107Sfenner                  if (*budget < 0) return;
903127664Sbms               }
90475107Sfenner            }
905127664Sbms            ftab[sb] |= SETMASK;
906127664Sbms         }
907127664Sbms      }
908127664Sbms
909127664Sbms      AssertH ( !bigDone[ss], 1006 );
910127664Sbms
911127664Sbms      /*--
912127664Sbms         Step 2:
913147894Ssam         Now scan this big bucket [ss] so as to synthesise the
914147894Ssam         sorted order for small buckets [t, ss] for all t,
915147894Ssam         including, magically, the bucket [ss,ss] too.
916147894Ssam         This will avoid doing Real Work in subsequent Step 1's.
917147894Ssam      --*/
918127664Sbms      {
919127664Sbms         for (j = 0; j <= 255; j++) {
920127664Sbms            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
921127664Sbms            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
922127664Sbms         }
923127664Sbms         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
924127664Sbms            k = ptr[j]-1; if (k < 0) k += nblock;
925127664Sbms            c1 = block[k];
926127664Sbms            if (!bigDone[c1])
92775107Sfenner               ptr[ copyStart[c1]++ ] = k;
92875107Sfenner         }
92975107Sfenner         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
930127664Sbms            k = ptr[j]-1; if (k < 0) k += nblock;
93175107Sfenner            c1 = block[k];
93275107Sfenner            if (!bigDone[c1])
93375107Sfenner               ptr[ copyEnd[c1]-- ] = k;
93475107Sfenner         }
93575107Sfenner      }
93675107Sfenner
93775107Sfenner      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
93875107Sfenner                ||
93926175Sfenner                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
94026175Sfenner                   Necessity for this case is demonstrated by compressing
94175107Sfenner                   a sequence of approximately 48.5 million of character
94275107Sfenner                   251; 1.0.0/1.0.1 will then die here. */
943127664Sbms                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
944127664Sbms                1007 )
94575107Sfenner
94639291Sfenner      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
947127664Sbms
948127664Sbms      /*--
949127664Sbms         Step 3:
95075107Sfenner         The [ss] big bucket is now done.  Record this fact,
95175107Sfenner         and update the quadrant descriptors.  Remember to
95275107Sfenner         update quadrants in the overshoot area too, if
95375107Sfenner         necessary.  The "if (i < 255)" test merely skips
95475107Sfenner         this updating for the last bucket processed, since
95575107Sfenner         updating for the last bucket is pointless.
95675107Sfenner
95775107Sfenner         The quadrant array provides a way to incrementally
95875107Sfenner         cache sort orderings, as they appear, so as to
959127664Sbms         make subsequent comparisons in fullGtU() complete
96075107Sfenner         faster.  For repetitive blocks this makes a big
96175107Sfenner         difference (but not big enough to be able to avoid
96275107Sfenner         the fallback sorting mechanism, exponential radix sort).
96375107Sfenner
96475107Sfenner         The precise meaning is: at all times:
965172677Smlaier
96675107Sfenner            for 0 <= i < nblock and 0 <= j <= nblock
96775107Sfenner
96875107Sfenner            if block[i] != block[j],
96975107Sfenner
97075107Sfenner               then the relative values of quadrant[i] and
97175107Sfenner                    quadrant[j] are meaningless.
97275107Sfenner
97375107Sfenner               else {
97475107Sfenner                  if quadrant[i] < quadrant[j]
97575107Sfenner                     then the string starting at i lexicographically
97675107Sfenner                     precedes the string starting at j
97775107Sfenner
97875107Sfenner                  else if quadrant[i] > quadrant[j]
97975107Sfenner                     then the string starting at j lexicographically
98075107Sfenner                     precedes the string starting at i
98175107Sfenner
98275107Sfenner                  else
98375107Sfenner                     the relative ordering of the strings starting
98475107Sfenner                     at i and j has not yet been determined.
98575107Sfenner               }
98675107Sfenner      --*/
98775107Sfenner      bigDone[ss] = True;
98875107Sfenner
98975107Sfenner      if (i < 255) {
99075107Sfenner         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
99175107Sfenner         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
99275107Sfenner         Int32 shifts   = 0;
99375107Sfenner
99475107Sfenner         while ((bbSize >> shifts) > 65534) shifts++;
99575107Sfenner
99675107Sfenner         for (j = bbSize-1; j >= 0; j--) {
99775107Sfenner            Int32 a2update     = ptr[bbStart + j];
99875107Sfenner            UInt16 qVal        = (UInt16)(j >> shifts);
99975107Sfenner            quadrant[a2update] = qVal;
100075107Sfenner            if (a2update < BZ_N_OVERSHOOT)
100175107Sfenner               quadrant[a2update + nblock] = qVal;
100275107Sfenner         }
100375107Sfenner         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
100475107Sfenner      }
100575107Sfenner
100675107Sfenner   }
100775107Sfenner
100875107Sfenner   if (verb >= 4)
100975107Sfenner      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
101039291Sfenner                 nblock, numQSorted, nblock - numQSorted );
101139291Sfenner}
101275107Sfenner
1013127664Sbms#undef BIGFREQ
101475107Sfenner#undef SETMASK
101575107Sfenner#undef CLEARMASK
101675107Sfenner
101775107Sfenner
1018127664Sbms/*---------------------------------------------*/
101975107Sfenner/* Pre:
1020127664Sbms      nblock > 0
102175107Sfenner      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
102275107Sfenner      ((UChar*)arr2)  [0 .. nblock-1] holds block
102375107Sfenner      arr1 exists for [0 .. nblock-1]
102475107Sfenner
102575107Sfenner   Post:
102675107Sfenner      ((UChar*)arr2) [0 .. nblock-1] holds block
1027127664Sbms      All other areas of block destroyed
102875107Sfenner      ftab [ 0 .. 65536 ] destroyed
102975107Sfenner      arr1 [0 .. nblock-1] holds sorted order
103075107Sfenner*/
103139291Sfennervoid BZ2_blockSort ( EState* s )
103239291Sfenner{
103375107Sfenner   UInt32* ptr    = s->ptr;
103498530Sfenner   UChar*  block  = s->block;
103598530Sfenner   UInt32* ftab   = s->ftab;
103698530Sfenner   Int32   nblock = s->nblock;
103798530Sfenner   Int32   verb   = s->verbosity;
103898530Sfenner   Int32   wfact  = s->workFactor;
103998530Sfenner   UInt16* quadrant;
104098530Sfenner   Int32   budget;
104198530Sfenner   Int32   budgetInit;
104298530Sfenner   Int32   i;
104398530Sfenner
104498530Sfenner   if (nblock < 10000) {
104575107Sfenner      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
104675107Sfenner   } else {
104775107Sfenner      /* Calculate the location for quadrant, remembering to get
104875107Sfenner         the alignment right.  Assumes that &(block[0]) is at least
1049127664Sbms         2-byte aligned -- this should be ok since block is really
1050127664Sbms         the first section of arr2.
1051127664Sbms      */
1052127664Sbms      i = nblock+BZ_N_OVERSHOOT;
105375107Sfenner      if (i & 1) i++;
105475107Sfenner      quadrant = (UInt16*)(&(block[i]));
105575107Sfenner
105675107Sfenner      /* (wfact-1) / 3 puts the default-factor-30
105775107Sfenner         transition point at very roughly the same place as
105875107Sfenner         with v0.1 and v0.9.0.
1059147894Ssam         Not that it particularly matters any more, since the
1060147894Ssam         resulting compressed stream is now the same regardless
1061147894Ssam         of whether or not we use the main sort or fallback sort.
1062147894Ssam      */
1063162012Ssam      if (wfact < 1  ) wfact = 1;
1064147894Ssam      if (wfact > 100) wfact = 100;
1065147894Ssam      budgetInit = nblock * ((wfact-1) / 3);
1066147894Ssam      budget = budgetInit;
1067147894Ssam
1068147894Ssam      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1069147894Ssam      if (verb >= 3)
1070147894Ssam         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1071147894Ssam                    budgetInit - budget,
1072147894Ssam                    nblock,
1073147894Ssam                    (float)(budgetInit - budget) /
1074147894Ssam                    (float)(nblock==0 ? 1 : nblock) );
1075147894Ssam      if (budget < 0) {
1076147894Ssam         if (verb >= 2)
1077147894Ssam            VPrintf0 ( "    too repetitive; using fallback"
1078147894Ssam                       " sorting algorithm\n" );
1079147894Ssam         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1080147894Ssam      }
1081127664Sbms   }
1082127664Sbms
108398530Sfenner   s->origPtr = -1;
108498530Sfenner   for (i = 0; i < s->nblock; i++)
108598530Sfenner      if (ptr[i] == 0)
108698530Sfenner         { s->origPtr = i; break; };
108798530Sfenner
108898530Sfenner   AssertH( s->origPtr != -1, 1003 );
108998530Sfenner}
109098530Sfenner
1091127664Sbms
1092127664Sbms/*-------------------------------------------------------------*/
1093127664Sbms/*--- end                                       blocksort.c ---*/
1094127664Sbms/*-------------------------------------------------------------*/
1095127664Sbms