178556Sobrien
278556Sobrien/*-------------------------------------------------------------*/
378556Sobrien/*--- Block sorting machinery                               ---*/
478556Sobrien/*---                                           blocksort.c ---*/
578556Sobrien/*-------------------------------------------------------------*/
678556Sobrien
7167974Sdelphij/* ------------------------------------------------------------------
8167974Sdelphij   This file is part of bzip2/libbzip2, a program and library for
9167974Sdelphij   lossless, block-sorting data compression.
1078556Sobrien
11215041Sobrien   bzip2/libbzip2 version 1.0.6 of 6 September 2010
12215041Sobrien   Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
1378556Sobrien
14167974Sdelphij   Please read the WARNING, DISCLAIMER and PATENTS sections in the
15167974Sdelphij   README file.
1678556Sobrien
17167974Sdelphij   This program is released under the terms of the license contained
18167974Sdelphij   in the file LICENSE.
19167974Sdelphij   ------------------------------------------------------------------ */
2078556Sobrien
2178556Sobrien
2278556Sobrien#include "bzlib_private.h"
2378556Sobrien
2478556Sobrien/*---------------------------------------------*/
2578556Sobrien/*--- Fallback O(N log(N)^2) sorting        ---*/
2678556Sobrien/*--- algorithm, for repetitive blocks      ---*/
2778556Sobrien/*---------------------------------------------*/
2878556Sobrien
2978556Sobrien/*---------------------------------------------*/
3078556Sobrienstatic
3178556Sobrien__inline__
3278556Sobrienvoid fallbackSimpleSort ( UInt32* fmap,
3378556Sobrien                          UInt32* eclass,
3478556Sobrien                          Int32   lo,
3578556Sobrien                          Int32   hi )
3678556Sobrien{
3778556Sobrien   Int32 i, j, tmp;
3878556Sobrien   UInt32 ec_tmp;
3978556Sobrien
4078556Sobrien   if (lo == hi) return;
4178556Sobrien
4278556Sobrien   if (hi - lo > 3) {
4378556Sobrien      for ( i = hi-4; i >= lo; i-- ) {
4478556Sobrien         tmp = fmap[i];
4578556Sobrien         ec_tmp = eclass[tmp];
4678556Sobrien         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
4778556Sobrien            fmap[j-4] = fmap[j];
4878556Sobrien         fmap[j-4] = tmp;
4978556Sobrien      }
5078556Sobrien   }
5178556Sobrien
5278556Sobrien   for ( i = hi-1; i >= lo; i-- ) {
5378556Sobrien      tmp = fmap[i];
5478556Sobrien      ec_tmp = eclass[tmp];
5578556Sobrien      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
5678556Sobrien         fmap[j-1] = fmap[j];
5778556Sobrien      fmap[j-1] = tmp;
5878556Sobrien   }
5978556Sobrien}
6078556Sobrien
6178556Sobrien
6278556Sobrien/*---------------------------------------------*/
6378556Sobrien#define fswap(zz1, zz2) \
6478556Sobrien   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
6578556Sobrien
6678556Sobrien#define fvswap(zzp1, zzp2, zzn)       \
6778556Sobrien{                                     \
6878556Sobrien   Int32 yyp1 = (zzp1);               \
6978556Sobrien   Int32 yyp2 = (zzp2);               \
7078556Sobrien   Int32 yyn  = (zzn);                \
7178556Sobrien   while (yyn > 0) {                  \
7278556Sobrien      fswap(fmap[yyp1], fmap[yyp2]);  \
7378556Sobrien      yyp1++; yyp2++; yyn--;          \
7478556Sobrien   }                                  \
7578556Sobrien}
7678556Sobrien
7778556Sobrien
7878556Sobrien#define fmin(a,b) ((a) < (b)) ? (a) : (b)
7978556Sobrien
8078556Sobrien#define fpush(lz,hz) { stackLo[sp] = lz; \
8178556Sobrien                       stackHi[sp] = hz; \
8278556Sobrien                       sp++; }
8378556Sobrien
8478556Sobrien#define fpop(lz,hz) { sp--;              \
8578556Sobrien                      lz = stackLo[sp];  \
8678556Sobrien                      hz = stackHi[sp]; }
8778556Sobrien
8878556Sobrien#define FALLBACK_QSORT_SMALL_THRESH 10
8978556Sobrien#define FALLBACK_QSORT_STACK_SIZE   100
9078556Sobrien
9178556Sobrien
9278556Sobrienstatic
9378556Sobrienvoid fallbackQSort3 ( UInt32* fmap,
9478556Sobrien                      UInt32* eclass,
9578556Sobrien                      Int32   loSt,
9678556Sobrien                      Int32   hiSt )
9778556Sobrien{
9878556Sobrien   Int32 unLo, unHi, ltLo, gtHi, n, m;
9978556Sobrien   Int32 sp, lo, hi;
10078556Sobrien   UInt32 med, r, r3;
10178556Sobrien   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
10278556Sobrien   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
10378556Sobrien
10478556Sobrien   r = 0;
10578556Sobrien
10678556Sobrien   sp = 0;
10778556Sobrien   fpush ( loSt, hiSt );
10878556Sobrien
10978556Sobrien   while (sp > 0) {
11078556Sobrien
111167974Sdelphij      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
11278556Sobrien
11378556Sobrien      fpop ( lo, hi );
11478556Sobrien      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
11578556Sobrien         fallbackSimpleSort ( fmap, eclass, lo, hi );
11678556Sobrien         continue;
11778556Sobrien      }
11878556Sobrien
11978556Sobrien      /* Random partitioning.  Median of 3 sometimes fails to
12078556Sobrien         avoid bad cases.  Median of 9 seems to help but
12178556Sobrien         looks rather expensive.  This too seems to work but
12278556Sobrien         is cheaper.  Guidance for the magic constants
12378556Sobrien         7621 and 32768 is taken from Sedgewick's algorithms
12478556Sobrien         book, chapter 35.
12578556Sobrien      */
12678556Sobrien      r = ((r * 7621) + 1) % 32768;
12778556Sobrien      r3 = r % 3;
12878556Sobrien      if (r3 == 0) med = eclass[fmap[lo]]; else
12978556Sobrien      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
13078556Sobrien                   med = eclass[fmap[hi]];
13178556Sobrien
13278556Sobrien      unLo = ltLo = lo;
13378556Sobrien      unHi = gtHi = hi;
13478556Sobrien
13578556Sobrien      while (1) {
13678556Sobrien         while (1) {
13778556Sobrien            if (unLo > unHi) break;
13878556Sobrien            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
13978556Sobrien            if (n == 0) {
14078556Sobrien               fswap(fmap[unLo], fmap[ltLo]);
14178556Sobrien               ltLo++; unLo++;
14278556Sobrien               continue;
14378556Sobrien            };
14478556Sobrien            if (n > 0) break;
14578556Sobrien            unLo++;
14678556Sobrien         }
14778556Sobrien         while (1) {
14878556Sobrien            if (unLo > unHi) break;
14978556Sobrien            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
15078556Sobrien            if (n == 0) {
15178556Sobrien               fswap(fmap[unHi], fmap[gtHi]);
15278556Sobrien               gtHi--; unHi--;
15378556Sobrien               continue;
15478556Sobrien            };
15578556Sobrien            if (n < 0) break;
15678556Sobrien            unHi--;
15778556Sobrien         }
15878556Sobrien         if (unLo > unHi) break;
15978556Sobrien         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
16078556Sobrien      }
16178556Sobrien
16278556Sobrien      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
16378556Sobrien
16478556Sobrien      if (gtHi < ltLo) continue;
16578556Sobrien
16678556Sobrien      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
16778556Sobrien      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
16878556Sobrien
16978556Sobrien      n = lo + unLo - ltLo - 1;
17078556Sobrien      m = hi - (gtHi - unHi) + 1;
17178556Sobrien
17278556Sobrien      if (n - lo > hi - m) {
17378556Sobrien         fpush ( lo, n );
17478556Sobrien         fpush ( m, hi );
17578556Sobrien      } else {
17678556Sobrien         fpush ( m, hi );
17778556Sobrien         fpush ( lo, n );
17878556Sobrien      }
17978556Sobrien   }
18078556Sobrien}
18178556Sobrien
18278556Sobrien#undef fmin
18378556Sobrien#undef fpush
18478556Sobrien#undef fpop
18578556Sobrien#undef fswap
18678556Sobrien#undef fvswap
18778556Sobrien#undef FALLBACK_QSORT_SMALL_THRESH
18878556Sobrien#undef FALLBACK_QSORT_STACK_SIZE
18978556Sobrien
19078556Sobrien
19178556Sobrien/*---------------------------------------------*/
19278556Sobrien/* Pre:
19378556Sobrien      nblock > 0
19478556Sobrien      eclass exists for [0 .. nblock-1]
19578556Sobrien      ((UChar*)eclass) [0 .. nblock-1] holds block
19678556Sobrien      ptr exists for [0 .. nblock-1]
19778556Sobrien
19878556Sobrien   Post:
19978556Sobrien      ((UChar*)eclass) [0 .. nblock-1] holds block
20078556Sobrien      All other areas of eclass destroyed
20178556Sobrien      fmap [0 .. nblock-1] holds sorted order
20278556Sobrien      bhtab [ 0 .. 2+(nblock/32) ] destroyed
20378556Sobrien*/
20478556Sobrien
20578556Sobrien#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
20678556Sobrien#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
20778556Sobrien#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
20878556Sobrien#define      WORD_BH(zz)  bhtab[(zz) >> 5]
20978556Sobrien#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
21078556Sobrien
21178556Sobrienstatic
21278556Sobrienvoid fallbackSort ( UInt32* fmap,
21378556Sobrien                    UInt32* eclass,
21478556Sobrien                    UInt32* bhtab,
21578556Sobrien                    Int32   nblock,
21678556Sobrien                    Int32   verb )
21778556Sobrien{
21878556Sobrien   Int32 ftab[257];
21978556Sobrien   Int32 ftabCopy[256];
22078556Sobrien   Int32 H, i, j, k, l, r, cc, cc1;
22178556Sobrien   Int32 nNotDone;
22278556Sobrien   Int32 nBhtab;
22378556Sobrien   UChar* eclass8 = (UChar*)eclass;
22478556Sobrien
22578556Sobrien   /*--
22678556Sobrien      Initial 1-char radix sort to generate
22778556Sobrien      initial fmap and initial BH bits.
22878556Sobrien   --*/
22978556Sobrien   if (verb >= 4)
23078556Sobrien      VPrintf0 ( "        bucket sorting ...\n" );
23178556Sobrien   for (i = 0; i < 257;    i++) ftab[i] = 0;
23278556Sobrien   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
23378556Sobrien   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
23478556Sobrien   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
23578556Sobrien
23678556Sobrien   for (i = 0; i < nblock; i++) {
23778556Sobrien      j = eclass8[i];
23878556Sobrien      k = ftab[j] - 1;
23978556Sobrien      ftab[j] = k;
24078556Sobrien      fmap[k] = i;
24178556Sobrien   }
24278556Sobrien
24378556Sobrien   nBhtab = 2 + (nblock / 32);
24478556Sobrien   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
24578556Sobrien   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
24678556Sobrien
24778556Sobrien   /*--
24878556Sobrien      Inductively refine the buckets.  Kind-of an
24978556Sobrien      "exponential radix sort" (!), inspired by the
25078556Sobrien      Manber-Myers suffix array construction algorithm.
25178556Sobrien   --*/
25278556Sobrien
25378556Sobrien   /*-- set sentinel bits for block-end detection --*/
25478556Sobrien   for (i = 0; i < 32; i++) {
25578556Sobrien      SET_BH(nblock + 2*i);
25678556Sobrien      CLEAR_BH(nblock + 2*i + 1);
25778556Sobrien   }
25878556Sobrien
25978556Sobrien   /*-- the log(N) loop --*/
26078556Sobrien   H = 1;
26178556Sobrien   while (1) {
26278556Sobrien
26378556Sobrien      if (verb >= 4)
26478556Sobrien         VPrintf1 ( "        depth %6d has ", H );
26578556Sobrien
26678556Sobrien      j = 0;
26778556Sobrien      for (i = 0; i < nblock; i++) {
26878556Sobrien         if (ISSET_BH(i)) j = i;
26978556Sobrien         k = fmap[i] - H; if (k < 0) k += nblock;
27078556Sobrien         eclass[k] = j;
27178556Sobrien      }
27278556Sobrien
27378556Sobrien      nNotDone = 0;
27478556Sobrien      r = -1;
27578556Sobrien      while (1) {
27678556Sobrien
27778556Sobrien	 /*-- find the next non-singleton bucket --*/
27878556Sobrien         k = r + 1;
27978556Sobrien         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28078556Sobrien         if (ISSET_BH(k)) {
28178556Sobrien            while (WORD_BH(k) == 0xffffffff) k += 32;
28278556Sobrien            while (ISSET_BH(k)) k++;
28378556Sobrien         }
28478556Sobrien         l = k - 1;
28578556Sobrien         if (l >= nblock) break;
28678556Sobrien         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
28778556Sobrien         if (!ISSET_BH(k)) {
28878556Sobrien            while (WORD_BH(k) == 0x00000000) k += 32;
28978556Sobrien            while (!ISSET_BH(k)) k++;
29078556Sobrien         }
29178556Sobrien         r = k - 1;
29278556Sobrien         if (r >= nblock) break;
29378556Sobrien
29478556Sobrien         /*-- now [l, r] bracket current bucket --*/
29578556Sobrien         if (r > l) {
29678556Sobrien            nNotDone += (r - l + 1);
29778556Sobrien            fallbackQSort3 ( fmap, eclass, l, r );
29878556Sobrien
29978556Sobrien            /*-- scan bucket and generate header bits-- */
30078556Sobrien            cc = -1;
30178556Sobrien            for (i = l; i <= r; i++) {
30278556Sobrien               cc1 = eclass[fmap[i]];
30378556Sobrien               if (cc != cc1) { SET_BH(i); cc = cc1; };
30478556Sobrien            }
30578556Sobrien         }
30678556Sobrien      }
30778556Sobrien
30878556Sobrien      if (verb >= 4)
30978556Sobrien         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
31078556Sobrien
31178556Sobrien      H *= 2;
31278556Sobrien      if (H > nblock || nNotDone == 0) break;
31378556Sobrien   }
31478556Sobrien
31578556Sobrien   /*--
31678556Sobrien      Reconstruct the original block in
31778556Sobrien      eclass8 [0 .. nblock-1], since the
31878556Sobrien      previous phase destroyed it.
31978556Sobrien   --*/
32078556Sobrien   if (verb >= 4)
32178556Sobrien      VPrintf0 ( "        reconstructing block ...\n" );
32278556Sobrien   j = 0;
32378556Sobrien   for (i = 0; i < nblock; i++) {
32478556Sobrien      while (ftabCopy[j] == 0) j++;
32578556Sobrien      ftabCopy[j]--;
32678556Sobrien      eclass8[fmap[i]] = (UChar)j;
32778556Sobrien   }
32878556Sobrien   AssertH ( j < 256, 1005 );
32978556Sobrien}
33078556Sobrien
33178556Sobrien#undef       SET_BH
33278556Sobrien#undef     CLEAR_BH
33378556Sobrien#undef     ISSET_BH
33478556Sobrien#undef      WORD_BH
33578556Sobrien#undef UNALIGNED_BH
33678556Sobrien
33778556Sobrien
33878556Sobrien/*---------------------------------------------*/
33978556Sobrien/*--- The main, O(N^2 log(N)) sorting       ---*/
34078556Sobrien/*--- algorithm.  Faster for "normal"       ---*/
34178556Sobrien/*--- non-repetitive blocks.                ---*/
34278556Sobrien/*---------------------------------------------*/
34378556Sobrien
34478556Sobrien/*---------------------------------------------*/
34578556Sobrienstatic
34678556Sobrien__inline__
34778556SobrienBool mainGtU ( UInt32  i1,
34878556Sobrien               UInt32  i2,
34978556Sobrien               UChar*  block,
35078556Sobrien               UInt16* quadrant,
35178556Sobrien               UInt32  nblock,
35278556Sobrien               Int32*  budget )
35378556Sobrien{
35478556Sobrien   Int32  k;
35578556Sobrien   UChar  c1, c2;
35678556Sobrien   UInt16 s1, s2;
35778556Sobrien
35878556Sobrien   AssertD ( i1 != i2, "mainGtU" );
35978556Sobrien   /* 1 */
36078556Sobrien   c1 = block[i1]; c2 = block[i2];
36178556Sobrien   if (c1 != c2) return (c1 > c2);
36278556Sobrien   i1++; i2++;
36378556Sobrien   /* 2 */
36478556Sobrien   c1 = block[i1]; c2 = block[i2];
36578556Sobrien   if (c1 != c2) return (c1 > c2);
36678556Sobrien   i1++; i2++;
36778556Sobrien   /* 3 */
36878556Sobrien   c1 = block[i1]; c2 = block[i2];
36978556Sobrien   if (c1 != c2) return (c1 > c2);
37078556Sobrien   i1++; i2++;
37178556Sobrien   /* 4 */
37278556Sobrien   c1 = block[i1]; c2 = block[i2];
37378556Sobrien   if (c1 != c2) return (c1 > c2);
37478556Sobrien   i1++; i2++;
37578556Sobrien   /* 5 */
37678556Sobrien   c1 = block[i1]; c2 = block[i2];
37778556Sobrien   if (c1 != c2) return (c1 > c2);
37878556Sobrien   i1++; i2++;
37978556Sobrien   /* 6 */
38078556Sobrien   c1 = block[i1]; c2 = block[i2];
38178556Sobrien   if (c1 != c2) return (c1 > c2);
38278556Sobrien   i1++; i2++;
38378556Sobrien   /* 7 */
38478556Sobrien   c1 = block[i1]; c2 = block[i2];
38578556Sobrien   if (c1 != c2) return (c1 > c2);
38678556Sobrien   i1++; i2++;
38778556Sobrien   /* 8 */
38878556Sobrien   c1 = block[i1]; c2 = block[i2];
38978556Sobrien   if (c1 != c2) return (c1 > c2);
39078556Sobrien   i1++; i2++;
39178556Sobrien   /* 9 */
39278556Sobrien   c1 = block[i1]; c2 = block[i2];
39378556Sobrien   if (c1 != c2) return (c1 > c2);
39478556Sobrien   i1++; i2++;
39578556Sobrien   /* 10 */
39678556Sobrien   c1 = block[i1]; c2 = block[i2];
39778556Sobrien   if (c1 != c2) return (c1 > c2);
39878556Sobrien   i1++; i2++;
39978556Sobrien   /* 11 */
40078556Sobrien   c1 = block[i1]; c2 = block[i2];
40178556Sobrien   if (c1 != c2) return (c1 > c2);
40278556Sobrien   i1++; i2++;
40378556Sobrien   /* 12 */
40478556Sobrien   c1 = block[i1]; c2 = block[i2];
40578556Sobrien   if (c1 != c2) return (c1 > c2);
40678556Sobrien   i1++; i2++;
40778556Sobrien
40878556Sobrien   k = nblock + 8;
40978556Sobrien
41078556Sobrien   do {
41178556Sobrien      /* 1 */
41278556Sobrien      c1 = block[i1]; c2 = block[i2];
41378556Sobrien      if (c1 != c2) return (c1 > c2);
41478556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
41578556Sobrien      if (s1 != s2) return (s1 > s2);
41678556Sobrien      i1++; i2++;
41778556Sobrien      /* 2 */
41878556Sobrien      c1 = block[i1]; c2 = block[i2];
41978556Sobrien      if (c1 != c2) return (c1 > c2);
42078556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
42178556Sobrien      if (s1 != s2) return (s1 > s2);
42278556Sobrien      i1++; i2++;
42378556Sobrien      /* 3 */
42478556Sobrien      c1 = block[i1]; c2 = block[i2];
42578556Sobrien      if (c1 != c2) return (c1 > c2);
42678556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
42778556Sobrien      if (s1 != s2) return (s1 > s2);
42878556Sobrien      i1++; i2++;
42978556Sobrien      /* 4 */
43078556Sobrien      c1 = block[i1]; c2 = block[i2];
43178556Sobrien      if (c1 != c2) return (c1 > c2);
43278556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
43378556Sobrien      if (s1 != s2) return (s1 > s2);
43478556Sobrien      i1++; i2++;
43578556Sobrien      /* 5 */
43678556Sobrien      c1 = block[i1]; c2 = block[i2];
43778556Sobrien      if (c1 != c2) return (c1 > c2);
43878556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
43978556Sobrien      if (s1 != s2) return (s1 > s2);
44078556Sobrien      i1++; i2++;
44178556Sobrien      /* 6 */
44278556Sobrien      c1 = block[i1]; c2 = block[i2];
44378556Sobrien      if (c1 != c2) return (c1 > c2);
44478556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
44578556Sobrien      if (s1 != s2) return (s1 > s2);
44678556Sobrien      i1++; i2++;
44778556Sobrien      /* 7 */
44878556Sobrien      c1 = block[i1]; c2 = block[i2];
44978556Sobrien      if (c1 != c2) return (c1 > c2);
45078556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
45178556Sobrien      if (s1 != s2) return (s1 > s2);
45278556Sobrien      i1++; i2++;
45378556Sobrien      /* 8 */
45478556Sobrien      c1 = block[i1]; c2 = block[i2];
45578556Sobrien      if (c1 != c2) return (c1 > c2);
45678556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
45778556Sobrien      if (s1 != s2) return (s1 > s2);
45878556Sobrien      i1++; i2++;
45978556Sobrien
46078556Sobrien      if (i1 >= nblock) i1 -= nblock;
46178556Sobrien      if (i2 >= nblock) i2 -= nblock;
46278556Sobrien
46378556Sobrien      k -= 8;
46478556Sobrien      (*budget)--;
46578556Sobrien   }
46678556Sobrien      while (k >= 0);
46778556Sobrien
46878556Sobrien   return False;
46978556Sobrien}
47078556Sobrien
47178556Sobrien
47278556Sobrien/*---------------------------------------------*/
47378556Sobrien/*--
47478556Sobrien   Knuth's increments seem to work better
47578556Sobrien   than Incerpi-Sedgewick here.  Possibly
47678556Sobrien   because the number of elems to sort is
47778556Sobrien   usually small, typically <= 20.
47878556Sobrien--*/
47978556Sobrienstatic
48078556SobrienInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
48178556Sobrien                   9841, 29524, 88573, 265720,
48278556Sobrien                   797161, 2391484 };
48378556Sobrien
48478556Sobrienstatic
48578556Sobrienvoid mainSimpleSort ( UInt32* ptr,
48678556Sobrien                      UChar*  block,
48778556Sobrien                      UInt16* quadrant,
48878556Sobrien                      Int32   nblock,
48978556Sobrien                      Int32   lo,
49078556Sobrien                      Int32   hi,
49178556Sobrien                      Int32   d,
49278556Sobrien                      Int32*  budget )
49378556Sobrien{
49478556Sobrien   Int32 i, j, h, bigN, hp;
49578556Sobrien   UInt32 v;
49678556Sobrien
49778556Sobrien   bigN = hi - lo + 1;
49878556Sobrien   if (bigN < 2) return;
49978556Sobrien
50078556Sobrien   hp = 0;
50178556Sobrien   while (incs[hp] < bigN) hp++;
50278556Sobrien   hp--;
50378556Sobrien
50478556Sobrien   for (; hp >= 0; hp--) {
50578556Sobrien      h = incs[hp];
50678556Sobrien
50778556Sobrien      i = lo + h;
50878556Sobrien      while (True) {
50978556Sobrien
51078556Sobrien         /*-- copy 1 --*/
51178556Sobrien         if (i > hi) break;
51278556Sobrien         v = ptr[i];
51378556Sobrien         j = i;
51478556Sobrien         while ( mainGtU (
51578556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
51678556Sobrien                 ) ) {
51778556Sobrien            ptr[j] = ptr[j-h];
51878556Sobrien            j = j - h;
51978556Sobrien            if (j <= (lo + h - 1)) break;
52078556Sobrien         }
52178556Sobrien         ptr[j] = v;
52278556Sobrien         i++;
52378556Sobrien
52478556Sobrien         /*-- copy 2 --*/
52578556Sobrien         if (i > hi) break;
52678556Sobrien         v = ptr[i];
52778556Sobrien         j = i;
52878556Sobrien         while ( mainGtU (
52978556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
53078556Sobrien                 ) ) {
53178556Sobrien            ptr[j] = ptr[j-h];
53278556Sobrien            j = j - h;
53378556Sobrien            if (j <= (lo + h - 1)) break;
53478556Sobrien         }
53578556Sobrien         ptr[j] = v;
53678556Sobrien         i++;
53778556Sobrien
53878556Sobrien         /*-- copy 3 --*/
53978556Sobrien         if (i > hi) break;
54078556Sobrien         v = ptr[i];
54178556Sobrien         j = i;
54278556Sobrien         while ( mainGtU (
54378556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
54478556Sobrien                 ) ) {
54578556Sobrien            ptr[j] = ptr[j-h];
54678556Sobrien            j = j - h;
54778556Sobrien            if (j <= (lo + h - 1)) break;
54878556Sobrien         }
54978556Sobrien         ptr[j] = v;
55078556Sobrien         i++;
55178556Sobrien
55278556Sobrien         if (*budget < 0) return;
55378556Sobrien      }
55478556Sobrien   }
55578556Sobrien}
55678556Sobrien
55778556Sobrien
55878556Sobrien/*---------------------------------------------*/
55978556Sobrien/*--
56078556Sobrien   The following is an implementation of
56178556Sobrien   an elegant 3-way quicksort for strings,
56278556Sobrien   described in a paper "Fast Algorithms for
56378556Sobrien   Sorting and Searching Strings", by Robert
56478556Sobrien   Sedgewick and Jon L. Bentley.
56578556Sobrien--*/
56678556Sobrien
56778556Sobrien#define mswap(zz1, zz2) \
56878556Sobrien   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
56978556Sobrien
57078556Sobrien#define mvswap(zzp1, zzp2, zzn)       \
57178556Sobrien{                                     \
57278556Sobrien   Int32 yyp1 = (zzp1);               \
57378556Sobrien   Int32 yyp2 = (zzp2);               \
57478556Sobrien   Int32 yyn  = (zzn);                \
57578556Sobrien   while (yyn > 0) {                  \
57678556Sobrien      mswap(ptr[yyp1], ptr[yyp2]);    \
57778556Sobrien      yyp1++; yyp2++; yyn--;          \
57878556Sobrien   }                                  \
57978556Sobrien}
58078556Sobrien
58178556Sobrienstatic
58278556Sobrien__inline__
58378556SobrienUChar mmed3 ( UChar a, UChar b, UChar c )
58478556Sobrien{
58578556Sobrien   UChar t;
58678556Sobrien   if (a > b) { t = a; a = b; b = t; };
58778556Sobrien   if (b > c) {
58878556Sobrien      b = c;
58978556Sobrien      if (a > b) b = a;
59078556Sobrien   }
59178556Sobrien   return b;
59278556Sobrien}
59378556Sobrien
59478556Sobrien#define mmin(a,b) ((a) < (b)) ? (a) : (b)
59578556Sobrien
59678556Sobrien#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
59778556Sobrien                          stackHi[sp] = hz; \
59878556Sobrien                          stackD [sp] = dz; \
59978556Sobrien                          sp++; }
60078556Sobrien
60178556Sobrien#define mpop(lz,hz,dz) { sp--;             \
60278556Sobrien                         lz = stackLo[sp]; \
60378556Sobrien                         hz = stackHi[sp]; \
60478556Sobrien                         dz = stackD [sp]; }
60578556Sobrien
60678556Sobrien
60778556Sobrien#define mnextsize(az) (nextHi[az]-nextLo[az])
60878556Sobrien
60978556Sobrien#define mnextswap(az,bz)                                        \
61078556Sobrien   { Int32 tz;                                                  \
61178556Sobrien     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
61278556Sobrien     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
61378556Sobrien     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
61478556Sobrien
61578556Sobrien
61678556Sobrien#define MAIN_QSORT_SMALL_THRESH 20
61778556Sobrien#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
61878556Sobrien#define MAIN_QSORT_STACK_SIZE 100
61978556Sobrien
62078556Sobrienstatic
62178556Sobrienvoid mainQSort3 ( UInt32* ptr,
62278556Sobrien                  UChar*  block,
62378556Sobrien                  UInt16* quadrant,
62478556Sobrien                  Int32   nblock,
62578556Sobrien                  Int32   loSt,
62678556Sobrien                  Int32   hiSt,
62778556Sobrien                  Int32   dSt,
62878556Sobrien                  Int32*  budget )
62978556Sobrien{
63078556Sobrien   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
63178556Sobrien   Int32 sp, lo, hi, d;
63278556Sobrien
63378556Sobrien   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
63478556Sobrien   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
63578556Sobrien   Int32 stackD [MAIN_QSORT_STACK_SIZE];
63678556Sobrien
63778556Sobrien   Int32 nextLo[3];
63878556Sobrien   Int32 nextHi[3];
63978556Sobrien   Int32 nextD [3];
64078556Sobrien
64178556Sobrien   sp = 0;
64278556Sobrien   mpush ( loSt, hiSt, dSt );
64378556Sobrien
64478556Sobrien   while (sp > 0) {
64578556Sobrien
646167974Sdelphij      AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
64778556Sobrien
64878556Sobrien      mpop ( lo, hi, d );
64978556Sobrien      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
65078556Sobrien          d > MAIN_QSORT_DEPTH_THRESH) {
65178556Sobrien         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
65278556Sobrien         if (*budget < 0) return;
65378556Sobrien         continue;
65478556Sobrien      }
65578556Sobrien
65678556Sobrien      med = (Int32)
65778556Sobrien            mmed3 ( block[ptr[ lo         ]+d],
65878556Sobrien                    block[ptr[ hi         ]+d],
65978556Sobrien                    block[ptr[ (lo+hi)>>1 ]+d] );
66078556Sobrien
66178556Sobrien      unLo = ltLo = lo;
66278556Sobrien      unHi = gtHi = hi;
66378556Sobrien
66478556Sobrien      while (True) {
66578556Sobrien         while (True) {
66678556Sobrien            if (unLo > unHi) break;
66778556Sobrien            n = ((Int32)block[ptr[unLo]+d]) - med;
66878556Sobrien            if (n == 0) {
66978556Sobrien               mswap(ptr[unLo], ptr[ltLo]);
67078556Sobrien               ltLo++; unLo++; continue;
67178556Sobrien            };
67278556Sobrien            if (n >  0) break;
67378556Sobrien            unLo++;
67478556Sobrien         }
67578556Sobrien         while (True) {
67678556Sobrien            if (unLo > unHi) break;
67778556Sobrien            n = ((Int32)block[ptr[unHi]+d]) - med;
67878556Sobrien            if (n == 0) {
67978556Sobrien               mswap(ptr[unHi], ptr[gtHi]);
68078556Sobrien               gtHi--; unHi--; continue;
68178556Sobrien            };
68278556Sobrien            if (n <  0) break;
68378556Sobrien            unHi--;
68478556Sobrien         }
68578556Sobrien         if (unLo > unHi) break;
68678556Sobrien         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
68778556Sobrien      }
68878556Sobrien
68978556Sobrien      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
69078556Sobrien
69178556Sobrien      if (gtHi < ltLo) {
69278556Sobrien         mpush(lo, hi, d+1 );
69378556Sobrien         continue;
69478556Sobrien      }
69578556Sobrien
69678556Sobrien      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
69778556Sobrien      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
69878556Sobrien
69978556Sobrien      n = lo + unLo - ltLo - 1;
70078556Sobrien      m = hi - (gtHi - unHi) + 1;
70178556Sobrien
70278556Sobrien      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
70378556Sobrien      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
70478556Sobrien      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
70578556Sobrien
70678556Sobrien      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70778556Sobrien      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
70878556Sobrien      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
70978556Sobrien
71078556Sobrien      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
71178556Sobrien      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
71278556Sobrien
71378556Sobrien      mpush (nextLo[0], nextHi[0], nextD[0]);
71478556Sobrien      mpush (nextLo[1], nextHi[1], nextD[1]);
71578556Sobrien      mpush (nextLo[2], nextHi[2], nextD[2]);
71678556Sobrien   }
71778556Sobrien}
71878556Sobrien
71978556Sobrien#undef mswap
72078556Sobrien#undef mvswap
72178556Sobrien#undef mpush
72278556Sobrien#undef mpop
72378556Sobrien#undef mmin
72478556Sobrien#undef mnextsize
72578556Sobrien#undef mnextswap
72678556Sobrien#undef MAIN_QSORT_SMALL_THRESH
72778556Sobrien#undef MAIN_QSORT_DEPTH_THRESH
72878556Sobrien#undef MAIN_QSORT_STACK_SIZE
72978556Sobrien
73078556Sobrien
73178556Sobrien/*---------------------------------------------*/
73278556Sobrien/* Pre:
73378556Sobrien      nblock > N_OVERSHOOT
73478556Sobrien      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
73578556Sobrien      ((UChar*)block32) [0 .. nblock-1] holds block
73678556Sobrien      ptr exists for [0 .. nblock-1]
73778556Sobrien
73878556Sobrien   Post:
73978556Sobrien      ((UChar*)block32) [0 .. nblock-1] holds block
74078556Sobrien      All other areas of block32 destroyed
74178556Sobrien      ftab [0 .. 65536 ] destroyed
74278556Sobrien      ptr [0 .. nblock-1] holds sorted order
74378556Sobrien      if (*budget < 0), sorting was abandoned
74478556Sobrien*/
74578556Sobrien
74678556Sobrien#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
74778556Sobrien#define SETMASK (1 << 21)
74878556Sobrien#define CLEARMASK (~(SETMASK))
74978556Sobrien
75078556Sobrienstatic
75178556Sobrienvoid mainSort ( UInt32* ptr,
75278556Sobrien                UChar*  block,
75378556Sobrien                UInt16* quadrant,
75478556Sobrien                UInt32* ftab,
75578556Sobrien                Int32   nblock,
75678556Sobrien                Int32   verb,
75778556Sobrien                Int32*  budget )
75878556Sobrien{
75978556Sobrien   Int32  i, j, k, ss, sb;
76078556Sobrien   Int32  runningOrder[256];
76178556Sobrien   Bool   bigDone[256];
76278556Sobrien   Int32  copyStart[256];
76378556Sobrien   Int32  copyEnd  [256];
76478556Sobrien   UChar  c1;
76578556Sobrien   Int32  numQSorted;
76678556Sobrien   UInt16 s;
76778556Sobrien   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
76878556Sobrien
76978556Sobrien   /*-- set up the 2-byte frequency table --*/
77078556Sobrien   for (i = 65536; i >= 0; i--) ftab[i] = 0;
77178556Sobrien
77278556Sobrien   j = block[0] << 8;
77378556Sobrien   i = nblock-1;
77478556Sobrien   for (; i >= 3; i -= 4) {
77578556Sobrien      quadrant[i] = 0;
77678556Sobrien      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
77778556Sobrien      ftab[j]++;
77878556Sobrien      quadrant[i-1] = 0;
77978556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
78078556Sobrien      ftab[j]++;
78178556Sobrien      quadrant[i-2] = 0;
78278556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
78378556Sobrien      ftab[j]++;
78478556Sobrien      quadrant[i-3] = 0;
78578556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
78678556Sobrien      ftab[j]++;
78778556Sobrien   }
78878556Sobrien   for (; i >= 0; i--) {
78978556Sobrien      quadrant[i] = 0;
79078556Sobrien      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
79178556Sobrien      ftab[j]++;
79278556Sobrien   }
79378556Sobrien
79478556Sobrien   /*-- (emphasises close relationship of block & quadrant) --*/
79578556Sobrien   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
79678556Sobrien      block   [nblock+i] = block[i];
79778556Sobrien      quadrant[nblock+i] = 0;
79878556Sobrien   }
79978556Sobrien
80078556Sobrien   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
80178556Sobrien
80278556Sobrien   /*-- Complete the initial radix sort --*/
80378556Sobrien   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
80478556Sobrien
80578556Sobrien   s = block[0] << 8;
80678556Sobrien   i = nblock-1;
80778556Sobrien   for (; i >= 3; i -= 4) {
80878556Sobrien      s = (s >> 8) | (block[i] << 8);
80978556Sobrien      j = ftab[s] -1;
81078556Sobrien      ftab[s] = j;
81178556Sobrien      ptr[j] = i;
81278556Sobrien      s = (s >> 8) | (block[i-1] << 8);
81378556Sobrien      j = ftab[s] -1;
81478556Sobrien      ftab[s] = j;
81578556Sobrien      ptr[j] = i-1;
81678556Sobrien      s = (s >> 8) | (block[i-2] << 8);
81778556Sobrien      j = ftab[s] -1;
81878556Sobrien      ftab[s] = j;
81978556Sobrien      ptr[j] = i-2;
82078556Sobrien      s = (s >> 8) | (block[i-3] << 8);
82178556Sobrien      j = ftab[s] -1;
82278556Sobrien      ftab[s] = j;
82378556Sobrien      ptr[j] = i-3;
82478556Sobrien   }
82578556Sobrien   for (; i >= 0; i--) {
82678556Sobrien      s = (s >> 8) | (block[i] << 8);
82778556Sobrien      j = ftab[s] -1;
82878556Sobrien      ftab[s] = j;
82978556Sobrien      ptr[j] = i;
83078556Sobrien   }
83178556Sobrien
83278556Sobrien   /*--
83378556Sobrien      Now ftab contains the first loc of every small bucket.
83478556Sobrien      Calculate the running order, from smallest to largest
83578556Sobrien      big bucket.
83678556Sobrien   --*/
83778556Sobrien   for (i = 0; i <= 255; i++) {
83878556Sobrien      bigDone     [i] = False;
83978556Sobrien      runningOrder[i] = i;
84078556Sobrien   }
84178556Sobrien
84278556Sobrien   {
84378556Sobrien      Int32 vv;
84478556Sobrien      Int32 h = 1;
84578556Sobrien      do h = 3 * h + 1; while (h <= 256);
84678556Sobrien      do {
84778556Sobrien         h = h / 3;
84878556Sobrien         for (i = h; i <= 255; i++) {
84978556Sobrien            vv = runningOrder[i];
85078556Sobrien            j = i;
85178556Sobrien            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
85278556Sobrien               runningOrder[j] = runningOrder[j-h];
85378556Sobrien               j = j - h;
85478556Sobrien               if (j <= (h - 1)) goto zero;
85578556Sobrien            }
85678556Sobrien            zero:
85778556Sobrien            runningOrder[j] = vv;
85878556Sobrien         }
85978556Sobrien      } while (h != 1);
86078556Sobrien   }
86178556Sobrien
86278556Sobrien   /*--
86378556Sobrien      The main sorting loop.
86478556Sobrien   --*/
86578556Sobrien
86678556Sobrien   numQSorted = 0;
86778556Sobrien
86878556Sobrien   for (i = 0; i <= 255; i++) {
86978556Sobrien
87078556Sobrien      /*--
87178556Sobrien         Process big buckets, starting with the least full.
87278556Sobrien         Basically this is a 3-step process in which we call
87378556Sobrien         mainQSort3 to sort the small buckets [ss, j], but
87478556Sobrien         also make a big effort to avoid the calls if we can.
87578556Sobrien      --*/
87678556Sobrien      ss = runningOrder[i];
87778556Sobrien
87878556Sobrien      /*--
87978556Sobrien         Step 1:
88078556Sobrien         Complete the big bucket [ss] by quicksorting
88178556Sobrien         any unsorted small buckets [ss, j], for j != ss.
88278556Sobrien         Hopefully previous pointer-scanning phases have already
88378556Sobrien         completed many of the small buckets [ss, j], so
88478556Sobrien         we don't have to sort them at all.
88578556Sobrien      --*/
88678556Sobrien      for (j = 0; j <= 255; j++) {
88778556Sobrien         if (j != ss) {
88878556Sobrien            sb = (ss << 8) + j;
88978556Sobrien            if ( ! (ftab[sb] & SETMASK) ) {
89078556Sobrien               Int32 lo = ftab[sb]   & CLEARMASK;
89178556Sobrien               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
89278556Sobrien               if (hi > lo) {
89378556Sobrien                  if (verb >= 4)
89478556Sobrien                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
89578556Sobrien                                "done %d   this %d\n",
89678556Sobrien                                ss, j, numQSorted, hi - lo + 1 );
89778556Sobrien                  mainQSort3 (
89878556Sobrien                     ptr, block, quadrant, nblock,
89978556Sobrien                     lo, hi, BZ_N_RADIX, budget
90078556Sobrien                  );
90178556Sobrien                  numQSorted += (hi - lo + 1);
90278556Sobrien                  if (*budget < 0) return;
90378556Sobrien               }
90478556Sobrien            }
90578556Sobrien            ftab[sb] |= SETMASK;
90678556Sobrien         }
90778556Sobrien      }
90878556Sobrien
90978556Sobrien      AssertH ( !bigDone[ss], 1006 );
91078556Sobrien
91178556Sobrien      /*--
91278556Sobrien         Step 2:
91378556Sobrien         Now scan this big bucket [ss] so as to synthesise the
91478556Sobrien         sorted order for small buckets [t, ss] for all t,
91578556Sobrien         including, magically, the bucket [ss,ss] too.
91678556Sobrien         This will avoid doing Real Work in subsequent Step 1's.
91778556Sobrien      --*/
91878556Sobrien      {
91978556Sobrien         for (j = 0; j <= 255; j++) {
92078556Sobrien            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
92178556Sobrien            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
92278556Sobrien         }
92378556Sobrien         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
92478556Sobrien            k = ptr[j]-1; if (k < 0) k += nblock;
92578556Sobrien            c1 = block[k];
92678556Sobrien            if (!bigDone[c1])
92778556Sobrien               ptr[ copyStart[c1]++ ] = k;
92878556Sobrien         }
92978556Sobrien         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
93078556Sobrien            k = ptr[j]-1; if (k < 0) k += nblock;
93178556Sobrien            c1 = block[k];
93278556Sobrien            if (!bigDone[c1])
93378556Sobrien               ptr[ copyEnd[c1]-- ] = k;
93478556Sobrien         }
93578556Sobrien      }
93678556Sobrien
93790067Ssobomax      AssertH ( (copyStart[ss]-1 == copyEnd[ss])
93890067Ssobomax                ||
93990067Ssobomax                /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
94090067Ssobomax                   Necessity for this case is demonstrated by compressing
94190067Ssobomax                   a sequence of approximately 48.5 million of character
94290067Ssobomax                   251; 1.0.0/1.0.1 will then die here. */
94390067Ssobomax                (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
94490067Ssobomax                1007 )
94578556Sobrien
94678556Sobrien      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
94778556Sobrien
94878556Sobrien      /*--
94978556Sobrien         Step 3:
95078556Sobrien         The [ss] big bucket is now done.  Record this fact,
95178556Sobrien         and update the quadrant descriptors.  Remember to
95278556Sobrien         update quadrants in the overshoot area too, if
95378556Sobrien         necessary.  The "if (i < 255)" test merely skips
95478556Sobrien         this updating for the last bucket processed, since
95578556Sobrien         updating for the last bucket is pointless.
95678556Sobrien
95778556Sobrien         The quadrant array provides a way to incrementally
95878556Sobrien         cache sort orderings, as they appear, so as to
95978556Sobrien         make subsequent comparisons in fullGtU() complete
96078556Sobrien         faster.  For repetitive blocks this makes a big
96178556Sobrien         difference (but not big enough to be able to avoid
96278556Sobrien         the fallback sorting mechanism, exponential radix sort).
96378556Sobrien
96478556Sobrien         The precise meaning is: at all times:
96578556Sobrien
96678556Sobrien            for 0 <= i < nblock and 0 <= j <= nblock
96778556Sobrien
96878556Sobrien            if block[i] != block[j],
96978556Sobrien
97078556Sobrien               then the relative values of quadrant[i] and
97178556Sobrien                    quadrant[j] are meaningless.
97278556Sobrien
97378556Sobrien               else {
97478556Sobrien                  if quadrant[i] < quadrant[j]
97578556Sobrien                     then the string starting at i lexicographically
97678556Sobrien                     precedes the string starting at j
97778556Sobrien
97878556Sobrien                  else if quadrant[i] > quadrant[j]
97978556Sobrien                     then the string starting at j lexicographically
98078556Sobrien                     precedes the string starting at i
98178556Sobrien
98278556Sobrien                  else
98378556Sobrien                     the relative ordering of the strings starting
98478556Sobrien                     at i and j has not yet been determined.
98578556Sobrien               }
98678556Sobrien      --*/
98778556Sobrien      bigDone[ss] = True;
98878556Sobrien
98978556Sobrien      if (i < 255) {
99078556Sobrien         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
99178556Sobrien         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
99278556Sobrien         Int32 shifts   = 0;
99378556Sobrien
99478556Sobrien         while ((bbSize >> shifts) > 65534) shifts++;
99578556Sobrien
99678556Sobrien         for (j = bbSize-1; j >= 0; j--) {
99778556Sobrien            Int32 a2update     = ptr[bbStart + j];
99878556Sobrien            UInt16 qVal        = (UInt16)(j >> shifts);
99978556Sobrien            quadrant[a2update] = qVal;
100078556Sobrien            if (a2update < BZ_N_OVERSHOOT)
100178556Sobrien               quadrant[a2update + nblock] = qVal;
100278556Sobrien         }
100378556Sobrien         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
100478556Sobrien      }
100578556Sobrien
100678556Sobrien   }
100778556Sobrien
100878556Sobrien   if (verb >= 4)
100978556Sobrien      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
101078556Sobrien                 nblock, numQSorted, nblock - numQSorted );
101178556Sobrien}
101278556Sobrien
101378556Sobrien#undef BIGFREQ
101478556Sobrien#undef SETMASK
101578556Sobrien#undef CLEARMASK
101678556Sobrien
101778556Sobrien
101878556Sobrien/*---------------------------------------------*/
101978556Sobrien/* Pre:
102078556Sobrien      nblock > 0
102178556Sobrien      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
102278556Sobrien      ((UChar*)arr2)  [0 .. nblock-1] holds block
102378556Sobrien      arr1 exists for [0 .. nblock-1]
102478556Sobrien
102578556Sobrien   Post:
102678556Sobrien      ((UChar*)arr2) [0 .. nblock-1] holds block
102778556Sobrien      All other areas of block destroyed
102878556Sobrien      ftab [ 0 .. 65536 ] destroyed
102978556Sobrien      arr1 [0 .. nblock-1] holds sorted order
103078556Sobrien*/
103178556Sobrienvoid BZ2_blockSort ( EState* s )
103278556Sobrien{
103378556Sobrien   UInt32* ptr    = s->ptr;
103478556Sobrien   UChar*  block  = s->block;
103578556Sobrien   UInt32* ftab   = s->ftab;
103678556Sobrien   Int32   nblock = s->nblock;
103778556Sobrien   Int32   verb   = s->verbosity;
103878556Sobrien   Int32   wfact  = s->workFactor;
103978556Sobrien   UInt16* quadrant;
104078556Sobrien   Int32   budget;
104178556Sobrien   Int32   budgetInit;
104278556Sobrien   Int32   i;
104378556Sobrien
104478556Sobrien   if (nblock < 10000) {
104578556Sobrien      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
104678556Sobrien   } else {
104778556Sobrien      /* Calculate the location for quadrant, remembering to get
104878556Sobrien         the alignment right.  Assumes that &(block[0]) is at least
104978556Sobrien         2-byte aligned -- this should be ok since block is really
105078556Sobrien         the first section of arr2.
105178556Sobrien      */
105278556Sobrien      i = nblock+BZ_N_OVERSHOOT;
105378556Sobrien      if (i & 1) i++;
105478556Sobrien      quadrant = (UInt16*)(&(block[i]));
105578556Sobrien
105678556Sobrien      /* (wfact-1) / 3 puts the default-factor-30
105778556Sobrien         transition point at very roughly the same place as
105878556Sobrien         with v0.1 and v0.9.0.
105978556Sobrien         Not that it particularly matters any more, since the
106078556Sobrien         resulting compressed stream is now the same regardless
106178556Sobrien         of whether or not we use the main sort or fallback sort.
106278556Sobrien      */
106378556Sobrien      if (wfact < 1  ) wfact = 1;
106478556Sobrien      if (wfact > 100) wfact = 100;
106578556Sobrien      budgetInit = nblock * ((wfact-1) / 3);
106678556Sobrien      budget = budgetInit;
106778556Sobrien
106878556Sobrien      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
106978556Sobrien      if (verb >= 3)
107078556Sobrien         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
107178556Sobrien                    budgetInit - budget,
107278556Sobrien                    nblock,
107378556Sobrien                    (float)(budgetInit - budget) /
107478556Sobrien                    (float)(nblock==0 ? 1 : nblock) );
107578556Sobrien      if (budget < 0) {
107678556Sobrien         if (verb >= 2)
107778556Sobrien            VPrintf0 ( "    too repetitive; using fallback"
107878556Sobrien                       " sorting algorithm\n" );
107978556Sobrien         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
108078556Sobrien      }
108178556Sobrien   }
108278556Sobrien
108378556Sobrien   s->origPtr = -1;
108478556Sobrien   for (i = 0; i < s->nblock; i++)
108578556Sobrien      if (ptr[i] == 0)
108678556Sobrien         { s->origPtr = i; break; };
108778556Sobrien
108878556Sobrien   AssertH( s->origPtr != -1, 1003 );
108978556Sobrien}
109078556Sobrien
109178556Sobrien
109278556Sobrien/*-------------------------------------------------------------*/
109378556Sobrien/*--- end                                       blocksort.c ---*/
109478556Sobrien/*-------------------------------------------------------------*/
1095