blocksort.c revision 78556
178556Sobrien
278556Sobrien/*-------------------------------------------------------------*/
378556Sobrien/*--- Block sorting machinery                               ---*/
478556Sobrien/*---                                           blocksort.c ---*/
578556Sobrien/*-------------------------------------------------------------*/
678556Sobrien
778556Sobrien/*--
878556Sobrien  This file is a part of bzip2 and/or libbzip2, a program and
978556Sobrien  library for lossless, block-sorting data compression.
1078556Sobrien
1178556Sobrien  Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
1278556Sobrien
1378556Sobrien  Redistribution and use in source and binary forms, with or without
1478556Sobrien  modification, are permitted provided that the following conditions
1578556Sobrien  are met:
1678556Sobrien
1778556Sobrien  1. Redistributions of source code must retain the above copyright
1878556Sobrien     notice, this list of conditions and the following disclaimer.
1978556Sobrien
2078556Sobrien  2. The origin of this software must not be misrepresented; you must
2178556Sobrien     not claim that you wrote the original software.  If you use this
2278556Sobrien     software in a product, an acknowledgment in the product
2378556Sobrien     documentation would be appreciated but is not required.
2478556Sobrien
2578556Sobrien  3. Altered source versions must be plainly marked as such, and must
2678556Sobrien     not be misrepresented as being the original software.
2778556Sobrien
2878556Sobrien  4. The name of the author may not be used to endorse or promote
2978556Sobrien     products derived from this software without specific prior written
3078556Sobrien     permission.
3178556Sobrien
3278556Sobrien  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
3378556Sobrien  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
3478556Sobrien  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3578556Sobrien  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
3678556Sobrien  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3778556Sobrien  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
3878556Sobrien  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3978556Sobrien  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4078556Sobrien  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4178556Sobrien  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4278556Sobrien  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4378556Sobrien
4478556Sobrien  Julian Seward, Cambridge, UK.
4578556Sobrien  jseward@acm.org
4678556Sobrien  bzip2/libbzip2 version 1.0 of 21 March 2000
4778556Sobrien
4878556Sobrien  This program is based on (at least) the work of:
4978556Sobrien     Mike Burrows
5078556Sobrien     David Wheeler
5178556Sobrien     Peter Fenwick
5278556Sobrien     Alistair Moffat
5378556Sobrien     Radford Neal
5478556Sobrien     Ian H. Witten
5578556Sobrien     Robert Sedgewick
5678556Sobrien     Jon L. Bentley
5778556Sobrien
5878556Sobrien  For more information on these sources, see the manual.
5978556Sobrien
6078556Sobrien  To get some idea how the block sorting algorithms in this file
6178556Sobrien  work, read my paper
6278556Sobrien     On the Performance of BWT Sorting Algorithms
6378556Sobrien  in Proceedings of the IEEE Data Compression Conference 2000,
6478556Sobrien  Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
6578556Sobrien  file implements the algorithm called  cache  in the paper.
6678556Sobrien--*/
6778556Sobrien
6878556Sobrien
6978556Sobrien#include "bzlib_private.h"
7078556Sobrien
7178556Sobrien/*---------------------------------------------*/
7278556Sobrien/*--- Fallback O(N log(N)^2) sorting        ---*/
7378556Sobrien/*--- algorithm, for repetitive blocks      ---*/
7478556Sobrien/*---------------------------------------------*/
7578556Sobrien
7678556Sobrien/*---------------------------------------------*/
7778556Sobrienstatic
7878556Sobrien__inline__
7978556Sobrienvoid fallbackSimpleSort ( UInt32* fmap,
8078556Sobrien                          UInt32* eclass,
8178556Sobrien                          Int32   lo,
8278556Sobrien                          Int32   hi )
8378556Sobrien{
8478556Sobrien   Int32 i, j, tmp;
8578556Sobrien   UInt32 ec_tmp;
8678556Sobrien
8778556Sobrien   if (lo == hi) return;
8878556Sobrien
8978556Sobrien   if (hi - lo > 3) {
9078556Sobrien      for ( i = hi-4; i >= lo; i-- ) {
9178556Sobrien         tmp = fmap[i];
9278556Sobrien         ec_tmp = eclass[tmp];
9378556Sobrien         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
9478556Sobrien            fmap[j-4] = fmap[j];
9578556Sobrien         fmap[j-4] = tmp;
9678556Sobrien      }
9778556Sobrien   }
9878556Sobrien
9978556Sobrien   for ( i = hi-1; i >= lo; i-- ) {
10078556Sobrien      tmp = fmap[i];
10178556Sobrien      ec_tmp = eclass[tmp];
10278556Sobrien      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
10378556Sobrien         fmap[j-1] = fmap[j];
10478556Sobrien      fmap[j-1] = tmp;
10578556Sobrien   }
10678556Sobrien}
10778556Sobrien
10878556Sobrien
10978556Sobrien/*---------------------------------------------*/
11078556Sobrien#define fswap(zz1, zz2) \
11178556Sobrien   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
11278556Sobrien
11378556Sobrien#define fvswap(zzp1, zzp2, zzn)       \
11478556Sobrien{                                     \
11578556Sobrien   Int32 yyp1 = (zzp1);               \
11678556Sobrien   Int32 yyp2 = (zzp2);               \
11778556Sobrien   Int32 yyn  = (zzn);                \
11878556Sobrien   while (yyn > 0) {                  \
11978556Sobrien      fswap(fmap[yyp1], fmap[yyp2]);  \
12078556Sobrien      yyp1++; yyp2++; yyn--;          \
12178556Sobrien   }                                  \
12278556Sobrien}
12378556Sobrien
12478556Sobrien
12578556Sobrien#define fmin(a,b) ((a) < (b)) ? (a) : (b)
12678556Sobrien
12778556Sobrien#define fpush(lz,hz) { stackLo[sp] = lz; \
12878556Sobrien                       stackHi[sp] = hz; \
12978556Sobrien                       sp++; }
13078556Sobrien
13178556Sobrien#define fpop(lz,hz) { sp--;              \
13278556Sobrien                      lz = stackLo[sp];  \
13378556Sobrien                      hz = stackHi[sp]; }
13478556Sobrien
13578556Sobrien#define FALLBACK_QSORT_SMALL_THRESH 10
13678556Sobrien#define FALLBACK_QSORT_STACK_SIZE   100
13778556Sobrien
13878556Sobrien
13978556Sobrienstatic
14078556Sobrienvoid fallbackQSort3 ( UInt32* fmap,
14178556Sobrien                      UInt32* eclass,
14278556Sobrien                      Int32   loSt,
14378556Sobrien                      Int32   hiSt )
14478556Sobrien{
14578556Sobrien   Int32 unLo, unHi, ltLo, gtHi, n, m;
14678556Sobrien   Int32 sp, lo, hi;
14778556Sobrien   UInt32 med, r, r3;
14878556Sobrien   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
14978556Sobrien   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
15078556Sobrien
15178556Sobrien   r = 0;
15278556Sobrien
15378556Sobrien   sp = 0;
15478556Sobrien   fpush ( loSt, hiSt );
15578556Sobrien
15678556Sobrien   while (sp > 0) {
15778556Sobrien
15878556Sobrien      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
15978556Sobrien
16078556Sobrien      fpop ( lo, hi );
16178556Sobrien      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
16278556Sobrien         fallbackSimpleSort ( fmap, eclass, lo, hi );
16378556Sobrien         continue;
16478556Sobrien      }
16578556Sobrien
16678556Sobrien      /* Random partitioning.  Median of 3 sometimes fails to
16778556Sobrien         avoid bad cases.  Median of 9 seems to help but
16878556Sobrien         looks rather expensive.  This too seems to work but
16978556Sobrien         is cheaper.  Guidance for the magic constants
17078556Sobrien         7621 and 32768 is taken from Sedgewick's algorithms
17178556Sobrien         book, chapter 35.
17278556Sobrien      */
17378556Sobrien      r = ((r * 7621) + 1) % 32768;
17478556Sobrien      r3 = r % 3;
17578556Sobrien      if (r3 == 0) med = eclass[fmap[lo]]; else
17678556Sobrien      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
17778556Sobrien                   med = eclass[fmap[hi]];
17878556Sobrien
17978556Sobrien      unLo = ltLo = lo;
18078556Sobrien      unHi = gtHi = hi;
18178556Sobrien
18278556Sobrien      while (1) {
18378556Sobrien         while (1) {
18478556Sobrien            if (unLo > unHi) break;
18578556Sobrien            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
18678556Sobrien            if (n == 0) {
18778556Sobrien               fswap(fmap[unLo], fmap[ltLo]);
18878556Sobrien               ltLo++; unLo++;
18978556Sobrien               continue;
19078556Sobrien            };
19178556Sobrien            if (n > 0) break;
19278556Sobrien            unLo++;
19378556Sobrien         }
19478556Sobrien         while (1) {
19578556Sobrien            if (unLo > unHi) break;
19678556Sobrien            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
19778556Sobrien            if (n == 0) {
19878556Sobrien               fswap(fmap[unHi], fmap[gtHi]);
19978556Sobrien               gtHi--; unHi--;
20078556Sobrien               continue;
20178556Sobrien            };
20278556Sobrien            if (n < 0) break;
20378556Sobrien            unHi--;
20478556Sobrien         }
20578556Sobrien         if (unLo > unHi) break;
20678556Sobrien         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
20778556Sobrien      }
20878556Sobrien
20978556Sobrien      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
21078556Sobrien
21178556Sobrien      if (gtHi < ltLo) continue;
21278556Sobrien
21378556Sobrien      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
21478556Sobrien      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
21578556Sobrien
21678556Sobrien      n = lo + unLo - ltLo - 1;
21778556Sobrien      m = hi - (gtHi - unHi) + 1;
21878556Sobrien
21978556Sobrien      if (n - lo > hi - m) {
22078556Sobrien         fpush ( lo, n );
22178556Sobrien         fpush ( m, hi );
22278556Sobrien      } else {
22378556Sobrien         fpush ( m, hi );
22478556Sobrien         fpush ( lo, n );
22578556Sobrien      }
22678556Sobrien   }
22778556Sobrien}
22878556Sobrien
22978556Sobrien#undef fmin
23078556Sobrien#undef fpush
23178556Sobrien#undef fpop
23278556Sobrien#undef fswap
23378556Sobrien#undef fvswap
23478556Sobrien#undef FALLBACK_QSORT_SMALL_THRESH
23578556Sobrien#undef FALLBACK_QSORT_STACK_SIZE
23678556Sobrien
23778556Sobrien
23878556Sobrien/*---------------------------------------------*/
23978556Sobrien/* Pre:
24078556Sobrien      nblock > 0
24178556Sobrien      eclass exists for [0 .. nblock-1]
24278556Sobrien      ((UChar*)eclass) [0 .. nblock-1] holds block
24378556Sobrien      ptr exists for [0 .. nblock-1]
24478556Sobrien
24578556Sobrien   Post:
24678556Sobrien      ((UChar*)eclass) [0 .. nblock-1] holds block
24778556Sobrien      All other areas of eclass destroyed
24878556Sobrien      fmap [0 .. nblock-1] holds sorted order
24978556Sobrien      bhtab [ 0 .. 2+(nblock/32) ] destroyed
25078556Sobrien*/
25178556Sobrien
25278556Sobrien#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
25378556Sobrien#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
25478556Sobrien#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
25578556Sobrien#define      WORD_BH(zz)  bhtab[(zz) >> 5]
25678556Sobrien#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
25778556Sobrien
25878556Sobrienstatic
25978556Sobrienvoid fallbackSort ( UInt32* fmap,
26078556Sobrien                    UInt32* eclass,
26178556Sobrien                    UInt32* bhtab,
26278556Sobrien                    Int32   nblock,
26378556Sobrien                    Int32   verb )
26478556Sobrien{
26578556Sobrien   Int32 ftab[257];
26678556Sobrien   Int32 ftabCopy[256];
26778556Sobrien   Int32 H, i, j, k, l, r, cc, cc1;
26878556Sobrien   Int32 nNotDone;
26978556Sobrien   Int32 nBhtab;
27078556Sobrien   UChar* eclass8 = (UChar*)eclass;
27178556Sobrien
27278556Sobrien   /*--
27378556Sobrien      Initial 1-char radix sort to generate
27478556Sobrien      initial fmap and initial BH bits.
27578556Sobrien   --*/
27678556Sobrien   if (verb >= 4)
27778556Sobrien      VPrintf0 ( "        bucket sorting ...\n" );
27878556Sobrien   for (i = 0; i < 257;    i++) ftab[i] = 0;
27978556Sobrien   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
28078556Sobrien   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
28178556Sobrien   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
28278556Sobrien
28378556Sobrien   for (i = 0; i < nblock; i++) {
28478556Sobrien      j = eclass8[i];
28578556Sobrien      k = ftab[j] - 1;
28678556Sobrien      ftab[j] = k;
28778556Sobrien      fmap[k] = i;
28878556Sobrien   }
28978556Sobrien
29078556Sobrien   nBhtab = 2 + (nblock / 32);
29178556Sobrien   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
29278556Sobrien   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
29378556Sobrien
29478556Sobrien   /*--
29578556Sobrien      Inductively refine the buckets.  Kind-of an
29678556Sobrien      "exponential radix sort" (!), inspired by the
29778556Sobrien      Manber-Myers suffix array construction algorithm.
29878556Sobrien   --*/
29978556Sobrien
30078556Sobrien   /*-- set sentinel bits for block-end detection --*/
30178556Sobrien   for (i = 0; i < 32; i++) {
30278556Sobrien      SET_BH(nblock + 2*i);
30378556Sobrien      CLEAR_BH(nblock + 2*i + 1);
30478556Sobrien   }
30578556Sobrien
30678556Sobrien   /*-- the log(N) loop --*/
30778556Sobrien   H = 1;
30878556Sobrien   while (1) {
30978556Sobrien
31078556Sobrien      if (verb >= 4)
31178556Sobrien         VPrintf1 ( "        depth %6d has ", H );
31278556Sobrien
31378556Sobrien      j = 0;
31478556Sobrien      for (i = 0; i < nblock; i++) {
31578556Sobrien         if (ISSET_BH(i)) j = i;
31678556Sobrien         k = fmap[i] - H; if (k < 0) k += nblock;
31778556Sobrien         eclass[k] = j;
31878556Sobrien      }
31978556Sobrien
32078556Sobrien      nNotDone = 0;
32178556Sobrien      r = -1;
32278556Sobrien      while (1) {
32378556Sobrien
32478556Sobrien	 /*-- find the next non-singleton bucket --*/
32578556Sobrien         k = r + 1;
32678556Sobrien         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
32778556Sobrien         if (ISSET_BH(k)) {
32878556Sobrien            while (WORD_BH(k) == 0xffffffff) k += 32;
32978556Sobrien            while (ISSET_BH(k)) k++;
33078556Sobrien         }
33178556Sobrien         l = k - 1;
33278556Sobrien         if (l >= nblock) break;
33378556Sobrien         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
33478556Sobrien         if (!ISSET_BH(k)) {
33578556Sobrien            while (WORD_BH(k) == 0x00000000) k += 32;
33678556Sobrien            while (!ISSET_BH(k)) k++;
33778556Sobrien         }
33878556Sobrien         r = k - 1;
33978556Sobrien         if (r >= nblock) break;
34078556Sobrien
34178556Sobrien         /*-- now [l, r] bracket current bucket --*/
34278556Sobrien         if (r > l) {
34378556Sobrien            nNotDone += (r - l + 1);
34478556Sobrien            fallbackQSort3 ( fmap, eclass, l, r );
34578556Sobrien
34678556Sobrien            /*-- scan bucket and generate header bits-- */
34778556Sobrien            cc = -1;
34878556Sobrien            for (i = l; i <= r; i++) {
34978556Sobrien               cc1 = eclass[fmap[i]];
35078556Sobrien               if (cc != cc1) { SET_BH(i); cc = cc1; };
35178556Sobrien            }
35278556Sobrien         }
35378556Sobrien      }
35478556Sobrien
35578556Sobrien      if (verb >= 4)
35678556Sobrien         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
35778556Sobrien
35878556Sobrien      H *= 2;
35978556Sobrien      if (H > nblock || nNotDone == 0) break;
36078556Sobrien   }
36178556Sobrien
36278556Sobrien   /*--
36378556Sobrien      Reconstruct the original block in
36478556Sobrien      eclass8 [0 .. nblock-1], since the
36578556Sobrien      previous phase destroyed it.
36678556Sobrien   --*/
36778556Sobrien   if (verb >= 4)
36878556Sobrien      VPrintf0 ( "        reconstructing block ...\n" );
36978556Sobrien   j = 0;
37078556Sobrien   for (i = 0; i < nblock; i++) {
37178556Sobrien      while (ftabCopy[j] == 0) j++;
37278556Sobrien      ftabCopy[j]--;
37378556Sobrien      eclass8[fmap[i]] = (UChar)j;
37478556Sobrien   }
37578556Sobrien   AssertH ( j < 256, 1005 );
37678556Sobrien}
37778556Sobrien
37878556Sobrien#undef       SET_BH
37978556Sobrien#undef     CLEAR_BH
38078556Sobrien#undef     ISSET_BH
38178556Sobrien#undef      WORD_BH
38278556Sobrien#undef UNALIGNED_BH
38378556Sobrien
38478556Sobrien
38578556Sobrien/*---------------------------------------------*/
38678556Sobrien/*--- The main, O(N^2 log(N)) sorting       ---*/
38778556Sobrien/*--- algorithm.  Faster for "normal"       ---*/
38878556Sobrien/*--- non-repetitive blocks.                ---*/
38978556Sobrien/*---------------------------------------------*/
39078556Sobrien
39178556Sobrien/*---------------------------------------------*/
39278556Sobrienstatic
39378556Sobrien__inline__
39478556SobrienBool mainGtU ( UInt32  i1,
39578556Sobrien               UInt32  i2,
39678556Sobrien               UChar*  block,
39778556Sobrien               UInt16* quadrant,
39878556Sobrien               UInt32  nblock,
39978556Sobrien               Int32*  budget )
40078556Sobrien{
40178556Sobrien   Int32  k;
40278556Sobrien   UChar  c1, c2;
40378556Sobrien   UInt16 s1, s2;
40478556Sobrien
40578556Sobrien   AssertD ( i1 != i2, "mainGtU" );
40678556Sobrien   /* 1 */
40778556Sobrien   c1 = block[i1]; c2 = block[i2];
40878556Sobrien   if (c1 != c2) return (c1 > c2);
40978556Sobrien   i1++; i2++;
41078556Sobrien   /* 2 */
41178556Sobrien   c1 = block[i1]; c2 = block[i2];
41278556Sobrien   if (c1 != c2) return (c1 > c2);
41378556Sobrien   i1++; i2++;
41478556Sobrien   /* 3 */
41578556Sobrien   c1 = block[i1]; c2 = block[i2];
41678556Sobrien   if (c1 != c2) return (c1 > c2);
41778556Sobrien   i1++; i2++;
41878556Sobrien   /* 4 */
41978556Sobrien   c1 = block[i1]; c2 = block[i2];
42078556Sobrien   if (c1 != c2) return (c1 > c2);
42178556Sobrien   i1++; i2++;
42278556Sobrien   /* 5 */
42378556Sobrien   c1 = block[i1]; c2 = block[i2];
42478556Sobrien   if (c1 != c2) return (c1 > c2);
42578556Sobrien   i1++; i2++;
42678556Sobrien   /* 6 */
42778556Sobrien   c1 = block[i1]; c2 = block[i2];
42878556Sobrien   if (c1 != c2) return (c1 > c2);
42978556Sobrien   i1++; i2++;
43078556Sobrien   /* 7 */
43178556Sobrien   c1 = block[i1]; c2 = block[i2];
43278556Sobrien   if (c1 != c2) return (c1 > c2);
43378556Sobrien   i1++; i2++;
43478556Sobrien   /* 8 */
43578556Sobrien   c1 = block[i1]; c2 = block[i2];
43678556Sobrien   if (c1 != c2) return (c1 > c2);
43778556Sobrien   i1++; i2++;
43878556Sobrien   /* 9 */
43978556Sobrien   c1 = block[i1]; c2 = block[i2];
44078556Sobrien   if (c1 != c2) return (c1 > c2);
44178556Sobrien   i1++; i2++;
44278556Sobrien   /* 10 */
44378556Sobrien   c1 = block[i1]; c2 = block[i2];
44478556Sobrien   if (c1 != c2) return (c1 > c2);
44578556Sobrien   i1++; i2++;
44678556Sobrien   /* 11 */
44778556Sobrien   c1 = block[i1]; c2 = block[i2];
44878556Sobrien   if (c1 != c2) return (c1 > c2);
44978556Sobrien   i1++; i2++;
45078556Sobrien   /* 12 */
45178556Sobrien   c1 = block[i1]; c2 = block[i2];
45278556Sobrien   if (c1 != c2) return (c1 > c2);
45378556Sobrien   i1++; i2++;
45478556Sobrien
45578556Sobrien   k = nblock + 8;
45678556Sobrien
45778556Sobrien   do {
45878556Sobrien      /* 1 */
45978556Sobrien      c1 = block[i1]; c2 = block[i2];
46078556Sobrien      if (c1 != c2) return (c1 > c2);
46178556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
46278556Sobrien      if (s1 != s2) return (s1 > s2);
46378556Sobrien      i1++; i2++;
46478556Sobrien      /* 2 */
46578556Sobrien      c1 = block[i1]; c2 = block[i2];
46678556Sobrien      if (c1 != c2) return (c1 > c2);
46778556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
46878556Sobrien      if (s1 != s2) return (s1 > s2);
46978556Sobrien      i1++; i2++;
47078556Sobrien      /* 3 */
47178556Sobrien      c1 = block[i1]; c2 = block[i2];
47278556Sobrien      if (c1 != c2) return (c1 > c2);
47378556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
47478556Sobrien      if (s1 != s2) return (s1 > s2);
47578556Sobrien      i1++; i2++;
47678556Sobrien      /* 4 */
47778556Sobrien      c1 = block[i1]; c2 = block[i2];
47878556Sobrien      if (c1 != c2) return (c1 > c2);
47978556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
48078556Sobrien      if (s1 != s2) return (s1 > s2);
48178556Sobrien      i1++; i2++;
48278556Sobrien      /* 5 */
48378556Sobrien      c1 = block[i1]; c2 = block[i2];
48478556Sobrien      if (c1 != c2) return (c1 > c2);
48578556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
48678556Sobrien      if (s1 != s2) return (s1 > s2);
48778556Sobrien      i1++; i2++;
48878556Sobrien      /* 6 */
48978556Sobrien      c1 = block[i1]; c2 = block[i2];
49078556Sobrien      if (c1 != c2) return (c1 > c2);
49178556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
49278556Sobrien      if (s1 != s2) return (s1 > s2);
49378556Sobrien      i1++; i2++;
49478556Sobrien      /* 7 */
49578556Sobrien      c1 = block[i1]; c2 = block[i2];
49678556Sobrien      if (c1 != c2) return (c1 > c2);
49778556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
49878556Sobrien      if (s1 != s2) return (s1 > s2);
49978556Sobrien      i1++; i2++;
50078556Sobrien      /* 8 */
50178556Sobrien      c1 = block[i1]; c2 = block[i2];
50278556Sobrien      if (c1 != c2) return (c1 > c2);
50378556Sobrien      s1 = quadrant[i1]; s2 = quadrant[i2];
50478556Sobrien      if (s1 != s2) return (s1 > s2);
50578556Sobrien      i1++; i2++;
50678556Sobrien
50778556Sobrien      if (i1 >= nblock) i1 -= nblock;
50878556Sobrien      if (i2 >= nblock) i2 -= nblock;
50978556Sobrien
51078556Sobrien      k -= 8;
51178556Sobrien      (*budget)--;
51278556Sobrien   }
51378556Sobrien      while (k >= 0);
51478556Sobrien
51578556Sobrien   return False;
51678556Sobrien}
51778556Sobrien
51878556Sobrien
51978556Sobrien/*---------------------------------------------*/
52078556Sobrien/*--
52178556Sobrien   Knuth's increments seem to work better
52278556Sobrien   than Incerpi-Sedgewick here.  Possibly
52378556Sobrien   because the number of elems to sort is
52478556Sobrien   usually small, typically <= 20.
52578556Sobrien--*/
52678556Sobrienstatic
52778556SobrienInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
52878556Sobrien                   9841, 29524, 88573, 265720,
52978556Sobrien                   797161, 2391484 };
53078556Sobrien
53178556Sobrienstatic
53278556Sobrienvoid mainSimpleSort ( UInt32* ptr,
53378556Sobrien                      UChar*  block,
53478556Sobrien                      UInt16* quadrant,
53578556Sobrien                      Int32   nblock,
53678556Sobrien                      Int32   lo,
53778556Sobrien                      Int32   hi,
53878556Sobrien                      Int32   d,
53978556Sobrien                      Int32*  budget )
54078556Sobrien{
54178556Sobrien   Int32 i, j, h, bigN, hp;
54278556Sobrien   UInt32 v;
54378556Sobrien
54478556Sobrien   bigN = hi - lo + 1;
54578556Sobrien   if (bigN < 2) return;
54678556Sobrien
54778556Sobrien   hp = 0;
54878556Sobrien   while (incs[hp] < bigN) hp++;
54978556Sobrien   hp--;
55078556Sobrien
55178556Sobrien   for (; hp >= 0; hp--) {
55278556Sobrien      h = incs[hp];
55378556Sobrien
55478556Sobrien      i = lo + h;
55578556Sobrien      while (True) {
55678556Sobrien
55778556Sobrien         /*-- copy 1 --*/
55878556Sobrien         if (i > hi) break;
55978556Sobrien         v = ptr[i];
56078556Sobrien         j = i;
56178556Sobrien         while ( mainGtU (
56278556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
56378556Sobrien                 ) ) {
56478556Sobrien            ptr[j] = ptr[j-h];
56578556Sobrien            j = j - h;
56678556Sobrien            if (j <= (lo + h - 1)) break;
56778556Sobrien         }
56878556Sobrien         ptr[j] = v;
56978556Sobrien         i++;
57078556Sobrien
57178556Sobrien         /*-- copy 2 --*/
57278556Sobrien         if (i > hi) break;
57378556Sobrien         v = ptr[i];
57478556Sobrien         j = i;
57578556Sobrien         while ( mainGtU (
57678556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
57778556Sobrien                 ) ) {
57878556Sobrien            ptr[j] = ptr[j-h];
57978556Sobrien            j = j - h;
58078556Sobrien            if (j <= (lo + h - 1)) break;
58178556Sobrien         }
58278556Sobrien         ptr[j] = v;
58378556Sobrien         i++;
58478556Sobrien
58578556Sobrien         /*-- copy 3 --*/
58678556Sobrien         if (i > hi) break;
58778556Sobrien         v = ptr[i];
58878556Sobrien         j = i;
58978556Sobrien         while ( mainGtU (
59078556Sobrien                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
59178556Sobrien                 ) ) {
59278556Sobrien            ptr[j] = ptr[j-h];
59378556Sobrien            j = j - h;
59478556Sobrien            if (j <= (lo + h - 1)) break;
59578556Sobrien         }
59678556Sobrien         ptr[j] = v;
59778556Sobrien         i++;
59878556Sobrien
59978556Sobrien         if (*budget < 0) return;
60078556Sobrien      }
60178556Sobrien   }
60278556Sobrien}
60378556Sobrien
60478556Sobrien
60578556Sobrien/*---------------------------------------------*/
60678556Sobrien/*--
60778556Sobrien   The following is an implementation of
60878556Sobrien   an elegant 3-way quicksort for strings,
60978556Sobrien   described in a paper "Fast Algorithms for
61078556Sobrien   Sorting and Searching Strings", by Robert
61178556Sobrien   Sedgewick and Jon L. Bentley.
61278556Sobrien--*/
61378556Sobrien
61478556Sobrien#define mswap(zz1, zz2) \
61578556Sobrien   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
61678556Sobrien
61778556Sobrien#define mvswap(zzp1, zzp2, zzn)       \
61878556Sobrien{                                     \
61978556Sobrien   Int32 yyp1 = (zzp1);               \
62078556Sobrien   Int32 yyp2 = (zzp2);               \
62178556Sobrien   Int32 yyn  = (zzn);                \
62278556Sobrien   while (yyn > 0) {                  \
62378556Sobrien      mswap(ptr[yyp1], ptr[yyp2]);    \
62478556Sobrien      yyp1++; yyp2++; yyn--;          \
62578556Sobrien   }                                  \
62678556Sobrien}
62778556Sobrien
62878556Sobrienstatic
62978556Sobrien__inline__
63078556SobrienUChar mmed3 ( UChar a, UChar b, UChar c )
63178556Sobrien{
63278556Sobrien   UChar t;
63378556Sobrien   if (a > b) { t = a; a = b; b = t; };
63478556Sobrien   if (b > c) {
63578556Sobrien      b = c;
63678556Sobrien      if (a > b) b = a;
63778556Sobrien   }
63878556Sobrien   return b;
63978556Sobrien}
64078556Sobrien
64178556Sobrien#define mmin(a,b) ((a) < (b)) ? (a) : (b)
64278556Sobrien
64378556Sobrien#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
64478556Sobrien                          stackHi[sp] = hz; \
64578556Sobrien                          stackD [sp] = dz; \
64678556Sobrien                          sp++; }
64778556Sobrien
64878556Sobrien#define mpop(lz,hz,dz) { sp--;             \
64978556Sobrien                         lz = stackLo[sp]; \
65078556Sobrien                         hz = stackHi[sp]; \
65178556Sobrien                         dz = stackD [sp]; }
65278556Sobrien
65378556Sobrien
65478556Sobrien#define mnextsize(az) (nextHi[az]-nextLo[az])
65578556Sobrien
65678556Sobrien#define mnextswap(az,bz)                                        \
65778556Sobrien   { Int32 tz;                                                  \
65878556Sobrien     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
65978556Sobrien     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
66078556Sobrien     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
66178556Sobrien
66278556Sobrien
66378556Sobrien#define MAIN_QSORT_SMALL_THRESH 20
66478556Sobrien#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
66578556Sobrien#define MAIN_QSORT_STACK_SIZE 100
66678556Sobrien
66778556Sobrienstatic
66878556Sobrienvoid mainQSort3 ( UInt32* ptr,
66978556Sobrien                  UChar*  block,
67078556Sobrien                  UInt16* quadrant,
67178556Sobrien                  Int32   nblock,
67278556Sobrien                  Int32   loSt,
67378556Sobrien                  Int32   hiSt,
67478556Sobrien                  Int32   dSt,
67578556Sobrien                  Int32*  budget )
67678556Sobrien{
67778556Sobrien   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
67878556Sobrien   Int32 sp, lo, hi, d;
67978556Sobrien
68078556Sobrien   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
68178556Sobrien   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
68278556Sobrien   Int32 stackD [MAIN_QSORT_STACK_SIZE];
68378556Sobrien
68478556Sobrien   Int32 nextLo[3];
68578556Sobrien   Int32 nextHi[3];
68678556Sobrien   Int32 nextD [3];
68778556Sobrien
68878556Sobrien   sp = 0;
68978556Sobrien   mpush ( loSt, hiSt, dSt );
69078556Sobrien
69178556Sobrien   while (sp > 0) {
69278556Sobrien
69378556Sobrien      AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
69478556Sobrien
69578556Sobrien      mpop ( lo, hi, d );
69678556Sobrien      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
69778556Sobrien          d > MAIN_QSORT_DEPTH_THRESH) {
69878556Sobrien         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
69978556Sobrien         if (*budget < 0) return;
70078556Sobrien         continue;
70178556Sobrien      }
70278556Sobrien
70378556Sobrien      med = (Int32)
70478556Sobrien            mmed3 ( block[ptr[ lo         ]+d],
70578556Sobrien                    block[ptr[ hi         ]+d],
70678556Sobrien                    block[ptr[ (lo+hi)>>1 ]+d] );
70778556Sobrien
70878556Sobrien      unLo = ltLo = lo;
70978556Sobrien      unHi = gtHi = hi;
71078556Sobrien
71178556Sobrien      while (True) {
71278556Sobrien         while (True) {
71378556Sobrien            if (unLo > unHi) break;
71478556Sobrien            n = ((Int32)block[ptr[unLo]+d]) - med;
71578556Sobrien            if (n == 0) {
71678556Sobrien               mswap(ptr[unLo], ptr[ltLo]);
71778556Sobrien               ltLo++; unLo++; continue;
71878556Sobrien            };
71978556Sobrien            if (n >  0) break;
72078556Sobrien            unLo++;
72178556Sobrien         }
72278556Sobrien         while (True) {
72378556Sobrien            if (unLo > unHi) break;
72478556Sobrien            n = ((Int32)block[ptr[unHi]+d]) - med;
72578556Sobrien            if (n == 0) {
72678556Sobrien               mswap(ptr[unHi], ptr[gtHi]);
72778556Sobrien               gtHi--; unHi--; continue;
72878556Sobrien            };
72978556Sobrien            if (n <  0) break;
73078556Sobrien            unHi--;
73178556Sobrien         }
73278556Sobrien         if (unLo > unHi) break;
73378556Sobrien         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
73478556Sobrien      }
73578556Sobrien
73678556Sobrien      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
73778556Sobrien
73878556Sobrien      if (gtHi < ltLo) {
73978556Sobrien         mpush(lo, hi, d+1 );
74078556Sobrien         continue;
74178556Sobrien      }
74278556Sobrien
74378556Sobrien      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
74478556Sobrien      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
74578556Sobrien
74678556Sobrien      n = lo + unLo - ltLo - 1;
74778556Sobrien      m = hi - (gtHi - unHi) + 1;
74878556Sobrien
74978556Sobrien      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
75078556Sobrien      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
75178556Sobrien      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
75278556Sobrien
75378556Sobrien      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
75478556Sobrien      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
75578556Sobrien      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
75678556Sobrien
75778556Sobrien      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
75878556Sobrien      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
75978556Sobrien
76078556Sobrien      mpush (nextLo[0], nextHi[0], nextD[0]);
76178556Sobrien      mpush (nextLo[1], nextHi[1], nextD[1]);
76278556Sobrien      mpush (nextLo[2], nextHi[2], nextD[2]);
76378556Sobrien   }
76478556Sobrien}
76578556Sobrien
76678556Sobrien#undef mswap
76778556Sobrien#undef mvswap
76878556Sobrien#undef mpush
76978556Sobrien#undef mpop
77078556Sobrien#undef mmin
77178556Sobrien#undef mnextsize
77278556Sobrien#undef mnextswap
77378556Sobrien#undef MAIN_QSORT_SMALL_THRESH
77478556Sobrien#undef MAIN_QSORT_DEPTH_THRESH
77578556Sobrien#undef MAIN_QSORT_STACK_SIZE
77678556Sobrien
77778556Sobrien
77878556Sobrien/*---------------------------------------------*/
77978556Sobrien/* Pre:
78078556Sobrien      nblock > N_OVERSHOOT
78178556Sobrien      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
78278556Sobrien      ((UChar*)block32) [0 .. nblock-1] holds block
78378556Sobrien      ptr exists for [0 .. nblock-1]
78478556Sobrien
78578556Sobrien   Post:
78678556Sobrien      ((UChar*)block32) [0 .. nblock-1] holds block
78778556Sobrien      All other areas of block32 destroyed
78878556Sobrien      ftab [0 .. 65536 ] destroyed
78978556Sobrien      ptr [0 .. nblock-1] holds sorted order
79078556Sobrien      if (*budget < 0), sorting was abandoned
79178556Sobrien*/
79278556Sobrien
79378556Sobrien#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
79478556Sobrien#define SETMASK (1 << 21)
79578556Sobrien#define CLEARMASK (~(SETMASK))
79678556Sobrien
79778556Sobrienstatic
79878556Sobrienvoid mainSort ( UInt32* ptr,
79978556Sobrien                UChar*  block,
80078556Sobrien                UInt16* quadrant,
80178556Sobrien                UInt32* ftab,
80278556Sobrien                Int32   nblock,
80378556Sobrien                Int32   verb,
80478556Sobrien                Int32*  budget )
80578556Sobrien{
80678556Sobrien   Int32  i, j, k, ss, sb;
80778556Sobrien   Int32  runningOrder[256];
80878556Sobrien   Bool   bigDone[256];
80978556Sobrien   Int32  copyStart[256];
81078556Sobrien   Int32  copyEnd  [256];
81178556Sobrien   UChar  c1;
81278556Sobrien   Int32  numQSorted;
81378556Sobrien   UInt16 s;
81478556Sobrien   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
81578556Sobrien
81678556Sobrien   /*-- set up the 2-byte frequency table --*/
81778556Sobrien   for (i = 65536; i >= 0; i--) ftab[i] = 0;
81878556Sobrien
81978556Sobrien   j = block[0] << 8;
82078556Sobrien   i = nblock-1;
82178556Sobrien   for (; i >= 3; i -= 4) {
82278556Sobrien      quadrant[i] = 0;
82378556Sobrien      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
82478556Sobrien      ftab[j]++;
82578556Sobrien      quadrant[i-1] = 0;
82678556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
82778556Sobrien      ftab[j]++;
82878556Sobrien      quadrant[i-2] = 0;
82978556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
83078556Sobrien      ftab[j]++;
83178556Sobrien      quadrant[i-3] = 0;
83278556Sobrien      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
83378556Sobrien      ftab[j]++;
83478556Sobrien   }
83578556Sobrien   for (; i >= 0; i--) {
83678556Sobrien      quadrant[i] = 0;
83778556Sobrien      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
83878556Sobrien      ftab[j]++;
83978556Sobrien   }
84078556Sobrien
84178556Sobrien   /*-- (emphasises close relationship of block & quadrant) --*/
84278556Sobrien   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
84378556Sobrien      block   [nblock+i] = block[i];
84478556Sobrien      quadrant[nblock+i] = 0;
84578556Sobrien   }
84678556Sobrien
84778556Sobrien   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
84878556Sobrien
84978556Sobrien   /*-- Complete the initial radix sort --*/
85078556Sobrien   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
85178556Sobrien
85278556Sobrien   s = block[0] << 8;
85378556Sobrien   i = nblock-1;
85478556Sobrien   for (; i >= 3; i -= 4) {
85578556Sobrien      s = (s >> 8) | (block[i] << 8);
85678556Sobrien      j = ftab[s] -1;
85778556Sobrien      ftab[s] = j;
85878556Sobrien      ptr[j] = i;
85978556Sobrien      s = (s >> 8) | (block[i-1] << 8);
86078556Sobrien      j = ftab[s] -1;
86178556Sobrien      ftab[s] = j;
86278556Sobrien      ptr[j] = i-1;
86378556Sobrien      s = (s >> 8) | (block[i-2] << 8);
86478556Sobrien      j = ftab[s] -1;
86578556Sobrien      ftab[s] = j;
86678556Sobrien      ptr[j] = i-2;
86778556Sobrien      s = (s >> 8) | (block[i-3] << 8);
86878556Sobrien      j = ftab[s] -1;
86978556Sobrien      ftab[s] = j;
87078556Sobrien      ptr[j] = i-3;
87178556Sobrien   }
87278556Sobrien   for (; i >= 0; i--) {
87378556Sobrien      s = (s >> 8) | (block[i] << 8);
87478556Sobrien      j = ftab[s] -1;
87578556Sobrien      ftab[s] = j;
87678556Sobrien      ptr[j] = i;
87778556Sobrien   }
87878556Sobrien
87978556Sobrien   /*--
88078556Sobrien      Now ftab contains the first loc of every small bucket.
88178556Sobrien      Calculate the running order, from smallest to largest
88278556Sobrien      big bucket.
88378556Sobrien   --*/
88478556Sobrien   for (i = 0; i <= 255; i++) {
88578556Sobrien      bigDone     [i] = False;
88678556Sobrien      runningOrder[i] = i;
88778556Sobrien   }
88878556Sobrien
88978556Sobrien   {
89078556Sobrien      Int32 vv;
89178556Sobrien      Int32 h = 1;
89278556Sobrien      do h = 3 * h + 1; while (h <= 256);
89378556Sobrien      do {
89478556Sobrien         h = h / 3;
89578556Sobrien         for (i = h; i <= 255; i++) {
89678556Sobrien            vv = runningOrder[i];
89778556Sobrien            j = i;
89878556Sobrien            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
89978556Sobrien               runningOrder[j] = runningOrder[j-h];
90078556Sobrien               j = j - h;
90178556Sobrien               if (j <= (h - 1)) goto zero;
90278556Sobrien            }
90378556Sobrien            zero:
90478556Sobrien            runningOrder[j] = vv;
90578556Sobrien         }
90678556Sobrien      } while (h != 1);
90778556Sobrien   }
90878556Sobrien
90978556Sobrien   /*--
91078556Sobrien      The main sorting loop.
91178556Sobrien   --*/
91278556Sobrien
91378556Sobrien   numQSorted = 0;
91478556Sobrien
91578556Sobrien   for (i = 0; i <= 255; i++) {
91678556Sobrien
91778556Sobrien      /*--
91878556Sobrien         Process big buckets, starting with the least full.
91978556Sobrien         Basically this is a 3-step process in which we call
92078556Sobrien         mainQSort3 to sort the small buckets [ss, j], but
92178556Sobrien         also make a big effort to avoid the calls if we can.
92278556Sobrien      --*/
92378556Sobrien      ss = runningOrder[i];
92478556Sobrien
92578556Sobrien      /*--
92678556Sobrien         Step 1:
92778556Sobrien         Complete the big bucket [ss] by quicksorting
92878556Sobrien         any unsorted small buckets [ss, j], for j != ss.
92978556Sobrien         Hopefully previous pointer-scanning phases have already
93078556Sobrien         completed many of the small buckets [ss, j], so
93178556Sobrien         we don't have to sort them at all.
93278556Sobrien      --*/
93378556Sobrien      for (j = 0; j <= 255; j++) {
93478556Sobrien         if (j != ss) {
93578556Sobrien            sb = (ss << 8) + j;
93678556Sobrien            if ( ! (ftab[sb] & SETMASK) ) {
93778556Sobrien               Int32 lo = ftab[sb]   & CLEARMASK;
93878556Sobrien               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
93978556Sobrien               if (hi > lo) {
94078556Sobrien                  if (verb >= 4)
94178556Sobrien                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
94278556Sobrien                                "done %d   this %d\n",
94378556Sobrien                                ss, j, numQSorted, hi - lo + 1 );
94478556Sobrien                  mainQSort3 (
94578556Sobrien                     ptr, block, quadrant, nblock,
94678556Sobrien                     lo, hi, BZ_N_RADIX, budget
94778556Sobrien                  );
94878556Sobrien                  numQSorted += (hi - lo + 1);
94978556Sobrien                  if (*budget < 0) return;
95078556Sobrien               }
95178556Sobrien            }
95278556Sobrien            ftab[sb] |= SETMASK;
95378556Sobrien         }
95478556Sobrien      }
95578556Sobrien
95678556Sobrien      AssertH ( !bigDone[ss], 1006 );
95778556Sobrien
95878556Sobrien      /*--
95978556Sobrien         Step 2:
96078556Sobrien         Now scan this big bucket [ss] so as to synthesise the
96178556Sobrien         sorted order for small buckets [t, ss] for all t,
96278556Sobrien         including, magically, the bucket [ss,ss] too.
96378556Sobrien         This will avoid doing Real Work in subsequent Step 1's.
96478556Sobrien      --*/
96578556Sobrien      {
96678556Sobrien         for (j = 0; j <= 255; j++) {
96778556Sobrien            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
96878556Sobrien            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
96978556Sobrien         }
97078556Sobrien         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
97178556Sobrien            k = ptr[j]-1; if (k < 0) k += nblock;
97278556Sobrien            c1 = block[k];
97378556Sobrien            if (!bigDone[c1])
97478556Sobrien               ptr[ copyStart[c1]++ ] = k;
97578556Sobrien         }
97678556Sobrien         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
97778556Sobrien            k = ptr[j]-1; if (k < 0) k += nblock;
97878556Sobrien            c1 = block[k];
97978556Sobrien            if (!bigDone[c1])
98078556Sobrien               ptr[ copyEnd[c1]-- ] = k;
98178556Sobrien         }
98278556Sobrien      }
98378556Sobrien
98478556Sobrien      AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
98578556Sobrien
98678556Sobrien      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
98778556Sobrien
98878556Sobrien      /*--
98978556Sobrien         Step 3:
99078556Sobrien         The [ss] big bucket is now done.  Record this fact,
99178556Sobrien         and update the quadrant descriptors.  Remember to
99278556Sobrien         update quadrants in the overshoot area too, if
99378556Sobrien         necessary.  The "if (i < 255)" test merely skips
99478556Sobrien         this updating for the last bucket processed, since
99578556Sobrien         updating for the last bucket is pointless.
99678556Sobrien
99778556Sobrien         The quadrant array provides a way to incrementally
99878556Sobrien         cache sort orderings, as they appear, so as to
99978556Sobrien         make subsequent comparisons in fullGtU() complete
100078556Sobrien         faster.  For repetitive blocks this makes a big
100178556Sobrien         difference (but not big enough to be able to avoid
100278556Sobrien         the fallback sorting mechanism, exponential radix sort).
100378556Sobrien
100478556Sobrien         The precise meaning is: at all times:
100578556Sobrien
100678556Sobrien            for 0 <= i < nblock and 0 <= j <= nblock
100778556Sobrien
100878556Sobrien            if block[i] != block[j],
100978556Sobrien
101078556Sobrien               then the relative values of quadrant[i] and
101178556Sobrien                    quadrant[j] are meaningless.
101278556Sobrien
101378556Sobrien               else {
101478556Sobrien                  if quadrant[i] < quadrant[j]
101578556Sobrien                     then the string starting at i lexicographically
101678556Sobrien                     precedes the string starting at j
101778556Sobrien
101878556Sobrien                  else if quadrant[i] > quadrant[j]
101978556Sobrien                     then the string starting at j lexicographically
102078556Sobrien                     precedes the string starting at i
102178556Sobrien
102278556Sobrien                  else
102378556Sobrien                     the relative ordering of the strings starting
102478556Sobrien                     at i and j has not yet been determined.
102578556Sobrien               }
102678556Sobrien      --*/
102778556Sobrien      bigDone[ss] = True;
102878556Sobrien
102978556Sobrien      if (i < 255) {
103078556Sobrien         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
103178556Sobrien         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
103278556Sobrien         Int32 shifts   = 0;
103378556Sobrien
103478556Sobrien         while ((bbSize >> shifts) > 65534) shifts++;
103578556Sobrien
103678556Sobrien         for (j = bbSize-1; j >= 0; j--) {
103778556Sobrien            Int32 a2update     = ptr[bbStart + j];
103878556Sobrien            UInt16 qVal        = (UInt16)(j >> shifts);
103978556Sobrien            quadrant[a2update] = qVal;
104078556Sobrien            if (a2update < BZ_N_OVERSHOOT)
104178556Sobrien               quadrant[a2update + nblock] = qVal;
104278556Sobrien         }
104378556Sobrien         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
104478556Sobrien      }
104578556Sobrien
104678556Sobrien   }
104778556Sobrien
104878556Sobrien   if (verb >= 4)
104978556Sobrien      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
105078556Sobrien                 nblock, numQSorted, nblock - numQSorted );
105178556Sobrien}
105278556Sobrien
105378556Sobrien#undef BIGFREQ
105478556Sobrien#undef SETMASK
105578556Sobrien#undef CLEARMASK
105678556Sobrien
105778556Sobrien
105878556Sobrien/*---------------------------------------------*/
105978556Sobrien/* Pre:
106078556Sobrien      nblock > 0
106178556Sobrien      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
106278556Sobrien      ((UChar*)arr2)  [0 .. nblock-1] holds block
106378556Sobrien      arr1 exists for [0 .. nblock-1]
106478556Sobrien
106578556Sobrien   Post:
106678556Sobrien      ((UChar*)arr2) [0 .. nblock-1] holds block
106778556Sobrien      All other areas of block destroyed
106878556Sobrien      ftab [ 0 .. 65536 ] destroyed
106978556Sobrien      arr1 [0 .. nblock-1] holds sorted order
107078556Sobrien*/
107178556Sobrienvoid BZ2_blockSort ( EState* s )
107278556Sobrien{
107378556Sobrien   UInt32* ptr    = s->ptr;
107478556Sobrien   UChar*  block  = s->block;
107578556Sobrien   UInt32* ftab   = s->ftab;
107678556Sobrien   Int32   nblock = s->nblock;
107778556Sobrien   Int32   verb   = s->verbosity;
107878556Sobrien   Int32   wfact  = s->workFactor;
107978556Sobrien   UInt16* quadrant;
108078556Sobrien   Int32   budget;
108178556Sobrien   Int32   budgetInit;
108278556Sobrien   Int32   i;
108378556Sobrien
108478556Sobrien   if (nblock < 10000) {
108578556Sobrien      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
108678556Sobrien   } else {
108778556Sobrien      /* Calculate the location for quadrant, remembering to get
108878556Sobrien         the alignment right.  Assumes that &(block[0]) is at least
108978556Sobrien         2-byte aligned -- this should be ok since block is really
109078556Sobrien         the first section of arr2.
109178556Sobrien      */
109278556Sobrien      i = nblock+BZ_N_OVERSHOOT;
109378556Sobrien      if (i & 1) i++;
109478556Sobrien      quadrant = (UInt16*)(&(block[i]));
109578556Sobrien
109678556Sobrien      /* (wfact-1) / 3 puts the default-factor-30
109778556Sobrien         transition point at very roughly the same place as
109878556Sobrien         with v0.1 and v0.9.0.
109978556Sobrien         Not that it particularly matters any more, since the
110078556Sobrien         resulting compressed stream is now the same regardless
110178556Sobrien         of whether or not we use the main sort or fallback sort.
110278556Sobrien      */
110378556Sobrien      if (wfact < 1  ) wfact = 1;
110478556Sobrien      if (wfact > 100) wfact = 100;
110578556Sobrien      budgetInit = nblock * ((wfact-1) / 3);
110678556Sobrien      budget = budgetInit;
110778556Sobrien
110878556Sobrien      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
110978556Sobrien      if (verb >= 3)
111078556Sobrien         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
111178556Sobrien                    budgetInit - budget,
111278556Sobrien                    nblock,
111378556Sobrien                    (float)(budgetInit - budget) /
111478556Sobrien                    (float)(nblock==0 ? 1 : nblock) );
111578556Sobrien      if (budget < 0) {
111678556Sobrien         if (verb >= 2)
111778556Sobrien            VPrintf0 ( "    too repetitive; using fallback"
111878556Sobrien                       " sorting algorithm\n" );
111978556Sobrien         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
112078556Sobrien      }
112178556Sobrien   }
112278556Sobrien
112378556Sobrien   s->origPtr = -1;
112478556Sobrien   for (i = 0; i < s->nblock; i++)
112578556Sobrien      if (ptr[i] == 0)
112678556Sobrien         { s->origPtr = i; break; };
112778556Sobrien
112878556Sobrien   AssertH( s->origPtr != -1, 1003 );
112978556Sobrien}
113078556Sobrien
113178556Sobrien
113278556Sobrien/*-------------------------------------------------------------*/
113378556Sobrien/*--- end                                       blocksort.c ---*/
113478556Sobrien/*-------------------------------------------------------------*/
1135