blocksort.c revision 78556
142421Syokota
242421Syokota/*-------------------------------------------------------------*/
342421Syokota/*--- Block sorting machinery                               ---*/
442421Syokota/*---                                           blocksort.c ---*/
542421Syokota/*-------------------------------------------------------------*/
642421Syokota
742421Syokota/*--
842421Syokota  This file is a part of bzip2 and/or libbzip2, a program and
942421Syokota  library for lossless, block-sorting data compression.
1042421Syokota
1142421Syokota  Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
1242421Syokota
1342421Syokota  Redistribution and use in source and binary forms, with or without
1442421Syokota  modification, are permitted provided that the following conditions
1542421Syokota  are met:
1642421Syokota
1742421Syokota  1. Redistributions of source code must retain the above copyright
1842421Syokota     notice, this list of conditions and the following disclaimer.
1942421Syokota
2042421Syokota  2. The origin of this software must not be misrepresented; you must
2142421Syokota     not claim that you wrote the original software.  If you use this
2242421Syokota     software in a product, an acknowledgment in the product
2342421Syokota     documentation would be appreciated but is not required.
2442421Syokota
2542421Syokota  3. Altered source versions must be plainly marked as such, and must
2642421Syokota     not be misrepresented as being the original software.
2742421Syokota
2842421Syokota  4. The name of the author may not be used to endorse or promote
2942421Syokota     products derived from this software without specific prior written
3050477Speter     permission.
3142421Syokota
3242421Syokota  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
3342421Syokota  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
3442421Syokota  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
3542421Syokota  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
3642421Syokota  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
3742421Syokota  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
3858271Syokota  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
3942421Syokota  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
4042421Syokota  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
4158271Syokota  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
4258271Syokota  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4358271Syokota
4458271Syokota  Julian Seward, Cambridge, UK.
4542421Syokota  jseward@acm.org
4642421Syokota  bzip2/libbzip2 version 1.0 of 21 March 2000
4742421Syokota
4842421Syokota  This program is based on (at least) the work of:
4942421Syokota     Mike Burrows
5042421Syokota     David Wheeler
5142421Syokota     Peter Fenwick
5242421Syokota     Alistair Moffat
53102149Speter     Radford Neal
5442421Syokota     Ian H. Witten
5542421Syokota     Robert Sedgewick
5642421Syokota     Jon L. Bentley
5742421Syokota
5842421Syokota  For more information on these sources, see the manual.
5942421Syokota
6042421Syokota  To get some idea how the block sorting algorithms in this file
6142421Syokota  work, read my paper
6242421Syokota     On the Performance of BWT Sorting Algorithms
6342421Syokota  in Proceedings of the IEEE Data Compression Conference 2000,
6442421Syokota  Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
6542421Syokota  file implements the algorithm called  cache  in the paper.
6642421Syokota--*/
6742421Syokota
6842421Syokota
6942421Syokota#include "bzlib_private.h"
7058271Syokota
7158271Syokota/*---------------------------------------------*/
7258271Syokota/*--- Fallback O(N log(N)^2) sorting        ---*/
7358271Syokota/*--- algorithm, for repetitive blocks      ---*/
7458271Syokota/*---------------------------------------------*/
7558271Syokota
7658271Syokota/*---------------------------------------------*/
7742421Syokotastatic
7842421Syokota__inline__
7942421Syokotavoid fallbackSimpleSort ( UInt32* fmap,
8042421Syokota                          UInt32* eclass,
8142421Syokota                          Int32   lo,
8242421Syokota                          Int32   hi )
8342421Syokota{
8442421Syokota   Int32 i, j, tmp;
8542421Syokota   UInt32 ec_tmp;
8642421Syokota
8742421Syokota   if (lo == hi) return;
8842421Syokota
8942421Syokota   if (hi - lo > 3) {
9042421Syokota      for ( i = hi-4; i >= lo; i-- ) {
9142421Syokota         tmp = fmap[i];
9258271Syokota         ec_tmp = eclass[tmp];
9358271Syokota         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
9442421Syokota            fmap[j-4] = fmap[j];
9542421Syokota         fmap[j-4] = tmp;
9642421Syokota      }
9742421Syokota   }
9842421Syokota
9942421Syokota   for ( i = hi-1; i >= lo; i-- ) {
10042421Syokota      tmp = fmap[i];
10142421Syokota      ec_tmp = eclass[tmp];
10242421Syokota      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
10342421Syokota         fmap[j-1] = fmap[j];
10442421Syokota      fmap[j-1] = tmp;
10542421Syokota   }
10642421Syokota}
10742421Syokota
10842421Syokota
10942421Syokota/*---------------------------------------------*/
11042421Syokota#define fswap(zz1, zz2) \
11142421Syokota   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
11242421Syokota
11369781Sdwmalone#define fvswap(zzp1, zzp2, zzn)       \
11442421Syokota{                                     \
11542421Syokota   Int32 yyp1 = (zzp1);               \
11642421Syokota   Int32 yyp2 = (zzp2);               \
11742421Syokota   Int32 yyn  = (zzn);                \
11842421Syokota   while (yyn > 0) {                  \
11942421Syokota      fswap(fmap[yyp1], fmap[yyp2]);  \
12042421Syokota      yyp1++; yyp2++; yyn--;          \
12158271Syokota   }                                  \
12242421Syokota}
12358271Syokota
12447296Syokota
12558271Syokota#define fmin(a,b) ((a) < (b)) ? (a) : (b)
12658271Syokota
12747296Syokota#define fpush(lz,hz) { stackLo[sp] = lz; \
12847296Syokota                       stackHi[sp] = hz; \
12947296Syokota                       sp++; }
13047296Syokota
13158271Syokota#define fpop(lz,hz) { sp--;              \
13258271Syokota                      lz = stackLo[sp];  \
13347296Syokota                      hz = stackHi[sp]; }
13458271Syokota
13558271Syokota#define FALLBACK_QSORT_SMALL_THRESH 10
13658271Syokota#define FALLBACK_QSORT_STACK_SIZE   100
13742421Syokota
13842421Syokota
13942421Syokotastatic
14042421Syokotavoid fallbackQSort3 ( UInt32* fmap,
14142421Syokota                      UInt32* eclass,
14242421Syokota                      Int32   loSt,
14358271Syokota                      Int32   hiSt )
14458271Syokota{
14558271Syokota   Int32 unLo, unHi, ltLo, gtHi, n, m;
14658271Syokota   Int32 sp, lo, hi;
14758271Syokota   UInt32 med, r, r3;
14858271Syokota   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
14958271Syokota   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
15058271Syokota
15158271Syokota   r = 0;
15258271Syokota
15358271Syokota   sp = 0;
15458271Syokota   fpush ( loSt, hiSt );
15558271Syokota
15658271Syokota   while (sp > 0) {
157114930Speter
15858271Syokota      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
159114930Speter
160114930Speter      fpop ( lo, hi );
16158271Syokota      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
16265176Sdfr         fallbackSimpleSort ( fmap, eclass, lo, hi );
16392661Speter         continue;
16492661Speter      }
16592661Speter
16692661Speter      /* Random partitioning.  Median of 3 sometimes fails to
16758271Syokota         avoid bad cases.  Median of 9 seems to help but
16858271Syokota         looks rather expensive.  This too seems to work but
16958271Syokota         is cheaper.  Guidance for the magic constants
17058271Syokota         7621 and 32768 is taken from Sedgewick's algorithms
17158271Syokota         book, chapter 35.
17258271Syokota      */
17358271Syokota      r = ((r * 7621) + 1) % 32768;
17458271Syokota      r3 = r % 3;
17558271Syokota      if (r3 == 0) med = eclass[fmap[lo]]; else
17658271Syokota      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
17742421Syokota                   med = eclass[fmap[hi]];
17842421Syokota
17942421Syokota      unLo = ltLo = lo;
18058271Syokota      unHi = gtHi = hi;
18158271Syokota
18242421Syokota      while (1) {
18358271Syokota         while (1) {
18442421Syokota            if (unLo > unHi) break;
18542421Syokota            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
18642421Syokota            if (n == 0) {
18742421Syokota               fswap(fmap[unLo], fmap[ltLo]);
18842421Syokota               ltLo++; unLo++;
18942421Syokota               continue;
19042421Syokota            };
19142421Syokota            if (n > 0) break;
19242421Syokota            unLo++;
19342421Syokota         }
19442421Syokota         while (1) {
19542421Syokota            if (unLo > unHi) break;
19658271Syokota            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
19758271Syokota            if (n == 0) {
19858271Syokota               fswap(fmap[unHi], fmap[gtHi]);
19942421Syokota               gtHi--; unHi--;
20042421Syokota               continue;
20142421Syokota            };
20258271Syokota            if (n < 0) break;
20342421Syokota            unHi--;
20458271Syokota         }
20542421Syokota         if (unLo > unHi) break;
20658271Syokota         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
20758271Syokota      }
20858271Syokota
20958271Syokota      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
21058271Syokota
21158271Syokota      if (gtHi < ltLo) continue;
21258271Syokota
21342421Syokota      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
21442421Syokota      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
21542421Syokota
21642421Syokota      n = lo + unLo - ltLo - 1;
21742421Syokota      m = hi - (gtHi - unHi) + 1;
21842421Syokota
21942421Syokota      if (n - lo > hi - m) {
22042421Syokota         fpush ( lo, n );
22142421Syokota         fpush ( m, hi );
22242421Syokota      } else {
22393279Smurray         fpush ( m, hi );
22442421Syokota         fpush ( lo, n );
22542421Syokota      }
22693279Smurray   }
22742421Syokota}
22842421Syokota
22942421Syokota#undef fmin
23042421Syokota#undef fpush
23142421Syokota#undef fpop
23242421Syokota#undef fswap
23342421Syokota#undef fvswap
23442421Syokota#undef FALLBACK_QSORT_SMALL_THRESH
23542421Syokota#undef FALLBACK_QSORT_STACK_SIZE
23642421Syokota
23742421Syokota
23842421Syokota/*---------------------------------------------*/
23942421Syokota/* Pre:
24042421Syokota      nblock > 0
24142421Syokota      eclass exists for [0 .. nblock-1]
24242421Syokota      ((UChar*)eclass) [0 .. nblock-1] holds block
24342421Syokota      ptr exists for [0 .. nblock-1]
24442421Syokota
24593279Smurray   Post:
24693279Smurray      ((UChar*)eclass) [0 .. nblock-1] holds block
24742421Syokota      All other areas of eclass destroyed
24893279Smurray      fmap [0 .. nblock-1] holds sorted order
24993279Smurray      bhtab [ 0 .. 2+(nblock/32) ] destroyed
25093279Smurray*/
25142421Syokota
25242421Syokota#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
25342421Syokota#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
25442421Syokota#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
25542421Syokota#define      WORD_BH(zz)  bhtab[(zz) >> 5]
25642421Syokota#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
25742421Syokota
25842421Syokotastatic
25942421Syokotavoid fallbackSort ( UInt32* fmap,
26042421Syokota                    UInt32* eclass,
26142421Syokota                    UInt32* bhtab,
26242421Syokota                    Int32   nblock,
26342421Syokota                    Int32   verb )
26442421Syokota{
26542421Syokota   Int32 ftab[257];
26642421Syokota   Int32 ftabCopy[256];
26742421Syokota   Int32 H, i, j, k, l, r, cc, cc1;
26842421Syokota   Int32 nNotDone;
26942421Syokota   Int32 nBhtab;
27042421Syokota   UChar* eclass8 = (UChar*)eclass;
27158271Syokota
27242421Syokota   /*--
27342421Syokota      Initial 1-char radix sort to generate
27442421Syokota      initial fmap and initial BH bits.
27542421Syokota   --*/
27642421Syokota   if (verb >= 4)
27742421Syokota      VPrintf0 ( "        bucket sorting ...\n" );
27842421Syokota   for (i = 0; i < 257;    i++) ftab[i] = 0;
27942421Syokota   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
28042421Syokota   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
28142421Syokota   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
28242421Syokota
28342421Syokota   for (i = 0; i < nblock; i++) {
28442421Syokota      j = eclass8[i];
28542421Syokota      k = ftab[j] - 1;
28642421Syokota      ftab[j] = k;
28742421Syokota      fmap[k] = i;
28842421Syokota   }
28942421Syokota
29042421Syokota   nBhtab = 2 + (nblock / 32);
29142421Syokota   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
29242421Syokota   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
29342421Syokota
29442421Syokota   /*--
29542421Syokota      Inductively refine the buckets.  Kind-of an
29642421Syokota      "exponential radix sort" (!), inspired by the
29742421Syokota      Manber-Myers suffix array construction algorithm.
29842421Syokota   --*/
29942421Syokota
30042421Syokota   /*-- set sentinel bits for block-end detection --*/
30142421Syokota   for (i = 0; i < 32; i++) {
30242421Syokota      SET_BH(nblock + 2*i);
30342421Syokota      CLEAR_BH(nblock + 2*i + 1);
30442421Syokota   }
30542421Syokota
30642421Syokota   /*-- the log(N) loop --*/
30742421Syokota   H = 1;
30842421Syokota   while (1) {
30942421Syokota
31042421Syokota      if (verb >= 4)
31142421Syokota         VPrintf1 ( "        depth %6d has ", H );
31242421Syokota
31342421Syokota      j = 0;
31442421Syokota      for (i = 0; i < nblock; i++) {
31542421Syokota         if (ISSET_BH(i)) j = i;
31642421Syokota         k = fmap[i] - H; if (k < 0) k += nblock;
31742421Syokota         eclass[k] = j;
31842421Syokota      }
31958271Syokota
32042421Syokota      nNotDone = 0;
32142421Syokota      r = -1;
32258271Syokota      while (1) {
32342421Syokota
32442421Syokota	 /*-- find the next non-singleton bucket --*/
32558271Syokota         k = r + 1;
32642421Syokota         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
32742421Syokota         if (ISSET_BH(k)) {
32842421Syokota            while (WORD_BH(k) == 0xffffffff) k += 32;
32942421Syokota            while (ISSET_BH(k)) k++;
33042421Syokota         }
33142421Syokota         l = k - 1;
33242421Syokota         if (l >= nblock) break;
33342421Syokota         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
33442421Syokota         if (!ISSET_BH(k)) {
33542421Syokota            while (WORD_BH(k) == 0x00000000) k += 32;
33642421Syokota            while (!ISSET_BH(k)) k++;
33742421Syokota         }
33842421Syokota         r = k - 1;
33942421Syokota         if (r >= nblock) break;
34042421Syokota
34142421Syokota         /*-- now [l, r] bracket current bucket --*/
34242421Syokota         if (r > l) {
34342421Syokota            nNotDone += (r - l + 1);
34442421Syokota            fallbackQSort3 ( fmap, eclass, l, r );
34558271Syokota
34642421Syokota            /*-- scan bucket and generate header bits-- */
34742421Syokota            cc = -1;
34842421Syokota            for (i = l; i <= r; i++) {
34942421Syokota               cc1 = eclass[fmap[i]];
35042421Syokota               if (cc != cc1) { SET_BH(i); cc = cc1; };
35142421Syokota            }
35242421Syokota         }
35342421Syokota      }
35442421Syokota
35542421Syokota      if (verb >= 4)
35642421Syokota         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
35742421Syokota
35842421Syokota      H *= 2;
35942421Syokota      if (H > nblock || nNotDone == 0) break;
36042421Syokota   }
36142421Syokota
36258271Syokota   /*--
36342421Syokota      Reconstruct the original block in
36442421Syokota      eclass8 [0 .. nblock-1], since the
36542421Syokota      previous phase destroyed it.
36658271Syokota   --*/
36742421Syokota   if (verb >= 4)
36842421Syokota      VPrintf0 ( "        reconstructing block ...\n" );
36942421Syokota   j = 0;
37042421Syokota   for (i = 0; i < nblock; i++) {
37142421Syokota      while (ftabCopy[j] == 0) j++;
37242421Syokota      ftabCopy[j]--;
37342421Syokota      eclass8[fmap[i]] = (UChar)j;
37442421Syokota   }
37542421Syokota   AssertH ( j < 256, 1005 );
37642421Syokota}
37742421Syokota
37842421Syokota#undef       SET_BH
37942421Syokota#undef     CLEAR_BH
38042421Syokota#undef     ISSET_BH
38142421Syokota#undef      WORD_BH
38242421Syokota#undef UNALIGNED_BH
38342421Syokota
38442421Syokota
38542421Syokota/*---------------------------------------------*/
38642421Syokota/*--- The main, O(N^2 log(N)) sorting       ---*/
38742421Syokota/*--- algorithm.  Faster for "normal"       ---*/
38842421Syokota/*--- non-repetitive blocks.                ---*/
38958271Syokota/*---------------------------------------------*/
39042421Syokota
39158271Syokota/*---------------------------------------------*/
39242421Syokotastatic
39342421Syokota__inline__
39442421SyokotaBool mainGtU ( UInt32  i1,
39542421Syokota               UInt32  i2,
39642421Syokota               UChar*  block,
39742421Syokota               UInt16* quadrant,
39842421Syokota               UInt32  nblock,
39942421Syokota               Int32*  budget )
40042421Syokota{
40142421Syokota   Int32  k;
40242421Syokota   UChar  c1, c2;
40342421Syokota   UInt16 s1, s2;
40442421Syokota
40542421Syokota   AssertD ( i1 != i2, "mainGtU" );
40642421Syokota   /* 1 */
40742421Syokota   c1 = block[i1]; c2 = block[i2];
40842421Syokota   if (c1 != c2) return (c1 > c2);
40942421Syokota   i1++; i2++;
41042421Syokota   /* 2 */
41142421Syokota   c1 = block[i1]; c2 = block[i2];
41242421Syokota   if (c1 != c2) return (c1 > c2);
41342421Syokota   i1++; i2++;
41458271Syokota   /* 3 */
41542421Syokota   c1 = block[i1]; c2 = block[i2];
41642421Syokota   if (c1 != c2) return (c1 > c2);
41742421Syokota   i1++; i2++;
41858271Syokota   /* 4 */
41942421Syokota   c1 = block[i1]; c2 = block[i2];
42042421Syokota   if (c1 != c2) return (c1 > c2);
42142421Syokota   i1++; i2++;
42242421Syokota   /* 5 */
42342421Syokota   c1 = block[i1]; c2 = block[i2];
42442421Syokota   if (c1 != c2) return (c1 > c2);
42542421Syokota   i1++; i2++;
42642421Syokota   /* 6 */
42742421Syokota   c1 = block[i1]; c2 = block[i2];
42842421Syokota   if (c1 != c2) return (c1 > c2);
42942421Syokota   i1++; i2++;
43042421Syokota   /* 7 */
43142421Syokota   c1 = block[i1]; c2 = block[i2];
43242421Syokota   if (c1 != c2) return (c1 > c2);
43342421Syokota   i1++; i2++;
43442421Syokota   /* 8 */
43542421Syokota   c1 = block[i1]; c2 = block[i2];
43642421Syokota   if (c1 != c2) return (c1 > c2);
43742421Syokota   i1++; i2++;
43842421Syokota   /* 9 */
43942421Syokota   c1 = block[i1]; c2 = block[i2];
44042421Syokota   if (c1 != c2) return (c1 > c2);
44158271Syokota   i1++; i2++;
44242421Syokota   /* 10 */
44358271Syokota   c1 = block[i1]; c2 = block[i2];
44442421Syokota   if (c1 != c2) return (c1 > c2);
44542421Syokota   i1++; i2++;
44642421Syokota   /* 11 */
44742421Syokota   c1 = block[i1]; c2 = block[i2];
44842421Syokota   if (c1 != c2) return (c1 > c2);
44942421Syokota   i1++; i2++;
45042421Syokota   /* 12 */
45142421Syokota   c1 = block[i1]; c2 = block[i2];
45242421Syokota   if (c1 != c2) return (c1 > c2);
45342421Syokota   i1++; i2++;
45442421Syokota
45542421Syokota   k = nblock + 8;
45642421Syokota
45742421Syokota   do {
45842421Syokota      /* 1 */
45942421Syokota      c1 = block[i1]; c2 = block[i2];
46042421Syokota      if (c1 != c2) return (c1 > c2);
46142421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
46242421Syokota      if (s1 != s2) return (s1 > s2);
46342421Syokota      i1++; i2++;
46458271Syokota      /* 2 */
46542421Syokota      c1 = block[i1]; c2 = block[i2];
46642421Syokota      if (c1 != c2) return (c1 > c2);
46742421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
46842421Syokota      if (s1 != s2) return (s1 > s2);
46942421Syokota      i1++; i2++;
47042421Syokota      /* 3 */
47142421Syokota      c1 = block[i1]; c2 = block[i2];
47242421Syokota      if (c1 != c2) return (c1 > c2);
47342421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
47458271Syokota      if (s1 != s2) return (s1 > s2);
47542421Syokota      i1++; i2++;
47642421Syokota      /* 4 */
47742421Syokota      c1 = block[i1]; c2 = block[i2];
47842421Syokota      if (c1 != c2) return (c1 > c2);
47942421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
48042421Syokota      if (s1 != s2) return (s1 > s2);
48142421Syokota      i1++; i2++;
48242421Syokota      /* 5 */
48342421Syokota      c1 = block[i1]; c2 = block[i2];
48458271Syokota      if (c1 != c2) return (c1 > c2);
48542421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
48642421Syokota      if (s1 != s2) return (s1 > s2);
48742421Syokota      i1++; i2++;
48842421Syokota      /* 6 */
48942421Syokota      c1 = block[i1]; c2 = block[i2];
49042421Syokota      if (c1 != c2) return (c1 > c2);
49142421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
49242421Syokota      if (s1 != s2) return (s1 > s2);
49342421Syokota      i1++; i2++;
49442421Syokota      /* 7 */
49542421Syokota      c1 = block[i1]; c2 = block[i2];
49642421Syokota      if (c1 != c2) return (c1 > c2);
49742421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
49842421Syokota      if (s1 != s2) return (s1 > s2);
49942421Syokota      i1++; i2++;
50042421Syokota      /* 8 */
50142421Syokota      c1 = block[i1]; c2 = block[i2];
50242421Syokota      if (c1 != c2) return (c1 > c2);
50342421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
50442421Syokota      if (s1 != s2) return (s1 > s2);
50542421Syokota      i1++; i2++;
50642421Syokota
50742421Syokota      if (i1 >= nblock) i1 -= nblock;
50842421Syokota      if (i2 >= nblock) i2 -= nblock;
50942421Syokota
51042421Syokota      k -= 8;
51142421Syokota      (*budget)--;
51242421Syokota   }
51342421Syokota      while (k >= 0);
51442421Syokota
51542421Syokota   return False;
51642421Syokota}
51742421Syokota
51842421Syokota
51942421Syokota/*---------------------------------------------*/
52042421Syokota/*--
52142421Syokota   Knuth's increments seem to work better
52242421Syokota   than Incerpi-Sedgewick here.  Possibly
52342421Syokota   because the number of elems to sort is
52442421Syokota   usually small, typically <= 20.
52542421Syokota--*/
52642421Syokotastatic
52742421SyokotaInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
52842421Syokota                   9841, 29524, 88573, 265720,
52942421Syokota                   797161, 2391484 };
53042421Syokota
53142421Syokotastatic
53242421Syokotavoid mainSimpleSort ( UInt32* ptr,
53342421Syokota                      UChar*  block,
53442421Syokota                      UInt16* quadrant,
53542421Syokota                      Int32   nblock,
53642421Syokota                      Int32   lo,
53742421Syokota                      Int32   hi,
53842421Syokota                      Int32   d,
53942421Syokota                      Int32*  budget )
54042421Syokota{
54142421Syokota   Int32 i, j, h, bigN, hp;
54242421Syokota   UInt32 v;
54342421Syokota
54442421Syokota   bigN = hi - lo + 1;
54542421Syokota   if (bigN < 2) return;
54642421Syokota
54742421Syokota   hp = 0;
54842421Syokota   while (incs[hp] < bigN) hp++;
54942421Syokota   hp--;
55042421Syokota
55142421Syokota   for (; hp >= 0; hp--) {
55242421Syokota      h = incs[hp];
55342421Syokota
55442421Syokota      i = lo + h;
55542421Syokota      while (True) {
55642421Syokota
55742421Syokota         /*-- copy 1 --*/
55842421Syokota         if (i > hi) break;
55942421Syokota         v = ptr[i];
56042421Syokota         j = i;
56142421Syokota         while ( mainGtU (
56242421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
56342421Syokota                 ) ) {
56442421Syokota            ptr[j] = ptr[j-h];
56542421Syokota            j = j - h;
56642421Syokota            if (j <= (lo + h - 1)) break;
56742421Syokota         }
56842421Syokota         ptr[j] = v;
56942421Syokota         i++;
57042421Syokota
57142421Syokota         /*-- copy 2 --*/
57242421Syokota         if (i > hi) break;
57342421Syokota         v = ptr[i];
57442421Syokota         j = i;
57542421Syokota         while ( mainGtU (
57642421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
57742421Syokota                 ) ) {
57842421Syokota            ptr[j] = ptr[j-h];
57942421Syokota            j = j - h;
58042421Syokota            if (j <= (lo + h - 1)) break;
58142421Syokota         }
58242421Syokota         ptr[j] = v;
58342421Syokota         i++;
58442421Syokota
58542421Syokota         /*-- copy 3 --*/
58642421Syokota         if (i > hi) break;
58742421Syokota         v = ptr[i];
58842421Syokota         j = i;
58942421Syokota         while ( mainGtU (
59042421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
59142421Syokota                 ) ) {
59242421Syokota            ptr[j] = ptr[j-h];
59342421Syokota            j = j - h;
59442421Syokota            if (j <= (lo + h - 1)) break;
59542421Syokota         }
59642421Syokota         ptr[j] = v;
59742421Syokota         i++;
59842421Syokota
59942421Syokota         if (*budget < 0) return;
60042421Syokota      }
60142421Syokota   }
60242421Syokota}
60342421Syokota
60442421Syokota
60542421Syokota/*---------------------------------------------*/
60642421Syokota/*--
60742421Syokota   The following is an implementation of
60842421Syokota   an elegant 3-way quicksort for strings,
60942421Syokota   described in a paper "Fast Algorithms for
61042421Syokota   Sorting and Searching Strings", by Robert
61142421Syokota   Sedgewick and Jon L. Bentley.
61242421Syokota--*/
61358271Syokota
61442421Syokota#define mswap(zz1, zz2) \
61542421Syokota   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
61642421Syokota
61742421Syokota#define mvswap(zzp1, zzp2, zzn)       \
61842421Syokota{                                     \
61942421Syokota   Int32 yyp1 = (zzp1);               \
62042421Syokota   Int32 yyp2 = (zzp2);               \
62142421Syokota   Int32 yyn  = (zzn);                \
62242421Syokota   while (yyn > 0) {                  \
62342421Syokota      mswap(ptr[yyp1], ptr[yyp2]);    \
62442421Syokota      yyp1++; yyp2++; yyn--;          \
62542421Syokota   }                                  \
62642421Syokota}
62742421Syokota
62842421Syokotastatic
62942421Syokota__inline__
63042421SyokotaUChar mmed3 ( UChar a, UChar b, UChar c )
63142421Syokota{
63242421Syokota   UChar t;
63342421Syokota   if (a > b) { t = a; a = b; b = t; };
63442421Syokota   if (b > c) {
63542421Syokota      b = c;
63642421Syokota      if (a > b) b = a;
63742421Syokota   }
63858271Syokota   return b;
63942421Syokota}
64042421Syokota
64142421Syokota#define mmin(a,b) ((a) < (b)) ? (a) : (b)
64242421Syokota
64342421Syokota#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
64442421Syokota                          stackHi[sp] = hz; \
64542421Syokota                          stackD [sp] = dz; \
64642421Syokota                          sp++; }
64742421Syokota
64842421Syokota#define mpop(lz,hz,dz) { sp--;             \
64942421Syokota                         lz = stackLo[sp]; \
65042421Syokota                         hz = stackHi[sp]; \
65142421Syokota                         dz = stackD [sp]; }
65242421Syokota
65342421Syokota
65442421Syokota#define mnextsize(az) (nextHi[az]-nextLo[az])
65542421Syokota
65642421Syokota#define mnextswap(az,bz)                                        \
65742421Syokota   { Int32 tz;                                                  \
65842421Syokota     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
65942421Syokota     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
66042421Syokota     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
66158271Syokota
66242421Syokota
66342421Syokota#define MAIN_QSORT_SMALL_THRESH 20
66458271Syokota#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
66558271Syokota#define MAIN_QSORT_STACK_SIZE 100
66642421Syokota
66742421Syokotastatic
66842421Syokotavoid mainQSort3 ( UInt32* ptr,
66958271Syokota                  UChar*  block,
67042421Syokota                  UInt16* quadrant,
67142421Syokota                  Int32   nblock,
67242421Syokota                  Int32   loSt,
67342421Syokota                  Int32   hiSt,
67442421Syokota                  Int32   dSt,
67542421Syokota                  Int32*  budget )
67642421Syokota{
67742421Syokota   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
67842421Syokota   Int32 sp, lo, hi, d;
67942421Syokota
68042421Syokota   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
68142421Syokota   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
68258271Syokota   Int32 stackD [MAIN_QSORT_STACK_SIZE];
68342421Syokota
68442421Syokota   Int32 nextLo[3];
68542421Syokota   Int32 nextHi[3];
68642421Syokota   Int32 nextD [3];
68742421Syokota
68842421Syokota   sp = 0;
68942421Syokota   mpush ( loSt, hiSt, dSt );
69042421Syokota
69142421Syokota   while (sp > 0) {
69242421Syokota
69342421Syokota      AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
69442421Syokota
69558271Syokota      mpop ( lo, hi, d );
69642421Syokota      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
69742421Syokota          d > MAIN_QSORT_DEPTH_THRESH) {
69858271Syokota         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
69958271Syokota         if (*budget < 0) return;
70042421Syokota         continue;
70142421Syokota      }
70242421Syokota
70358271Syokota      med = (Int32)
70442421Syokota            mmed3 ( block[ptr[ lo         ]+d],
70542421Syokota                    block[ptr[ hi         ]+d],
70642421Syokota                    block[ptr[ (lo+hi)>>1 ]+d] );
70742421Syokota
70842421Syokota      unLo = ltLo = lo;
70942421Syokota      unHi = gtHi = hi;
71042421Syokota
71142421Syokota      while (True) {
71242421Syokota         while (True) {
71342421Syokota            if (unLo > unHi) break;
71442421Syokota            n = ((Int32)block[ptr[unLo]+d]) - med;
71542421Syokota            if (n == 0) {
71642421Syokota               mswap(ptr[unLo], ptr[ltLo]);
71742421Syokota               ltLo++; unLo++; continue;
71842421Syokota            };
71942421Syokota            if (n >  0) break;
72042421Syokota            unLo++;
72142421Syokota         }
72258271Syokota         while (True) {
72342421Syokota            if (unLo > unHi) break;
72458271Syokota            n = ((Int32)block[ptr[unHi]+d]) - med;
72542421Syokota            if (n == 0) {
72642421Syokota               mswap(ptr[unHi], ptr[gtHi]);
72742421Syokota               gtHi--; unHi--; continue;
72842421Syokota            };
72942421Syokota            if (n <  0) break;
73042421Syokota            unHi--;
73142421Syokota         }
73242421Syokota         if (unLo > unHi) break;
73342421Syokota         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
73442421Syokota      }
73542421Syokota
73642421Syokota      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
73742421Syokota
73842421Syokota      if (gtHi < ltLo) {
73942421Syokota         mpush(lo, hi, d+1 );
74042421Syokota         continue;
74142421Syokota      }
74242421Syokota
74342421Syokota      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
74442421Syokota      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
74542421Syokota
74642421Syokota      n = lo + unLo - ltLo - 1;
74742421Syokota      m = hi - (gtHi - unHi) + 1;
74842421Syokota
74942421Syokota      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
75042421Syokota      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
75142421Syokota      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
75242421Syokota
75342421Syokota      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
75442421Syokota      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
75542421Syokota      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
75642421Syokota
75742421Syokota      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
75842421Syokota      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
75942421Syokota
76042421Syokota      mpush (nextLo[0], nextHi[0], nextD[0]);
76158271Syokota      mpush (nextLo[1], nextHi[1], nextD[1]);
76242421Syokota      mpush (nextLo[2], nextHi[2], nextD[2]);
76358271Syokota   }
76442421Syokota}
76542421Syokota
76642421Syokota#undef mswap
76742421Syokota#undef mvswap
76842421Syokota#undef mpush
76942421Syokota#undef mpop
77042421Syokota#undef mmin
77142421Syokota#undef mnextsize
77242421Syokota#undef mnextswap
77342421Syokota#undef MAIN_QSORT_SMALL_THRESH
77442421Syokota#undef MAIN_QSORT_DEPTH_THRESH
77542421Syokota#undef MAIN_QSORT_STACK_SIZE
77642421Syokota
77742421Syokota
77842421Syokota/*---------------------------------------------*/
77942421Syokota/* Pre:
78042421Syokota      nblock > N_OVERSHOOT
78142421Syokota      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
78242421Syokota      ((UChar*)block32) [0 .. nblock-1] holds block
78342421Syokota      ptr exists for [0 .. nblock-1]
78442421Syokota
78542421Syokota   Post:
78642421Syokota      ((UChar*)block32) [0 .. nblock-1] holds block
78742421Syokota      All other areas of block32 destroyed
78842421Syokota      ftab [0 .. 65536 ] destroyed
78942421Syokota      ptr [0 .. nblock-1] holds sorted order
79042421Syokota      if (*budget < 0), sorting was abandoned
79142421Syokota*/
79242421Syokota
79342421Syokota#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
79442421Syokota#define SETMASK (1 << 21)
79542421Syokota#define CLEARMASK (~(SETMASK))
79642421Syokota
79742421Syokotastatic
79842421Syokotavoid mainSort ( UInt32* ptr,
79958271Syokota                UChar*  block,
80042421Syokota                UInt16* quadrant,
80158271Syokota                UInt32* ftab,
80242421Syokota                Int32   nblock,
80342421Syokota                Int32   verb,
80442421Syokota                Int32*  budget )
80542421Syokota{
80642421Syokota   Int32  i, j, k, ss, sb;
80742421Syokota   Int32  runningOrder[256];
80842421Syokota   Bool   bigDone[256];
80942421Syokota   Int32  copyStart[256];
81042421Syokota   Int32  copyEnd  [256];
81142421Syokota   UChar  c1;
81242421Syokota   Int32  numQSorted;
81342421Syokota   UInt16 s;
81442421Syokota   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
81542421Syokota
81642421Syokota   /*-- set up the 2-byte frequency table --*/
81742421Syokota   for (i = 65536; i >= 0; i--) ftab[i] = 0;
81842421Syokota
81942421Syokota   j = block[0] << 8;
82042421Syokota   i = nblock-1;
82142421Syokota   for (; i >= 3; i -= 4) {
82242421Syokota      quadrant[i] = 0;
82342421Syokota      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
82442421Syokota      ftab[j]++;
82542421Syokota      quadrant[i-1] = 0;
82642421Syokota      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
82742421Syokota      ftab[j]++;
82842421Syokota      quadrant[i-2] = 0;
82942421Syokota      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
83042421Syokota      ftab[j]++;
83142421Syokota      quadrant[i-3] = 0;
83242421Syokota      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
83342421Syokota      ftab[j]++;
83442421Syokota   }
83542421Syokota   for (; i >= 0; i--) {
83642421Syokota      quadrant[i] = 0;
83742421Syokota      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
83842421Syokota      ftab[j]++;
83942421Syokota   }
84042421Syokota
84142421Syokota   /*-- (emphasises close relationship of block & quadrant) --*/
84242421Syokota   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
84342421Syokota      block   [nblock+i] = block[i];
84442421Syokota      quadrant[nblock+i] = 0;
84542421Syokota   }
84642421Syokota
84742421Syokota   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
84842421Syokota
84942421Syokota   /*-- Complete the initial radix sort --*/
85042421Syokota   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
85142421Syokota
85242421Syokota   s = block[0] << 8;
85342421Syokota   i = nblock-1;
85442421Syokota   for (; i >= 3; i -= 4) {
85542421Syokota      s = (s >> 8) | (block[i] << 8);
85642421Syokota      j = ftab[s] -1;
85742421Syokota      ftab[s] = j;
85842421Syokota      ptr[j] = i;
85942421Syokota      s = (s >> 8) | (block[i-1] << 8);
86042421Syokota      j = ftab[s] -1;
86142421Syokota      ftab[s] = j;
86242421Syokota      ptr[j] = i-1;
86342421Syokota      s = (s >> 8) | (block[i-2] << 8);
86442421Syokota      j = ftab[s] -1;
86542421Syokota      ftab[s] = j;
86642421Syokota      ptr[j] = i-2;
86742421Syokota      s = (s >> 8) | (block[i-3] << 8);
86842421Syokota      j = ftab[s] -1;
86942421Syokota      ftab[s] = j;
87042421Syokota      ptr[j] = i-3;
87142421Syokota   }
87242421Syokota   for (; i >= 0; i--) {
87342421Syokota      s = (s >> 8) | (block[i] << 8);
87442421Syokota      j = ftab[s] -1;
87542421Syokota      ftab[s] = j;
87642421Syokota      ptr[j] = i;
87742421Syokota   }
87842421Syokota
87942421Syokota   /*--
88042421Syokota      Now ftab contains the first loc of every small bucket.
88142421Syokota      Calculate the running order, from smallest to largest
88242421Syokota      big bucket.
88342421Syokota   --*/
88442421Syokota   for (i = 0; i <= 255; i++) {
88542421Syokota      bigDone     [i] = False;
88642421Syokota      runningOrder[i] = i;
88742421Syokota   }
88842421Syokota
88942421Syokota   {
89042421Syokota      Int32 vv;
89142421Syokota      Int32 h = 1;
89242421Syokota      do h = 3 * h + 1; while (h <= 256);
89342421Syokota      do {
89442421Syokota         h = h / 3;
89542421Syokota         for (i = h; i <= 255; i++) {
89642421Syokota            vv = runningOrder[i];
89742421Syokota            j = i;
89842421Syokota            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
89942421Syokota               runningOrder[j] = runningOrder[j-h];
90042421Syokota               j = j - h;
90142421Syokota               if (j <= (h - 1)) goto zero;
90242421Syokota            }
90342421Syokota            zero:
90442421Syokota            runningOrder[j] = vv;
90542421Syokota         }
90642421Syokota      } while (h != 1);
90742421Syokota   }
90842421Syokota
90942421Syokota   /*--
91042421Syokota      The main sorting loop.
91142421Syokota   --*/
91242421Syokota
91342421Syokota   numQSorted = 0;
91442421Syokota
91542421Syokota   for (i = 0; i <= 255; i++) {
91642421Syokota
91742421Syokota      /*--
91842421Syokota         Process big buckets, starting with the least full.
91942421Syokota         Basically this is a 3-step process in which we call
92042421Syokota         mainQSort3 to sort the small buckets [ss, j], but
92142421Syokota         also make a big effort to avoid the calls if we can.
92242421Syokota      --*/
92342421Syokota      ss = runningOrder[i];
92442421Syokota
92542421Syokota      /*--
92642421Syokota         Step 1:
92742421Syokota         Complete the big bucket [ss] by quicksorting
92842421Syokota         any unsorted small buckets [ss, j], for j != ss.
92942421Syokota         Hopefully previous pointer-scanning phases have already
93042421Syokota         completed many of the small buckets [ss, j], so
93142421Syokota         we don't have to sort them at all.
93242421Syokota      --*/
93342421Syokota      for (j = 0; j <= 255; j++) {
93442421Syokota         if (j != ss) {
93542421Syokota            sb = (ss << 8) + j;
93642421Syokota            if ( ! (ftab[sb] & SETMASK) ) {
93742421Syokota               Int32 lo = ftab[sb]   & CLEARMASK;
93842421Syokota               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
93942421Syokota               if (hi > lo) {
94042421Syokota                  if (verb >= 4)
94142421Syokota                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
94242421Syokota                                "done %d   this %d\n",
94342421Syokota                                ss, j, numQSorted, hi - lo + 1 );
94442421Syokota                  mainQSort3 (
94542421Syokota                     ptr, block, quadrant, nblock,
94642421Syokota                     lo, hi, BZ_N_RADIX, budget
94742421Syokota                  );
94842421Syokota                  numQSorted += (hi - lo + 1);
94942421Syokota                  if (*budget < 0) return;
95042421Syokota               }
95142421Syokota            }
95242421Syokota            ftab[sb] |= SETMASK;
95342421Syokota         }
95442421Syokota      }
95542421Syokota
95642421Syokota      AssertH ( !bigDone[ss], 1006 );
95742421Syokota
95842421Syokota      /*--
95942421Syokota         Step 2:
96042421Syokota         Now scan this big bucket [ss] so as to synthesise the
96142421Syokota         sorted order for small buckets [t, ss] for all t,
96242421Syokota         including, magically, the bucket [ss,ss] too.
96342421Syokota         This will avoid doing Real Work in subsequent Step 1's.
96442421Syokota      --*/
96542421Syokota      {
96642421Syokota         for (j = 0; j <= 255; j++) {
96742421Syokota            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
96842421Syokota            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
96942421Syokota         }
97042421Syokota         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
97142421Syokota            k = ptr[j]-1; if (k < 0) k += nblock;
97242421Syokota            c1 = block[k];
97342421Syokota            if (!bigDone[c1])
97442421Syokota               ptr[ copyStart[c1]++ ] = k;
97542421Syokota         }
97642421Syokota         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
97742421Syokota            k = ptr[j]-1; if (k < 0) k += nblock;
97842421Syokota            c1 = block[k];
97942421Syokota            if (!bigDone[c1])
98042421Syokota               ptr[ copyEnd[c1]-- ] = k;
98142421Syokota         }
98242421Syokota      }
98342421Syokota
98442421Syokota      AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
98542421Syokota
98642421Syokota      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
98742421Syokota
98842421Syokota      /*--
98942421Syokota         Step 3:
99042421Syokota         The [ss] big bucket is now done.  Record this fact,
99142421Syokota         and update the quadrant descriptors.  Remember to
99242421Syokota         update quadrants in the overshoot area too, if
99342421Syokota         necessary.  The "if (i < 255)" test merely skips
99442421Syokota         this updating for the last bucket processed, since
99542421Syokota         updating for the last bucket is pointless.
99642421Syokota
99742421Syokota         The quadrant array provides a way to incrementally
99842421Syokota         cache sort orderings, as they appear, so as to
99942421Syokota         make subsequent comparisons in fullGtU() complete
100042421Syokota         faster.  For repetitive blocks this makes a big
100142421Syokota         difference (but not big enough to be able to avoid
100242421Syokota         the fallback sorting mechanism, exponential radix sort).
100342421Syokota
100442421Syokota         The precise meaning is: at all times:
100542421Syokota
100642421Syokota            for 0 <= i < nblock and 0 <= j <= nblock
100742421Syokota
100842421Syokota            if block[i] != block[j],
100942421Syokota
101042421Syokota               then the relative values of quadrant[i] and
101142421Syokota                    quadrant[j] are meaningless.
101242421Syokota
101342421Syokota               else {
101442421Syokota                  if quadrant[i] < quadrant[j]
101542421Syokota                     then the string starting at i lexicographically
101642421Syokota                     precedes the string starting at j
101742421Syokota
101842421Syokota                  else if quadrant[i] > quadrant[j]
101942421Syokota                     then the string starting at j lexicographically
102042421Syokota                     precedes the string starting at i
102142421Syokota
102242421Syokota                  else
102342421Syokota                     the relative ordering of the strings starting
102442421Syokota                     at i and j has not yet been determined.
102542421Syokota               }
102642421Syokota      --*/
102742421Syokota      bigDone[ss] = True;
102842421Syokota
102942421Syokota      if (i < 255) {
103042421Syokota         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
103142421Syokota         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
103242421Syokota         Int32 shifts   = 0;
103342421Syokota
103442421Syokota         while ((bbSize >> shifts) > 65534) shifts++;
103542421Syokota
103642421Syokota         for (j = bbSize-1; j >= 0; j--) {
103742421Syokota            Int32 a2update     = ptr[bbStart + j];
103842421Syokota            UInt16 qVal        = (UInt16)(j >> shifts);
103942421Syokota            quadrant[a2update] = qVal;
104042421Syokota            if (a2update < BZ_N_OVERSHOOT)
104142421Syokota               quadrant[a2update + nblock] = qVal;
1042         }
1043         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
1044      }
1045
1046   }
1047
1048   if (verb >= 4)
1049      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
1050                 nblock, numQSorted, nblock - numQSorted );
1051}
1052
1053#undef BIGFREQ
1054#undef SETMASK
1055#undef CLEARMASK
1056
1057
1058/*---------------------------------------------*/
1059/* Pre:
1060      nblock > 0
1061      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
1062      ((UChar*)arr2)  [0 .. nblock-1] holds block
1063      arr1 exists for [0 .. nblock-1]
1064
1065   Post:
1066      ((UChar*)arr2) [0 .. nblock-1] holds block
1067      All other areas of block destroyed
1068      ftab [ 0 .. 65536 ] destroyed
1069      arr1 [0 .. nblock-1] holds sorted order
1070*/
1071void BZ2_blockSort ( EState* s )
1072{
1073   UInt32* ptr    = s->ptr;
1074   UChar*  block  = s->block;
1075   UInt32* ftab   = s->ftab;
1076   Int32   nblock = s->nblock;
1077   Int32   verb   = s->verbosity;
1078   Int32   wfact  = s->workFactor;
1079   UInt16* quadrant;
1080   Int32   budget;
1081   Int32   budgetInit;
1082   Int32   i;
1083
1084   if (nblock < 10000) {
1085      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1086   } else {
1087      /* Calculate the location for quadrant, remembering to get
1088         the alignment right.  Assumes that &(block[0]) is at least
1089         2-byte aligned -- this should be ok since block is really
1090         the first section of arr2.
1091      */
1092      i = nblock+BZ_N_OVERSHOOT;
1093      if (i & 1) i++;
1094      quadrant = (UInt16*)(&(block[i]));
1095
1096      /* (wfact-1) / 3 puts the default-factor-30
1097         transition point at very roughly the same place as
1098         with v0.1 and v0.9.0.
1099         Not that it particularly matters any more, since the
1100         resulting compressed stream is now the same regardless
1101         of whether or not we use the main sort or fallback sort.
1102      */
1103      if (wfact < 1  ) wfact = 1;
1104      if (wfact > 100) wfact = 100;
1105      budgetInit = nblock * ((wfact-1) / 3);
1106      budget = budgetInit;
1107
1108      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
1109      if (verb >= 3)
1110         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
1111                    budgetInit - budget,
1112                    nblock,
1113                    (float)(budgetInit - budget) /
1114                    (float)(nblock==0 ? 1 : nblock) );
1115      if (budget < 0) {
1116         if (verb >= 2)
1117            VPrintf0 ( "    too repetitive; using fallback"
1118                       " sorting algorithm\n" );
1119         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
1120      }
1121   }
1122
1123   s->origPtr = -1;
1124   for (i = 0; i < s->nblock; i++)
1125      if (ptr[i] == 0)
1126         { s->origPtr = i; break; };
1127
1128   AssertH( s->origPtr != -1, 1003 );
1129}
1130
1131
1132/*-------------------------------------------------------------*/
1133/*--- end                                       blocksort.c ---*/
1134/*-------------------------------------------------------------*/
1135