contrib/bzip2/blocksort.c

42421Syokota
42421Syokota/*-------------------------------------------------------------*/
42421Syokota/*--- Block sorting machinery                               ---*/
42421Syokota/*---                                           blocksort.c ---*/
42421Syokota/*-------------------------------------------------------------*/
42421Syokota
42421Syokota/*--
42421Syokota  This file is a part of bzip2 and/or libbzip2, a program and
42421Syokota  library for lossless, block-sorting data compression.
42421Syokota
42421Syokota  Copyright (C) 1996-2000 Julian R Seward.  All rights reserved.
42421Syokota
42421Syokota  Redistribution and use in source and binary forms, with or without
42421Syokota  modification, are permitted provided that the following conditions
42421Syokota  are met:
42421Syokota
42421Syokota  1. Redistributions of source code must retain the above copyright
42421Syokota     notice, this list of conditions and the following disclaimer.
42421Syokota
42421Syokota  2. The origin of this software must not be misrepresented; you must
42421Syokota     not claim that you wrote the original software.  If you use this
42421Syokota     software in a product, an acknowledgment in the product
42421Syokota     documentation would be appreciated but is not required.
42421Syokota
42421Syokota  3. Altered source versions must be plainly marked as such, and must
42421Syokota     not be misrepresented as being the original software.
42421Syokota
42421Syokota  4. The name of the author may not be used to endorse or promote
42421Syokota     products derived from this software without specific prior written
50477Speter     permission.
42421Syokota
42421Syokota  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
42421Syokota  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
42421Syokota  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
42421Syokota  ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
42421Syokota  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42421Syokota  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
58271Syokota  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
42421Syokota  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
42421Syokota  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
58271Syokota  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
58271Syokota  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58271Syokota
58271Syokota  Julian Seward, Cambridge, UK.
42421Syokota  jseward@acm.org
42421Syokota  bzip2/libbzip2 version 1.0 of 21 March 2000
42421Syokota
42421Syokota  This program is based on (at least) the work of:
42421Syokota     Mike Burrows
42421Syokota     David Wheeler
42421Syokota     Peter Fenwick
42421Syokota     Alistair Moffat
102149Speter     Radford Neal
42421Syokota     Ian H. Witten
42421Syokota     Robert Sedgewick
42421Syokota     Jon L. Bentley
42421Syokota
42421Syokota  For more information on these sources, see the manual.
42421Syokota
42421Syokota  To get some idea how the block sorting algorithms in this file
42421Syokota  work, read my paper
42421Syokota     On the Performance of BWT Sorting Algorithms
42421Syokota  in Proceedings of the IEEE Data Compression Conference 2000,
42421Syokota  Snowbird, Utah, USA, 27-30 March 2000.  The main sort in this
42421Syokota  file implements the algorithm called  cache  in the paper.
42421Syokota--*/
42421Syokota
42421Syokota
42421Syokota#include "bzlib_private.h"
58271Syokota
58271Syokota/*---------------------------------------------*/
58271Syokota/*--- Fallback O(N log(N)^2) sorting        ---*/
58271Syokota/*--- algorithm, for repetitive blocks      ---*/
58271Syokota/*---------------------------------------------*/
58271Syokota
58271Syokota/*---------------------------------------------*/
42421Syokotastatic
42421Syokota__inline__
42421Syokotavoid fallbackSimpleSort ( UInt32* fmap,
42421Syokota                          UInt32* eclass,
42421Syokota                          Int32   lo,
42421Syokota                          Int32   hi )
42421Syokota{
42421Syokota   Int32 i, j, tmp;
42421Syokota   UInt32 ec_tmp;
42421Syokota
42421Syokota   if (lo == hi) return;
42421Syokota
42421Syokota   if (hi - lo > 3) {
42421Syokota      for ( i = hi-4; i >= lo; i-- ) {
42421Syokota         tmp = fmap[i];
58271Syokota         ec_tmp = eclass[tmp];
58271Syokota         for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
42421Syokota            fmap[j-4] = fmap[j];
42421Syokota         fmap[j-4] = tmp;
42421Syokota      }
42421Syokota   }
42421Syokota
42421Syokota   for ( i = hi-1; i >= lo; i-- ) {
42421Syokota      tmp = fmap[i];
42421Syokota      ec_tmp = eclass[tmp];
42421Syokota      for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
42421Syokota         fmap[j-1] = fmap[j];
42421Syokota      fmap[j-1] = tmp;
42421Syokota   }
42421Syokota}
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota#define fswap(zz1, zz2) \
42421Syokota   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
42421Syokota
69781Sdwmalone#define fvswap(zzp1, zzp2, zzn)       \
42421Syokota{                                     \
42421Syokota   Int32 yyp1 = (zzp1);               \
42421Syokota   Int32 yyp2 = (zzp2);               \
42421Syokota   Int32 yyn  = (zzn);                \
42421Syokota   while (yyn > 0) {                  \
42421Syokota      fswap(fmap[yyp1], fmap[yyp2]);  \
42421Syokota      yyp1++; yyp2++; yyn--;          \
58271Syokota   }                                  \
42421Syokota}
58271Syokota
47296Syokota
58271Syokota#define fmin(a,b) ((a) < (b)) ? (a) : (b)
58271Syokota
47296Syokota#define fpush(lz,hz) { stackLo[sp] = lz; \
47296Syokota                       stackHi[sp] = hz; \
47296Syokota                       sp++; }
47296Syokota
58271Syokota#define fpop(lz,hz) { sp--;              \
58271Syokota                      lz = stackLo[sp];  \
47296Syokota                      hz = stackHi[sp]; }
58271Syokota
58271Syokota#define FALLBACK_QSORT_SMALL_THRESH 10
58271Syokota#define FALLBACK_QSORT_STACK_SIZE   100
42421Syokota
42421Syokota
42421Syokotastatic
42421Syokotavoid fallbackQSort3 ( UInt32* fmap,
42421Syokota                      UInt32* eclass,
42421Syokota                      Int32   loSt,
58271Syokota                      Int32   hiSt )
58271Syokota{
58271Syokota   Int32 unLo, unHi, ltLo, gtHi, n, m;
58271Syokota   Int32 sp, lo, hi;
58271Syokota   UInt32 med, r, r3;
58271Syokota   Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
58271Syokota   Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
58271Syokota
58271Syokota   r = 0;
58271Syokota
58271Syokota   sp = 0;
58271Syokota   fpush ( loSt, hiSt );
58271Syokota
58271Syokota   while (sp > 0) {
114930Speter
58271Syokota      AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 );
114930Speter
114930Speter      fpop ( lo, hi );
58271Syokota      if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
65176Sdfr         fallbackSimpleSort ( fmap, eclass, lo, hi );
92661Speter         continue;
92661Speter      }
92661Speter
92661Speter      /* Random partitioning.  Median of 3 sometimes fails to
58271Syokota         avoid bad cases.  Median of 9 seems to help but
58271Syokota         looks rather expensive.  This too seems to work but
58271Syokota         is cheaper.  Guidance for the magic constants
58271Syokota         7621 and 32768 is taken from Sedgewick's algorithms
58271Syokota         book, chapter 35.
58271Syokota      */
58271Syokota      r = ((r * 7621) + 1) % 32768;
58271Syokota      r3 = r % 3;
58271Syokota      if (r3 == 0) med = eclass[fmap[lo]]; else
58271Syokota      if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
42421Syokota                   med = eclass[fmap[hi]];
42421Syokota
42421Syokota      unLo = ltLo = lo;
58271Syokota      unHi = gtHi = hi;
58271Syokota
42421Syokota      while (1) {
58271Syokota         while (1) {
42421Syokota            if (unLo > unHi) break;
42421Syokota            n = (Int32)eclass[fmap[unLo]] - (Int32)med;
42421Syokota            if (n == 0) {
42421Syokota               fswap(fmap[unLo], fmap[ltLo]);
42421Syokota               ltLo++; unLo++;
42421Syokota               continue;
42421Syokota            };
42421Syokota            if (n > 0) break;
42421Syokota            unLo++;
42421Syokota         }
42421Syokota         while (1) {
42421Syokota            if (unLo > unHi) break;
58271Syokota            n = (Int32)eclass[fmap[unHi]] - (Int32)med;
58271Syokota            if (n == 0) {
58271Syokota               fswap(fmap[unHi], fmap[gtHi]);
42421Syokota               gtHi--; unHi--;
42421Syokota               continue;
42421Syokota            };
58271Syokota            if (n < 0) break;
42421Syokota            unHi--;
58271Syokota         }
42421Syokota         if (unLo > unHi) break;
58271Syokota         fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
58271Syokota      }
58271Syokota
58271Syokota      AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
58271Syokota
58271Syokota      if (gtHi < ltLo) continue;
58271Syokota
42421Syokota      n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
42421Syokota      m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
42421Syokota
42421Syokota      n = lo + unLo - ltLo - 1;
42421Syokota      m = hi - (gtHi - unHi) + 1;
42421Syokota
42421Syokota      if (n - lo > hi - m) {
42421Syokota         fpush ( lo, n );
42421Syokota         fpush ( m, hi );
42421Syokota      } else {
93279Smurray         fpush ( m, hi );
42421Syokota         fpush ( lo, n );
42421Syokota      }
93279Smurray   }
42421Syokota}
42421Syokota
42421Syokota#undef fmin
42421Syokota#undef fpush
42421Syokota#undef fpop
42421Syokota#undef fswap
42421Syokota#undef fvswap
42421Syokota#undef FALLBACK_QSORT_SMALL_THRESH
42421Syokota#undef FALLBACK_QSORT_STACK_SIZE
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota/* Pre:
42421Syokota      nblock > 0
42421Syokota      eclass exists for [0 .. nblock-1]
42421Syokota      ((UChar*)eclass) [0 .. nblock-1] holds block
42421Syokota      ptr exists for [0 .. nblock-1]
42421Syokota
93279Smurray   Post:
93279Smurray      ((UChar*)eclass) [0 .. nblock-1] holds block
42421Syokota      All other areas of eclass destroyed
93279Smurray      fmap [0 .. nblock-1] holds sorted order
93279Smurray      bhtab [ 0 .. 2+(nblock/32) ] destroyed
93279Smurray*/
42421Syokota
42421Syokota#define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
42421Syokota#define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
42421Syokota#define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
42421Syokota#define      WORD_BH(zz)  bhtab[(zz) >> 5]
42421Syokota#define UNALIGNED_BH(zz)  ((zz) & 0x01f)
42421Syokota
42421Syokotastatic
42421Syokotavoid fallbackSort ( UInt32* fmap,
42421Syokota                    UInt32* eclass,
42421Syokota                    UInt32* bhtab,
42421Syokota                    Int32   nblock,
42421Syokota                    Int32   verb )
42421Syokota{
42421Syokota   Int32 ftab[257];
42421Syokota   Int32 ftabCopy[256];
42421Syokota   Int32 H, i, j, k, l, r, cc, cc1;
42421Syokota   Int32 nNotDone;
42421Syokota   Int32 nBhtab;
42421Syokota   UChar* eclass8 = (UChar*)eclass;
58271Syokota
42421Syokota   /*--
42421Syokota      Initial 1-char radix sort to generate
42421Syokota      initial fmap and initial BH bits.
42421Syokota   --*/
42421Syokota   if (verb >= 4)
42421Syokota      VPrintf0 ( "        bucket sorting ...\n" );
42421Syokota   for (i = 0; i < 257;    i++) ftab[i] = 0;
42421Syokota   for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
42421Syokota   for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i];
42421Syokota   for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1];
42421Syokota
42421Syokota   for (i = 0; i < nblock; i++) {
42421Syokota      j = eclass8[i];
42421Syokota      k = ftab[j] - 1;
42421Syokota      ftab[j] = k;
42421Syokota      fmap[k] = i;
42421Syokota   }
42421Syokota
42421Syokota   nBhtab = 2 + (nblock / 32);
42421Syokota   for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
42421Syokota   for (i = 0; i < 256; i++) SET_BH(ftab[i]);
42421Syokota
42421Syokota   /*--
42421Syokota      Inductively refine the buckets.  Kind-of an
42421Syokota      "exponential radix sort" (!), inspired by the
42421Syokota      Manber-Myers suffix array construction algorithm.
42421Syokota   --*/
42421Syokota
42421Syokota   /*-- set sentinel bits for block-end detection --*/
42421Syokota   for (i = 0; i < 32; i++) {
42421Syokota      SET_BH(nblock + 2*i);
42421Syokota      CLEAR_BH(nblock + 2*i + 1);
42421Syokota   }
42421Syokota
42421Syokota   /*-- the log(N) loop --*/
42421Syokota   H = 1;
42421Syokota   while (1) {
42421Syokota
42421Syokota      if (verb >= 4)
42421Syokota         VPrintf1 ( "        depth %6d has ", H );
42421Syokota
42421Syokota      j = 0;
42421Syokota      for (i = 0; i < nblock; i++) {
42421Syokota         if (ISSET_BH(i)) j = i;
42421Syokota         k = fmap[i] - H; if (k < 0) k += nblock;
42421Syokota         eclass[k] = j;
42421Syokota      }
58271Syokota
42421Syokota      nNotDone = 0;
42421Syokota      r = -1;
58271Syokota      while (1) {
42421Syokota
42421Syokota	 /*-- find the next non-singleton bucket --*/
58271Syokota         k = r + 1;
42421Syokota         while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
42421Syokota         if (ISSET_BH(k)) {
42421Syokota            while (WORD_BH(k) == 0xffffffff) k += 32;
42421Syokota            while (ISSET_BH(k)) k++;
42421Syokota         }
42421Syokota         l = k - 1;
42421Syokota         if (l >= nblock) break;
42421Syokota         while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
42421Syokota         if (!ISSET_BH(k)) {
42421Syokota            while (WORD_BH(k) == 0x00000000) k += 32;
42421Syokota            while (!ISSET_BH(k)) k++;
42421Syokota         }
42421Syokota         r = k - 1;
42421Syokota         if (r >= nblock) break;
42421Syokota
42421Syokota         /*-- now [l, r] bracket current bucket --*/
42421Syokota         if (r > l) {
42421Syokota            nNotDone += (r - l + 1);
42421Syokota            fallbackQSort3 ( fmap, eclass, l, r );
58271Syokota
42421Syokota            /*-- scan bucket and generate header bits-- */
42421Syokota            cc = -1;
42421Syokota            for (i = l; i <= r; i++) {
42421Syokota               cc1 = eclass[fmap[i]];
42421Syokota               if (cc != cc1) { SET_BH(i); cc = cc1; };
42421Syokota            }
42421Syokota         }
42421Syokota      }
42421Syokota
42421Syokota      if (verb >= 4)
42421Syokota         VPrintf1 ( "%6d unresolved strings\n", nNotDone );
42421Syokota
42421Syokota      H *= 2;
42421Syokota      if (H > nblock || nNotDone == 0) break;
42421Syokota   }
42421Syokota
58271Syokota   /*--
42421Syokota      Reconstruct the original block in
42421Syokota      eclass8 [0 .. nblock-1], since the
42421Syokota      previous phase destroyed it.
58271Syokota   --*/
42421Syokota   if (verb >= 4)
42421Syokota      VPrintf0 ( "        reconstructing block ...\n" );
42421Syokota   j = 0;
42421Syokota   for (i = 0; i < nblock; i++) {
42421Syokota      while (ftabCopy[j] == 0) j++;
42421Syokota      ftabCopy[j]--;
42421Syokota      eclass8[fmap[i]] = (UChar)j;
42421Syokota   }
42421Syokota   AssertH ( j < 256, 1005 );
42421Syokota}
42421Syokota
42421Syokota#undef       SET_BH
42421Syokota#undef     CLEAR_BH
42421Syokota#undef     ISSET_BH
42421Syokota#undef      WORD_BH
42421Syokota#undef UNALIGNED_BH
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota/*--- The main, O(N^2 log(N)) sorting       ---*/
42421Syokota/*--- algorithm.  Faster for "normal"       ---*/
42421Syokota/*--- non-repetitive blocks.                ---*/
58271Syokota/*---------------------------------------------*/
42421Syokota
58271Syokota/*---------------------------------------------*/
42421Syokotastatic
42421Syokota__inline__
42421SyokotaBool mainGtU ( UInt32  i1,
42421Syokota               UInt32  i2,
42421Syokota               UChar*  block,
42421Syokota               UInt16* quadrant,
42421Syokota               UInt32  nblock,
42421Syokota               Int32*  budget )
42421Syokota{
42421Syokota   Int32  k;
42421Syokota   UChar  c1, c2;
42421Syokota   UInt16 s1, s2;
42421Syokota
42421Syokota   AssertD ( i1 != i2, "mainGtU" );
42421Syokota   /* 1 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 2 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
58271Syokota   /* 3 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
58271Syokota   /* 4 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 5 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 6 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 7 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 8 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 9 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
58271Syokota   i1++; i2++;
42421Syokota   /* 10 */
58271Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 11 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota   /* 12 */
42421Syokota   c1 = block[i1]; c2 = block[i2];
42421Syokota   if (c1 != c2) return (c1 > c2);
42421Syokota   i1++; i2++;
42421Syokota
42421Syokota   k = nblock + 8;
42421Syokota
42421Syokota   do {
42421Syokota      /* 1 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
58271Syokota      /* 2 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 3 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
58271Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 4 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 5 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
58271Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 6 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 7 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota      /* 8 */
42421Syokota      c1 = block[i1]; c2 = block[i2];
42421Syokota      if (c1 != c2) return (c1 > c2);
42421Syokota      s1 = quadrant[i1]; s2 = quadrant[i2];
42421Syokota      if (s1 != s2) return (s1 > s2);
42421Syokota      i1++; i2++;
42421Syokota
42421Syokota      if (i1 >= nblock) i1 -= nblock;
42421Syokota      if (i2 >= nblock) i2 -= nblock;
42421Syokota
42421Syokota      k -= 8;
42421Syokota      (*budget)--;
42421Syokota   }
42421Syokota      while (k >= 0);
42421Syokota
42421Syokota   return False;
42421Syokota}
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota/*--
42421Syokota   Knuth's increments seem to work better
42421Syokota   than Incerpi-Sedgewick here.  Possibly
42421Syokota   because the number of elems to sort is
42421Syokota   usually small, typically <= 20.
42421Syokota--*/
42421Syokotastatic
42421SyokotaInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
42421Syokota                   9841, 29524, 88573, 265720,
42421Syokota                   797161, 2391484 };
42421Syokota
42421Syokotastatic
42421Syokotavoid mainSimpleSort ( UInt32* ptr,
42421Syokota                      UChar*  block,
42421Syokota                      UInt16* quadrant,
42421Syokota                      Int32   nblock,
42421Syokota                      Int32   lo,
42421Syokota                      Int32   hi,
42421Syokota                      Int32   d,
42421Syokota                      Int32*  budget )
42421Syokota{
42421Syokota   Int32 i, j, h, bigN, hp;
42421Syokota   UInt32 v;
42421Syokota
42421Syokota   bigN = hi - lo + 1;
42421Syokota   if (bigN < 2) return;
42421Syokota
42421Syokota   hp = 0;
42421Syokota   while (incs[hp] < bigN) hp++;
42421Syokota   hp--;
42421Syokota
42421Syokota   for (; hp >= 0; hp--) {
42421Syokota      h = incs[hp];
42421Syokota
42421Syokota      i = lo + h;
42421Syokota      while (True) {
42421Syokota
42421Syokota         /*-- copy 1 --*/
42421Syokota         if (i > hi) break;
42421Syokota         v = ptr[i];
42421Syokota         j = i;
42421Syokota         while ( mainGtU (
42421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
42421Syokota                 ) ) {
42421Syokota            ptr[j] = ptr[j-h];
42421Syokota            j = j - h;
42421Syokota            if (j <= (lo + h - 1)) break;
42421Syokota         }
42421Syokota         ptr[j] = v;
42421Syokota         i++;
42421Syokota
42421Syokota         /*-- copy 2 --*/
42421Syokota         if (i > hi) break;
42421Syokota         v = ptr[i];
42421Syokota         j = i;
42421Syokota         while ( mainGtU (
42421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
42421Syokota                 ) ) {
42421Syokota            ptr[j] = ptr[j-h];
42421Syokota            j = j - h;
42421Syokota            if (j <= (lo + h - 1)) break;
42421Syokota         }
42421Syokota         ptr[j] = v;
42421Syokota         i++;
42421Syokota
42421Syokota         /*-- copy 3 --*/
42421Syokota         if (i > hi) break;
42421Syokota         v = ptr[i];
42421Syokota         j = i;
42421Syokota         while ( mainGtU (
42421Syokota                    ptr[j-h]+d, v+d, block, quadrant, nblock, budget
42421Syokota                 ) ) {
42421Syokota            ptr[j] = ptr[j-h];
42421Syokota            j = j - h;
42421Syokota            if (j <= (lo + h - 1)) break;
42421Syokota         }
42421Syokota         ptr[j] = v;
42421Syokota         i++;
42421Syokota
42421Syokota         if (*budget < 0) return;
42421Syokota      }
42421Syokota   }
42421Syokota}
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota/*--
42421Syokota   The following is an implementation of
42421Syokota   an elegant 3-way quicksort for strings,
42421Syokota   described in a paper "Fast Algorithms for
42421Syokota   Sorting and Searching Strings", by Robert
42421Syokota   Sedgewick and Jon L. Bentley.
42421Syokota--*/
58271Syokota
42421Syokota#define mswap(zz1, zz2) \
42421Syokota   { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
42421Syokota
42421Syokota#define mvswap(zzp1, zzp2, zzn)       \
42421Syokota{                                     \
42421Syokota   Int32 yyp1 = (zzp1);               \
42421Syokota   Int32 yyp2 = (zzp2);               \
42421Syokota   Int32 yyn  = (zzn);                \
42421Syokota   while (yyn > 0) {                  \
42421Syokota      mswap(ptr[yyp1], ptr[yyp2]);    \
42421Syokota      yyp1++; yyp2++; yyn--;          \
42421Syokota   }                                  \
42421Syokota}
42421Syokota
42421Syokotastatic
42421Syokota__inline__
42421SyokotaUChar mmed3 ( UChar a, UChar b, UChar c )
42421Syokota{
42421Syokota   UChar t;
42421Syokota   if (a > b) { t = a; a = b; b = t; };
42421Syokota   if (b > c) {
42421Syokota      b = c;
42421Syokota      if (a > b) b = a;
42421Syokota   }
58271Syokota   return b;
42421Syokota}
42421Syokota
42421Syokota#define mmin(a,b) ((a) < (b)) ? (a) : (b)
42421Syokota
42421Syokota#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
42421Syokota                          stackHi[sp] = hz; \
42421Syokota                          stackD [sp] = dz; \
42421Syokota                          sp++; }
42421Syokota
42421Syokota#define mpop(lz,hz,dz) { sp--;             \
42421Syokota                         lz = stackLo[sp]; \
42421Syokota                         hz = stackHi[sp]; \
42421Syokota                         dz = stackD [sp]; }
42421Syokota
42421Syokota
42421Syokota#define mnextsize(az) (nextHi[az]-nextLo[az])
42421Syokota
42421Syokota#define mnextswap(az,bz)                                        \
42421Syokota   { Int32 tz;                                                  \
42421Syokota     tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
42421Syokota     tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
42421Syokota     tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
58271Syokota
42421Syokota
42421Syokota#define MAIN_QSORT_SMALL_THRESH 20
58271Syokota#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
58271Syokota#define MAIN_QSORT_STACK_SIZE 100
42421Syokota
42421Syokotastatic
42421Syokotavoid mainQSort3 ( UInt32* ptr,
58271Syokota                  UChar*  block,
42421Syokota                  UInt16* quadrant,
42421Syokota                  Int32   nblock,
42421Syokota                  Int32   loSt,
42421Syokota                  Int32   hiSt,
42421Syokota                  Int32   dSt,
42421Syokota                  Int32*  budget )
42421Syokota{
42421Syokota   Int32 unLo, unHi, ltLo, gtHi, n, m, med;
42421Syokota   Int32 sp, lo, hi, d;
42421Syokota
42421Syokota   Int32 stackLo[MAIN_QSORT_STACK_SIZE];
42421Syokota   Int32 stackHi[MAIN_QSORT_STACK_SIZE];
58271Syokota   Int32 stackD [MAIN_QSORT_STACK_SIZE];
42421Syokota
42421Syokota   Int32 nextLo[3];
42421Syokota   Int32 nextHi[3];
42421Syokota   Int32 nextD [3];
42421Syokota
42421Syokota   sp = 0;
42421Syokota   mpush ( loSt, hiSt, dSt );
42421Syokota
42421Syokota   while (sp > 0) {
42421Syokota
42421Syokota      AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 );
42421Syokota
58271Syokota      mpop ( lo, hi, d );
42421Syokota      if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
42421Syokota          d > MAIN_QSORT_DEPTH_THRESH) {
58271Syokota         mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
58271Syokota         if (*budget < 0) return;
42421Syokota         continue;
42421Syokota      }
42421Syokota
58271Syokota      med = (Int32)
42421Syokota            mmed3 ( block[ptr[ lo         ]+d],
42421Syokota                    block[ptr[ hi         ]+d],
42421Syokota                    block[ptr[ (lo+hi)>>1 ]+d] );
42421Syokota
42421Syokota      unLo = ltLo = lo;
42421Syokota      unHi = gtHi = hi;
42421Syokota
42421Syokota      while (True) {
42421Syokota         while (True) {
42421Syokota            if (unLo > unHi) break;
42421Syokota            n = ((Int32)block[ptr[unLo]+d]) - med;
42421Syokota            if (n == 0) {
42421Syokota               mswap(ptr[unLo], ptr[ltLo]);
42421Syokota               ltLo++; unLo++; continue;
42421Syokota            };
42421Syokota            if (n >  0) break;
42421Syokota            unLo++;
42421Syokota         }
58271Syokota         while (True) {
42421Syokota            if (unLo > unHi) break;
58271Syokota            n = ((Int32)block[ptr[unHi]+d]) - med;
42421Syokota            if (n == 0) {
42421Syokota               mswap(ptr[unHi], ptr[gtHi]);
42421Syokota               gtHi--; unHi--; continue;
42421Syokota            };
42421Syokota            if (n <  0) break;
42421Syokota            unHi--;
42421Syokota         }
42421Syokota         if (unLo > unHi) break;
42421Syokota         mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
42421Syokota      }
42421Syokota
42421Syokota      AssertD ( unHi == unLo-1, "mainQSort3(2)" );
42421Syokota
42421Syokota      if (gtHi < ltLo) {
42421Syokota         mpush(lo, hi, d+1 );
42421Syokota         continue;
42421Syokota      }
42421Syokota
42421Syokota      n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
42421Syokota      m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
42421Syokota
42421Syokota      n = lo + unLo - ltLo - 1;
42421Syokota      m = hi - (gtHi - unHi) + 1;
42421Syokota
42421Syokota      nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d;
42421Syokota      nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d;
42421Syokota      nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
42421Syokota
42421Syokota      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
42421Syokota      if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
42421Syokota      if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
42421Syokota
42421Syokota      AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
42421Syokota      AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
42421Syokota
42421Syokota      mpush (nextLo[0], nextHi[0], nextD[0]);
58271Syokota      mpush (nextLo[1], nextHi[1], nextD[1]);
42421Syokota      mpush (nextLo[2], nextHi[2], nextD[2]);
58271Syokota   }
42421Syokota}
42421Syokota
42421Syokota#undef mswap
42421Syokota#undef mvswap
42421Syokota#undef mpush
42421Syokota#undef mpop
42421Syokota#undef mmin
42421Syokota#undef mnextsize
42421Syokota#undef mnextswap
42421Syokota#undef MAIN_QSORT_SMALL_THRESH
42421Syokota#undef MAIN_QSORT_DEPTH_THRESH
42421Syokota#undef MAIN_QSORT_STACK_SIZE
42421Syokota
42421Syokota
42421Syokota/*---------------------------------------------*/
42421Syokota/* Pre:
42421Syokota      nblock > N_OVERSHOOT
42421Syokota      block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
42421Syokota      ((UChar*)block32) [0 .. nblock-1] holds block
42421Syokota      ptr exists for [0 .. nblock-1]
42421Syokota
42421Syokota   Post:
42421Syokota      ((UChar*)block32) [0 .. nblock-1] holds block
42421Syokota      All other areas of block32 destroyed
42421Syokota      ftab [0 .. 65536 ] destroyed
42421Syokota      ptr [0 .. nblock-1] holds sorted order
42421Syokota      if (*budget < 0), sorting was abandoned
42421Syokota*/
42421Syokota
42421Syokota#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
42421Syokota#define SETMASK (1 << 21)
42421Syokota#define CLEARMASK (~(SETMASK))
42421Syokota
42421Syokotastatic
42421Syokotavoid mainSort ( UInt32* ptr,
58271Syokota                UChar*  block,
42421Syokota                UInt16* quadrant,
58271Syokota                UInt32* ftab,
42421Syokota                Int32   nblock,
42421Syokota                Int32   verb,
42421Syokota                Int32*  budget )
42421Syokota{
42421Syokota   Int32  i, j, k, ss, sb;
42421Syokota   Int32  runningOrder[256];
42421Syokota   Bool   bigDone[256];
42421Syokota   Int32  copyStart[256];
42421Syokota   Int32  copyEnd  [256];
42421Syokota   UChar  c1;
42421Syokota   Int32  numQSorted;
42421Syokota   UInt16 s;
42421Syokota   if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" );
42421Syokota
42421Syokota   /*-- set up the 2-byte frequency table --*/
42421Syokota   for (i = 65536; i >= 0; i--) ftab[i] = 0;
42421Syokota
42421Syokota   j = block[0] << 8;
42421Syokota   i = nblock-1;
42421Syokota   for (; i >= 3; i -= 4) {
42421Syokota      quadrant[i] = 0;
42421Syokota      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
42421Syokota      ftab[j]++;
42421Syokota      quadrant[i-1] = 0;
42421Syokota      j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
42421Syokota      ftab[j]++;
42421Syokota      quadrant[i-2] = 0;
42421Syokota      j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
42421Syokota      ftab[j]++;
42421Syokota      quadrant[i-3] = 0;
42421Syokota      j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
42421Syokota      ftab[j]++;
42421Syokota   }
42421Syokota   for (; i >= 0; i--) {
42421Syokota      quadrant[i] = 0;
42421Syokota      j = (j >> 8) | ( ((UInt16)block[i]) << 8);
42421Syokota      ftab[j]++;
42421Syokota   }
42421Syokota
42421Syokota   /*-- (emphasises close relationship of block & quadrant) --*/
42421Syokota   for (i = 0; i < BZ_N_OVERSHOOT; i++) {
42421Syokota      block   [nblock+i] = block[i];
42421Syokota      quadrant[nblock+i] = 0;
42421Syokota   }
42421Syokota
42421Syokota   if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" );
42421Syokota
42421Syokota   /*-- Complete the initial radix sort --*/
42421Syokota   for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
42421Syokota
42421Syokota   s = block[0] << 8;
42421Syokota   i = nblock-1;
42421Syokota   for (; i >= 3; i -= 4) {
42421Syokota      s = (s >> 8) | (block[i] << 8);
42421Syokota      j = ftab[s] -1;
42421Syokota      ftab[s] = j;
42421Syokota      ptr[j] = i;
42421Syokota      s = (s >> 8) | (block[i-1] << 8);
42421Syokota      j = ftab[s] -1;
42421Syokota      ftab[s] = j;
42421Syokota      ptr[j] = i-1;
42421Syokota      s = (s >> 8) | (block[i-2] << 8);
42421Syokota      j = ftab[s] -1;
42421Syokota      ftab[s] = j;
42421Syokota      ptr[j] = i-2;
42421Syokota      s = (s >> 8) | (block[i-3] << 8);
42421Syokota      j = ftab[s] -1;
42421Syokota      ftab[s] = j;
42421Syokota      ptr[j] = i-3;
42421Syokota   }
42421Syokota   for (; i >= 0; i--) {
42421Syokota      s = (s >> 8) | (block[i] << 8);
42421Syokota      j = ftab[s] -1;
42421Syokota      ftab[s] = j;
42421Syokota      ptr[j] = i;
42421Syokota   }
42421Syokota
42421Syokota   /*--
42421Syokota      Now ftab contains the first loc of every small bucket.
42421Syokota      Calculate the running order, from smallest to largest
42421Syokota      big bucket.
42421Syokota   --*/
42421Syokota   for (i = 0; i <= 255; i++) {
42421Syokota      bigDone     [i] = False;
42421Syokota      runningOrder[i] = i;
42421Syokota   }
42421Syokota
42421Syokota   {
42421Syokota      Int32 vv;
42421Syokota      Int32 h = 1;
42421Syokota      do h = 3 * h + 1; while (h <= 256);
42421Syokota      do {
42421Syokota         h = h / 3;
42421Syokota         for (i = h; i <= 255; i++) {
42421Syokota            vv = runningOrder[i];
42421Syokota            j = i;
42421Syokota            while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
42421Syokota               runningOrder[j] = runningOrder[j-h];
42421Syokota               j = j - h;
42421Syokota               if (j <= (h - 1)) goto zero;
42421Syokota            }
42421Syokota            zero:
42421Syokota            runningOrder[j] = vv;
42421Syokota         }
42421Syokota      } while (h != 1);
42421Syokota   }
42421Syokota
42421Syokota   /*--
42421Syokota      The main sorting loop.
42421Syokota   --*/
42421Syokota
42421Syokota   numQSorted = 0;
42421Syokota
42421Syokota   for (i = 0; i <= 255; i++) {
42421Syokota
42421Syokota      /*--
42421Syokota         Process big buckets, starting with the least full.
42421Syokota         Basically this is a 3-step process in which we call
42421Syokota         mainQSort3 to sort the small buckets [ss, j], but
42421Syokota         also make a big effort to avoid the calls if we can.
42421Syokota      --*/
42421Syokota      ss = runningOrder[i];
42421Syokota
42421Syokota      /*--
42421Syokota         Step 1:
42421Syokota         Complete the big bucket [ss] by quicksorting
42421Syokota         any unsorted small buckets [ss, j], for j != ss.
42421Syokota         Hopefully previous pointer-scanning phases have already
42421Syokota         completed many of the small buckets [ss, j], so
42421Syokota         we don't have to sort them at all.
42421Syokota      --*/
42421Syokota      for (j = 0; j <= 255; j++) {
42421Syokota         if (j != ss) {
42421Syokota            sb = (ss << 8) + j;
42421Syokota            if ( ! (ftab[sb] & SETMASK) ) {
42421Syokota               Int32 lo = ftab[sb]   & CLEARMASK;
42421Syokota               Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
42421Syokota               if (hi > lo) {
42421Syokota                  if (verb >= 4)
42421Syokota                     VPrintf4 ( "        qsort [0x%x, 0x%x]   "
42421Syokota                                "done %d   this %d\n",
42421Syokota                                ss, j, numQSorted, hi - lo + 1 );
42421Syokota                  mainQSort3 (
42421Syokota                     ptr, block, quadrant, nblock,
42421Syokota                     lo, hi, BZ_N_RADIX, budget
42421Syokota                  );
42421Syokota                  numQSorted += (hi - lo + 1);
42421Syokota                  if (*budget < 0) return;
42421Syokota               }
42421Syokota            }
42421Syokota            ftab[sb] |= SETMASK;
42421Syokota         }
42421Syokota      }
42421Syokota
42421Syokota      AssertH ( !bigDone[ss], 1006 );
42421Syokota
42421Syokota      /*--
42421Syokota         Step 2:
42421Syokota         Now scan this big bucket [ss] so as to synthesise the
42421Syokota         sorted order for small buckets [t, ss] for all t,
42421Syokota         including, magically, the bucket [ss,ss] too.
42421Syokota         This will avoid doing Real Work in subsequent Step 1's.
42421Syokota      --*/
42421Syokota      {
42421Syokota         for (j = 0; j <= 255; j++) {
42421Syokota            copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK;
42421Syokota            copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
42421Syokota         }
42421Syokota         for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
42421Syokota            k = ptr[j]-1; if (k < 0) k += nblock;
42421Syokota            c1 = block[k];
42421Syokota            if (!bigDone[c1])
42421Syokota               ptr[ copyStart[c1]++ ] = k;
42421Syokota         }
42421Syokota         for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
42421Syokota            k = ptr[j]-1; if (k < 0) k += nblock;
42421Syokota            c1 = block[k];
42421Syokota            if (!bigDone[c1])
42421Syokota               ptr[ copyEnd[c1]-- ] = k;
42421Syokota         }
42421Syokota      }
42421Syokota
42421Syokota      AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 );
42421Syokota
42421Syokota      for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
42421Syokota
42421Syokota      /*--
42421Syokota         Step 3:
42421Syokota         The [ss] big bucket is now done.  Record this fact,
42421Syokota         and update the quadrant descriptors.  Remember to
42421Syokota         update quadrants in the overshoot area too, if
42421Syokota         necessary.  The "if (i < 255)" test merely skips
42421Syokota         this updating for the last bucket processed, since
42421Syokota         updating for the last bucket is pointless.
42421Syokota
42421Syokota         The quadrant array provides a way to incrementally
42421Syokota         cache sort orderings, as they appear, so as to
42421Syokota         make subsequent comparisons in fullGtU() complete
42421Syokota         faster.  For repetitive blocks this makes a big
42421Syokota         difference (but not big enough to be able to avoid
42421Syokota         the fallback sorting mechanism, exponential radix sort).
42421Syokota
42421Syokota         The precise meaning is: at all times:
42421Syokota
42421Syokota            for 0 <= i < nblock and 0 <= j <= nblock
42421Syokota
42421Syokota            if block[i] != block[j],
42421Syokota
42421Syokota               then the relative values of quadrant[i] and
42421Syokota                    quadrant[j] are meaningless.
42421Syokota
42421Syokota               else {
42421Syokota                  if quadrant[i] < quadrant[j]
42421Syokota                     then the string starting at i lexicographically
42421Syokota                     precedes the string starting at j
42421Syokota
42421Syokota                  else if quadrant[i] > quadrant[j]
42421Syokota                     then the string starting at j lexicographically
42421Syokota                     precedes the string starting at i
42421Syokota
42421Syokota                  else
42421Syokota                     the relative ordering of the strings starting
42421Syokota                     at i and j has not yet been determined.
42421Syokota               }
42421Syokota      --*/
42421Syokota      bigDone[ss] = True;
42421Syokota
42421Syokota      if (i < 255) {
42421Syokota         Int32 bbStart  = ftab[ss << 8] & CLEARMASK;
42421Syokota         Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
42421Syokota         Int32 shifts   = 0;
42421Syokota
42421Syokota         while ((bbSize >> shifts) > 65534) shifts++;
42421Syokota
42421Syokota         for (j = bbSize-1; j >= 0; j--) {
42421Syokota            Int32 a2update     = ptr[bbStart + j];
42421Syokota            UInt16 qVal        = (UInt16)(j >> shifts);
42421Syokota            quadrant[a2update] = qVal;
42421Syokota            if (a2update < BZ_N_OVERSHOOT)
42421Syokota               quadrant[a2update + nblock] = qVal;
         }
         AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
      }

   }

   if (verb >= 4)
      VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n",
                 nblock, numQSorted, nblock - numQSorted );
}

#undef BIGFREQ
#undef SETMASK
#undef CLEARMASK


/*---------------------------------------------*/
/* Pre:
      nblock > 0
      arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
      ((UChar*)arr2)  [0 .. nblock-1] holds block
      arr1 exists for [0 .. nblock-1]

   Post:
      ((UChar*)arr2) [0 .. nblock-1] holds block
      All other areas of block destroyed
      ftab [ 0 .. 65536 ] destroyed
      arr1 [0 .. nblock-1] holds sorted order
*/
void BZ2_blockSort ( EState* s )
{
   UInt32* ptr    = s->ptr;
   UChar*  block  = s->block;
   UInt32* ftab   = s->ftab;
   Int32   nblock = s->nblock;
   Int32   verb   = s->verbosity;
   Int32   wfact  = s->workFactor;
   UInt16* quadrant;
   Int32   budget;
   Int32   budgetInit;
   Int32   i;

   if (nblock < 10000) {
      fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
   } else {
      /* Calculate the location for quadrant, remembering to get
         the alignment right.  Assumes that &(block[0]) is at least
         2-byte aligned -- this should be ok since block is really
         the first section of arr2.
      */
      i = nblock+BZ_N_OVERSHOOT;
      if (i & 1) i++;
      quadrant = (UInt16*)(&(block[i]));

      /* (wfact-1) / 3 puts the default-factor-30
         transition point at very roughly the same place as
         with v0.1 and v0.9.0.
         Not that it particularly matters any more, since the
         resulting compressed stream is now the same regardless
         of whether or not we use the main sort or fallback sort.
      */
      if (wfact < 1  ) wfact = 1;
      if (wfact > 100) wfact = 100;
      budgetInit = nblock * ((wfact-1) / 3);
      budget = budgetInit;

      mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
      if (verb >= 3)
         VPrintf3 ( "      %d work, %d block, ratio %5.2f\n",
                    budgetInit - budget,
                    nblock,
                    (float)(budgetInit - budget) /
                    (float)(nblock==0 ? 1 : nblock) );
      if (budget < 0) {
         if (verb >= 2)
            VPrintf0 ( "    too repetitive; using fallback"
                       " sorting algorithm\n" );
         fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
      }
   }

   s->origPtr = -1;
   for (i = 0; i < s->nblock; i++)
      if (ptr[i] == 0)
         { s->origPtr = i; break; };

   AssertH( s->origPtr != -1, 1003 );
}


/*-------------------------------------------------------------*/
/*--- end                                       blocksort.c ---*/
/*-------------------------------------------------------------*/