blocksort.c revision 78556
142421Syokota 242421Syokota/*-------------------------------------------------------------*/ 342421Syokota/*--- Block sorting machinery ---*/ 442421Syokota/*--- blocksort.c ---*/ 542421Syokota/*-------------------------------------------------------------*/ 642421Syokota 742421Syokota/*-- 842421Syokota This file is a part of bzip2 and/or libbzip2, a program and 942421Syokota library for lossless, block-sorting data compression. 1042421Syokota 1142421Syokota Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 1242421Syokota 1342421Syokota Redistribution and use in source and binary forms, with or without 1442421Syokota modification, are permitted provided that the following conditions 1542421Syokota are met: 1642421Syokota 1742421Syokota 1. Redistributions of source code must retain the above copyright 1842421Syokota notice, this list of conditions and the following disclaimer. 1942421Syokota 2042421Syokota 2. The origin of this software must not be misrepresented; you must 2142421Syokota not claim that you wrote the original software. If you use this 2242421Syokota software in a product, an acknowledgment in the product 2342421Syokota documentation would be appreciated but is not required. 2442421Syokota 2542421Syokota 3. Altered source versions must be plainly marked as such, and must 2642421Syokota not be misrepresented as being the original software. 2742421Syokota 2842421Syokota 4. The name of the author may not be used to endorse or promote 2942421Syokota products derived from this software without specific prior written 3050477Speter permission. 3142421Syokota 3242421Syokota THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3342421Syokota OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3442421Syokota WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3542421Syokota ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3642421Syokota DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3742421Syokota DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3858271Syokota GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3942421Syokota INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4042421Syokota WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4158271Syokota NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4258271Syokota SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4358271Syokota 4458271Syokota Julian Seward, Cambridge, UK. 4542421Syokota jseward@acm.org 4642421Syokota bzip2/libbzip2 version 1.0 of 21 March 2000 4742421Syokota 4842421Syokota This program is based on (at least) the work of: 4942421Syokota Mike Burrows 5042421Syokota David Wheeler 5142421Syokota Peter Fenwick 5242421Syokota Alistair Moffat 53102149Speter Radford Neal 5442421Syokota Ian H. Witten 5542421Syokota Robert Sedgewick 5642421Syokota Jon L. Bentley 5742421Syokota 5842421Syokota For more information on these sources, see the manual. 5942421Syokota 6042421Syokota To get some idea how the block sorting algorithms in this file 6142421Syokota work, read my paper 6242421Syokota On the Performance of BWT Sorting Algorithms 6342421Syokota in Proceedings of the IEEE Data Compression Conference 2000, 6442421Syokota Snowbird, Utah, USA, 27-30 March 2000. The main sort in this 6542421Syokota file implements the algorithm called cache in the paper. 6642421Syokota--*/ 6742421Syokota 6842421Syokota 6942421Syokota#include "bzlib_private.h" 7058271Syokota 7158271Syokota/*---------------------------------------------*/ 7258271Syokota/*--- Fallback O(N log(N)^2) sorting ---*/ 7358271Syokota/*--- algorithm, for repetitive blocks ---*/ 7458271Syokota/*---------------------------------------------*/ 7558271Syokota 7658271Syokota/*---------------------------------------------*/ 7742421Syokotastatic 7842421Syokota__inline__ 7942421Syokotavoid fallbackSimpleSort ( UInt32* fmap, 8042421Syokota UInt32* eclass, 8142421Syokota Int32 lo, 8242421Syokota Int32 hi ) 8342421Syokota{ 8442421Syokota Int32 i, j, tmp; 8542421Syokota UInt32 ec_tmp; 8642421Syokota 8742421Syokota if (lo == hi) return; 8842421Syokota 8942421Syokota if (hi - lo > 3) { 9042421Syokota for ( i = hi-4; i >= lo; i-- ) { 9142421Syokota tmp = fmap[i]; 9258271Syokota ec_tmp = eclass[tmp]; 9358271Syokota for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 9442421Syokota fmap[j-4] = fmap[j]; 9542421Syokota fmap[j-4] = tmp; 9642421Syokota } 9742421Syokota } 9842421Syokota 9942421Syokota for ( i = hi-1; i >= lo; i-- ) { 10042421Syokota tmp = fmap[i]; 10142421Syokota ec_tmp = eclass[tmp]; 10242421Syokota for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 10342421Syokota fmap[j-1] = fmap[j]; 10442421Syokota fmap[j-1] = tmp; 10542421Syokota } 10642421Syokota} 10742421Syokota 10842421Syokota 10942421Syokota/*---------------------------------------------*/ 11042421Syokota#define fswap(zz1, zz2) \ 11142421Syokota { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 11242421Syokota 11369781Sdwmalone#define fvswap(zzp1, zzp2, zzn) \ 11442421Syokota{ \ 11542421Syokota Int32 yyp1 = (zzp1); \ 11642421Syokota Int32 yyp2 = (zzp2); \ 11742421Syokota Int32 yyn = (zzn); \ 11842421Syokota while (yyn > 0) { \ 11942421Syokota fswap(fmap[yyp1], fmap[yyp2]); \ 12042421Syokota yyp1++; yyp2++; yyn--; \ 12158271Syokota } \ 12242421Syokota} 12358271Syokota 12447296Syokota 12558271Syokota#define fmin(a,b) ((a) < (b)) ? (a) : (b) 12658271Syokota 12747296Syokota#define fpush(lz,hz) { stackLo[sp] = lz; \ 12847296Syokota stackHi[sp] = hz; \ 12947296Syokota sp++; } 13047296Syokota 13158271Syokota#define fpop(lz,hz) { sp--; \ 13258271Syokota lz = stackLo[sp]; \ 13347296Syokota hz = stackHi[sp]; } 13458271Syokota 13558271Syokota#define FALLBACK_QSORT_SMALL_THRESH 10 13658271Syokota#define FALLBACK_QSORT_STACK_SIZE 100 13742421Syokota 13842421Syokota 13942421Syokotastatic 14042421Syokotavoid fallbackQSort3 ( UInt32* fmap, 14142421Syokota UInt32* eclass, 14242421Syokota Int32 loSt, 14358271Syokota Int32 hiSt ) 14458271Syokota{ 14558271Syokota Int32 unLo, unHi, ltLo, gtHi, n, m; 14658271Syokota Int32 sp, lo, hi; 14758271Syokota UInt32 med, r, r3; 14858271Syokota Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 14958271Syokota Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 15058271Syokota 15158271Syokota r = 0; 15258271Syokota 15358271Syokota sp = 0; 15458271Syokota fpush ( loSt, hiSt ); 15558271Syokota 15658271Syokota while (sp > 0) { 157114930Speter 15858271Syokota AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); 159114930Speter 160114930Speter fpop ( lo, hi ); 16158271Syokota if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 16265176Sdfr fallbackSimpleSort ( fmap, eclass, lo, hi ); 16392661Speter continue; 16492661Speter } 16592661Speter 16692661Speter /* Random partitioning. Median of 3 sometimes fails to 16758271Syokota avoid bad cases. Median of 9 seems to help but 16858271Syokota looks rather expensive. This too seems to work but 16958271Syokota is cheaper. Guidance for the magic constants 17058271Syokota 7621 and 32768 is taken from Sedgewick's algorithms 17158271Syokota book, chapter 35. 17258271Syokota */ 17358271Syokota r = ((r * 7621) + 1) % 32768; 17458271Syokota r3 = r % 3; 17558271Syokota if (r3 == 0) med = eclass[fmap[lo]]; else 17658271Syokota if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 17742421Syokota med = eclass[fmap[hi]]; 17842421Syokota 17942421Syokota unLo = ltLo = lo; 18058271Syokota unHi = gtHi = hi; 18158271Syokota 18242421Syokota while (1) { 18358271Syokota while (1) { 18442421Syokota if (unLo > unHi) break; 18542421Syokota n = (Int32)eclass[fmap[unLo]] - (Int32)med; 18642421Syokota if (n == 0) { 18742421Syokota fswap(fmap[unLo], fmap[ltLo]); 18842421Syokota ltLo++; unLo++; 18942421Syokota continue; 19042421Syokota }; 19142421Syokota if (n > 0) break; 19242421Syokota unLo++; 19342421Syokota } 19442421Syokota while (1) { 19542421Syokota if (unLo > unHi) break; 19658271Syokota n = (Int32)eclass[fmap[unHi]] - (Int32)med; 19758271Syokota if (n == 0) { 19858271Syokota fswap(fmap[unHi], fmap[gtHi]); 19942421Syokota gtHi--; unHi--; 20042421Syokota continue; 20142421Syokota }; 20258271Syokota if (n < 0) break; 20342421Syokota unHi--; 20458271Syokota } 20542421Syokota if (unLo > unHi) break; 20658271Syokota fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 20758271Syokota } 20858271Syokota 20958271Syokota AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 21058271Syokota 21158271Syokota if (gtHi < ltLo) continue; 21258271Syokota 21342421Syokota n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 21442421Syokota m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 21542421Syokota 21642421Syokota n = lo + unLo - ltLo - 1; 21742421Syokota m = hi - (gtHi - unHi) + 1; 21842421Syokota 21942421Syokota if (n - lo > hi - m) { 22042421Syokota fpush ( lo, n ); 22142421Syokota fpush ( m, hi ); 22242421Syokota } else { 22393279Smurray fpush ( m, hi ); 22442421Syokota fpush ( lo, n ); 22542421Syokota } 22693279Smurray } 22742421Syokota} 22842421Syokota 22942421Syokota#undef fmin 23042421Syokota#undef fpush 23142421Syokota#undef fpop 23242421Syokota#undef fswap 23342421Syokota#undef fvswap 23442421Syokota#undef FALLBACK_QSORT_SMALL_THRESH 23542421Syokota#undef FALLBACK_QSORT_STACK_SIZE 23642421Syokota 23742421Syokota 23842421Syokota/*---------------------------------------------*/ 23942421Syokota/* Pre: 24042421Syokota nblock > 0 24142421Syokota eclass exists for [0 .. nblock-1] 24242421Syokota ((UChar*)eclass) [0 .. nblock-1] holds block 24342421Syokota ptr exists for [0 .. nblock-1] 24442421Syokota 24593279Smurray Post: 24693279Smurray ((UChar*)eclass) [0 .. nblock-1] holds block 24742421Syokota All other areas of eclass destroyed 24893279Smurray fmap [0 .. nblock-1] holds sorted order 24993279Smurray bhtab [ 0 .. 2+(nblock/32) ] destroyed 25093279Smurray*/ 25142421Syokota 25242421Syokota#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 25342421Syokota#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 25442421Syokota#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 25542421Syokota#define WORD_BH(zz) bhtab[(zz) >> 5] 25642421Syokota#define UNALIGNED_BH(zz) ((zz) & 0x01f) 25742421Syokota 25842421Syokotastatic 25942421Syokotavoid fallbackSort ( UInt32* fmap, 26042421Syokota UInt32* eclass, 26142421Syokota UInt32* bhtab, 26242421Syokota Int32 nblock, 26342421Syokota Int32 verb ) 26442421Syokota{ 26542421Syokota Int32 ftab[257]; 26642421Syokota Int32 ftabCopy[256]; 26742421Syokota Int32 H, i, j, k, l, r, cc, cc1; 26842421Syokota Int32 nNotDone; 26942421Syokota Int32 nBhtab; 27042421Syokota UChar* eclass8 = (UChar*)eclass; 27158271Syokota 27242421Syokota /*-- 27342421Syokota Initial 1-char radix sort to generate 27442421Syokota initial fmap and initial BH bits. 27542421Syokota --*/ 27642421Syokota if (verb >= 4) 27742421Syokota VPrintf0 ( " bucket sorting ...\n" ); 27842421Syokota for (i = 0; i < 257; i++) ftab[i] = 0; 27942421Syokota for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 28042421Syokota for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 28142421Syokota for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 28242421Syokota 28342421Syokota for (i = 0; i < nblock; i++) { 28442421Syokota j = eclass8[i]; 28542421Syokota k = ftab[j] - 1; 28642421Syokota ftab[j] = k; 28742421Syokota fmap[k] = i; 28842421Syokota } 28942421Syokota 29042421Syokota nBhtab = 2 + (nblock / 32); 29142421Syokota for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 29242421Syokota for (i = 0; i < 256; i++) SET_BH(ftab[i]); 29342421Syokota 29442421Syokota /*-- 29542421Syokota Inductively refine the buckets. Kind-of an 29642421Syokota "exponential radix sort" (!), inspired by the 29742421Syokota Manber-Myers suffix array construction algorithm. 29842421Syokota --*/ 29942421Syokota 30042421Syokota /*-- set sentinel bits for block-end detection --*/ 30142421Syokota for (i = 0; i < 32; i++) { 30242421Syokota SET_BH(nblock + 2*i); 30342421Syokota CLEAR_BH(nblock + 2*i + 1); 30442421Syokota } 30542421Syokota 30642421Syokota /*-- the log(N) loop --*/ 30742421Syokota H = 1; 30842421Syokota while (1) { 30942421Syokota 31042421Syokota if (verb >= 4) 31142421Syokota VPrintf1 ( " depth %6d has ", H ); 31242421Syokota 31342421Syokota j = 0; 31442421Syokota for (i = 0; i < nblock; i++) { 31542421Syokota if (ISSET_BH(i)) j = i; 31642421Syokota k = fmap[i] - H; if (k < 0) k += nblock; 31742421Syokota eclass[k] = j; 31842421Syokota } 31958271Syokota 32042421Syokota nNotDone = 0; 32142421Syokota r = -1; 32258271Syokota while (1) { 32342421Syokota 32442421Syokota /*-- find the next non-singleton bucket --*/ 32558271Syokota k = r + 1; 32642421Syokota while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 32742421Syokota if (ISSET_BH(k)) { 32842421Syokota while (WORD_BH(k) == 0xffffffff) k += 32; 32942421Syokota while (ISSET_BH(k)) k++; 33042421Syokota } 33142421Syokota l = k - 1; 33242421Syokota if (l >= nblock) break; 33342421Syokota while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 33442421Syokota if (!ISSET_BH(k)) { 33542421Syokota while (WORD_BH(k) == 0x00000000) k += 32; 33642421Syokota while (!ISSET_BH(k)) k++; 33742421Syokota } 33842421Syokota r = k - 1; 33942421Syokota if (r >= nblock) break; 34042421Syokota 34142421Syokota /*-- now [l, r] bracket current bucket --*/ 34242421Syokota if (r > l) { 34342421Syokota nNotDone += (r - l + 1); 34442421Syokota fallbackQSort3 ( fmap, eclass, l, r ); 34558271Syokota 34642421Syokota /*-- scan bucket and generate header bits-- */ 34742421Syokota cc = -1; 34842421Syokota for (i = l; i <= r; i++) { 34942421Syokota cc1 = eclass[fmap[i]]; 35042421Syokota if (cc != cc1) { SET_BH(i); cc = cc1; }; 35142421Syokota } 35242421Syokota } 35342421Syokota } 35442421Syokota 35542421Syokota if (verb >= 4) 35642421Syokota VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 35742421Syokota 35842421Syokota H *= 2; 35942421Syokota if (H > nblock || nNotDone == 0) break; 36042421Syokota } 36142421Syokota 36258271Syokota /*-- 36342421Syokota Reconstruct the original block in 36442421Syokota eclass8 [0 .. nblock-1], since the 36542421Syokota previous phase destroyed it. 36658271Syokota --*/ 36742421Syokota if (verb >= 4) 36842421Syokota VPrintf0 ( " reconstructing block ...\n" ); 36942421Syokota j = 0; 37042421Syokota for (i = 0; i < nblock; i++) { 37142421Syokota while (ftabCopy[j] == 0) j++; 37242421Syokota ftabCopy[j]--; 37342421Syokota eclass8[fmap[i]] = (UChar)j; 37442421Syokota } 37542421Syokota AssertH ( j < 256, 1005 ); 37642421Syokota} 37742421Syokota 37842421Syokota#undef SET_BH 37942421Syokota#undef CLEAR_BH 38042421Syokota#undef ISSET_BH 38142421Syokota#undef WORD_BH 38242421Syokota#undef UNALIGNED_BH 38342421Syokota 38442421Syokota 38542421Syokota/*---------------------------------------------*/ 38642421Syokota/*--- The main, O(N^2 log(N)) sorting ---*/ 38742421Syokota/*--- algorithm. Faster for "normal" ---*/ 38842421Syokota/*--- non-repetitive blocks. ---*/ 38958271Syokota/*---------------------------------------------*/ 39042421Syokota 39158271Syokota/*---------------------------------------------*/ 39242421Syokotastatic 39342421Syokota__inline__ 39442421SyokotaBool mainGtU ( UInt32 i1, 39542421Syokota UInt32 i2, 39642421Syokota UChar* block, 39742421Syokota UInt16* quadrant, 39842421Syokota UInt32 nblock, 39942421Syokota Int32* budget ) 40042421Syokota{ 40142421Syokota Int32 k; 40242421Syokota UChar c1, c2; 40342421Syokota UInt16 s1, s2; 40442421Syokota 40542421Syokota AssertD ( i1 != i2, "mainGtU" ); 40642421Syokota /* 1 */ 40742421Syokota c1 = block[i1]; c2 = block[i2]; 40842421Syokota if (c1 != c2) return (c1 > c2); 40942421Syokota i1++; i2++; 41042421Syokota /* 2 */ 41142421Syokota c1 = block[i1]; c2 = block[i2]; 41242421Syokota if (c1 != c2) return (c1 > c2); 41342421Syokota i1++; i2++; 41458271Syokota /* 3 */ 41542421Syokota c1 = block[i1]; c2 = block[i2]; 41642421Syokota if (c1 != c2) return (c1 > c2); 41742421Syokota i1++; i2++; 41858271Syokota /* 4 */ 41942421Syokota c1 = block[i1]; c2 = block[i2]; 42042421Syokota if (c1 != c2) return (c1 > c2); 42142421Syokota i1++; i2++; 42242421Syokota /* 5 */ 42342421Syokota c1 = block[i1]; c2 = block[i2]; 42442421Syokota if (c1 != c2) return (c1 > c2); 42542421Syokota i1++; i2++; 42642421Syokota /* 6 */ 42742421Syokota c1 = block[i1]; c2 = block[i2]; 42842421Syokota if (c1 != c2) return (c1 > c2); 42942421Syokota i1++; i2++; 43042421Syokota /* 7 */ 43142421Syokota c1 = block[i1]; c2 = block[i2]; 43242421Syokota if (c1 != c2) return (c1 > c2); 43342421Syokota i1++; i2++; 43442421Syokota /* 8 */ 43542421Syokota c1 = block[i1]; c2 = block[i2]; 43642421Syokota if (c1 != c2) return (c1 > c2); 43742421Syokota i1++; i2++; 43842421Syokota /* 9 */ 43942421Syokota c1 = block[i1]; c2 = block[i2]; 44042421Syokota if (c1 != c2) return (c1 > c2); 44158271Syokota i1++; i2++; 44242421Syokota /* 10 */ 44358271Syokota c1 = block[i1]; c2 = block[i2]; 44442421Syokota if (c1 != c2) return (c1 > c2); 44542421Syokota i1++; i2++; 44642421Syokota /* 11 */ 44742421Syokota c1 = block[i1]; c2 = block[i2]; 44842421Syokota if (c1 != c2) return (c1 > c2); 44942421Syokota i1++; i2++; 45042421Syokota /* 12 */ 45142421Syokota c1 = block[i1]; c2 = block[i2]; 45242421Syokota if (c1 != c2) return (c1 > c2); 45342421Syokota i1++; i2++; 45442421Syokota 45542421Syokota k = nblock + 8; 45642421Syokota 45742421Syokota do { 45842421Syokota /* 1 */ 45942421Syokota c1 = block[i1]; c2 = block[i2]; 46042421Syokota if (c1 != c2) return (c1 > c2); 46142421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 46242421Syokota if (s1 != s2) return (s1 > s2); 46342421Syokota i1++; i2++; 46458271Syokota /* 2 */ 46542421Syokota c1 = block[i1]; c2 = block[i2]; 46642421Syokota if (c1 != c2) return (c1 > c2); 46742421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 46842421Syokota if (s1 != s2) return (s1 > s2); 46942421Syokota i1++; i2++; 47042421Syokota /* 3 */ 47142421Syokota c1 = block[i1]; c2 = block[i2]; 47242421Syokota if (c1 != c2) return (c1 > c2); 47342421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 47458271Syokota if (s1 != s2) return (s1 > s2); 47542421Syokota i1++; i2++; 47642421Syokota /* 4 */ 47742421Syokota c1 = block[i1]; c2 = block[i2]; 47842421Syokota if (c1 != c2) return (c1 > c2); 47942421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 48042421Syokota if (s1 != s2) return (s1 > s2); 48142421Syokota i1++; i2++; 48242421Syokota /* 5 */ 48342421Syokota c1 = block[i1]; c2 = block[i2]; 48458271Syokota if (c1 != c2) return (c1 > c2); 48542421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 48642421Syokota if (s1 != s2) return (s1 > s2); 48742421Syokota i1++; i2++; 48842421Syokota /* 6 */ 48942421Syokota c1 = block[i1]; c2 = block[i2]; 49042421Syokota if (c1 != c2) return (c1 > c2); 49142421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 49242421Syokota if (s1 != s2) return (s1 > s2); 49342421Syokota i1++; i2++; 49442421Syokota /* 7 */ 49542421Syokota c1 = block[i1]; c2 = block[i2]; 49642421Syokota if (c1 != c2) return (c1 > c2); 49742421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 49842421Syokota if (s1 != s2) return (s1 > s2); 49942421Syokota i1++; i2++; 50042421Syokota /* 8 */ 50142421Syokota c1 = block[i1]; c2 = block[i2]; 50242421Syokota if (c1 != c2) return (c1 > c2); 50342421Syokota s1 = quadrant[i1]; s2 = quadrant[i2]; 50442421Syokota if (s1 != s2) return (s1 > s2); 50542421Syokota i1++; i2++; 50642421Syokota 50742421Syokota if (i1 >= nblock) i1 -= nblock; 50842421Syokota if (i2 >= nblock) i2 -= nblock; 50942421Syokota 51042421Syokota k -= 8; 51142421Syokota (*budget)--; 51242421Syokota } 51342421Syokota while (k >= 0); 51442421Syokota 51542421Syokota return False; 51642421Syokota} 51742421Syokota 51842421Syokota 51942421Syokota/*---------------------------------------------*/ 52042421Syokota/*-- 52142421Syokota Knuth's increments seem to work better 52242421Syokota than Incerpi-Sedgewick here. Possibly 52342421Syokota because the number of elems to sort is 52442421Syokota usually small, typically <= 20. 52542421Syokota--*/ 52642421Syokotastatic 52742421SyokotaInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 52842421Syokota 9841, 29524, 88573, 265720, 52942421Syokota 797161, 2391484 }; 53042421Syokota 53142421Syokotastatic 53242421Syokotavoid mainSimpleSort ( UInt32* ptr, 53342421Syokota UChar* block, 53442421Syokota UInt16* quadrant, 53542421Syokota Int32 nblock, 53642421Syokota Int32 lo, 53742421Syokota Int32 hi, 53842421Syokota Int32 d, 53942421Syokota Int32* budget ) 54042421Syokota{ 54142421Syokota Int32 i, j, h, bigN, hp; 54242421Syokota UInt32 v; 54342421Syokota 54442421Syokota bigN = hi - lo + 1; 54542421Syokota if (bigN < 2) return; 54642421Syokota 54742421Syokota hp = 0; 54842421Syokota while (incs[hp] < bigN) hp++; 54942421Syokota hp--; 55042421Syokota 55142421Syokota for (; hp >= 0; hp--) { 55242421Syokota h = incs[hp]; 55342421Syokota 55442421Syokota i = lo + h; 55542421Syokota while (True) { 55642421Syokota 55742421Syokota /*-- copy 1 --*/ 55842421Syokota if (i > hi) break; 55942421Syokota v = ptr[i]; 56042421Syokota j = i; 56142421Syokota while ( mainGtU ( 56242421Syokota ptr[j-h]+d, v+d, block, quadrant, nblock, budget 56342421Syokota ) ) { 56442421Syokota ptr[j] = ptr[j-h]; 56542421Syokota j = j - h; 56642421Syokota if (j <= (lo + h - 1)) break; 56742421Syokota } 56842421Syokota ptr[j] = v; 56942421Syokota i++; 57042421Syokota 57142421Syokota /*-- copy 2 --*/ 57242421Syokota if (i > hi) break; 57342421Syokota v = ptr[i]; 57442421Syokota j = i; 57542421Syokota while ( mainGtU ( 57642421Syokota ptr[j-h]+d, v+d, block, quadrant, nblock, budget 57742421Syokota ) ) { 57842421Syokota ptr[j] = ptr[j-h]; 57942421Syokota j = j - h; 58042421Syokota if (j <= (lo + h - 1)) break; 58142421Syokota } 58242421Syokota ptr[j] = v; 58342421Syokota i++; 58442421Syokota 58542421Syokota /*-- copy 3 --*/ 58642421Syokota if (i > hi) break; 58742421Syokota v = ptr[i]; 58842421Syokota j = i; 58942421Syokota while ( mainGtU ( 59042421Syokota ptr[j-h]+d, v+d, block, quadrant, nblock, budget 59142421Syokota ) ) { 59242421Syokota ptr[j] = ptr[j-h]; 59342421Syokota j = j - h; 59442421Syokota if (j <= (lo + h - 1)) break; 59542421Syokota } 59642421Syokota ptr[j] = v; 59742421Syokota i++; 59842421Syokota 59942421Syokota if (*budget < 0) return; 60042421Syokota } 60142421Syokota } 60242421Syokota} 60342421Syokota 60442421Syokota 60542421Syokota/*---------------------------------------------*/ 60642421Syokota/*-- 60742421Syokota The following is an implementation of 60842421Syokota an elegant 3-way quicksort for strings, 60942421Syokota described in a paper "Fast Algorithms for 61042421Syokota Sorting and Searching Strings", by Robert 61142421Syokota Sedgewick and Jon L. Bentley. 61242421Syokota--*/ 61358271Syokota 61442421Syokota#define mswap(zz1, zz2) \ 61542421Syokota { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 61642421Syokota 61742421Syokota#define mvswap(zzp1, zzp2, zzn) \ 61842421Syokota{ \ 61942421Syokota Int32 yyp1 = (zzp1); \ 62042421Syokota Int32 yyp2 = (zzp2); \ 62142421Syokota Int32 yyn = (zzn); \ 62242421Syokota while (yyn > 0) { \ 62342421Syokota mswap(ptr[yyp1], ptr[yyp2]); \ 62442421Syokota yyp1++; yyp2++; yyn--; \ 62542421Syokota } \ 62642421Syokota} 62742421Syokota 62842421Syokotastatic 62942421Syokota__inline__ 63042421SyokotaUChar mmed3 ( UChar a, UChar b, UChar c ) 63142421Syokota{ 63242421Syokota UChar t; 63342421Syokota if (a > b) { t = a; a = b; b = t; }; 63442421Syokota if (b > c) { 63542421Syokota b = c; 63642421Syokota if (a > b) b = a; 63742421Syokota } 63858271Syokota return b; 63942421Syokota} 64042421Syokota 64142421Syokota#define mmin(a,b) ((a) < (b)) ? (a) : (b) 64242421Syokota 64342421Syokota#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 64442421Syokota stackHi[sp] = hz; \ 64542421Syokota stackD [sp] = dz; \ 64642421Syokota sp++; } 64742421Syokota 64842421Syokota#define mpop(lz,hz,dz) { sp--; \ 64942421Syokota lz = stackLo[sp]; \ 65042421Syokota hz = stackHi[sp]; \ 65142421Syokota dz = stackD [sp]; } 65242421Syokota 65342421Syokota 65442421Syokota#define mnextsize(az) (nextHi[az]-nextLo[az]) 65542421Syokota 65642421Syokota#define mnextswap(az,bz) \ 65742421Syokota { Int32 tz; \ 65842421Syokota tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 65942421Syokota tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 66042421Syokota tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 66158271Syokota 66242421Syokota 66342421Syokota#define MAIN_QSORT_SMALL_THRESH 20 66458271Syokota#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 66558271Syokota#define MAIN_QSORT_STACK_SIZE 100 66642421Syokota 66742421Syokotastatic 66842421Syokotavoid mainQSort3 ( UInt32* ptr, 66958271Syokota UChar* block, 67042421Syokota UInt16* quadrant, 67142421Syokota Int32 nblock, 67242421Syokota Int32 loSt, 67342421Syokota Int32 hiSt, 67442421Syokota Int32 dSt, 67542421Syokota Int32* budget ) 67642421Syokota{ 67742421Syokota Int32 unLo, unHi, ltLo, gtHi, n, m, med; 67842421Syokota Int32 sp, lo, hi, d; 67942421Syokota 68042421Syokota Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 68142421Syokota Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 68258271Syokota Int32 stackD [MAIN_QSORT_STACK_SIZE]; 68342421Syokota 68442421Syokota Int32 nextLo[3]; 68542421Syokota Int32 nextHi[3]; 68642421Syokota Int32 nextD [3]; 68742421Syokota 68842421Syokota sp = 0; 68942421Syokota mpush ( loSt, hiSt, dSt ); 69042421Syokota 69142421Syokota while (sp > 0) { 69242421Syokota 69342421Syokota AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); 69442421Syokota 69558271Syokota mpop ( lo, hi, d ); 69642421Syokota if (hi - lo < MAIN_QSORT_SMALL_THRESH || 69742421Syokota d > MAIN_QSORT_DEPTH_THRESH) { 69858271Syokota mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 69958271Syokota if (*budget < 0) return; 70042421Syokota continue; 70142421Syokota } 70242421Syokota 70358271Syokota med = (Int32) 70442421Syokota mmed3 ( block[ptr[ lo ]+d], 70542421Syokota block[ptr[ hi ]+d], 70642421Syokota block[ptr[ (lo+hi)>>1 ]+d] ); 70742421Syokota 70842421Syokota unLo = ltLo = lo; 70942421Syokota unHi = gtHi = hi; 71042421Syokota 71142421Syokota while (True) { 71242421Syokota while (True) { 71342421Syokota if (unLo > unHi) break; 71442421Syokota n = ((Int32)block[ptr[unLo]+d]) - med; 71542421Syokota if (n == 0) { 71642421Syokota mswap(ptr[unLo], ptr[ltLo]); 71742421Syokota ltLo++; unLo++; continue; 71842421Syokota }; 71942421Syokota if (n > 0) break; 72042421Syokota unLo++; 72142421Syokota } 72258271Syokota while (True) { 72342421Syokota if (unLo > unHi) break; 72458271Syokota n = ((Int32)block[ptr[unHi]+d]) - med; 72542421Syokota if (n == 0) { 72642421Syokota mswap(ptr[unHi], ptr[gtHi]); 72742421Syokota gtHi--; unHi--; continue; 72842421Syokota }; 72942421Syokota if (n < 0) break; 73042421Syokota unHi--; 73142421Syokota } 73242421Syokota if (unLo > unHi) break; 73342421Syokota mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 73442421Syokota } 73542421Syokota 73642421Syokota AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 73742421Syokota 73842421Syokota if (gtHi < ltLo) { 73942421Syokota mpush(lo, hi, d+1 ); 74042421Syokota continue; 74142421Syokota } 74242421Syokota 74342421Syokota n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 74442421Syokota m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 74542421Syokota 74642421Syokota n = lo + unLo - ltLo - 1; 74742421Syokota m = hi - (gtHi - unHi) + 1; 74842421Syokota 74942421Syokota nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 75042421Syokota nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 75142421Syokota nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 75242421Syokota 75342421Syokota if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 75442421Syokota if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 75542421Syokota if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 75642421Syokota 75742421Syokota AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 75842421Syokota AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 75942421Syokota 76042421Syokota mpush (nextLo[0], nextHi[0], nextD[0]); 76158271Syokota mpush (nextLo[1], nextHi[1], nextD[1]); 76242421Syokota mpush (nextLo[2], nextHi[2], nextD[2]); 76358271Syokota } 76442421Syokota} 76542421Syokota 76642421Syokota#undef mswap 76742421Syokota#undef mvswap 76842421Syokota#undef mpush 76942421Syokota#undef mpop 77042421Syokota#undef mmin 77142421Syokota#undef mnextsize 77242421Syokota#undef mnextswap 77342421Syokota#undef MAIN_QSORT_SMALL_THRESH 77442421Syokota#undef MAIN_QSORT_DEPTH_THRESH 77542421Syokota#undef MAIN_QSORT_STACK_SIZE 77642421Syokota 77742421Syokota 77842421Syokota/*---------------------------------------------*/ 77942421Syokota/* Pre: 78042421Syokota nblock > N_OVERSHOOT 78142421Syokota block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 78242421Syokota ((UChar*)block32) [0 .. nblock-1] holds block 78342421Syokota ptr exists for [0 .. nblock-1] 78442421Syokota 78542421Syokota Post: 78642421Syokota ((UChar*)block32) [0 .. nblock-1] holds block 78742421Syokota All other areas of block32 destroyed 78842421Syokota ftab [0 .. 65536 ] destroyed 78942421Syokota ptr [0 .. nblock-1] holds sorted order 79042421Syokota if (*budget < 0), sorting was abandoned 79142421Syokota*/ 79242421Syokota 79342421Syokota#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 79442421Syokota#define SETMASK (1 << 21) 79542421Syokota#define CLEARMASK (~(SETMASK)) 79642421Syokota 79742421Syokotastatic 79842421Syokotavoid mainSort ( UInt32* ptr, 79958271Syokota UChar* block, 80042421Syokota UInt16* quadrant, 80158271Syokota UInt32* ftab, 80242421Syokota Int32 nblock, 80342421Syokota Int32 verb, 80442421Syokota Int32* budget ) 80542421Syokota{ 80642421Syokota Int32 i, j, k, ss, sb; 80742421Syokota Int32 runningOrder[256]; 80842421Syokota Bool bigDone[256]; 80942421Syokota Int32 copyStart[256]; 81042421Syokota Int32 copyEnd [256]; 81142421Syokota UChar c1; 81242421Syokota Int32 numQSorted; 81342421Syokota UInt16 s; 81442421Syokota if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 81542421Syokota 81642421Syokota /*-- set up the 2-byte frequency table --*/ 81742421Syokota for (i = 65536; i >= 0; i--) ftab[i] = 0; 81842421Syokota 81942421Syokota j = block[0] << 8; 82042421Syokota i = nblock-1; 82142421Syokota for (; i >= 3; i -= 4) { 82242421Syokota quadrant[i] = 0; 82342421Syokota j = (j >> 8) | ( ((UInt16)block[i]) << 8); 82442421Syokota ftab[j]++; 82542421Syokota quadrant[i-1] = 0; 82642421Syokota j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 82742421Syokota ftab[j]++; 82842421Syokota quadrant[i-2] = 0; 82942421Syokota j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 83042421Syokota ftab[j]++; 83142421Syokota quadrant[i-3] = 0; 83242421Syokota j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 83342421Syokota ftab[j]++; 83442421Syokota } 83542421Syokota for (; i >= 0; i--) { 83642421Syokota quadrant[i] = 0; 83742421Syokota j = (j >> 8) | ( ((UInt16)block[i]) << 8); 83842421Syokota ftab[j]++; 83942421Syokota } 84042421Syokota 84142421Syokota /*-- (emphasises close relationship of block & quadrant) --*/ 84242421Syokota for (i = 0; i < BZ_N_OVERSHOOT; i++) { 84342421Syokota block [nblock+i] = block[i]; 84442421Syokota quadrant[nblock+i] = 0; 84542421Syokota } 84642421Syokota 84742421Syokota if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 84842421Syokota 84942421Syokota /*-- Complete the initial radix sort --*/ 85042421Syokota for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 85142421Syokota 85242421Syokota s = block[0] << 8; 85342421Syokota i = nblock-1; 85442421Syokota for (; i >= 3; i -= 4) { 85542421Syokota s = (s >> 8) | (block[i] << 8); 85642421Syokota j = ftab[s] -1; 85742421Syokota ftab[s] = j; 85842421Syokota ptr[j] = i; 85942421Syokota s = (s >> 8) | (block[i-1] << 8); 86042421Syokota j = ftab[s] -1; 86142421Syokota ftab[s] = j; 86242421Syokota ptr[j] = i-1; 86342421Syokota s = (s >> 8) | (block[i-2] << 8); 86442421Syokota j = ftab[s] -1; 86542421Syokota ftab[s] = j; 86642421Syokota ptr[j] = i-2; 86742421Syokota s = (s >> 8) | (block[i-3] << 8); 86842421Syokota j = ftab[s] -1; 86942421Syokota ftab[s] = j; 87042421Syokota ptr[j] = i-3; 87142421Syokota } 87242421Syokota for (; i >= 0; i--) { 87342421Syokota s = (s >> 8) | (block[i] << 8); 87442421Syokota j = ftab[s] -1; 87542421Syokota ftab[s] = j; 87642421Syokota ptr[j] = i; 87742421Syokota } 87842421Syokota 87942421Syokota /*-- 88042421Syokota Now ftab contains the first loc of every small bucket. 88142421Syokota Calculate the running order, from smallest to largest 88242421Syokota big bucket. 88342421Syokota --*/ 88442421Syokota for (i = 0; i <= 255; i++) { 88542421Syokota bigDone [i] = False; 88642421Syokota runningOrder[i] = i; 88742421Syokota } 88842421Syokota 88942421Syokota { 89042421Syokota Int32 vv; 89142421Syokota Int32 h = 1; 89242421Syokota do h = 3 * h + 1; while (h <= 256); 89342421Syokota do { 89442421Syokota h = h / 3; 89542421Syokota for (i = h; i <= 255; i++) { 89642421Syokota vv = runningOrder[i]; 89742421Syokota j = i; 89842421Syokota while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 89942421Syokota runningOrder[j] = runningOrder[j-h]; 90042421Syokota j = j - h; 90142421Syokota if (j <= (h - 1)) goto zero; 90242421Syokota } 90342421Syokota zero: 90442421Syokota runningOrder[j] = vv; 90542421Syokota } 90642421Syokota } while (h != 1); 90742421Syokota } 90842421Syokota 90942421Syokota /*-- 91042421Syokota The main sorting loop. 91142421Syokota --*/ 91242421Syokota 91342421Syokota numQSorted = 0; 91442421Syokota 91542421Syokota for (i = 0; i <= 255; i++) { 91642421Syokota 91742421Syokota /*-- 91842421Syokota Process big buckets, starting with the least full. 91942421Syokota Basically this is a 3-step process in which we call 92042421Syokota mainQSort3 to sort the small buckets [ss, j], but 92142421Syokota also make a big effort to avoid the calls if we can. 92242421Syokota --*/ 92342421Syokota ss = runningOrder[i]; 92442421Syokota 92542421Syokota /*-- 92642421Syokota Step 1: 92742421Syokota Complete the big bucket [ss] by quicksorting 92842421Syokota any unsorted small buckets [ss, j], for j != ss. 92942421Syokota Hopefully previous pointer-scanning phases have already 93042421Syokota completed many of the small buckets [ss, j], so 93142421Syokota we don't have to sort them at all. 93242421Syokota --*/ 93342421Syokota for (j = 0; j <= 255; j++) { 93442421Syokota if (j != ss) { 93542421Syokota sb = (ss << 8) + j; 93642421Syokota if ( ! (ftab[sb] & SETMASK) ) { 93742421Syokota Int32 lo = ftab[sb] & CLEARMASK; 93842421Syokota Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 93942421Syokota if (hi > lo) { 94042421Syokota if (verb >= 4) 94142421Syokota VPrintf4 ( " qsort [0x%x, 0x%x] " 94242421Syokota "done %d this %d\n", 94342421Syokota ss, j, numQSorted, hi - lo + 1 ); 94442421Syokota mainQSort3 ( 94542421Syokota ptr, block, quadrant, nblock, 94642421Syokota lo, hi, BZ_N_RADIX, budget 94742421Syokota ); 94842421Syokota numQSorted += (hi - lo + 1); 94942421Syokota if (*budget < 0) return; 95042421Syokota } 95142421Syokota } 95242421Syokota ftab[sb] |= SETMASK; 95342421Syokota } 95442421Syokota } 95542421Syokota 95642421Syokota AssertH ( !bigDone[ss], 1006 ); 95742421Syokota 95842421Syokota /*-- 95942421Syokota Step 2: 96042421Syokota Now scan this big bucket [ss] so as to synthesise the 96142421Syokota sorted order for small buckets [t, ss] for all t, 96242421Syokota including, magically, the bucket [ss,ss] too. 96342421Syokota This will avoid doing Real Work in subsequent Step 1's. 96442421Syokota --*/ 96542421Syokota { 96642421Syokota for (j = 0; j <= 255; j++) { 96742421Syokota copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 96842421Syokota copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 96942421Syokota } 97042421Syokota for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 97142421Syokota k = ptr[j]-1; if (k < 0) k += nblock; 97242421Syokota c1 = block[k]; 97342421Syokota if (!bigDone[c1]) 97442421Syokota ptr[ copyStart[c1]++ ] = k; 97542421Syokota } 97642421Syokota for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 97742421Syokota k = ptr[j]-1; if (k < 0) k += nblock; 97842421Syokota c1 = block[k]; 97942421Syokota if (!bigDone[c1]) 98042421Syokota ptr[ copyEnd[c1]-- ] = k; 98142421Syokota } 98242421Syokota } 98342421Syokota 98442421Syokota AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 ); 98542421Syokota 98642421Syokota for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 98742421Syokota 98842421Syokota /*-- 98942421Syokota Step 3: 99042421Syokota The [ss] big bucket is now done. Record this fact, 99142421Syokota and update the quadrant descriptors. Remember to 99242421Syokota update quadrants in the overshoot area too, if 99342421Syokota necessary. The "if (i < 255)" test merely skips 99442421Syokota this updating for the last bucket processed, since 99542421Syokota updating for the last bucket is pointless. 99642421Syokota 99742421Syokota The quadrant array provides a way to incrementally 99842421Syokota cache sort orderings, as they appear, so as to 99942421Syokota make subsequent comparisons in fullGtU() complete 100042421Syokota faster. For repetitive blocks this makes a big 100142421Syokota difference (but not big enough to be able to avoid 100242421Syokota the fallback sorting mechanism, exponential radix sort). 100342421Syokota 100442421Syokota The precise meaning is: at all times: 100542421Syokota 100642421Syokota for 0 <= i < nblock and 0 <= j <= nblock 100742421Syokota 100842421Syokota if block[i] != block[j], 100942421Syokota 101042421Syokota then the relative values of quadrant[i] and 101142421Syokota quadrant[j] are meaningless. 101242421Syokota 101342421Syokota else { 101442421Syokota if quadrant[i] < quadrant[j] 101542421Syokota then the string starting at i lexicographically 101642421Syokota precedes the string starting at j 101742421Syokota 101842421Syokota else if quadrant[i] > quadrant[j] 101942421Syokota then the string starting at j lexicographically 102042421Syokota precedes the string starting at i 102142421Syokota 102242421Syokota else 102342421Syokota the relative ordering of the strings starting 102442421Syokota at i and j has not yet been determined. 102542421Syokota } 102642421Syokota --*/ 102742421Syokota bigDone[ss] = True; 102842421Syokota 102942421Syokota if (i < 255) { 103042421Syokota Int32 bbStart = ftab[ss << 8] & CLEARMASK; 103142421Syokota Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 103242421Syokota Int32 shifts = 0; 103342421Syokota 103442421Syokota while ((bbSize >> shifts) > 65534) shifts++; 103542421Syokota 103642421Syokota for (j = bbSize-1; j >= 0; j--) { 103742421Syokota Int32 a2update = ptr[bbStart + j]; 103842421Syokota UInt16 qVal = (UInt16)(j >> shifts); 103942421Syokota quadrant[a2update] = qVal; 104042421Syokota if (a2update < BZ_N_OVERSHOOT) 104142421Syokota quadrant[a2update + nblock] = qVal; 1042 } 1043 AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 1044 } 1045 1046 } 1047 1048 if (verb >= 4) 1049 VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 1050 nblock, numQSorted, nblock - numQSorted ); 1051} 1052 1053#undef BIGFREQ 1054#undef SETMASK 1055#undef CLEARMASK 1056 1057 1058/*---------------------------------------------*/ 1059/* Pre: 1060 nblock > 0 1061 arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 1062 ((UChar*)arr2) [0 .. nblock-1] holds block 1063 arr1 exists for [0 .. nblock-1] 1064 1065 Post: 1066 ((UChar*)arr2) [0 .. nblock-1] holds block 1067 All other areas of block destroyed 1068 ftab [ 0 .. 65536 ] destroyed 1069 arr1 [0 .. nblock-1] holds sorted order 1070*/ 1071void BZ2_blockSort ( EState* s ) 1072{ 1073 UInt32* ptr = s->ptr; 1074 UChar* block = s->block; 1075 UInt32* ftab = s->ftab; 1076 Int32 nblock = s->nblock; 1077 Int32 verb = s->verbosity; 1078 Int32 wfact = s->workFactor; 1079 UInt16* quadrant; 1080 Int32 budget; 1081 Int32 budgetInit; 1082 Int32 i; 1083 1084 if (nblock < 10000) { 1085 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1086 } else { 1087 /* Calculate the location for quadrant, remembering to get 1088 the alignment right. Assumes that &(block[0]) is at least 1089 2-byte aligned -- this should be ok since block is really 1090 the first section of arr2. 1091 */ 1092 i = nblock+BZ_N_OVERSHOOT; 1093 if (i & 1) i++; 1094 quadrant = (UInt16*)(&(block[i])); 1095 1096 /* (wfact-1) / 3 puts the default-factor-30 1097 transition point at very roughly the same place as 1098 with v0.1 and v0.9.0. 1099 Not that it particularly matters any more, since the 1100 resulting compressed stream is now the same regardless 1101 of whether or not we use the main sort or fallback sort. 1102 */ 1103 if (wfact < 1 ) wfact = 1; 1104 if (wfact > 100) wfact = 100; 1105 budgetInit = nblock * ((wfact-1) / 3); 1106 budget = budgetInit; 1107 1108 mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 1109 if (verb >= 3) 1110 VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 1111 budgetInit - budget, 1112 nblock, 1113 (float)(budgetInit - budget) / 1114 (float)(nblock==0 ? 1 : nblock) ); 1115 if (budget < 0) { 1116 if (verb >= 2) 1117 VPrintf0 ( " too repetitive; using fallback" 1118 " sorting algorithm\n" ); 1119 fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1120 } 1121 } 1122 1123 s->origPtr = -1; 1124 for (i = 0; i < s->nblock; i++) 1125 if (ptr[i] == 0) 1126 { s->origPtr = i; break; }; 1127 1128 AssertH( s->origPtr != -1, 1003 ); 1129} 1130 1131 1132/*-------------------------------------------------------------*/ 1133/*--- end blocksort.c ---*/ 1134/*-------------------------------------------------------------*/ 1135