blocksort.c revision 167974
126175Sfenner 275107Sfenner/*-------------------------------------------------------------*/ 326175Sfenner/*--- Block sorting machinery ---*/ 475107Sfenner/*--- blocksort.c ---*/ 575107Sfenner/*-------------------------------------------------------------*/ 6127664Sbms 775107Sfenner/* ------------------------------------------------------------------ 8127664Sbms This file is part of bzip2/libbzip2, a program and library for 975107Sfenner lossless, block-sorting data compression. 1075107Sfenner 1175107Sfenner bzip2/libbzip2 version 1.0.4 of 20 December 2006 12127664Sbms Copyright (C) 1996-2006 Julian Seward <jseward@bzip.org> 1375107Sfenner 1475107Sfenner Please read the WARNING, DISCLAIMER and PATENTS sections in the 1575107Sfenner README file. 1675107Sfenner 1775107Sfenner This program is released under the terms of the license contained 1875107Sfenner in the file LICENSE. 1975107Sfenner ------------------------------------------------------------------ */ 2075107Sfenner 2175107Sfenner 22127664Sbms#include "bzlib_private.h" 2375107Sfenner 2475107Sfenner/*---------------------------------------------*/ 2575107Sfenner/*--- Fallback O(N log(N)^2) sorting ---*/ 2626175Sfenner/*--- algorithm, for repetitive blocks ---*/ 27127664Sbms/*---------------------------------------------*/ 2826175Sfenner 29127664Sbms/*---------------------------------------------*/ 30172677Smlaierstatic 3126175Sfenner__inline__ 3226175Sfennervoid fallbackSimpleSort ( UInt32* fmap, 3375107Sfenner UInt32* eclass, 3475107Sfenner Int32 lo, 3575107Sfenner Int32 hi ) 3675107Sfenner{ 3775107Sfenner Int32 i, j, tmp; 3875107Sfenner UInt32 ec_tmp; 3975107Sfenner 4075107Sfenner if (lo == hi) return; 4175107Sfenner 4275107Sfenner if (hi - lo > 3) { 4375107Sfenner for ( i = hi-4; i >= lo; i-- ) { 4475107Sfenner tmp = fmap[i]; 4575107Sfenner ec_tmp = eclass[tmp]; 4675107Sfenner for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 4775107Sfenner fmap[j-4] = fmap[j]; 4875107Sfenner fmap[j-4] = tmp; 4975107Sfenner } 5075107Sfenner } 5175107Sfenner 5275107Sfenner for ( i = hi-1; i >= lo; i-- ) { 5375107Sfenner tmp = fmap[i]; 5475107Sfenner ec_tmp = eclass[tmp]; 5598530Sfenner for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 5698530Sfenner fmap[j-1] = fmap[j]; 5798530Sfenner fmap[j-1] = tmp; 5898530Sfenner } 5998530Sfenner} 6098530Sfenner 6198530Sfenner 6298530Sfenner/*---------------------------------------------*/ 6398530Sfenner#define fswap(zz1, zz2) \ 6498530Sfenner { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 6598530Sfenner 6698530Sfenner#define fvswap(zzp1, zzp2, zzn) \ 6798530Sfenner{ \ 6898530Sfenner Int32 yyp1 = (zzp1); \ 6998530Sfenner Int32 yyp2 = (zzp2); \ 7098530Sfenner Int32 yyn = (zzn); \ 7198530Sfenner while (yyn > 0) { \ 7298530Sfenner fswap(fmap[yyp1], fmap[yyp2]); \ 7375107Sfenner yyp1++; yyp2++; yyn--; \ 7426175Sfenner } \ 7575107Sfenner} 7675107Sfenner 7775107Sfenner 7839291Sfenner#define fmin(a,b) ((a) < (b)) ? (a) : (b) 7926175Sfenner 8075107Sfenner#define fpush(lz,hz) { stackLo[sp] = lz; \ 8175107Sfenner stackHi[sp] = hz; \ 8226175Sfenner sp++; } 83127664Sbms 84127664Sbms#define fpop(lz,hz) { sp--; \ 85127664Sbms lz = stackLo[sp]; \ 86147894Ssam hz = stackHi[sp]; } 87147894Ssam 88147894Ssam#define FALLBACK_QSORT_SMALL_THRESH 10 89147894Ssam#define FALLBACK_QSORT_STACK_SIZE 100 90127664Sbms 9126175Sfenner 9226175Sfennerstatic 9375107Sfennervoid fallbackQSort3 ( UInt32* fmap, 9475107Sfenner UInt32* eclass, 9526175Sfenner Int32 loSt, 9675107Sfenner Int32 hiSt ) 9775107Sfenner{ 9875107Sfenner Int32 unLo, unHi, ltLo, gtHi, n, m; 9975107Sfenner Int32 sp, lo, hi; 10075107Sfenner UInt32 med, r, r3; 10175107Sfenner Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 10275107Sfenner Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 10326175Sfenner 10498530Sfenner r = 0; 10598530Sfenner 10698530Sfenner sp = 0; 10798530Sfenner fpush ( loSt, hiSt ); 10898530Sfenner 10998530Sfenner while (sp > 0) { 11098530Sfenner 11198530Sfenner AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 11298530Sfenner 11398530Sfenner fpop ( lo, hi ); 11498530Sfenner if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 11598530Sfenner fallbackSimpleSort ( fmap, eclass, lo, hi ); 11698530Sfenner continue; 11798530Sfenner } 11898530Sfenner 11998530Sfenner /* Random partitioning. Median of 3 sometimes fails to 12098530Sfenner avoid bad cases. Median of 9 seems to help but 12198530Sfenner looks rather expensive. This too seems to work but 12298530Sfenner is cheaper. Guidance for the magic constants 12398530Sfenner 7621 and 32768 is taken from Sedgewick's algorithms 12498530Sfenner book, chapter 35. 12598530Sfenner */ 12698530Sfenner r = ((r * 7621) + 1) % 32768; 12798530Sfenner r3 = r % 3; 12898530Sfenner if (r3 == 0) med = eclass[fmap[lo]]; else 129127664Sbms if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 13098530Sfenner med = eclass[fmap[hi]]; 13126175Sfenner 13275107Sfenner unLo = ltLo = lo; 13398530Sfenner unHi = gtHi = hi; 13498530Sfenner 13598530Sfenner while (1) { 13675107Sfenner while (1) { 13775107Sfenner if (unLo > unHi) break; 13875107Sfenner n = (Int32)eclass[fmap[unLo]] - (Int32)med; 13975107Sfenner if (n == 0) { 14075107Sfenner fswap(fmap[unLo], fmap[ltLo]); 14175107Sfenner ltLo++; unLo++; 14275107Sfenner continue; 14375107Sfenner }; 14475107Sfenner if (n > 0) break; 14598530Sfenner unLo++; 14675107Sfenner } 14775107Sfenner while (1) { 14875107Sfenner if (unLo > unHi) break; 14975107Sfenner n = (Int32)eclass[fmap[unHi]] - (Int32)med; 15026175Sfenner if (n == 0) { 15126175Sfenner fswap(fmap[unHi], fmap[gtHi]); 15275107Sfenner gtHi--; unHi--; 15375107Sfenner continue; 15475107Sfenner }; 15526175Sfenner if (n < 0) break; 15675107Sfenner unHi--; 15798530Sfenner } 15898530Sfenner if (unLo > unHi) break; 15998530Sfenner fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 16098530Sfenner } 16198530Sfenner 16298530Sfenner AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 16398530Sfenner 16498530Sfenner if (gtHi < ltLo) continue; 16598530Sfenner 16675107Sfenner n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 16775107Sfenner m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 168127664Sbms 169127664Sbms n = lo + unLo - ltLo - 1; 170127664Sbms m = hi - (gtHi - unHi) + 1; 171127664Sbms 172127664Sbms if (n - lo > hi - m) { 173127664Sbms fpush ( lo, n ); 174127664Sbms fpush ( m, hi ); 175127664Sbms } else { 176127664Sbms fpush ( m, hi ); 177127664Sbms fpush ( lo, n ); 17875107Sfenner } 17975107Sfenner } 180127664Sbms} 181127664Sbms 18275107Sfenner#undef fmin 18375107Sfenner#undef fpush 18475107Sfenner#undef fpop 18575107Sfenner#undef fswap 18675107Sfenner#undef fvswap 18775107Sfenner#undef FALLBACK_QSORT_SMALL_THRESH 18875107Sfenner#undef FALLBACK_QSORT_STACK_SIZE 18975107Sfenner 190127664Sbms 191127664Sbms/*---------------------------------------------*/ 192127664Sbms/* Pre: 193127664Sbms nblock > 0 19475107Sfenner eclass exists for [0 .. nblock-1] 195146768Ssam ((UChar*)eclass) [0 .. nblock-1] holds block 196127664Sbms ptr exists for [0 .. nblock-1] 197127664Sbms 198162012Ssam Post: 199127664Sbms ((UChar*)eclass) [0 .. nblock-1] holds block 20075107Sfenner All other areas of eclass destroyed 20175107Sfenner fmap [0 .. nblock-1] holds sorted order 20275107Sfenner bhtab [ 0 .. 2+(nblock/32) ] destroyed 20375107Sfenner*/ 20475107Sfenner 20575107Sfenner#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 20675107Sfenner#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 20775107Sfenner#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 20875107Sfenner#define WORD_BH(zz) bhtab[(zz) >> 5] 20975107Sfenner#define UNALIGNED_BH(zz) ((zz) & 0x01f) 21075107Sfenner 21175107Sfennerstatic 21275107Sfennervoid fallbackSort ( UInt32* fmap, 21375107Sfenner UInt32* eclass, 21475107Sfenner UInt32* bhtab, 21575107Sfenner Int32 nblock, 21675107Sfenner Int32 verb ) 21798530Sfenner{ 21898530Sfenner Int32 ftab[257]; 21998530Sfenner Int32 ftabCopy[256]; 22098530Sfenner Int32 H, i, j, k, l, r, cc, cc1; 22198530Sfenner Int32 nNotDone; 22298530Sfenner Int32 nBhtab; 22398530Sfenner UChar* eclass8 = (UChar*)eclass; 22475107Sfenner 22575107Sfenner /*-- 22675107Sfenner Initial 1-char radix sort to generate 227127664Sbms initial fmap and initial BH bits. 228127664Sbms --*/ 22975107Sfenner if (verb >= 4) 230127664Sbms VPrintf0 ( " bucket sorting ...\n" ); 23175107Sfenner for (i = 0; i < 257; i++) ftab[i] = 0; 23275107Sfenner for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 233127664Sbms for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 23475107Sfenner for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 23575107Sfenner 23675107Sfenner for (i = 0; i < nblock; i++) { 237127664Sbms j = eclass8[i]; 238127664Sbms k = ftab[j] - 1; 23926175Sfenner ftab[j] = k; 24098530Sfenner fmap[k] = i; 24198530Sfenner } 242127664Sbms 243127664Sbms nBhtab = 2 + (nblock / 32); 24498530Sfenner for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 24598530Sfenner for (i = 0; i < 256; i++) SET_BH(ftab[i]); 246127664Sbms 247127664Sbms /*-- 248127664Sbms Inductively refine the buckets. Kind-of an 249127664Sbms "exponential radix sort" (!), inspired by the 250127664Sbms Manber-Myers suffix array construction algorithm. 251127664Sbms --*/ 252147894Ssam 253147894Ssam /*-- set sentinel bits for block-end detection --*/ 254147894Ssam for (i = 0; i < 32; i++) { 255147894Ssam SET_BH(nblock + 2*i); 256147894Ssam CLEAR_BH(nblock + 2*i + 1); 25726175Sfenner } 258147894Ssam 259147894Ssam /*-- the log(N) loop --*/ 26098530Sfenner H = 1; 26175107Sfenner while (1) { 26275107Sfenner 26375107Sfenner if (verb >= 4) 26475107Sfenner VPrintf1 ( " depth %6d has ", H ); 26575107Sfenner 26675107Sfenner j = 0; 26775107Sfenner for (i = 0; i < nblock; i++) { 26875107Sfenner if (ISSET_BH(i)) j = i; 26975107Sfenner k = fmap[i] - H; if (k < 0) k += nblock; 27075107Sfenner eclass[k] = j; 27175107Sfenner } 27275107Sfenner 27375107Sfenner nNotDone = 0; 274127664Sbms r = -1; 27575107Sfenner while (1) { 27675107Sfenner 27775107Sfenner /*-- find the next non-singleton bucket --*/ 27875107Sfenner k = r + 1; 27975107Sfenner while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 28098530Sfenner if (ISSET_BH(k)) { 28198530Sfenner while (WORD_BH(k) == 0xffffffff) k += 32; 28298530Sfenner while (ISSET_BH(k)) k++; 28398530Sfenner } 28498530Sfenner l = k - 1; 28598530Sfenner if (l >= nblock) break; 286127664Sbms while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 28775107Sfenner if (!ISSET_BH(k)) { 28875107Sfenner while (WORD_BH(k) == 0x00000000) k += 32; 28975107Sfenner while (!ISSET_BH(k)) k++; 29075107Sfenner } 29175107Sfenner r = k - 1; 29275107Sfenner if (r >= nblock) break; 29375107Sfenner 29475107Sfenner /*-- now [l, r] bracket current bucket --*/ 29575107Sfenner if (r > l) { 29675107Sfenner nNotDone += (r - l + 1); 297127664Sbms fallbackQSort3 ( fmap, eclass, l, r ); 298127664Sbms 299127664Sbms /*-- scan bucket and generate header bits-- */ 300127664Sbms cc = -1; 30175107Sfenner for (i = l; i <= r; i++) { 30275107Sfenner cc1 = eclass[fmap[i]]; 303127664Sbms if (cc != cc1) { SET_BH(i); cc = cc1; }; 30475107Sfenner } 30575107Sfenner } 30675107Sfenner } 307127664Sbms 308127664Sbms if (verb >= 4) 309127664Sbms VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 310127664Sbms 311127664Sbms H *= 2; 312127664Sbms if (H > nblock || nNotDone == 0) break; 313127664Sbms } 314127664Sbms 315127664Sbms /*-- 31675107Sfenner Reconstruct the original block in 31775107Sfenner eclass8 [0 .. nblock-1], since the 318127664Sbms previous phase destroyed it. 31975107Sfenner --*/ 32075107Sfenner if (verb >= 4) 321127664Sbms VPrintf0 ( " reconstructing block ...\n" ); 322127664Sbms j = 0; 32375107Sfenner for (i = 0; i < nblock; i++) { 32475107Sfenner while (ftabCopy[j] == 0) j++; 32575107Sfenner ftabCopy[j]--; 32675107Sfenner eclass8[fmap[i]] = (UChar)j; 32798530Sfenner } 32898530Sfenner AssertH ( j < 256, 1005 ); 32998530Sfenner} 33098530Sfenner 33198530Sfenner#undef SET_BH 33298530Sfenner#undef CLEAR_BH 33398530Sfenner#undef ISSET_BH 33498530Sfenner#undef WORD_BH 33598530Sfenner#undef UNALIGNED_BH 33698530Sfenner 33798530Sfenner 33898530Sfenner/*---------------------------------------------*/ 33998530Sfenner/*--- The main, O(N^2 log(N)) sorting ---*/ 34098530Sfenner/*--- algorithm. Faster for "normal" ---*/ 34198530Sfenner/*--- non-repetitive blocks. ---*/ 34298530Sfenner/*---------------------------------------------*/ 34398530Sfenner 34498530Sfenner/*---------------------------------------------*/ 34598530Sfennerstatic 34698530Sfenner__inline__ 34798530SfennerBool mainGtU ( UInt32 i1, 34898530Sfenner UInt32 i2, 34998530Sfenner UChar* block, 35098530Sfenner UInt16* quadrant, 35198530Sfenner UInt32 nblock, 35298530Sfenner Int32* budget ) 35398530Sfenner{ 35498530Sfenner Int32 k; 35598530Sfenner UChar c1, c2; 35698530Sfenner UInt16 s1, s2; 35798530Sfenner 35898530Sfenner AssertD ( i1 != i2, "mainGtU" ); 35998530Sfenner /* 1 */ 36098530Sfenner c1 = block[i1]; c2 = block[i2]; 36198530Sfenner if (c1 != c2) return (c1 > c2); 36298530Sfenner i1++; i2++; 36398530Sfenner /* 2 */ 36498530Sfenner c1 = block[i1]; c2 = block[i2]; 36598530Sfenner if (c1 != c2) return (c1 > c2); 36698530Sfenner i1++; i2++; 36798530Sfenner /* 3 */ 36898530Sfenner c1 = block[i1]; c2 = block[i2]; 36998530Sfenner if (c1 != c2) return (c1 > c2); 37098530Sfenner i1++; i2++; 37198530Sfenner /* 4 */ 37298530Sfenner c1 = block[i1]; c2 = block[i2]; 37398530Sfenner if (c1 != c2) return (c1 > c2); 37498530Sfenner i1++; i2++; 37598530Sfenner /* 5 */ 37698530Sfenner c1 = block[i1]; c2 = block[i2]; 37798530Sfenner if (c1 != c2) return (c1 > c2); 37898530Sfenner i1++; i2++; 37998530Sfenner /* 6 */ 380127664Sbms c1 = block[i1]; c2 = block[i2]; 38198530Sfenner if (c1 != c2) return (c1 > c2); 38298530Sfenner i1++; i2++; 38398530Sfenner /* 7 */ 38498530Sfenner c1 = block[i1]; c2 = block[i2]; 38598530Sfenner if (c1 != c2) return (c1 > c2); 38698530Sfenner i1++; i2++; 38798530Sfenner /* 8 */ 38898530Sfenner c1 = block[i1]; c2 = block[i2]; 38998530Sfenner if (c1 != c2) return (c1 > c2); 39098530Sfenner i1++; i2++; 39198530Sfenner /* 9 */ 39298530Sfenner c1 = block[i1]; c2 = block[i2]; 39398530Sfenner if (c1 != c2) return (c1 > c2); 39498530Sfenner i1++; i2++; 39598530Sfenner /* 10 */ 39698530Sfenner c1 = block[i1]; c2 = block[i2]; 397172677Smlaier if (c1 != c2) return (c1 > c2); 398172677Smlaier i1++; i2++; 399172677Smlaier /* 11 */ 400172677Smlaier c1 = block[i1]; c2 = block[i2]; 401172677Smlaier if (c1 != c2) return (c1 > c2); 40298530Sfenner i1++; i2++; 403172677Smlaier /* 12 */ 404172677Smlaier c1 = block[i1]; c2 = block[i2]; 405172677Smlaier if (c1 != c2) return (c1 > c2); 406172677Smlaier i1++; i2++; 40798530Sfenner 40898530Sfenner k = nblock + 8; 40998530Sfenner 41098530Sfenner do { 41198530Sfenner /* 1 */ 41298530Sfenner c1 = block[i1]; c2 = block[i2]; 41398530Sfenner if (c1 != c2) return (c1 > c2); 41498530Sfenner s1 = quadrant[i1]; s2 = quadrant[i2]; 41598530Sfenner if (s1 != s2) return (s1 > s2); 416127664Sbms i1++; i2++; 41798530Sfenner /* 2 */ 41898530Sfenner c1 = block[i1]; c2 = block[i2]; 41998530Sfenner if (c1 != c2) return (c1 > c2); 42098530Sfenner s1 = quadrant[i1]; s2 = quadrant[i2]; 421127664Sbms if (s1 != s2) return (s1 > s2); 422127664Sbms i1++; i2++; 423127664Sbms /* 3 */ 424127664Sbms c1 = block[i1]; c2 = block[i2]; 425127664Sbms if (c1 != c2) return (c1 > c2); 426127664Sbms s1 = quadrant[i1]; s2 = quadrant[i2]; 427127664Sbms if (s1 != s2) return (s1 > s2); 428146768Ssam i1++; i2++; 429127664Sbms /* 4 */ 430147894Ssam c1 = block[i1]; c2 = block[i2]; 431127664Sbms if (c1 != c2) return (c1 > c2); 432127664Sbms s1 = quadrant[i1]; s2 = quadrant[i2]; 433127664Sbms if (s1 != s2) return (s1 > s2); 434127664Sbms i1++; i2++; 435127664Sbms /* 5 */ 436127664Sbms c1 = block[i1]; c2 = block[i2]; 43775107Sfenner if (c1 != c2) return (c1 > c2); 43826175Sfenner s1 = quadrant[i1]; s2 = quadrant[i2]; 43926175Sfenner if (s1 != s2) return (s1 > s2); 44075107Sfenner i1++; i2++; 44175107Sfenner /* 6 */ 44275107Sfenner c1 = block[i1]; c2 = block[i2]; 443127664Sbms if (c1 != c2) return (c1 > c2); 44475107Sfenner s1 = quadrant[i1]; s2 = quadrant[i2]; 445127664Sbms if (s1 != s2) return (s1 > s2); 446127664Sbms i1++; i2++; 44726175Sfenner /* 7 */ 44875107Sfenner c1 = block[i1]; c2 = block[i2]; 44975107Sfenner if (c1 != c2) return (c1 > c2); 45075107Sfenner s1 = quadrant[i1]; s2 = quadrant[i2]; 45175107Sfenner if (s1 != s2) return (s1 > s2); 45275107Sfenner i1++; i2++; 45375107Sfenner /* 8 */ 45426175Sfenner c1 = block[i1]; c2 = block[i2]; 45575107Sfenner if (c1 != c2) return (c1 > c2); 456127664Sbms s1 = quadrant[i1]; s2 = quadrant[i2]; 45775107Sfenner if (s1 != s2) return (s1 > s2); 45875107Sfenner i1++; i2++; 45975107Sfenner 46075107Sfenner if (i1 >= nblock) i1 -= nblock; 46175107Sfenner if (i2 >= nblock) i2 -= nblock; 46275107Sfenner 46398530Sfenner k -= 8; 46475107Sfenner (*budget)--; 46575107Sfenner } 46675107Sfenner while (k >= 0); 46775107Sfenner 46875107Sfenner return False; 46975107Sfenner} 47075107Sfenner 47175107Sfenner 47275107Sfenner/*---------------------------------------------*/ 47375107Sfenner/*-- 47426175Sfenner Knuth's increments seem to work better 47575107Sfenner than Incerpi-Sedgewick here. Possibly 47675107Sfenner because the number of elems to sort is 47775107Sfenner usually small, typically <= 20. 47875107Sfenner--*/ 47975107Sfennerstatic 48075107SfennerInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 48175107Sfenner 9841, 29524, 88573, 265720, 48275107Sfenner 797161, 2391484 }; 48375107Sfenner 48475107Sfennerstatic 48575107Sfennervoid mainSimpleSort ( UInt32* ptr, 48675107Sfenner UChar* block, 48775107Sfenner UInt16* quadrant, 48875107Sfenner Int32 nblock, 48975107Sfenner Int32 lo, 49075107Sfenner Int32 hi, 49175107Sfenner Int32 d, 49275107Sfenner Int32* budget ) 49375107Sfenner{ 49498530Sfenner Int32 i, j, h, bigN, hp; 49526175Sfenner UInt32 v; 496127664Sbms 497127664Sbms bigN = hi - lo + 1; 498127664Sbms if (bigN < 2) return; 499127664Sbms 500127664Sbms hp = 0; 501127664Sbms while (incs[hp] < bigN) hp++; 502127664Sbms hp--; 503127664Sbms 504127664Sbms for (; hp >= 0; hp--) { 505127664Sbms h = incs[hp]; 506127664Sbms 507127664Sbms i = lo + h; 50826175Sfenner while (True) { 509127664Sbms 51098530Sfenner /*-- copy 1 --*/ 511127664Sbms if (i > hi) break; 51275107Sfenner v = ptr[i]; 51375107Sfenner j = i; 51426175Sfenner while ( mainGtU ( 51575107Sfenner ptr[j-h]+d, v+d, block, quadrant, nblock, budget 51626175Sfenner ) ) { 51775107Sfenner ptr[j] = ptr[j-h]; 51875107Sfenner j = j - h; 51975107Sfenner if (j <= (lo + h - 1)) break; 52075107Sfenner } 52175107Sfenner ptr[j] = v; 52275107Sfenner i++; 52375107Sfenner 52426175Sfenner /*-- copy 2 --*/ 52575107Sfenner if (i > hi) break; 52626175Sfenner v = ptr[i]; 52775107Sfenner j = i; 528147894Ssam while ( mainGtU ( 529147894Ssam ptr[j-h]+d, v+d, block, quadrant, nblock, budget 530172677Smlaier ) ) { 531172677Smlaier ptr[j] = ptr[j-h]; 532172677Smlaier j = j - h; 533172677Smlaier if (j <= (lo + h - 1)) break; 534172677Smlaier } 535172677Smlaier ptr[j] = v; 536172677Smlaier i++; 537172677Smlaier 538172677Smlaier /*-- copy 3 --*/ 539172677Smlaier if (i > hi) break; 540172677Smlaier v = ptr[i]; 541172677Smlaier j = i; 542172677Smlaier while ( mainGtU ( 543172677Smlaier ptr[j-h]+d, v+d, block, quadrant, nblock, budget 544172677Smlaier ) ) { 545172677Smlaier ptr[j] = ptr[j-h]; 546147894Ssam j = j - h; 547147894Ssam if (j <= (lo + h - 1)) break; 548147894Ssam } 549147894Ssam ptr[j] = v; 550147894Ssam i++; 551147894Ssam 552147894Ssam if (*budget < 0) return; 553147894Ssam } 554147894Ssam } 555147894Ssam} 556147894Ssam 557147894Ssam 558147894Ssam/*---------------------------------------------*/ 559147894Ssam/*-- 560147894Ssam The following is an implementation of 561147894Ssam an elegant 3-way quicksort for strings, 562147894Ssam described in a paper "Fast Algorithms for 563147894Ssam Sorting and Searching Strings", by Robert 564162012Ssam Sedgewick and Jon L. Bentley. 565147894Ssam--*/ 566147894Ssam 567147894Ssam#define mswap(zz1, zz2) \ 568147894Ssam { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 569147894Ssam 570147894Ssam#define mvswap(zzp1, zzp2, zzn) \ 571162012Ssam{ \ 572147894Ssam Int32 yyp1 = (zzp1); \ 573147894Ssam Int32 yyp2 = (zzp2); \ 574147894Ssam Int32 yyn = (zzn); \ 57575107Sfenner while (yyn > 0) { \ 57626175Sfenner mswap(ptr[yyp1], ptr[yyp2]); \ 57775107Sfenner yyp1++; yyp2++; yyn--; \ 57875107Sfenner } \ 57975107Sfenner} 58075107Sfenner 58175107Sfennerstatic 58275107Sfenner__inline__ 58375107SfennerUChar mmed3 ( UChar a, UChar b, UChar c ) 58475107Sfenner{ 58575107Sfenner UChar t; 58675107Sfenner if (a > b) { t = a; a = b; b = t; }; 58726175Sfenner if (b > c) { 58898530Sfenner b = c; 58926175Sfenner if (a > b) b = a; 59075107Sfenner } 59175107Sfenner return b; 59275107Sfenner} 59375107Sfenner 59475107Sfenner#define mmin(a,b) ((a) < (b)) ? (a) : (b) 59575107Sfenner 59675107Sfenner#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 59775107Sfenner stackHi[sp] = hz; \ 59875107Sfenner stackD [sp] = dz; \ 59926175Sfenner sp++; } 60075107Sfenner 60175107Sfenner#define mpop(lz,hz,dz) { sp--; \ 60275107Sfenner lz = stackLo[sp]; \ 60326175Sfenner hz = stackHi[sp]; \ 60475107Sfenner dz = stackD [sp]; } 60575107Sfenner 60675107Sfenner 60775107Sfenner#define mnextsize(az) (nextHi[az]-nextLo[az]) 60875107Sfenner 60975107Sfenner#define mnextswap(az,bz) \ 61075107Sfenner { Int32 tz; \ 61175107Sfenner tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 61275107Sfenner tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 61375107Sfenner tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 61475107Sfenner 61575107Sfenner 61675107Sfenner#define MAIN_QSORT_SMALL_THRESH 20 61775107Sfenner#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 61875107Sfenner#define MAIN_QSORT_STACK_SIZE 100 61975107Sfenner 62075107Sfennerstatic 62175107Sfennervoid mainQSort3 ( UInt32* ptr, 62275107Sfenner UChar* block, 62326175Sfenner UInt16* quadrant, 62475107Sfenner Int32 nblock, 62575107Sfenner Int32 loSt, 62675107Sfenner Int32 hiSt, 62775107Sfenner Int32 dSt, 62875107Sfenner Int32* budget ) 62975107Sfenner{ 63075107Sfenner Int32 unLo, unHi, ltLo, gtHi, n, m, med; 63175107Sfenner Int32 sp, lo, hi, d; 63226175Sfenner 63375107Sfenner Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 63475107Sfenner Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 63575107Sfenner Int32 stackD [MAIN_QSORT_STACK_SIZE]; 636127664Sbms 637127664Sbms Int32 nextLo[3]; 63875107Sfenner Int32 nextHi[3]; 639127664Sbms Int32 nextD [3]; 64075107Sfenner 64175107Sfenner sp = 0; 64275107Sfenner mpush ( loSt, hiSt, dSt ); 643127664Sbms 64475107Sfenner while (sp > 0) { 645127664Sbms 64675107Sfenner AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 64775107Sfenner 64875107Sfenner mpop ( lo, hi, d ); 64975107Sfenner if (hi - lo < MAIN_QSORT_SMALL_THRESH || 65075107Sfenner d > MAIN_QSORT_DEPTH_THRESH) { 65175107Sfenner mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 65275107Sfenner if (*budget < 0) return; 65375107Sfenner continue; 65475107Sfenner } 655127664Sbms 65675107Sfenner med = (Int32) 657127664Sbms mmed3 ( block[ptr[ lo ]+d], 658127664Sbms block[ptr[ hi ]+d], 65975107Sfenner block[ptr[ (lo+hi)>>1 ]+d] ); 66075107Sfenner 66175107Sfenner unLo = ltLo = lo; 66275107Sfenner unHi = gtHi = hi; 66375107Sfenner 66475107Sfenner while (True) { 66575107Sfenner while (True) { 66675107Sfenner if (unLo > unHi) break; 66775107Sfenner n = ((Int32)block[ptr[unLo]+d]) - med; 66875107Sfenner if (n == 0) { 66975107Sfenner mswap(ptr[unLo], ptr[ltLo]); 67075107Sfenner ltLo++; unLo++; continue; 67175107Sfenner }; 67275107Sfenner if (n > 0) break; 67398530Sfenner unLo++; 67475107Sfenner } 67575107Sfenner while (True) { 67675107Sfenner if (unLo > unHi) break; 67775107Sfenner n = ((Int32)block[ptr[unHi]+d]) - med; 67875107Sfenner if (n == 0) { 67975107Sfenner mswap(ptr[unHi], ptr[gtHi]); 68075107Sfenner gtHi--; unHi--; continue; 68175107Sfenner }; 68275107Sfenner if (n < 0) break; 68375107Sfenner unHi--; 68475107Sfenner } 685172677Smlaier if (unLo > unHi) break; 68675107Sfenner mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 68775107Sfenner } 68875107Sfenner 68975107Sfenner AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 69075107Sfenner 69198530Sfenner if (gtHi < ltLo) { 69298530Sfenner mpush(lo, hi, d+1 ); 69398530Sfenner continue; 69498530Sfenner } 69598530Sfenner 69698530Sfenner n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 69798530Sfenner m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 69898530Sfenner 69998530Sfenner n = lo + unLo - ltLo - 1; 70098530Sfenner m = hi - (gtHi - unHi) + 1; 70198530Sfenner 70298530Sfenner nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 70398530Sfenner nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 70498530Sfenner nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 70598530Sfenner 70698530Sfenner if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 70798530Sfenner if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 70898530Sfenner if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 70998530Sfenner 71098530Sfenner AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 71198530Sfenner AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 71298530Sfenner 71398530Sfenner mpush (nextLo[0], nextHi[0], nextD[0]); 71498530Sfenner mpush (nextLo[1], nextHi[1], nextD[1]); 71598530Sfenner mpush (nextLo[2], nextHi[2], nextD[2]); 71698530Sfenner } 71798530Sfenner} 71898530Sfenner 71998530Sfenner#undef mswap 72098530Sfenner#undef mvswap 72198530Sfenner#undef mpush 72298530Sfenner#undef mpop 723172677Smlaier#undef mmin 724172677Smlaier#undef mnextsize 725172677Smlaier#undef mnextswap 726172677Smlaier#undef MAIN_QSORT_SMALL_THRESH 727172677Smlaier#undef MAIN_QSORT_DEPTH_THRESH 728172677Smlaier#undef MAIN_QSORT_STACK_SIZE 729172677Smlaier 730172677Smlaier 731172677Smlaier/*---------------------------------------------*/ 732172677Smlaier/* Pre: 73398530Sfenner nblock > N_OVERSHOOT 734172677Smlaier block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 73575107Sfenner ((UChar*)block32) [0 .. nblock-1] holds block 73698530Sfenner ptr exists for [0 .. nblock-1] 73798530Sfenner 73898530Sfenner Post: 73975107Sfenner ((UChar*)block32) [0 .. nblock-1] holds block 74026175Sfenner All other areas of block32 destroyed 74126175Sfenner ftab [0 .. 65536 ] destroyed 742146768Ssam ptr [0 .. nblock-1] holds sorted order 743146768Ssam if (*budget < 0), sorting was abandoned 744146768Ssam*/ 745146768Ssam 746146768Ssam#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 747146768Ssam#define SETMASK (1 << 21) 748146768Ssam#define CLEARMASK (~(SETMASK)) 749146768Ssam 750146768Ssamstatic 751146768Ssamvoid mainSort ( UInt32* ptr, 752146768Ssam UChar* block, 753146768Ssam UInt16* quadrant, 754146768Ssam UInt32* ftab, 755146768Ssam Int32 nblock, 756146768Ssam Int32 verb, 757146768Ssam Int32* budget ) 758146768Ssam{ 759146768Ssam Int32 i, j, k, ss, sb; 760146768Ssam Int32 runningOrder[256]; 761146768Ssam Bool bigDone[256]; 762146768Ssam Int32 copyStart[256]; 763146768Ssam Int32 copyEnd [256]; 764146768Ssam UChar c1; 765146768Ssam Int32 numQSorted; 766146768Ssam UInt16 s; 767146768Ssam if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 768146768Ssam 769146768Ssam /*-- set up the 2-byte frequency table --*/ 770146768Ssam for (i = 65536; i >= 0; i--) ftab[i] = 0; 771146768Ssam 772146768Ssam j = block[0] << 8; 773146768Ssam i = nblock-1; 774146768Ssam for (; i >= 3; i -= 4) { 775146768Ssam quadrant[i] = 0; 776146768Ssam j = (j >> 8) | ( ((UInt16)block[i]) << 8); 777146768Ssam ftab[j]++; 778146768Ssam quadrant[i-1] = 0; 779146768Ssam j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 780146768Ssam ftab[j]++; 781146768Ssam quadrant[i-2] = 0; 782146768Ssam j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 783146768Ssam ftab[j]++; 784146768Ssam quadrant[i-3] = 0; 78575107Sfenner j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 78675107Sfenner ftab[j]++; 78798530Sfenner } 78898530Sfenner for (; i >= 0; i--) { 78998530Sfenner quadrant[i] = 0; 79098530Sfenner j = (j >> 8) | ( ((UInt16)block[i]) << 8); 79198530Sfenner ftab[j]++; 79275107Sfenner } 793127664Sbms 794127664Sbms /*-- (emphasises close relationship of block & quadrant) --*/ 79526175Sfenner for (i = 0; i < BZ_N_OVERSHOOT; i++) { 79698530Sfenner block [nblock+i] = block[i]; 79798530Sfenner quadrant[nblock+i] = 0; 79898530Sfenner } 799127664Sbms 80098530Sfenner if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 801127664Sbms 80298530Sfenner /*-- Complete the initial radix sort --*/ 80398530Sfenner for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 80498530Sfenner 80598530Sfenner s = block[0] << 8; 80698530Sfenner i = nblock-1; 80798530Sfenner for (; i >= 3; i -= 4) { 808172677Smlaier s = (s >> 8) | (block[i] << 8); 809172677Smlaier j = ftab[s] -1; 810172677Smlaier ftab[s] = j; 811172677Smlaier ptr[j] = i; 812172677Smlaier s = (s >> 8) | (block[i-1] << 8); 813172677Smlaier j = ftab[s] -1; 814172677Smlaier ftab[s] = j; 815172677Smlaier ptr[j] = i-1; 816172677Smlaier s = (s >> 8) | (block[i-2] << 8); 817172677Smlaier j = ftab[s] -1; 818172677Smlaier ftab[s] = j; 819172677Smlaier ptr[j] = i-2; 820172677Smlaier s = (s >> 8) | (block[i-3] << 8); 821172677Smlaier j = ftab[s] -1; 822172677Smlaier ftab[s] = j; 823172677Smlaier ptr[j] = i-3; 824172677Smlaier } 82598530Sfenner for (; i >= 0; i--) { 82698530Sfenner s = (s >> 8) | (block[i] << 8); 82798530Sfenner j = ftab[s] -1; 82898530Sfenner ftab[s] = j; 82998530Sfenner ptr[j] = i; 83098530Sfenner } 83198530Sfenner 83298530Sfenner /*-- 83398530Sfenner Now ftab contains the first loc of every small bucket. 83498530Sfenner Calculate the running order, from smallest to largest 83598530Sfenner big bucket. 83698530Sfenner --*/ 83798530Sfenner for (i = 0; i <= 255; i++) { 83898530Sfenner bigDone [i] = False; 83998530Sfenner runningOrder[i] = i; 84098530Sfenner } 84198530Sfenner 84298530Sfenner { 84398530Sfenner Int32 vv; 84498530Sfenner Int32 h = 1; 84598530Sfenner do h = 3 * h + 1; while (h <= 256); 84698530Sfenner do { 847146768Ssam h = h / 3; 848146768Ssam for (i = h; i <= 255; i++) { 849146768Ssam vv = runningOrder[i]; 850146768Ssam j = i; 85198530Sfenner while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 852146768Ssam runningOrder[j] = runningOrder[j-h]; 853146768Ssam j = j - h; 854172677Smlaier if (j <= (h - 1)) goto zero; 855172677Smlaier } 85698530Sfenner zero: 85798530Sfenner runningOrder[j] = vv; 85898530Sfenner } 85998530Sfenner } while (h != 1); 86098530Sfenner } 86198530Sfenner 86298530Sfenner /*-- 86398530Sfenner The main sorting loop. 86498530Sfenner --*/ 86598530Sfenner 86698530Sfenner numQSorted = 0; 86798530Sfenner 86898530Sfenner for (i = 0; i <= 255; i++) { 86998530Sfenner 87098530Sfenner /*-- 87198530Sfenner Process big buckets, starting with the least full. 87298530Sfenner Basically this is a 3-step process in which we call 87398530Sfenner mainQSort3 to sort the small buckets [ss, j], but 87498530Sfenner also make a big effort to avoid the calls if we can. 87598530Sfenner --*/ 87698530Sfenner ss = runningOrder[i]; 87798530Sfenner 87898530Sfenner /*-- 87998530Sfenner Step 1: 88098530Sfenner Complete the big bucket [ss] by quicksorting 88198530Sfenner any unsorted small buckets [ss, j], for j != ss. 88298530Sfenner Hopefully previous pointer-scanning phases have already 88398530Sfenner completed many of the small buckets [ss, j], so 88498530Sfenner we don't have to sort them at all. 88598530Sfenner --*/ 886172677Smlaier for (j = 0; j <= 255; j++) { 887172677Smlaier if (j != ss) { 888172677Smlaier sb = (ss << 8) + j; 889172677Smlaier if ( ! (ftab[sb] & SETMASK) ) { 890172677Smlaier Int32 lo = ftab[sb] & CLEARMASK; 89198530Sfenner Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 892172677Smlaier if (hi > lo) { 893172677Smlaier if (verb >= 4) 89475107Sfenner VPrintf4 ( " qsort [0x%x, 0x%x] " 89575107Sfenner "done %d this %d\n", 89626175Sfenner ss, j, numQSorted, hi - lo + 1 ); 89775107Sfenner mainQSort3 ( 898127664Sbms ptr, block, quadrant, nblock, 89975107Sfenner lo, hi, BZ_N_RADIX, budget 900127664Sbms ); 901127664Sbms numQSorted += (hi - lo + 1); 90275107Sfenner if (*budget < 0) return; 903127664Sbms } 90475107Sfenner } 905127664Sbms ftab[sb] |= SETMASK; 906127664Sbms } 907127664Sbms } 908127664Sbms 909127664Sbms AssertH ( !bigDone[ss], 1006 ); 910127664Sbms 911127664Sbms /*-- 912127664Sbms Step 2: 913147894Ssam Now scan this big bucket [ss] so as to synthesise the 914147894Ssam sorted order for small buckets [t, ss] for all t, 915147894Ssam including, magically, the bucket [ss,ss] too. 916147894Ssam This will avoid doing Real Work in subsequent Step 1's. 917147894Ssam --*/ 918127664Sbms { 919127664Sbms for (j = 0; j <= 255; j++) { 920127664Sbms copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 921127664Sbms copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 922127664Sbms } 923127664Sbms for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 924127664Sbms k = ptr[j]-1; if (k < 0) k += nblock; 925127664Sbms c1 = block[k]; 926127664Sbms if (!bigDone[c1]) 92775107Sfenner ptr[ copyStart[c1]++ ] = k; 92875107Sfenner } 92975107Sfenner for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 930127664Sbms k = ptr[j]-1; if (k < 0) k += nblock; 93175107Sfenner c1 = block[k]; 93275107Sfenner if (!bigDone[c1]) 93375107Sfenner ptr[ copyEnd[c1]-- ] = k; 93475107Sfenner } 93575107Sfenner } 93675107Sfenner 93775107Sfenner AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 93875107Sfenner || 93926175Sfenner /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 94026175Sfenner Necessity for this case is demonstrated by compressing 94175107Sfenner a sequence of approximately 48.5 million of character 94275107Sfenner 251; 1.0.0/1.0.1 will then die here. */ 943127664Sbms (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 944127664Sbms 1007 ) 94575107Sfenner 94639291Sfenner for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 947127664Sbms 948127664Sbms /*-- 949127664Sbms Step 3: 95075107Sfenner The [ss] big bucket is now done. Record this fact, 95175107Sfenner and update the quadrant descriptors. Remember to 95275107Sfenner update quadrants in the overshoot area too, if 95375107Sfenner necessary. The "if (i < 255)" test merely skips 95475107Sfenner this updating for the last bucket processed, since 95575107Sfenner updating for the last bucket is pointless. 95675107Sfenner 95775107Sfenner The quadrant array provides a way to incrementally 95875107Sfenner cache sort orderings, as they appear, so as to 959127664Sbms make subsequent comparisons in fullGtU() complete 96075107Sfenner faster. For repetitive blocks this makes a big 96175107Sfenner difference (but not big enough to be able to avoid 96275107Sfenner the fallback sorting mechanism, exponential radix sort). 96375107Sfenner 96475107Sfenner The precise meaning is: at all times: 965172677Smlaier 96675107Sfenner for 0 <= i < nblock and 0 <= j <= nblock 96775107Sfenner 96875107Sfenner if block[i] != block[j], 96975107Sfenner 97075107Sfenner then the relative values of quadrant[i] and 97175107Sfenner quadrant[j] are meaningless. 97275107Sfenner 97375107Sfenner else { 97475107Sfenner if quadrant[i] < quadrant[j] 97575107Sfenner then the string starting at i lexicographically 97675107Sfenner precedes the string starting at j 97775107Sfenner 97875107Sfenner else if quadrant[i] > quadrant[j] 97975107Sfenner then the string starting at j lexicographically 98075107Sfenner precedes the string starting at i 98175107Sfenner 98275107Sfenner else 98375107Sfenner the relative ordering of the strings starting 98475107Sfenner at i and j has not yet been determined. 98575107Sfenner } 98675107Sfenner --*/ 98775107Sfenner bigDone[ss] = True; 98875107Sfenner 98975107Sfenner if (i < 255) { 99075107Sfenner Int32 bbStart = ftab[ss << 8] & CLEARMASK; 99175107Sfenner Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 99275107Sfenner Int32 shifts = 0; 99375107Sfenner 99475107Sfenner while ((bbSize >> shifts) > 65534) shifts++; 99575107Sfenner 99675107Sfenner for (j = bbSize-1; j >= 0; j--) { 99775107Sfenner Int32 a2update = ptr[bbStart + j]; 99875107Sfenner UInt16 qVal = (UInt16)(j >> shifts); 99975107Sfenner quadrant[a2update] = qVal; 100075107Sfenner if (a2update < BZ_N_OVERSHOOT) 100175107Sfenner quadrant[a2update + nblock] = qVal; 100275107Sfenner } 100375107Sfenner AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 100475107Sfenner } 100575107Sfenner 100675107Sfenner } 100775107Sfenner 100875107Sfenner if (verb >= 4) 100975107Sfenner VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 101039291Sfenner nblock, numQSorted, nblock - numQSorted ); 101139291Sfenner} 101275107Sfenner 1013127664Sbms#undef BIGFREQ 101475107Sfenner#undef SETMASK 101575107Sfenner#undef CLEARMASK 101675107Sfenner 101775107Sfenner 1018127664Sbms/*---------------------------------------------*/ 101975107Sfenner/* Pre: 1020127664Sbms nblock > 0 102175107Sfenner arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 102275107Sfenner ((UChar*)arr2) [0 .. nblock-1] holds block 102375107Sfenner arr1 exists for [0 .. nblock-1] 102475107Sfenner 102575107Sfenner Post: 102675107Sfenner ((UChar*)arr2) [0 .. nblock-1] holds block 1027127664Sbms All other areas of block destroyed 102875107Sfenner ftab [ 0 .. 65536 ] destroyed 102975107Sfenner arr1 [0 .. nblock-1] holds sorted order 103075107Sfenner*/ 103139291Sfennervoid BZ2_blockSort ( EState* s ) 103239291Sfenner{ 103375107Sfenner UInt32* ptr = s->ptr; 103498530Sfenner UChar* block = s->block; 103598530Sfenner UInt32* ftab = s->ftab; 103698530Sfenner Int32 nblock = s->nblock; 103798530Sfenner Int32 verb = s->verbosity; 103898530Sfenner Int32 wfact = s->workFactor; 103998530Sfenner UInt16* quadrant; 104098530Sfenner Int32 budget; 104198530Sfenner Int32 budgetInit; 104298530Sfenner Int32 i; 104398530Sfenner 104498530Sfenner if (nblock < 10000) { 104575107Sfenner fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 104675107Sfenner } else { 104775107Sfenner /* Calculate the location for quadrant, remembering to get 104875107Sfenner the alignment right. Assumes that &(block[0]) is at least 1049127664Sbms 2-byte aligned -- this should be ok since block is really 1050127664Sbms the first section of arr2. 1051127664Sbms */ 1052127664Sbms i = nblock+BZ_N_OVERSHOOT; 105375107Sfenner if (i & 1) i++; 105475107Sfenner quadrant = (UInt16*)(&(block[i])); 105575107Sfenner 105675107Sfenner /* (wfact-1) / 3 puts the default-factor-30 105775107Sfenner transition point at very roughly the same place as 105875107Sfenner with v0.1 and v0.9.0. 1059147894Ssam Not that it particularly matters any more, since the 1060147894Ssam resulting compressed stream is now the same regardless 1061147894Ssam of whether or not we use the main sort or fallback sort. 1062147894Ssam */ 1063162012Ssam if (wfact < 1 ) wfact = 1; 1064147894Ssam if (wfact > 100) wfact = 100; 1065147894Ssam budgetInit = nblock * ((wfact-1) / 3); 1066147894Ssam budget = budgetInit; 1067147894Ssam 1068147894Ssam mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 1069147894Ssam if (verb >= 3) 1070147894Ssam VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 1071147894Ssam budgetInit - budget, 1072147894Ssam nblock, 1073147894Ssam (float)(budgetInit - budget) / 1074147894Ssam (float)(nblock==0 ? 1 : nblock) ); 1075147894Ssam if (budget < 0) { 1076147894Ssam if (verb >= 2) 1077147894Ssam VPrintf0 ( " too repetitive; using fallback" 1078147894Ssam " sorting algorithm\n" ); 1079147894Ssam fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 1080147894Ssam } 1081127664Sbms } 1082127664Sbms 108398530Sfenner s->origPtr = -1; 108498530Sfenner for (i = 0; i < s->nblock; i++) 108598530Sfenner if (ptr[i] == 0) 108698530Sfenner { s->origPtr = i; break; }; 108798530Sfenner 108898530Sfenner AssertH( s->origPtr != -1, 1003 ); 108998530Sfenner} 109098530Sfenner 1091127664Sbms 1092127664Sbms/*-------------------------------------------------------------*/ 1093127664Sbms/*--- end blocksort.c ---*/ 1094127664Sbms/*-------------------------------------------------------------*/ 1095127664Sbms