178556Sobrien 278556Sobrien/*-------------------------------------------------------------*/ 378556Sobrien/*--- Block sorting machinery ---*/ 478556Sobrien/*--- blocksort.c ---*/ 578556Sobrien/*-------------------------------------------------------------*/ 678556Sobrien 7167974Sdelphij/* ------------------------------------------------------------------ 8167974Sdelphij This file is part of bzip2/libbzip2, a program and library for 9167974Sdelphij lossless, block-sorting data compression. 1078556Sobrien 11215041Sobrien bzip2/libbzip2 version 1.0.6 of 6 September 2010 12215041Sobrien Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org> 1378556Sobrien 14167974Sdelphij Please read the WARNING, DISCLAIMER and PATENTS sections in the 15167974Sdelphij README file. 1678556Sobrien 17167974Sdelphij This program is released under the terms of the license contained 18167974Sdelphij in the file LICENSE. 19167974Sdelphij ------------------------------------------------------------------ */ 2078556Sobrien 2178556Sobrien 2278556Sobrien#include "bzlib_private.h" 2378556Sobrien 2478556Sobrien/*---------------------------------------------*/ 2578556Sobrien/*--- Fallback O(N log(N)^2) sorting ---*/ 2678556Sobrien/*--- algorithm, for repetitive blocks ---*/ 2778556Sobrien/*---------------------------------------------*/ 2878556Sobrien 2978556Sobrien/*---------------------------------------------*/ 3078556Sobrienstatic 3178556Sobrien__inline__ 3278556Sobrienvoid fallbackSimpleSort ( UInt32* fmap, 3378556Sobrien UInt32* eclass, 3478556Sobrien Int32 lo, 3578556Sobrien Int32 hi ) 3678556Sobrien{ 3778556Sobrien Int32 i, j, tmp; 3878556Sobrien UInt32 ec_tmp; 3978556Sobrien 4078556Sobrien if (lo == hi) return; 4178556Sobrien 4278556Sobrien if (hi - lo > 3) { 4378556Sobrien for ( i = hi-4; i >= lo; i-- ) { 4478556Sobrien tmp = fmap[i]; 4578556Sobrien ec_tmp = eclass[tmp]; 4678556Sobrien for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 4778556Sobrien fmap[j-4] = fmap[j]; 4878556Sobrien fmap[j-4] = tmp; 4978556Sobrien } 5078556Sobrien } 5178556Sobrien 5278556Sobrien for ( i = hi-1; i >= lo; i-- ) { 5378556Sobrien tmp = fmap[i]; 5478556Sobrien ec_tmp = eclass[tmp]; 5578556Sobrien for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 5678556Sobrien fmap[j-1] = fmap[j]; 5778556Sobrien fmap[j-1] = tmp; 5878556Sobrien } 5978556Sobrien} 6078556Sobrien 6178556Sobrien 6278556Sobrien/*---------------------------------------------*/ 6378556Sobrien#define fswap(zz1, zz2) \ 6478556Sobrien { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 6578556Sobrien 6678556Sobrien#define fvswap(zzp1, zzp2, zzn) \ 6778556Sobrien{ \ 6878556Sobrien Int32 yyp1 = (zzp1); \ 6978556Sobrien Int32 yyp2 = (zzp2); \ 7078556Sobrien Int32 yyn = (zzn); \ 7178556Sobrien while (yyn > 0) { \ 7278556Sobrien fswap(fmap[yyp1], fmap[yyp2]); \ 7378556Sobrien yyp1++; yyp2++; yyn--; \ 7478556Sobrien } \ 7578556Sobrien} 7678556Sobrien 7778556Sobrien 7878556Sobrien#define fmin(a,b) ((a) < (b)) ? (a) : (b) 7978556Sobrien 8078556Sobrien#define fpush(lz,hz) { stackLo[sp] = lz; \ 8178556Sobrien stackHi[sp] = hz; \ 8278556Sobrien sp++; } 8378556Sobrien 8478556Sobrien#define fpop(lz,hz) { sp--; \ 8578556Sobrien lz = stackLo[sp]; \ 8678556Sobrien hz = stackHi[sp]; } 8778556Sobrien 8878556Sobrien#define FALLBACK_QSORT_SMALL_THRESH 10 8978556Sobrien#define FALLBACK_QSORT_STACK_SIZE 100 9078556Sobrien 9178556Sobrien 9278556Sobrienstatic 9378556Sobrienvoid fallbackQSort3 ( UInt32* fmap, 9478556Sobrien UInt32* eclass, 9578556Sobrien Int32 loSt, 9678556Sobrien Int32 hiSt ) 9778556Sobrien{ 9878556Sobrien Int32 unLo, unHi, ltLo, gtHi, n, m; 9978556Sobrien Int32 sp, lo, hi; 10078556Sobrien UInt32 med, r, r3; 10178556Sobrien Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 10278556Sobrien Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 10378556Sobrien 10478556Sobrien r = 0; 10578556Sobrien 10678556Sobrien sp = 0; 10778556Sobrien fpush ( loSt, hiSt ); 10878556Sobrien 10978556Sobrien while (sp > 0) { 11078556Sobrien 111167974Sdelphij AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); 11278556Sobrien 11378556Sobrien fpop ( lo, hi ); 11478556Sobrien if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 11578556Sobrien fallbackSimpleSort ( fmap, eclass, lo, hi ); 11678556Sobrien continue; 11778556Sobrien } 11878556Sobrien 11978556Sobrien /* Random partitioning. Median of 3 sometimes fails to 12078556Sobrien avoid bad cases. Median of 9 seems to help but 12178556Sobrien looks rather expensive. This too seems to work but 12278556Sobrien is cheaper. Guidance for the magic constants 12378556Sobrien 7621 and 32768 is taken from Sedgewick's algorithms 12478556Sobrien book, chapter 35. 12578556Sobrien */ 12678556Sobrien r = ((r * 7621) + 1) % 32768; 12778556Sobrien r3 = r % 3; 12878556Sobrien if (r3 == 0) med = eclass[fmap[lo]]; else 12978556Sobrien if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 13078556Sobrien med = eclass[fmap[hi]]; 13178556Sobrien 13278556Sobrien unLo = ltLo = lo; 13378556Sobrien unHi = gtHi = hi; 13478556Sobrien 13578556Sobrien while (1) { 13678556Sobrien while (1) { 13778556Sobrien if (unLo > unHi) break; 13878556Sobrien n = (Int32)eclass[fmap[unLo]] - (Int32)med; 13978556Sobrien if (n == 0) { 14078556Sobrien fswap(fmap[unLo], fmap[ltLo]); 14178556Sobrien ltLo++; unLo++; 14278556Sobrien continue; 14378556Sobrien }; 14478556Sobrien if (n > 0) break; 14578556Sobrien unLo++; 14678556Sobrien } 14778556Sobrien while (1) { 14878556Sobrien if (unLo > unHi) break; 14978556Sobrien n = (Int32)eclass[fmap[unHi]] - (Int32)med; 15078556Sobrien if (n == 0) { 15178556Sobrien fswap(fmap[unHi], fmap[gtHi]); 15278556Sobrien gtHi--; unHi--; 15378556Sobrien continue; 15478556Sobrien }; 15578556Sobrien if (n < 0) break; 15678556Sobrien unHi--; 15778556Sobrien } 15878556Sobrien if (unLo > unHi) break; 15978556Sobrien fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 16078556Sobrien } 16178556Sobrien 16278556Sobrien AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 16378556Sobrien 16478556Sobrien if (gtHi < ltLo) continue; 16578556Sobrien 16678556Sobrien n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 16778556Sobrien m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 16878556Sobrien 16978556Sobrien n = lo + unLo - ltLo - 1; 17078556Sobrien m = hi - (gtHi - unHi) + 1; 17178556Sobrien 17278556Sobrien if (n - lo > hi - m) { 17378556Sobrien fpush ( lo, n ); 17478556Sobrien fpush ( m, hi ); 17578556Sobrien } else { 17678556Sobrien fpush ( m, hi ); 17778556Sobrien fpush ( lo, n ); 17878556Sobrien } 17978556Sobrien } 18078556Sobrien} 18178556Sobrien 18278556Sobrien#undef fmin 18378556Sobrien#undef fpush 18478556Sobrien#undef fpop 18578556Sobrien#undef fswap 18678556Sobrien#undef fvswap 18778556Sobrien#undef FALLBACK_QSORT_SMALL_THRESH 18878556Sobrien#undef FALLBACK_QSORT_STACK_SIZE 18978556Sobrien 19078556Sobrien 19178556Sobrien/*---------------------------------------------*/ 19278556Sobrien/* Pre: 19378556Sobrien nblock > 0 19478556Sobrien eclass exists for [0 .. nblock-1] 19578556Sobrien ((UChar*)eclass) [0 .. nblock-1] holds block 19678556Sobrien ptr exists for [0 .. nblock-1] 19778556Sobrien 19878556Sobrien Post: 19978556Sobrien ((UChar*)eclass) [0 .. nblock-1] holds block 20078556Sobrien All other areas of eclass destroyed 20178556Sobrien fmap [0 .. nblock-1] holds sorted order 20278556Sobrien bhtab [ 0 .. 2+(nblock/32) ] destroyed 20378556Sobrien*/ 20478556Sobrien 20578556Sobrien#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 20678556Sobrien#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 20778556Sobrien#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 20878556Sobrien#define WORD_BH(zz) bhtab[(zz) >> 5] 20978556Sobrien#define UNALIGNED_BH(zz) ((zz) & 0x01f) 21078556Sobrien 21178556Sobrienstatic 21278556Sobrienvoid fallbackSort ( UInt32* fmap, 21378556Sobrien UInt32* eclass, 21478556Sobrien UInt32* bhtab, 21578556Sobrien Int32 nblock, 21678556Sobrien Int32 verb ) 21778556Sobrien{ 21878556Sobrien Int32 ftab[257]; 21978556Sobrien Int32 ftabCopy[256]; 22078556Sobrien Int32 H, i, j, k, l, r, cc, cc1; 22178556Sobrien Int32 nNotDone; 22278556Sobrien Int32 nBhtab; 22378556Sobrien UChar* eclass8 = (UChar*)eclass; 22478556Sobrien 22578556Sobrien /*-- 22678556Sobrien Initial 1-char radix sort to generate 22778556Sobrien initial fmap and initial BH bits. 22878556Sobrien --*/ 22978556Sobrien if (verb >= 4) 23078556Sobrien VPrintf0 ( " bucket sorting ...\n" ); 23178556Sobrien for (i = 0; i < 257; i++) ftab[i] = 0; 23278556Sobrien for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 23378556Sobrien for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 23478556Sobrien for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 23578556Sobrien 23678556Sobrien for (i = 0; i < nblock; i++) { 23778556Sobrien j = eclass8[i]; 23878556Sobrien k = ftab[j] - 1; 23978556Sobrien ftab[j] = k; 24078556Sobrien fmap[k] = i; 24178556Sobrien } 24278556Sobrien 24378556Sobrien nBhtab = 2 + (nblock / 32); 24478556Sobrien for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 24578556Sobrien for (i = 0; i < 256; i++) SET_BH(ftab[i]); 24678556Sobrien 24778556Sobrien /*-- 24878556Sobrien Inductively refine the buckets. Kind-of an 24978556Sobrien "exponential radix sort" (!), inspired by the 25078556Sobrien Manber-Myers suffix array construction algorithm. 25178556Sobrien --*/ 25278556Sobrien 25378556Sobrien /*-- set sentinel bits for block-end detection --*/ 25478556Sobrien for (i = 0; i < 32; i++) { 25578556Sobrien SET_BH(nblock + 2*i); 25678556Sobrien CLEAR_BH(nblock + 2*i + 1); 25778556Sobrien } 25878556Sobrien 25978556Sobrien /*-- the log(N) loop --*/ 26078556Sobrien H = 1; 26178556Sobrien while (1) { 26278556Sobrien 26378556Sobrien if (verb >= 4) 26478556Sobrien VPrintf1 ( " depth %6d has ", H ); 26578556Sobrien 26678556Sobrien j = 0; 26778556Sobrien for (i = 0; i < nblock; i++) { 26878556Sobrien if (ISSET_BH(i)) j = i; 26978556Sobrien k = fmap[i] - H; if (k < 0) k += nblock; 27078556Sobrien eclass[k] = j; 27178556Sobrien } 27278556Sobrien 27378556Sobrien nNotDone = 0; 27478556Sobrien r = -1; 27578556Sobrien while (1) { 27678556Sobrien 27778556Sobrien /*-- find the next non-singleton bucket --*/ 27878556Sobrien k = r + 1; 27978556Sobrien while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 28078556Sobrien if (ISSET_BH(k)) { 28178556Sobrien while (WORD_BH(k) == 0xffffffff) k += 32; 28278556Sobrien while (ISSET_BH(k)) k++; 28378556Sobrien } 28478556Sobrien l = k - 1; 28578556Sobrien if (l >= nblock) break; 28678556Sobrien while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 28778556Sobrien if (!ISSET_BH(k)) { 28878556Sobrien while (WORD_BH(k) == 0x00000000) k += 32; 28978556Sobrien while (!ISSET_BH(k)) k++; 29078556Sobrien } 29178556Sobrien r = k - 1; 29278556Sobrien if (r >= nblock) break; 29378556Sobrien 29478556Sobrien /*-- now [l, r] bracket current bucket --*/ 29578556Sobrien if (r > l) { 29678556Sobrien nNotDone += (r - l + 1); 29778556Sobrien fallbackQSort3 ( fmap, eclass, l, r ); 29878556Sobrien 29978556Sobrien /*-- scan bucket and generate header bits-- */ 30078556Sobrien cc = -1; 30178556Sobrien for (i = l; i <= r; i++) { 30278556Sobrien cc1 = eclass[fmap[i]]; 30378556Sobrien if (cc != cc1) { SET_BH(i); cc = cc1; }; 30478556Sobrien } 30578556Sobrien } 30678556Sobrien } 30778556Sobrien 30878556Sobrien if (verb >= 4) 30978556Sobrien VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 31078556Sobrien 31178556Sobrien H *= 2; 31278556Sobrien if (H > nblock || nNotDone == 0) break; 31378556Sobrien } 31478556Sobrien 31578556Sobrien /*-- 31678556Sobrien Reconstruct the original block in 31778556Sobrien eclass8 [0 .. nblock-1], since the 31878556Sobrien previous phase destroyed it. 31978556Sobrien --*/ 32078556Sobrien if (verb >= 4) 32178556Sobrien VPrintf0 ( " reconstructing block ...\n" ); 32278556Sobrien j = 0; 32378556Sobrien for (i = 0; i < nblock; i++) { 32478556Sobrien while (ftabCopy[j] == 0) j++; 32578556Sobrien ftabCopy[j]--; 32678556Sobrien eclass8[fmap[i]] = (UChar)j; 32778556Sobrien } 32878556Sobrien AssertH ( j < 256, 1005 ); 32978556Sobrien} 33078556Sobrien 33178556Sobrien#undef SET_BH 33278556Sobrien#undef CLEAR_BH 33378556Sobrien#undef ISSET_BH 33478556Sobrien#undef WORD_BH 33578556Sobrien#undef UNALIGNED_BH 33678556Sobrien 33778556Sobrien 33878556Sobrien/*---------------------------------------------*/ 33978556Sobrien/*--- The main, O(N^2 log(N)) sorting ---*/ 34078556Sobrien/*--- algorithm. Faster for "normal" ---*/ 34178556Sobrien/*--- non-repetitive blocks. ---*/ 34278556Sobrien/*---------------------------------------------*/ 34378556Sobrien 34478556Sobrien/*---------------------------------------------*/ 34578556Sobrienstatic 34678556Sobrien__inline__ 34778556SobrienBool mainGtU ( UInt32 i1, 34878556Sobrien UInt32 i2, 34978556Sobrien UChar* block, 35078556Sobrien UInt16* quadrant, 35178556Sobrien UInt32 nblock, 35278556Sobrien Int32* budget ) 35378556Sobrien{ 35478556Sobrien Int32 k; 35578556Sobrien UChar c1, c2; 35678556Sobrien UInt16 s1, s2; 35778556Sobrien 35878556Sobrien AssertD ( i1 != i2, "mainGtU" ); 35978556Sobrien /* 1 */ 36078556Sobrien c1 = block[i1]; c2 = block[i2]; 36178556Sobrien if (c1 != c2) return (c1 > c2); 36278556Sobrien i1++; i2++; 36378556Sobrien /* 2 */ 36478556Sobrien c1 = block[i1]; c2 = block[i2]; 36578556Sobrien if (c1 != c2) return (c1 > c2); 36678556Sobrien i1++; i2++; 36778556Sobrien /* 3 */ 36878556Sobrien c1 = block[i1]; c2 = block[i2]; 36978556Sobrien if (c1 != c2) return (c1 > c2); 37078556Sobrien i1++; i2++; 37178556Sobrien /* 4 */ 37278556Sobrien c1 = block[i1]; c2 = block[i2]; 37378556Sobrien if (c1 != c2) return (c1 > c2); 37478556Sobrien i1++; i2++; 37578556Sobrien /* 5 */ 37678556Sobrien c1 = block[i1]; c2 = block[i2]; 37778556Sobrien if (c1 != c2) return (c1 > c2); 37878556Sobrien i1++; i2++; 37978556Sobrien /* 6 */ 38078556Sobrien c1 = block[i1]; c2 = block[i2]; 38178556Sobrien if (c1 != c2) return (c1 > c2); 38278556Sobrien i1++; i2++; 38378556Sobrien /* 7 */ 38478556Sobrien c1 = block[i1]; c2 = block[i2]; 38578556Sobrien if (c1 != c2) return (c1 > c2); 38678556Sobrien i1++; i2++; 38778556Sobrien /* 8 */ 38878556Sobrien c1 = block[i1]; c2 = block[i2]; 38978556Sobrien if (c1 != c2) return (c1 > c2); 39078556Sobrien i1++; i2++; 39178556Sobrien /* 9 */ 39278556Sobrien c1 = block[i1]; c2 = block[i2]; 39378556Sobrien if (c1 != c2) return (c1 > c2); 39478556Sobrien i1++; i2++; 39578556Sobrien /* 10 */ 39678556Sobrien c1 = block[i1]; c2 = block[i2]; 39778556Sobrien if (c1 != c2) return (c1 > c2); 39878556Sobrien i1++; i2++; 39978556Sobrien /* 11 */ 40078556Sobrien c1 = block[i1]; c2 = block[i2]; 40178556Sobrien if (c1 != c2) return (c1 > c2); 40278556Sobrien i1++; i2++; 40378556Sobrien /* 12 */ 40478556Sobrien c1 = block[i1]; c2 = block[i2]; 40578556Sobrien if (c1 != c2) return (c1 > c2); 40678556Sobrien i1++; i2++; 40778556Sobrien 40878556Sobrien k = nblock + 8; 40978556Sobrien 41078556Sobrien do { 41178556Sobrien /* 1 */ 41278556Sobrien c1 = block[i1]; c2 = block[i2]; 41378556Sobrien if (c1 != c2) return (c1 > c2); 41478556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 41578556Sobrien if (s1 != s2) return (s1 > s2); 41678556Sobrien i1++; i2++; 41778556Sobrien /* 2 */ 41878556Sobrien c1 = block[i1]; c2 = block[i2]; 41978556Sobrien if (c1 != c2) return (c1 > c2); 42078556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 42178556Sobrien if (s1 != s2) return (s1 > s2); 42278556Sobrien i1++; i2++; 42378556Sobrien /* 3 */ 42478556Sobrien c1 = block[i1]; c2 = block[i2]; 42578556Sobrien if (c1 != c2) return (c1 > c2); 42678556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 42778556Sobrien if (s1 != s2) return (s1 > s2); 42878556Sobrien i1++; i2++; 42978556Sobrien /* 4 */ 43078556Sobrien c1 = block[i1]; c2 = block[i2]; 43178556Sobrien if (c1 != c2) return (c1 > c2); 43278556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 43378556Sobrien if (s1 != s2) return (s1 > s2); 43478556Sobrien i1++; i2++; 43578556Sobrien /* 5 */ 43678556Sobrien c1 = block[i1]; c2 = block[i2]; 43778556Sobrien if (c1 != c2) return (c1 > c2); 43878556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 43978556Sobrien if (s1 != s2) return (s1 > s2); 44078556Sobrien i1++; i2++; 44178556Sobrien /* 6 */ 44278556Sobrien c1 = block[i1]; c2 = block[i2]; 44378556Sobrien if (c1 != c2) return (c1 > c2); 44478556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 44578556Sobrien if (s1 != s2) return (s1 > s2); 44678556Sobrien i1++; i2++; 44778556Sobrien /* 7 */ 44878556Sobrien c1 = block[i1]; c2 = block[i2]; 44978556Sobrien if (c1 != c2) return (c1 > c2); 45078556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 45178556Sobrien if (s1 != s2) return (s1 > s2); 45278556Sobrien i1++; i2++; 45378556Sobrien /* 8 */ 45478556Sobrien c1 = block[i1]; c2 = block[i2]; 45578556Sobrien if (c1 != c2) return (c1 > c2); 45678556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 45778556Sobrien if (s1 != s2) return (s1 > s2); 45878556Sobrien i1++; i2++; 45978556Sobrien 46078556Sobrien if (i1 >= nblock) i1 -= nblock; 46178556Sobrien if (i2 >= nblock) i2 -= nblock; 46278556Sobrien 46378556Sobrien k -= 8; 46478556Sobrien (*budget)--; 46578556Sobrien } 46678556Sobrien while (k >= 0); 46778556Sobrien 46878556Sobrien return False; 46978556Sobrien} 47078556Sobrien 47178556Sobrien 47278556Sobrien/*---------------------------------------------*/ 47378556Sobrien/*-- 47478556Sobrien Knuth's increments seem to work better 47578556Sobrien than Incerpi-Sedgewick here. Possibly 47678556Sobrien because the number of elems to sort is 47778556Sobrien usually small, typically <= 20. 47878556Sobrien--*/ 47978556Sobrienstatic 48078556SobrienInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 48178556Sobrien 9841, 29524, 88573, 265720, 48278556Sobrien 797161, 2391484 }; 48378556Sobrien 48478556Sobrienstatic 48578556Sobrienvoid mainSimpleSort ( UInt32* ptr, 48678556Sobrien UChar* block, 48778556Sobrien UInt16* quadrant, 48878556Sobrien Int32 nblock, 48978556Sobrien Int32 lo, 49078556Sobrien Int32 hi, 49178556Sobrien Int32 d, 49278556Sobrien Int32* budget ) 49378556Sobrien{ 49478556Sobrien Int32 i, j, h, bigN, hp; 49578556Sobrien UInt32 v; 49678556Sobrien 49778556Sobrien bigN = hi - lo + 1; 49878556Sobrien if (bigN < 2) return; 49978556Sobrien 50078556Sobrien hp = 0; 50178556Sobrien while (incs[hp] < bigN) hp++; 50278556Sobrien hp--; 50378556Sobrien 50478556Sobrien for (; hp >= 0; hp--) { 50578556Sobrien h = incs[hp]; 50678556Sobrien 50778556Sobrien i = lo + h; 50878556Sobrien while (True) { 50978556Sobrien 51078556Sobrien /*-- copy 1 --*/ 51178556Sobrien if (i > hi) break; 51278556Sobrien v = ptr[i]; 51378556Sobrien j = i; 51478556Sobrien while ( mainGtU ( 51578556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 51678556Sobrien ) ) { 51778556Sobrien ptr[j] = ptr[j-h]; 51878556Sobrien j = j - h; 51978556Sobrien if (j <= (lo + h - 1)) break; 52078556Sobrien } 52178556Sobrien ptr[j] = v; 52278556Sobrien i++; 52378556Sobrien 52478556Sobrien /*-- copy 2 --*/ 52578556Sobrien if (i > hi) break; 52678556Sobrien v = ptr[i]; 52778556Sobrien j = i; 52878556Sobrien while ( mainGtU ( 52978556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 53078556Sobrien ) ) { 53178556Sobrien ptr[j] = ptr[j-h]; 53278556Sobrien j = j - h; 53378556Sobrien if (j <= (lo + h - 1)) break; 53478556Sobrien } 53578556Sobrien ptr[j] = v; 53678556Sobrien i++; 53778556Sobrien 53878556Sobrien /*-- copy 3 --*/ 53978556Sobrien if (i > hi) break; 54078556Sobrien v = ptr[i]; 54178556Sobrien j = i; 54278556Sobrien while ( mainGtU ( 54378556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 54478556Sobrien ) ) { 54578556Sobrien ptr[j] = ptr[j-h]; 54678556Sobrien j = j - h; 54778556Sobrien if (j <= (lo + h - 1)) break; 54878556Sobrien } 54978556Sobrien ptr[j] = v; 55078556Sobrien i++; 55178556Sobrien 55278556Sobrien if (*budget < 0) return; 55378556Sobrien } 55478556Sobrien } 55578556Sobrien} 55678556Sobrien 55778556Sobrien 55878556Sobrien/*---------------------------------------------*/ 55978556Sobrien/*-- 56078556Sobrien The following is an implementation of 56178556Sobrien an elegant 3-way quicksort for strings, 56278556Sobrien described in a paper "Fast Algorithms for 56378556Sobrien Sorting and Searching Strings", by Robert 56478556Sobrien Sedgewick and Jon L. Bentley. 56578556Sobrien--*/ 56678556Sobrien 56778556Sobrien#define mswap(zz1, zz2) \ 56878556Sobrien { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 56978556Sobrien 57078556Sobrien#define mvswap(zzp1, zzp2, zzn) \ 57178556Sobrien{ \ 57278556Sobrien Int32 yyp1 = (zzp1); \ 57378556Sobrien Int32 yyp2 = (zzp2); \ 57478556Sobrien Int32 yyn = (zzn); \ 57578556Sobrien while (yyn > 0) { \ 57678556Sobrien mswap(ptr[yyp1], ptr[yyp2]); \ 57778556Sobrien yyp1++; yyp2++; yyn--; \ 57878556Sobrien } \ 57978556Sobrien} 58078556Sobrien 58178556Sobrienstatic 58278556Sobrien__inline__ 58378556SobrienUChar mmed3 ( UChar a, UChar b, UChar c ) 58478556Sobrien{ 58578556Sobrien UChar t; 58678556Sobrien if (a > b) { t = a; a = b; b = t; }; 58778556Sobrien if (b > c) { 58878556Sobrien b = c; 58978556Sobrien if (a > b) b = a; 59078556Sobrien } 59178556Sobrien return b; 59278556Sobrien} 59378556Sobrien 59478556Sobrien#define mmin(a,b) ((a) < (b)) ? (a) : (b) 59578556Sobrien 59678556Sobrien#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 59778556Sobrien stackHi[sp] = hz; \ 59878556Sobrien stackD [sp] = dz; \ 59978556Sobrien sp++; } 60078556Sobrien 60178556Sobrien#define mpop(lz,hz,dz) { sp--; \ 60278556Sobrien lz = stackLo[sp]; \ 60378556Sobrien hz = stackHi[sp]; \ 60478556Sobrien dz = stackD [sp]; } 60578556Sobrien 60678556Sobrien 60778556Sobrien#define mnextsize(az) (nextHi[az]-nextLo[az]) 60878556Sobrien 60978556Sobrien#define mnextswap(az,bz) \ 61078556Sobrien { Int32 tz; \ 61178556Sobrien tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 61278556Sobrien tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 61378556Sobrien tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 61478556Sobrien 61578556Sobrien 61678556Sobrien#define MAIN_QSORT_SMALL_THRESH 20 61778556Sobrien#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 61878556Sobrien#define MAIN_QSORT_STACK_SIZE 100 61978556Sobrien 62078556Sobrienstatic 62178556Sobrienvoid mainQSort3 ( UInt32* ptr, 62278556Sobrien UChar* block, 62378556Sobrien UInt16* quadrant, 62478556Sobrien Int32 nblock, 62578556Sobrien Int32 loSt, 62678556Sobrien Int32 hiSt, 62778556Sobrien Int32 dSt, 62878556Sobrien Int32* budget ) 62978556Sobrien{ 63078556Sobrien Int32 unLo, unHi, ltLo, gtHi, n, m, med; 63178556Sobrien Int32 sp, lo, hi, d; 63278556Sobrien 63378556Sobrien Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 63478556Sobrien Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 63578556Sobrien Int32 stackD [MAIN_QSORT_STACK_SIZE]; 63678556Sobrien 63778556Sobrien Int32 nextLo[3]; 63878556Sobrien Int32 nextHi[3]; 63978556Sobrien Int32 nextD [3]; 64078556Sobrien 64178556Sobrien sp = 0; 64278556Sobrien mpush ( loSt, hiSt, dSt ); 64378556Sobrien 64478556Sobrien while (sp > 0) { 64578556Sobrien 646167974Sdelphij AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); 64778556Sobrien 64878556Sobrien mpop ( lo, hi, d ); 64978556Sobrien if (hi - lo < MAIN_QSORT_SMALL_THRESH || 65078556Sobrien d > MAIN_QSORT_DEPTH_THRESH) { 65178556Sobrien mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 65278556Sobrien if (*budget < 0) return; 65378556Sobrien continue; 65478556Sobrien } 65578556Sobrien 65678556Sobrien med = (Int32) 65778556Sobrien mmed3 ( block[ptr[ lo ]+d], 65878556Sobrien block[ptr[ hi ]+d], 65978556Sobrien block[ptr[ (lo+hi)>>1 ]+d] ); 66078556Sobrien 66178556Sobrien unLo = ltLo = lo; 66278556Sobrien unHi = gtHi = hi; 66378556Sobrien 66478556Sobrien while (True) { 66578556Sobrien while (True) { 66678556Sobrien if (unLo > unHi) break; 66778556Sobrien n = ((Int32)block[ptr[unLo]+d]) - med; 66878556Sobrien if (n == 0) { 66978556Sobrien mswap(ptr[unLo], ptr[ltLo]); 67078556Sobrien ltLo++; unLo++; continue; 67178556Sobrien }; 67278556Sobrien if (n > 0) break; 67378556Sobrien unLo++; 67478556Sobrien } 67578556Sobrien while (True) { 67678556Sobrien if (unLo > unHi) break; 67778556Sobrien n = ((Int32)block[ptr[unHi]+d]) - med; 67878556Sobrien if (n == 0) { 67978556Sobrien mswap(ptr[unHi], ptr[gtHi]); 68078556Sobrien gtHi--; unHi--; continue; 68178556Sobrien }; 68278556Sobrien if (n < 0) break; 68378556Sobrien unHi--; 68478556Sobrien } 68578556Sobrien if (unLo > unHi) break; 68678556Sobrien mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 68778556Sobrien } 68878556Sobrien 68978556Sobrien AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 69078556Sobrien 69178556Sobrien if (gtHi < ltLo) { 69278556Sobrien mpush(lo, hi, d+1 ); 69378556Sobrien continue; 69478556Sobrien } 69578556Sobrien 69678556Sobrien n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 69778556Sobrien m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 69878556Sobrien 69978556Sobrien n = lo + unLo - ltLo - 1; 70078556Sobrien m = hi - (gtHi - unHi) + 1; 70178556Sobrien 70278556Sobrien nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 70378556Sobrien nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 70478556Sobrien nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 70578556Sobrien 70678556Sobrien if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 70778556Sobrien if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 70878556Sobrien if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 70978556Sobrien 71078556Sobrien AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 71178556Sobrien AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 71278556Sobrien 71378556Sobrien mpush (nextLo[0], nextHi[0], nextD[0]); 71478556Sobrien mpush (nextLo[1], nextHi[1], nextD[1]); 71578556Sobrien mpush (nextLo[2], nextHi[2], nextD[2]); 71678556Sobrien } 71778556Sobrien} 71878556Sobrien 71978556Sobrien#undef mswap 72078556Sobrien#undef mvswap 72178556Sobrien#undef mpush 72278556Sobrien#undef mpop 72378556Sobrien#undef mmin 72478556Sobrien#undef mnextsize 72578556Sobrien#undef mnextswap 72678556Sobrien#undef MAIN_QSORT_SMALL_THRESH 72778556Sobrien#undef MAIN_QSORT_DEPTH_THRESH 72878556Sobrien#undef MAIN_QSORT_STACK_SIZE 72978556Sobrien 73078556Sobrien 73178556Sobrien/*---------------------------------------------*/ 73278556Sobrien/* Pre: 73378556Sobrien nblock > N_OVERSHOOT 73478556Sobrien block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 73578556Sobrien ((UChar*)block32) [0 .. nblock-1] holds block 73678556Sobrien ptr exists for [0 .. nblock-1] 73778556Sobrien 73878556Sobrien Post: 73978556Sobrien ((UChar*)block32) [0 .. nblock-1] holds block 74078556Sobrien All other areas of block32 destroyed 74178556Sobrien ftab [0 .. 65536 ] destroyed 74278556Sobrien ptr [0 .. nblock-1] holds sorted order 74378556Sobrien if (*budget < 0), sorting was abandoned 74478556Sobrien*/ 74578556Sobrien 74678556Sobrien#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 74778556Sobrien#define SETMASK (1 << 21) 74878556Sobrien#define CLEARMASK (~(SETMASK)) 74978556Sobrien 75078556Sobrienstatic 75178556Sobrienvoid mainSort ( UInt32* ptr, 75278556Sobrien UChar* block, 75378556Sobrien UInt16* quadrant, 75478556Sobrien UInt32* ftab, 75578556Sobrien Int32 nblock, 75678556Sobrien Int32 verb, 75778556Sobrien Int32* budget ) 75878556Sobrien{ 75978556Sobrien Int32 i, j, k, ss, sb; 76078556Sobrien Int32 runningOrder[256]; 76178556Sobrien Bool bigDone[256]; 76278556Sobrien Int32 copyStart[256]; 76378556Sobrien Int32 copyEnd [256]; 76478556Sobrien UChar c1; 76578556Sobrien Int32 numQSorted; 76678556Sobrien UInt16 s; 76778556Sobrien if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 76878556Sobrien 76978556Sobrien /*-- set up the 2-byte frequency table --*/ 77078556Sobrien for (i = 65536; i >= 0; i--) ftab[i] = 0; 77178556Sobrien 77278556Sobrien j = block[0] << 8; 77378556Sobrien i = nblock-1; 77478556Sobrien for (; i >= 3; i -= 4) { 77578556Sobrien quadrant[i] = 0; 77678556Sobrien j = (j >> 8) | ( ((UInt16)block[i]) << 8); 77778556Sobrien ftab[j]++; 77878556Sobrien quadrant[i-1] = 0; 77978556Sobrien j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 78078556Sobrien ftab[j]++; 78178556Sobrien quadrant[i-2] = 0; 78278556Sobrien j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 78378556Sobrien ftab[j]++; 78478556Sobrien quadrant[i-3] = 0; 78578556Sobrien j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 78678556Sobrien ftab[j]++; 78778556Sobrien } 78878556Sobrien for (; i >= 0; i--) { 78978556Sobrien quadrant[i] = 0; 79078556Sobrien j = (j >> 8) | ( ((UInt16)block[i]) << 8); 79178556Sobrien ftab[j]++; 79278556Sobrien } 79378556Sobrien 79478556Sobrien /*-- (emphasises close relationship of block & quadrant) --*/ 79578556Sobrien for (i = 0; i < BZ_N_OVERSHOOT; i++) { 79678556Sobrien block [nblock+i] = block[i]; 79778556Sobrien quadrant[nblock+i] = 0; 79878556Sobrien } 79978556Sobrien 80078556Sobrien if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 80178556Sobrien 80278556Sobrien /*-- Complete the initial radix sort --*/ 80378556Sobrien for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 80478556Sobrien 80578556Sobrien s = block[0] << 8; 80678556Sobrien i = nblock-1; 80778556Sobrien for (; i >= 3; i -= 4) { 80878556Sobrien s = (s >> 8) | (block[i] << 8); 80978556Sobrien j = ftab[s] -1; 81078556Sobrien ftab[s] = j; 81178556Sobrien ptr[j] = i; 81278556Sobrien s = (s >> 8) | (block[i-1] << 8); 81378556Sobrien j = ftab[s] -1; 81478556Sobrien ftab[s] = j; 81578556Sobrien ptr[j] = i-1; 81678556Sobrien s = (s >> 8) | (block[i-2] << 8); 81778556Sobrien j = ftab[s] -1; 81878556Sobrien ftab[s] = j; 81978556Sobrien ptr[j] = i-2; 82078556Sobrien s = (s >> 8) | (block[i-3] << 8); 82178556Sobrien j = ftab[s] -1; 82278556Sobrien ftab[s] = j; 82378556Sobrien ptr[j] = i-3; 82478556Sobrien } 82578556Sobrien for (; i >= 0; i--) { 82678556Sobrien s = (s >> 8) | (block[i] << 8); 82778556Sobrien j = ftab[s] -1; 82878556Sobrien ftab[s] = j; 82978556Sobrien ptr[j] = i; 83078556Sobrien } 83178556Sobrien 83278556Sobrien /*-- 83378556Sobrien Now ftab contains the first loc of every small bucket. 83478556Sobrien Calculate the running order, from smallest to largest 83578556Sobrien big bucket. 83678556Sobrien --*/ 83778556Sobrien for (i = 0; i <= 255; i++) { 83878556Sobrien bigDone [i] = False; 83978556Sobrien runningOrder[i] = i; 84078556Sobrien } 84178556Sobrien 84278556Sobrien { 84378556Sobrien Int32 vv; 84478556Sobrien Int32 h = 1; 84578556Sobrien do h = 3 * h + 1; while (h <= 256); 84678556Sobrien do { 84778556Sobrien h = h / 3; 84878556Sobrien for (i = h; i <= 255; i++) { 84978556Sobrien vv = runningOrder[i]; 85078556Sobrien j = i; 85178556Sobrien while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 85278556Sobrien runningOrder[j] = runningOrder[j-h]; 85378556Sobrien j = j - h; 85478556Sobrien if (j <= (h - 1)) goto zero; 85578556Sobrien } 85678556Sobrien zero: 85778556Sobrien runningOrder[j] = vv; 85878556Sobrien } 85978556Sobrien } while (h != 1); 86078556Sobrien } 86178556Sobrien 86278556Sobrien /*-- 86378556Sobrien The main sorting loop. 86478556Sobrien --*/ 86578556Sobrien 86678556Sobrien numQSorted = 0; 86778556Sobrien 86878556Sobrien for (i = 0; i <= 255; i++) { 86978556Sobrien 87078556Sobrien /*-- 87178556Sobrien Process big buckets, starting with the least full. 87278556Sobrien Basically this is a 3-step process in which we call 87378556Sobrien mainQSort3 to sort the small buckets [ss, j], but 87478556Sobrien also make a big effort to avoid the calls if we can. 87578556Sobrien --*/ 87678556Sobrien ss = runningOrder[i]; 87778556Sobrien 87878556Sobrien /*-- 87978556Sobrien Step 1: 88078556Sobrien Complete the big bucket [ss] by quicksorting 88178556Sobrien any unsorted small buckets [ss, j], for j != ss. 88278556Sobrien Hopefully previous pointer-scanning phases have already 88378556Sobrien completed many of the small buckets [ss, j], so 88478556Sobrien we don't have to sort them at all. 88578556Sobrien --*/ 88678556Sobrien for (j = 0; j <= 255; j++) { 88778556Sobrien if (j != ss) { 88878556Sobrien sb = (ss << 8) + j; 88978556Sobrien if ( ! (ftab[sb] & SETMASK) ) { 89078556Sobrien Int32 lo = ftab[sb] & CLEARMASK; 89178556Sobrien Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 89278556Sobrien if (hi > lo) { 89378556Sobrien if (verb >= 4) 89478556Sobrien VPrintf4 ( " qsort [0x%x, 0x%x] " 89578556Sobrien "done %d this %d\n", 89678556Sobrien ss, j, numQSorted, hi - lo + 1 ); 89778556Sobrien mainQSort3 ( 89878556Sobrien ptr, block, quadrant, nblock, 89978556Sobrien lo, hi, BZ_N_RADIX, budget 90078556Sobrien ); 90178556Sobrien numQSorted += (hi - lo + 1); 90278556Sobrien if (*budget < 0) return; 90378556Sobrien } 90478556Sobrien } 90578556Sobrien ftab[sb] |= SETMASK; 90678556Sobrien } 90778556Sobrien } 90878556Sobrien 90978556Sobrien AssertH ( !bigDone[ss], 1006 ); 91078556Sobrien 91178556Sobrien /*-- 91278556Sobrien Step 2: 91378556Sobrien Now scan this big bucket [ss] so as to synthesise the 91478556Sobrien sorted order for small buckets [t, ss] for all t, 91578556Sobrien including, magically, the bucket [ss,ss] too. 91678556Sobrien This will avoid doing Real Work in subsequent Step 1's. 91778556Sobrien --*/ 91878556Sobrien { 91978556Sobrien for (j = 0; j <= 255; j++) { 92078556Sobrien copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 92178556Sobrien copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 92278556Sobrien } 92378556Sobrien for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 92478556Sobrien k = ptr[j]-1; if (k < 0) k += nblock; 92578556Sobrien c1 = block[k]; 92678556Sobrien if (!bigDone[c1]) 92778556Sobrien ptr[ copyStart[c1]++ ] = k; 92878556Sobrien } 92978556Sobrien for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 93078556Sobrien k = ptr[j]-1; if (k < 0) k += nblock; 93178556Sobrien c1 = block[k]; 93278556Sobrien if (!bigDone[c1]) 93378556Sobrien ptr[ copyEnd[c1]-- ] = k; 93478556Sobrien } 93578556Sobrien } 93678556Sobrien 93790067Ssobomax AssertH ( (copyStart[ss]-1 == copyEnd[ss]) 93890067Ssobomax || 93990067Ssobomax /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. 94090067Ssobomax Necessity for this case is demonstrated by compressing 94190067Ssobomax a sequence of approximately 48.5 million of character 94290067Ssobomax 251; 1.0.0/1.0.1 will then die here. */ 94390067Ssobomax (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 94490067Ssobomax 1007 ) 94578556Sobrien 94678556Sobrien for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 94778556Sobrien 94878556Sobrien /*-- 94978556Sobrien Step 3: 95078556Sobrien The [ss] big bucket is now done. Record this fact, 95178556Sobrien and update the quadrant descriptors. Remember to 95278556Sobrien update quadrants in the overshoot area too, if 95378556Sobrien necessary. The "if (i < 255)" test merely skips 95478556Sobrien this updating for the last bucket processed, since 95578556Sobrien updating for the last bucket is pointless. 95678556Sobrien 95778556Sobrien The quadrant array provides a way to incrementally 95878556Sobrien cache sort orderings, as they appear, so as to 95978556Sobrien make subsequent comparisons in fullGtU() complete 96078556Sobrien faster. For repetitive blocks this makes a big 96178556Sobrien difference (but not big enough to be able to avoid 96278556Sobrien the fallback sorting mechanism, exponential radix sort). 96378556Sobrien 96478556Sobrien The precise meaning is: at all times: 96578556Sobrien 96678556Sobrien for 0 <= i < nblock and 0 <= j <= nblock 96778556Sobrien 96878556Sobrien if block[i] != block[j], 96978556Sobrien 97078556Sobrien then the relative values of quadrant[i] and 97178556Sobrien quadrant[j] are meaningless. 97278556Sobrien 97378556Sobrien else { 97478556Sobrien if quadrant[i] < quadrant[j] 97578556Sobrien then the string starting at i lexicographically 97678556Sobrien precedes the string starting at j 97778556Sobrien 97878556Sobrien else if quadrant[i] > quadrant[j] 97978556Sobrien then the string starting at j lexicographically 98078556Sobrien precedes the string starting at i 98178556Sobrien 98278556Sobrien else 98378556Sobrien the relative ordering of the strings starting 98478556Sobrien at i and j has not yet been determined. 98578556Sobrien } 98678556Sobrien --*/ 98778556Sobrien bigDone[ss] = True; 98878556Sobrien 98978556Sobrien if (i < 255) { 99078556Sobrien Int32 bbStart = ftab[ss << 8] & CLEARMASK; 99178556Sobrien Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 99278556Sobrien Int32 shifts = 0; 99378556Sobrien 99478556Sobrien while ((bbSize >> shifts) > 65534) shifts++; 99578556Sobrien 99678556Sobrien for (j = bbSize-1; j >= 0; j--) { 99778556Sobrien Int32 a2update = ptr[bbStart + j]; 99878556Sobrien UInt16 qVal = (UInt16)(j >> shifts); 99978556Sobrien quadrant[a2update] = qVal; 100078556Sobrien if (a2update < BZ_N_OVERSHOOT) 100178556Sobrien quadrant[a2update + nblock] = qVal; 100278556Sobrien } 100378556Sobrien AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 100478556Sobrien } 100578556Sobrien 100678556Sobrien } 100778556Sobrien 100878556Sobrien if (verb >= 4) 100978556Sobrien VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 101078556Sobrien nblock, numQSorted, nblock - numQSorted ); 101178556Sobrien} 101278556Sobrien 101378556Sobrien#undef BIGFREQ 101478556Sobrien#undef SETMASK 101578556Sobrien#undef CLEARMASK 101678556Sobrien 101778556Sobrien 101878556Sobrien/*---------------------------------------------*/ 101978556Sobrien/* Pre: 102078556Sobrien nblock > 0 102178556Sobrien arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 102278556Sobrien ((UChar*)arr2) [0 .. nblock-1] holds block 102378556Sobrien arr1 exists for [0 .. nblock-1] 102478556Sobrien 102578556Sobrien Post: 102678556Sobrien ((UChar*)arr2) [0 .. nblock-1] holds block 102778556Sobrien All other areas of block destroyed 102878556Sobrien ftab [ 0 .. 65536 ] destroyed 102978556Sobrien arr1 [0 .. nblock-1] holds sorted order 103078556Sobrien*/ 103178556Sobrienvoid BZ2_blockSort ( EState* s ) 103278556Sobrien{ 103378556Sobrien UInt32* ptr = s->ptr; 103478556Sobrien UChar* block = s->block; 103578556Sobrien UInt32* ftab = s->ftab; 103678556Sobrien Int32 nblock = s->nblock; 103778556Sobrien Int32 verb = s->verbosity; 103878556Sobrien Int32 wfact = s->workFactor; 103978556Sobrien UInt16* quadrant; 104078556Sobrien Int32 budget; 104178556Sobrien Int32 budgetInit; 104278556Sobrien Int32 i; 104378556Sobrien 104478556Sobrien if (nblock < 10000) { 104578556Sobrien fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 104678556Sobrien } else { 104778556Sobrien /* Calculate the location for quadrant, remembering to get 104878556Sobrien the alignment right. Assumes that &(block[0]) is at least 104978556Sobrien 2-byte aligned -- this should be ok since block is really 105078556Sobrien the first section of arr2. 105178556Sobrien */ 105278556Sobrien i = nblock+BZ_N_OVERSHOOT; 105378556Sobrien if (i & 1) i++; 105478556Sobrien quadrant = (UInt16*)(&(block[i])); 105578556Sobrien 105678556Sobrien /* (wfact-1) / 3 puts the default-factor-30 105778556Sobrien transition point at very roughly the same place as 105878556Sobrien with v0.1 and v0.9.0. 105978556Sobrien Not that it particularly matters any more, since the 106078556Sobrien resulting compressed stream is now the same regardless 106178556Sobrien of whether or not we use the main sort or fallback sort. 106278556Sobrien */ 106378556Sobrien if (wfact < 1 ) wfact = 1; 106478556Sobrien if (wfact > 100) wfact = 100; 106578556Sobrien budgetInit = nblock * ((wfact-1) / 3); 106678556Sobrien budget = budgetInit; 106778556Sobrien 106878556Sobrien mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 106978556Sobrien if (verb >= 3) 107078556Sobrien VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 107178556Sobrien budgetInit - budget, 107278556Sobrien nblock, 107378556Sobrien (float)(budgetInit - budget) / 107478556Sobrien (float)(nblock==0 ? 1 : nblock) ); 107578556Sobrien if (budget < 0) { 107678556Sobrien if (verb >= 2) 107778556Sobrien VPrintf0 ( " too repetitive; using fallback" 107878556Sobrien " sorting algorithm\n" ); 107978556Sobrien fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 108078556Sobrien } 108178556Sobrien } 108278556Sobrien 108378556Sobrien s->origPtr = -1; 108478556Sobrien for (i = 0; i < s->nblock; i++) 108578556Sobrien if (ptr[i] == 0) 108678556Sobrien { s->origPtr = i; break; }; 108778556Sobrien 108878556Sobrien AssertH( s->origPtr != -1, 1003 ); 108978556Sobrien} 109078556Sobrien 109178556Sobrien 109278556Sobrien/*-------------------------------------------------------------*/ 109378556Sobrien/*--- end blocksort.c ---*/ 109478556Sobrien/*-------------------------------------------------------------*/ 1095