blocksort.c revision 78556
178556Sobrien 278556Sobrien/*-------------------------------------------------------------*/ 378556Sobrien/*--- Block sorting machinery ---*/ 478556Sobrien/*--- blocksort.c ---*/ 578556Sobrien/*-------------------------------------------------------------*/ 678556Sobrien 778556Sobrien/*-- 878556Sobrien This file is a part of bzip2 and/or libbzip2, a program and 978556Sobrien library for lossless, block-sorting data compression. 1078556Sobrien 1178556Sobrien Copyright (C) 1996-2000 Julian R Seward. All rights reserved. 1278556Sobrien 1378556Sobrien Redistribution and use in source and binary forms, with or without 1478556Sobrien modification, are permitted provided that the following conditions 1578556Sobrien are met: 1678556Sobrien 1778556Sobrien 1. Redistributions of source code must retain the above copyright 1878556Sobrien notice, this list of conditions and the following disclaimer. 1978556Sobrien 2078556Sobrien 2. The origin of this software must not be misrepresented; you must 2178556Sobrien not claim that you wrote the original software. If you use this 2278556Sobrien software in a product, an acknowledgment in the product 2378556Sobrien documentation would be appreciated but is not required. 2478556Sobrien 2578556Sobrien 3. Altered source versions must be plainly marked as such, and must 2678556Sobrien not be misrepresented as being the original software. 2778556Sobrien 2878556Sobrien 4. The name of the author may not be used to endorse or promote 2978556Sobrien products derived from this software without specific prior written 3078556Sobrien permission. 3178556Sobrien 3278556Sobrien THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS 3378556Sobrien OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 3478556Sobrien WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 3578556Sobrien ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 3678556Sobrien DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 3778556Sobrien DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE 3878556Sobrien GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 3978556Sobrien INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 4078556Sobrien WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 4178556Sobrien NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 4278556Sobrien SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 4378556Sobrien 4478556Sobrien Julian Seward, Cambridge, UK. 4578556Sobrien jseward@acm.org 4678556Sobrien bzip2/libbzip2 version 1.0 of 21 March 2000 4778556Sobrien 4878556Sobrien This program is based on (at least) the work of: 4978556Sobrien Mike Burrows 5078556Sobrien David Wheeler 5178556Sobrien Peter Fenwick 5278556Sobrien Alistair Moffat 5378556Sobrien Radford Neal 5478556Sobrien Ian H. Witten 5578556Sobrien Robert Sedgewick 5678556Sobrien Jon L. Bentley 5778556Sobrien 5878556Sobrien For more information on these sources, see the manual. 5978556Sobrien 6078556Sobrien To get some idea how the block sorting algorithms in this file 6178556Sobrien work, read my paper 6278556Sobrien On the Performance of BWT Sorting Algorithms 6378556Sobrien in Proceedings of the IEEE Data Compression Conference 2000, 6478556Sobrien Snowbird, Utah, USA, 27-30 March 2000. The main sort in this 6578556Sobrien file implements the algorithm called cache in the paper. 6678556Sobrien--*/ 6778556Sobrien 6878556Sobrien 6978556Sobrien#include "bzlib_private.h" 7078556Sobrien 7178556Sobrien/*---------------------------------------------*/ 7278556Sobrien/*--- Fallback O(N log(N)^2) sorting ---*/ 7378556Sobrien/*--- algorithm, for repetitive blocks ---*/ 7478556Sobrien/*---------------------------------------------*/ 7578556Sobrien 7678556Sobrien/*---------------------------------------------*/ 7778556Sobrienstatic 7878556Sobrien__inline__ 7978556Sobrienvoid fallbackSimpleSort ( UInt32* fmap, 8078556Sobrien UInt32* eclass, 8178556Sobrien Int32 lo, 8278556Sobrien Int32 hi ) 8378556Sobrien{ 8478556Sobrien Int32 i, j, tmp; 8578556Sobrien UInt32 ec_tmp; 8678556Sobrien 8778556Sobrien if (lo == hi) return; 8878556Sobrien 8978556Sobrien if (hi - lo > 3) { 9078556Sobrien for ( i = hi-4; i >= lo; i-- ) { 9178556Sobrien tmp = fmap[i]; 9278556Sobrien ec_tmp = eclass[tmp]; 9378556Sobrien for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) 9478556Sobrien fmap[j-4] = fmap[j]; 9578556Sobrien fmap[j-4] = tmp; 9678556Sobrien } 9778556Sobrien } 9878556Sobrien 9978556Sobrien for ( i = hi-1; i >= lo; i-- ) { 10078556Sobrien tmp = fmap[i]; 10178556Sobrien ec_tmp = eclass[tmp]; 10278556Sobrien for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) 10378556Sobrien fmap[j-1] = fmap[j]; 10478556Sobrien fmap[j-1] = tmp; 10578556Sobrien } 10678556Sobrien} 10778556Sobrien 10878556Sobrien 10978556Sobrien/*---------------------------------------------*/ 11078556Sobrien#define fswap(zz1, zz2) \ 11178556Sobrien { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 11278556Sobrien 11378556Sobrien#define fvswap(zzp1, zzp2, zzn) \ 11478556Sobrien{ \ 11578556Sobrien Int32 yyp1 = (zzp1); \ 11678556Sobrien Int32 yyp2 = (zzp2); \ 11778556Sobrien Int32 yyn = (zzn); \ 11878556Sobrien while (yyn > 0) { \ 11978556Sobrien fswap(fmap[yyp1], fmap[yyp2]); \ 12078556Sobrien yyp1++; yyp2++; yyn--; \ 12178556Sobrien } \ 12278556Sobrien} 12378556Sobrien 12478556Sobrien 12578556Sobrien#define fmin(a,b) ((a) < (b)) ? (a) : (b) 12678556Sobrien 12778556Sobrien#define fpush(lz,hz) { stackLo[sp] = lz; \ 12878556Sobrien stackHi[sp] = hz; \ 12978556Sobrien sp++; } 13078556Sobrien 13178556Sobrien#define fpop(lz,hz) { sp--; \ 13278556Sobrien lz = stackLo[sp]; \ 13378556Sobrien hz = stackHi[sp]; } 13478556Sobrien 13578556Sobrien#define FALLBACK_QSORT_SMALL_THRESH 10 13678556Sobrien#define FALLBACK_QSORT_STACK_SIZE 100 13778556Sobrien 13878556Sobrien 13978556Sobrienstatic 14078556Sobrienvoid fallbackQSort3 ( UInt32* fmap, 14178556Sobrien UInt32* eclass, 14278556Sobrien Int32 loSt, 14378556Sobrien Int32 hiSt ) 14478556Sobrien{ 14578556Sobrien Int32 unLo, unHi, ltLo, gtHi, n, m; 14678556Sobrien Int32 sp, lo, hi; 14778556Sobrien UInt32 med, r, r3; 14878556Sobrien Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; 14978556Sobrien Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; 15078556Sobrien 15178556Sobrien r = 0; 15278556Sobrien 15378556Sobrien sp = 0; 15478556Sobrien fpush ( loSt, hiSt ); 15578556Sobrien 15678556Sobrien while (sp > 0) { 15778556Sobrien 15878556Sobrien AssertH ( sp < FALLBACK_QSORT_STACK_SIZE, 1004 ); 15978556Sobrien 16078556Sobrien fpop ( lo, hi ); 16178556Sobrien if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { 16278556Sobrien fallbackSimpleSort ( fmap, eclass, lo, hi ); 16378556Sobrien continue; 16478556Sobrien } 16578556Sobrien 16678556Sobrien /* Random partitioning. Median of 3 sometimes fails to 16778556Sobrien avoid bad cases. Median of 9 seems to help but 16878556Sobrien looks rather expensive. This too seems to work but 16978556Sobrien is cheaper. Guidance for the magic constants 17078556Sobrien 7621 and 32768 is taken from Sedgewick's algorithms 17178556Sobrien book, chapter 35. 17278556Sobrien */ 17378556Sobrien r = ((r * 7621) + 1) % 32768; 17478556Sobrien r3 = r % 3; 17578556Sobrien if (r3 == 0) med = eclass[fmap[lo]]; else 17678556Sobrien if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else 17778556Sobrien med = eclass[fmap[hi]]; 17878556Sobrien 17978556Sobrien unLo = ltLo = lo; 18078556Sobrien unHi = gtHi = hi; 18178556Sobrien 18278556Sobrien while (1) { 18378556Sobrien while (1) { 18478556Sobrien if (unLo > unHi) break; 18578556Sobrien n = (Int32)eclass[fmap[unLo]] - (Int32)med; 18678556Sobrien if (n == 0) { 18778556Sobrien fswap(fmap[unLo], fmap[ltLo]); 18878556Sobrien ltLo++; unLo++; 18978556Sobrien continue; 19078556Sobrien }; 19178556Sobrien if (n > 0) break; 19278556Sobrien unLo++; 19378556Sobrien } 19478556Sobrien while (1) { 19578556Sobrien if (unLo > unHi) break; 19678556Sobrien n = (Int32)eclass[fmap[unHi]] - (Int32)med; 19778556Sobrien if (n == 0) { 19878556Sobrien fswap(fmap[unHi], fmap[gtHi]); 19978556Sobrien gtHi--; unHi--; 20078556Sobrien continue; 20178556Sobrien }; 20278556Sobrien if (n < 0) break; 20378556Sobrien unHi--; 20478556Sobrien } 20578556Sobrien if (unLo > unHi) break; 20678556Sobrien fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; 20778556Sobrien } 20878556Sobrien 20978556Sobrien AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); 21078556Sobrien 21178556Sobrien if (gtHi < ltLo) continue; 21278556Sobrien 21378556Sobrien n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); 21478556Sobrien m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); 21578556Sobrien 21678556Sobrien n = lo + unLo - ltLo - 1; 21778556Sobrien m = hi - (gtHi - unHi) + 1; 21878556Sobrien 21978556Sobrien if (n - lo > hi - m) { 22078556Sobrien fpush ( lo, n ); 22178556Sobrien fpush ( m, hi ); 22278556Sobrien } else { 22378556Sobrien fpush ( m, hi ); 22478556Sobrien fpush ( lo, n ); 22578556Sobrien } 22678556Sobrien } 22778556Sobrien} 22878556Sobrien 22978556Sobrien#undef fmin 23078556Sobrien#undef fpush 23178556Sobrien#undef fpop 23278556Sobrien#undef fswap 23378556Sobrien#undef fvswap 23478556Sobrien#undef FALLBACK_QSORT_SMALL_THRESH 23578556Sobrien#undef FALLBACK_QSORT_STACK_SIZE 23678556Sobrien 23778556Sobrien 23878556Sobrien/*---------------------------------------------*/ 23978556Sobrien/* Pre: 24078556Sobrien nblock > 0 24178556Sobrien eclass exists for [0 .. nblock-1] 24278556Sobrien ((UChar*)eclass) [0 .. nblock-1] holds block 24378556Sobrien ptr exists for [0 .. nblock-1] 24478556Sobrien 24578556Sobrien Post: 24678556Sobrien ((UChar*)eclass) [0 .. nblock-1] holds block 24778556Sobrien All other areas of eclass destroyed 24878556Sobrien fmap [0 .. nblock-1] holds sorted order 24978556Sobrien bhtab [ 0 .. 2+(nblock/32) ] destroyed 25078556Sobrien*/ 25178556Sobrien 25278556Sobrien#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) 25378556Sobrien#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) 25478556Sobrien#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) 25578556Sobrien#define WORD_BH(zz) bhtab[(zz) >> 5] 25678556Sobrien#define UNALIGNED_BH(zz) ((zz) & 0x01f) 25778556Sobrien 25878556Sobrienstatic 25978556Sobrienvoid fallbackSort ( UInt32* fmap, 26078556Sobrien UInt32* eclass, 26178556Sobrien UInt32* bhtab, 26278556Sobrien Int32 nblock, 26378556Sobrien Int32 verb ) 26478556Sobrien{ 26578556Sobrien Int32 ftab[257]; 26678556Sobrien Int32 ftabCopy[256]; 26778556Sobrien Int32 H, i, j, k, l, r, cc, cc1; 26878556Sobrien Int32 nNotDone; 26978556Sobrien Int32 nBhtab; 27078556Sobrien UChar* eclass8 = (UChar*)eclass; 27178556Sobrien 27278556Sobrien /*-- 27378556Sobrien Initial 1-char radix sort to generate 27478556Sobrien initial fmap and initial BH bits. 27578556Sobrien --*/ 27678556Sobrien if (verb >= 4) 27778556Sobrien VPrintf0 ( " bucket sorting ...\n" ); 27878556Sobrien for (i = 0; i < 257; i++) ftab[i] = 0; 27978556Sobrien for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; 28078556Sobrien for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; 28178556Sobrien for (i = 1; i < 257; i++) ftab[i] += ftab[i-1]; 28278556Sobrien 28378556Sobrien for (i = 0; i < nblock; i++) { 28478556Sobrien j = eclass8[i]; 28578556Sobrien k = ftab[j] - 1; 28678556Sobrien ftab[j] = k; 28778556Sobrien fmap[k] = i; 28878556Sobrien } 28978556Sobrien 29078556Sobrien nBhtab = 2 + (nblock / 32); 29178556Sobrien for (i = 0; i < nBhtab; i++) bhtab[i] = 0; 29278556Sobrien for (i = 0; i < 256; i++) SET_BH(ftab[i]); 29378556Sobrien 29478556Sobrien /*-- 29578556Sobrien Inductively refine the buckets. Kind-of an 29678556Sobrien "exponential radix sort" (!), inspired by the 29778556Sobrien Manber-Myers suffix array construction algorithm. 29878556Sobrien --*/ 29978556Sobrien 30078556Sobrien /*-- set sentinel bits for block-end detection --*/ 30178556Sobrien for (i = 0; i < 32; i++) { 30278556Sobrien SET_BH(nblock + 2*i); 30378556Sobrien CLEAR_BH(nblock + 2*i + 1); 30478556Sobrien } 30578556Sobrien 30678556Sobrien /*-- the log(N) loop --*/ 30778556Sobrien H = 1; 30878556Sobrien while (1) { 30978556Sobrien 31078556Sobrien if (verb >= 4) 31178556Sobrien VPrintf1 ( " depth %6d has ", H ); 31278556Sobrien 31378556Sobrien j = 0; 31478556Sobrien for (i = 0; i < nblock; i++) { 31578556Sobrien if (ISSET_BH(i)) j = i; 31678556Sobrien k = fmap[i] - H; if (k < 0) k += nblock; 31778556Sobrien eclass[k] = j; 31878556Sobrien } 31978556Sobrien 32078556Sobrien nNotDone = 0; 32178556Sobrien r = -1; 32278556Sobrien while (1) { 32378556Sobrien 32478556Sobrien /*-- find the next non-singleton bucket --*/ 32578556Sobrien k = r + 1; 32678556Sobrien while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; 32778556Sobrien if (ISSET_BH(k)) { 32878556Sobrien while (WORD_BH(k) == 0xffffffff) k += 32; 32978556Sobrien while (ISSET_BH(k)) k++; 33078556Sobrien } 33178556Sobrien l = k - 1; 33278556Sobrien if (l >= nblock) break; 33378556Sobrien while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; 33478556Sobrien if (!ISSET_BH(k)) { 33578556Sobrien while (WORD_BH(k) == 0x00000000) k += 32; 33678556Sobrien while (!ISSET_BH(k)) k++; 33778556Sobrien } 33878556Sobrien r = k - 1; 33978556Sobrien if (r >= nblock) break; 34078556Sobrien 34178556Sobrien /*-- now [l, r] bracket current bucket --*/ 34278556Sobrien if (r > l) { 34378556Sobrien nNotDone += (r - l + 1); 34478556Sobrien fallbackQSort3 ( fmap, eclass, l, r ); 34578556Sobrien 34678556Sobrien /*-- scan bucket and generate header bits-- */ 34778556Sobrien cc = -1; 34878556Sobrien for (i = l; i <= r; i++) { 34978556Sobrien cc1 = eclass[fmap[i]]; 35078556Sobrien if (cc != cc1) { SET_BH(i); cc = cc1; }; 35178556Sobrien } 35278556Sobrien } 35378556Sobrien } 35478556Sobrien 35578556Sobrien if (verb >= 4) 35678556Sobrien VPrintf1 ( "%6d unresolved strings\n", nNotDone ); 35778556Sobrien 35878556Sobrien H *= 2; 35978556Sobrien if (H > nblock || nNotDone == 0) break; 36078556Sobrien } 36178556Sobrien 36278556Sobrien /*-- 36378556Sobrien Reconstruct the original block in 36478556Sobrien eclass8 [0 .. nblock-1], since the 36578556Sobrien previous phase destroyed it. 36678556Sobrien --*/ 36778556Sobrien if (verb >= 4) 36878556Sobrien VPrintf0 ( " reconstructing block ...\n" ); 36978556Sobrien j = 0; 37078556Sobrien for (i = 0; i < nblock; i++) { 37178556Sobrien while (ftabCopy[j] == 0) j++; 37278556Sobrien ftabCopy[j]--; 37378556Sobrien eclass8[fmap[i]] = (UChar)j; 37478556Sobrien } 37578556Sobrien AssertH ( j < 256, 1005 ); 37678556Sobrien} 37778556Sobrien 37878556Sobrien#undef SET_BH 37978556Sobrien#undef CLEAR_BH 38078556Sobrien#undef ISSET_BH 38178556Sobrien#undef WORD_BH 38278556Sobrien#undef UNALIGNED_BH 38378556Sobrien 38478556Sobrien 38578556Sobrien/*---------------------------------------------*/ 38678556Sobrien/*--- The main, O(N^2 log(N)) sorting ---*/ 38778556Sobrien/*--- algorithm. Faster for "normal" ---*/ 38878556Sobrien/*--- non-repetitive blocks. ---*/ 38978556Sobrien/*---------------------------------------------*/ 39078556Sobrien 39178556Sobrien/*---------------------------------------------*/ 39278556Sobrienstatic 39378556Sobrien__inline__ 39478556SobrienBool mainGtU ( UInt32 i1, 39578556Sobrien UInt32 i2, 39678556Sobrien UChar* block, 39778556Sobrien UInt16* quadrant, 39878556Sobrien UInt32 nblock, 39978556Sobrien Int32* budget ) 40078556Sobrien{ 40178556Sobrien Int32 k; 40278556Sobrien UChar c1, c2; 40378556Sobrien UInt16 s1, s2; 40478556Sobrien 40578556Sobrien AssertD ( i1 != i2, "mainGtU" ); 40678556Sobrien /* 1 */ 40778556Sobrien c1 = block[i1]; c2 = block[i2]; 40878556Sobrien if (c1 != c2) return (c1 > c2); 40978556Sobrien i1++; i2++; 41078556Sobrien /* 2 */ 41178556Sobrien c1 = block[i1]; c2 = block[i2]; 41278556Sobrien if (c1 != c2) return (c1 > c2); 41378556Sobrien i1++; i2++; 41478556Sobrien /* 3 */ 41578556Sobrien c1 = block[i1]; c2 = block[i2]; 41678556Sobrien if (c1 != c2) return (c1 > c2); 41778556Sobrien i1++; i2++; 41878556Sobrien /* 4 */ 41978556Sobrien c1 = block[i1]; c2 = block[i2]; 42078556Sobrien if (c1 != c2) return (c1 > c2); 42178556Sobrien i1++; i2++; 42278556Sobrien /* 5 */ 42378556Sobrien c1 = block[i1]; c2 = block[i2]; 42478556Sobrien if (c1 != c2) return (c1 > c2); 42578556Sobrien i1++; i2++; 42678556Sobrien /* 6 */ 42778556Sobrien c1 = block[i1]; c2 = block[i2]; 42878556Sobrien if (c1 != c2) return (c1 > c2); 42978556Sobrien i1++; i2++; 43078556Sobrien /* 7 */ 43178556Sobrien c1 = block[i1]; c2 = block[i2]; 43278556Sobrien if (c1 != c2) return (c1 > c2); 43378556Sobrien i1++; i2++; 43478556Sobrien /* 8 */ 43578556Sobrien c1 = block[i1]; c2 = block[i2]; 43678556Sobrien if (c1 != c2) return (c1 > c2); 43778556Sobrien i1++; i2++; 43878556Sobrien /* 9 */ 43978556Sobrien c1 = block[i1]; c2 = block[i2]; 44078556Sobrien if (c1 != c2) return (c1 > c2); 44178556Sobrien i1++; i2++; 44278556Sobrien /* 10 */ 44378556Sobrien c1 = block[i1]; c2 = block[i2]; 44478556Sobrien if (c1 != c2) return (c1 > c2); 44578556Sobrien i1++; i2++; 44678556Sobrien /* 11 */ 44778556Sobrien c1 = block[i1]; c2 = block[i2]; 44878556Sobrien if (c1 != c2) return (c1 > c2); 44978556Sobrien i1++; i2++; 45078556Sobrien /* 12 */ 45178556Sobrien c1 = block[i1]; c2 = block[i2]; 45278556Sobrien if (c1 != c2) return (c1 > c2); 45378556Sobrien i1++; i2++; 45478556Sobrien 45578556Sobrien k = nblock + 8; 45678556Sobrien 45778556Sobrien do { 45878556Sobrien /* 1 */ 45978556Sobrien c1 = block[i1]; c2 = block[i2]; 46078556Sobrien if (c1 != c2) return (c1 > c2); 46178556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 46278556Sobrien if (s1 != s2) return (s1 > s2); 46378556Sobrien i1++; i2++; 46478556Sobrien /* 2 */ 46578556Sobrien c1 = block[i1]; c2 = block[i2]; 46678556Sobrien if (c1 != c2) return (c1 > c2); 46778556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 46878556Sobrien if (s1 != s2) return (s1 > s2); 46978556Sobrien i1++; i2++; 47078556Sobrien /* 3 */ 47178556Sobrien c1 = block[i1]; c2 = block[i2]; 47278556Sobrien if (c1 != c2) return (c1 > c2); 47378556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 47478556Sobrien if (s1 != s2) return (s1 > s2); 47578556Sobrien i1++; i2++; 47678556Sobrien /* 4 */ 47778556Sobrien c1 = block[i1]; c2 = block[i2]; 47878556Sobrien if (c1 != c2) return (c1 > c2); 47978556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 48078556Sobrien if (s1 != s2) return (s1 > s2); 48178556Sobrien i1++; i2++; 48278556Sobrien /* 5 */ 48378556Sobrien c1 = block[i1]; c2 = block[i2]; 48478556Sobrien if (c1 != c2) return (c1 > c2); 48578556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 48678556Sobrien if (s1 != s2) return (s1 > s2); 48778556Sobrien i1++; i2++; 48878556Sobrien /* 6 */ 48978556Sobrien c1 = block[i1]; c2 = block[i2]; 49078556Sobrien if (c1 != c2) return (c1 > c2); 49178556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 49278556Sobrien if (s1 != s2) return (s1 > s2); 49378556Sobrien i1++; i2++; 49478556Sobrien /* 7 */ 49578556Sobrien c1 = block[i1]; c2 = block[i2]; 49678556Sobrien if (c1 != c2) return (c1 > c2); 49778556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 49878556Sobrien if (s1 != s2) return (s1 > s2); 49978556Sobrien i1++; i2++; 50078556Sobrien /* 8 */ 50178556Sobrien c1 = block[i1]; c2 = block[i2]; 50278556Sobrien if (c1 != c2) return (c1 > c2); 50378556Sobrien s1 = quadrant[i1]; s2 = quadrant[i2]; 50478556Sobrien if (s1 != s2) return (s1 > s2); 50578556Sobrien i1++; i2++; 50678556Sobrien 50778556Sobrien if (i1 >= nblock) i1 -= nblock; 50878556Sobrien if (i2 >= nblock) i2 -= nblock; 50978556Sobrien 51078556Sobrien k -= 8; 51178556Sobrien (*budget)--; 51278556Sobrien } 51378556Sobrien while (k >= 0); 51478556Sobrien 51578556Sobrien return False; 51678556Sobrien} 51778556Sobrien 51878556Sobrien 51978556Sobrien/*---------------------------------------------*/ 52078556Sobrien/*-- 52178556Sobrien Knuth's increments seem to work better 52278556Sobrien than Incerpi-Sedgewick here. Possibly 52378556Sobrien because the number of elems to sort is 52478556Sobrien usually small, typically <= 20. 52578556Sobrien--*/ 52678556Sobrienstatic 52778556SobrienInt32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, 52878556Sobrien 9841, 29524, 88573, 265720, 52978556Sobrien 797161, 2391484 }; 53078556Sobrien 53178556Sobrienstatic 53278556Sobrienvoid mainSimpleSort ( UInt32* ptr, 53378556Sobrien UChar* block, 53478556Sobrien UInt16* quadrant, 53578556Sobrien Int32 nblock, 53678556Sobrien Int32 lo, 53778556Sobrien Int32 hi, 53878556Sobrien Int32 d, 53978556Sobrien Int32* budget ) 54078556Sobrien{ 54178556Sobrien Int32 i, j, h, bigN, hp; 54278556Sobrien UInt32 v; 54378556Sobrien 54478556Sobrien bigN = hi - lo + 1; 54578556Sobrien if (bigN < 2) return; 54678556Sobrien 54778556Sobrien hp = 0; 54878556Sobrien while (incs[hp] < bigN) hp++; 54978556Sobrien hp--; 55078556Sobrien 55178556Sobrien for (; hp >= 0; hp--) { 55278556Sobrien h = incs[hp]; 55378556Sobrien 55478556Sobrien i = lo + h; 55578556Sobrien while (True) { 55678556Sobrien 55778556Sobrien /*-- copy 1 --*/ 55878556Sobrien if (i > hi) break; 55978556Sobrien v = ptr[i]; 56078556Sobrien j = i; 56178556Sobrien while ( mainGtU ( 56278556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 56378556Sobrien ) ) { 56478556Sobrien ptr[j] = ptr[j-h]; 56578556Sobrien j = j - h; 56678556Sobrien if (j <= (lo + h - 1)) break; 56778556Sobrien } 56878556Sobrien ptr[j] = v; 56978556Sobrien i++; 57078556Sobrien 57178556Sobrien /*-- copy 2 --*/ 57278556Sobrien if (i > hi) break; 57378556Sobrien v = ptr[i]; 57478556Sobrien j = i; 57578556Sobrien while ( mainGtU ( 57678556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 57778556Sobrien ) ) { 57878556Sobrien ptr[j] = ptr[j-h]; 57978556Sobrien j = j - h; 58078556Sobrien if (j <= (lo + h - 1)) break; 58178556Sobrien } 58278556Sobrien ptr[j] = v; 58378556Sobrien i++; 58478556Sobrien 58578556Sobrien /*-- copy 3 --*/ 58678556Sobrien if (i > hi) break; 58778556Sobrien v = ptr[i]; 58878556Sobrien j = i; 58978556Sobrien while ( mainGtU ( 59078556Sobrien ptr[j-h]+d, v+d, block, quadrant, nblock, budget 59178556Sobrien ) ) { 59278556Sobrien ptr[j] = ptr[j-h]; 59378556Sobrien j = j - h; 59478556Sobrien if (j <= (lo + h - 1)) break; 59578556Sobrien } 59678556Sobrien ptr[j] = v; 59778556Sobrien i++; 59878556Sobrien 59978556Sobrien if (*budget < 0) return; 60078556Sobrien } 60178556Sobrien } 60278556Sobrien} 60378556Sobrien 60478556Sobrien 60578556Sobrien/*---------------------------------------------*/ 60678556Sobrien/*-- 60778556Sobrien The following is an implementation of 60878556Sobrien an elegant 3-way quicksort for strings, 60978556Sobrien described in a paper "Fast Algorithms for 61078556Sobrien Sorting and Searching Strings", by Robert 61178556Sobrien Sedgewick and Jon L. Bentley. 61278556Sobrien--*/ 61378556Sobrien 61478556Sobrien#define mswap(zz1, zz2) \ 61578556Sobrien { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } 61678556Sobrien 61778556Sobrien#define mvswap(zzp1, zzp2, zzn) \ 61878556Sobrien{ \ 61978556Sobrien Int32 yyp1 = (zzp1); \ 62078556Sobrien Int32 yyp2 = (zzp2); \ 62178556Sobrien Int32 yyn = (zzn); \ 62278556Sobrien while (yyn > 0) { \ 62378556Sobrien mswap(ptr[yyp1], ptr[yyp2]); \ 62478556Sobrien yyp1++; yyp2++; yyn--; \ 62578556Sobrien } \ 62678556Sobrien} 62778556Sobrien 62878556Sobrienstatic 62978556Sobrien__inline__ 63078556SobrienUChar mmed3 ( UChar a, UChar b, UChar c ) 63178556Sobrien{ 63278556Sobrien UChar t; 63378556Sobrien if (a > b) { t = a; a = b; b = t; }; 63478556Sobrien if (b > c) { 63578556Sobrien b = c; 63678556Sobrien if (a > b) b = a; 63778556Sobrien } 63878556Sobrien return b; 63978556Sobrien} 64078556Sobrien 64178556Sobrien#define mmin(a,b) ((a) < (b)) ? (a) : (b) 64278556Sobrien 64378556Sobrien#define mpush(lz,hz,dz) { stackLo[sp] = lz; \ 64478556Sobrien stackHi[sp] = hz; \ 64578556Sobrien stackD [sp] = dz; \ 64678556Sobrien sp++; } 64778556Sobrien 64878556Sobrien#define mpop(lz,hz,dz) { sp--; \ 64978556Sobrien lz = stackLo[sp]; \ 65078556Sobrien hz = stackHi[sp]; \ 65178556Sobrien dz = stackD [sp]; } 65278556Sobrien 65378556Sobrien 65478556Sobrien#define mnextsize(az) (nextHi[az]-nextLo[az]) 65578556Sobrien 65678556Sobrien#define mnextswap(az,bz) \ 65778556Sobrien { Int32 tz; \ 65878556Sobrien tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ 65978556Sobrien tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ 66078556Sobrien tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } 66178556Sobrien 66278556Sobrien 66378556Sobrien#define MAIN_QSORT_SMALL_THRESH 20 66478556Sobrien#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) 66578556Sobrien#define MAIN_QSORT_STACK_SIZE 100 66678556Sobrien 66778556Sobrienstatic 66878556Sobrienvoid mainQSort3 ( UInt32* ptr, 66978556Sobrien UChar* block, 67078556Sobrien UInt16* quadrant, 67178556Sobrien Int32 nblock, 67278556Sobrien Int32 loSt, 67378556Sobrien Int32 hiSt, 67478556Sobrien Int32 dSt, 67578556Sobrien Int32* budget ) 67678556Sobrien{ 67778556Sobrien Int32 unLo, unHi, ltLo, gtHi, n, m, med; 67878556Sobrien Int32 sp, lo, hi, d; 67978556Sobrien 68078556Sobrien Int32 stackLo[MAIN_QSORT_STACK_SIZE]; 68178556Sobrien Int32 stackHi[MAIN_QSORT_STACK_SIZE]; 68278556Sobrien Int32 stackD [MAIN_QSORT_STACK_SIZE]; 68378556Sobrien 68478556Sobrien Int32 nextLo[3]; 68578556Sobrien Int32 nextHi[3]; 68678556Sobrien Int32 nextD [3]; 68778556Sobrien 68878556Sobrien sp = 0; 68978556Sobrien mpush ( loSt, hiSt, dSt ); 69078556Sobrien 69178556Sobrien while (sp > 0) { 69278556Sobrien 69378556Sobrien AssertH ( sp < MAIN_QSORT_STACK_SIZE, 1001 ); 69478556Sobrien 69578556Sobrien mpop ( lo, hi, d ); 69678556Sobrien if (hi - lo < MAIN_QSORT_SMALL_THRESH || 69778556Sobrien d > MAIN_QSORT_DEPTH_THRESH) { 69878556Sobrien mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); 69978556Sobrien if (*budget < 0) return; 70078556Sobrien continue; 70178556Sobrien } 70278556Sobrien 70378556Sobrien med = (Int32) 70478556Sobrien mmed3 ( block[ptr[ lo ]+d], 70578556Sobrien block[ptr[ hi ]+d], 70678556Sobrien block[ptr[ (lo+hi)>>1 ]+d] ); 70778556Sobrien 70878556Sobrien unLo = ltLo = lo; 70978556Sobrien unHi = gtHi = hi; 71078556Sobrien 71178556Sobrien while (True) { 71278556Sobrien while (True) { 71378556Sobrien if (unLo > unHi) break; 71478556Sobrien n = ((Int32)block[ptr[unLo]+d]) - med; 71578556Sobrien if (n == 0) { 71678556Sobrien mswap(ptr[unLo], ptr[ltLo]); 71778556Sobrien ltLo++; unLo++; continue; 71878556Sobrien }; 71978556Sobrien if (n > 0) break; 72078556Sobrien unLo++; 72178556Sobrien } 72278556Sobrien while (True) { 72378556Sobrien if (unLo > unHi) break; 72478556Sobrien n = ((Int32)block[ptr[unHi]+d]) - med; 72578556Sobrien if (n == 0) { 72678556Sobrien mswap(ptr[unHi], ptr[gtHi]); 72778556Sobrien gtHi--; unHi--; continue; 72878556Sobrien }; 72978556Sobrien if (n < 0) break; 73078556Sobrien unHi--; 73178556Sobrien } 73278556Sobrien if (unLo > unHi) break; 73378556Sobrien mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; 73478556Sobrien } 73578556Sobrien 73678556Sobrien AssertD ( unHi == unLo-1, "mainQSort3(2)" ); 73778556Sobrien 73878556Sobrien if (gtHi < ltLo) { 73978556Sobrien mpush(lo, hi, d+1 ); 74078556Sobrien continue; 74178556Sobrien } 74278556Sobrien 74378556Sobrien n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); 74478556Sobrien m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); 74578556Sobrien 74678556Sobrien n = lo + unLo - ltLo - 1; 74778556Sobrien m = hi - (gtHi - unHi) + 1; 74878556Sobrien 74978556Sobrien nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; 75078556Sobrien nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; 75178556Sobrien nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; 75278556Sobrien 75378556Sobrien if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 75478556Sobrien if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); 75578556Sobrien if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); 75678556Sobrien 75778556Sobrien AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); 75878556Sobrien AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); 75978556Sobrien 76078556Sobrien mpush (nextLo[0], nextHi[0], nextD[0]); 76178556Sobrien mpush (nextLo[1], nextHi[1], nextD[1]); 76278556Sobrien mpush (nextLo[2], nextHi[2], nextD[2]); 76378556Sobrien } 76478556Sobrien} 76578556Sobrien 76678556Sobrien#undef mswap 76778556Sobrien#undef mvswap 76878556Sobrien#undef mpush 76978556Sobrien#undef mpop 77078556Sobrien#undef mmin 77178556Sobrien#undef mnextsize 77278556Sobrien#undef mnextswap 77378556Sobrien#undef MAIN_QSORT_SMALL_THRESH 77478556Sobrien#undef MAIN_QSORT_DEPTH_THRESH 77578556Sobrien#undef MAIN_QSORT_STACK_SIZE 77678556Sobrien 77778556Sobrien 77878556Sobrien/*---------------------------------------------*/ 77978556Sobrien/* Pre: 78078556Sobrien nblock > N_OVERSHOOT 78178556Sobrien block32 exists for [0 .. nblock-1 +N_OVERSHOOT] 78278556Sobrien ((UChar*)block32) [0 .. nblock-1] holds block 78378556Sobrien ptr exists for [0 .. nblock-1] 78478556Sobrien 78578556Sobrien Post: 78678556Sobrien ((UChar*)block32) [0 .. nblock-1] holds block 78778556Sobrien All other areas of block32 destroyed 78878556Sobrien ftab [0 .. 65536 ] destroyed 78978556Sobrien ptr [0 .. nblock-1] holds sorted order 79078556Sobrien if (*budget < 0), sorting was abandoned 79178556Sobrien*/ 79278556Sobrien 79378556Sobrien#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) 79478556Sobrien#define SETMASK (1 << 21) 79578556Sobrien#define CLEARMASK (~(SETMASK)) 79678556Sobrien 79778556Sobrienstatic 79878556Sobrienvoid mainSort ( UInt32* ptr, 79978556Sobrien UChar* block, 80078556Sobrien UInt16* quadrant, 80178556Sobrien UInt32* ftab, 80278556Sobrien Int32 nblock, 80378556Sobrien Int32 verb, 80478556Sobrien Int32* budget ) 80578556Sobrien{ 80678556Sobrien Int32 i, j, k, ss, sb; 80778556Sobrien Int32 runningOrder[256]; 80878556Sobrien Bool bigDone[256]; 80978556Sobrien Int32 copyStart[256]; 81078556Sobrien Int32 copyEnd [256]; 81178556Sobrien UChar c1; 81278556Sobrien Int32 numQSorted; 81378556Sobrien UInt16 s; 81478556Sobrien if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" ); 81578556Sobrien 81678556Sobrien /*-- set up the 2-byte frequency table --*/ 81778556Sobrien for (i = 65536; i >= 0; i--) ftab[i] = 0; 81878556Sobrien 81978556Sobrien j = block[0] << 8; 82078556Sobrien i = nblock-1; 82178556Sobrien for (; i >= 3; i -= 4) { 82278556Sobrien quadrant[i] = 0; 82378556Sobrien j = (j >> 8) | ( ((UInt16)block[i]) << 8); 82478556Sobrien ftab[j]++; 82578556Sobrien quadrant[i-1] = 0; 82678556Sobrien j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); 82778556Sobrien ftab[j]++; 82878556Sobrien quadrant[i-2] = 0; 82978556Sobrien j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); 83078556Sobrien ftab[j]++; 83178556Sobrien quadrant[i-3] = 0; 83278556Sobrien j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); 83378556Sobrien ftab[j]++; 83478556Sobrien } 83578556Sobrien for (; i >= 0; i--) { 83678556Sobrien quadrant[i] = 0; 83778556Sobrien j = (j >> 8) | ( ((UInt16)block[i]) << 8); 83878556Sobrien ftab[j]++; 83978556Sobrien } 84078556Sobrien 84178556Sobrien /*-- (emphasises close relationship of block & quadrant) --*/ 84278556Sobrien for (i = 0; i < BZ_N_OVERSHOOT; i++) { 84378556Sobrien block [nblock+i] = block[i]; 84478556Sobrien quadrant[nblock+i] = 0; 84578556Sobrien } 84678556Sobrien 84778556Sobrien if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" ); 84878556Sobrien 84978556Sobrien /*-- Complete the initial radix sort --*/ 85078556Sobrien for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; 85178556Sobrien 85278556Sobrien s = block[0] << 8; 85378556Sobrien i = nblock-1; 85478556Sobrien for (; i >= 3; i -= 4) { 85578556Sobrien s = (s >> 8) | (block[i] << 8); 85678556Sobrien j = ftab[s] -1; 85778556Sobrien ftab[s] = j; 85878556Sobrien ptr[j] = i; 85978556Sobrien s = (s >> 8) | (block[i-1] << 8); 86078556Sobrien j = ftab[s] -1; 86178556Sobrien ftab[s] = j; 86278556Sobrien ptr[j] = i-1; 86378556Sobrien s = (s >> 8) | (block[i-2] << 8); 86478556Sobrien j = ftab[s] -1; 86578556Sobrien ftab[s] = j; 86678556Sobrien ptr[j] = i-2; 86778556Sobrien s = (s >> 8) | (block[i-3] << 8); 86878556Sobrien j = ftab[s] -1; 86978556Sobrien ftab[s] = j; 87078556Sobrien ptr[j] = i-3; 87178556Sobrien } 87278556Sobrien for (; i >= 0; i--) { 87378556Sobrien s = (s >> 8) | (block[i] << 8); 87478556Sobrien j = ftab[s] -1; 87578556Sobrien ftab[s] = j; 87678556Sobrien ptr[j] = i; 87778556Sobrien } 87878556Sobrien 87978556Sobrien /*-- 88078556Sobrien Now ftab contains the first loc of every small bucket. 88178556Sobrien Calculate the running order, from smallest to largest 88278556Sobrien big bucket. 88378556Sobrien --*/ 88478556Sobrien for (i = 0; i <= 255; i++) { 88578556Sobrien bigDone [i] = False; 88678556Sobrien runningOrder[i] = i; 88778556Sobrien } 88878556Sobrien 88978556Sobrien { 89078556Sobrien Int32 vv; 89178556Sobrien Int32 h = 1; 89278556Sobrien do h = 3 * h + 1; while (h <= 256); 89378556Sobrien do { 89478556Sobrien h = h / 3; 89578556Sobrien for (i = h; i <= 255; i++) { 89678556Sobrien vv = runningOrder[i]; 89778556Sobrien j = i; 89878556Sobrien while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { 89978556Sobrien runningOrder[j] = runningOrder[j-h]; 90078556Sobrien j = j - h; 90178556Sobrien if (j <= (h - 1)) goto zero; 90278556Sobrien } 90378556Sobrien zero: 90478556Sobrien runningOrder[j] = vv; 90578556Sobrien } 90678556Sobrien } while (h != 1); 90778556Sobrien } 90878556Sobrien 90978556Sobrien /*-- 91078556Sobrien The main sorting loop. 91178556Sobrien --*/ 91278556Sobrien 91378556Sobrien numQSorted = 0; 91478556Sobrien 91578556Sobrien for (i = 0; i <= 255; i++) { 91678556Sobrien 91778556Sobrien /*-- 91878556Sobrien Process big buckets, starting with the least full. 91978556Sobrien Basically this is a 3-step process in which we call 92078556Sobrien mainQSort3 to sort the small buckets [ss, j], but 92178556Sobrien also make a big effort to avoid the calls if we can. 92278556Sobrien --*/ 92378556Sobrien ss = runningOrder[i]; 92478556Sobrien 92578556Sobrien /*-- 92678556Sobrien Step 1: 92778556Sobrien Complete the big bucket [ss] by quicksorting 92878556Sobrien any unsorted small buckets [ss, j], for j != ss. 92978556Sobrien Hopefully previous pointer-scanning phases have already 93078556Sobrien completed many of the small buckets [ss, j], so 93178556Sobrien we don't have to sort them at all. 93278556Sobrien --*/ 93378556Sobrien for (j = 0; j <= 255; j++) { 93478556Sobrien if (j != ss) { 93578556Sobrien sb = (ss << 8) + j; 93678556Sobrien if ( ! (ftab[sb] & SETMASK) ) { 93778556Sobrien Int32 lo = ftab[sb] & CLEARMASK; 93878556Sobrien Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; 93978556Sobrien if (hi > lo) { 94078556Sobrien if (verb >= 4) 94178556Sobrien VPrintf4 ( " qsort [0x%x, 0x%x] " 94278556Sobrien "done %d this %d\n", 94378556Sobrien ss, j, numQSorted, hi - lo + 1 ); 94478556Sobrien mainQSort3 ( 94578556Sobrien ptr, block, quadrant, nblock, 94678556Sobrien lo, hi, BZ_N_RADIX, budget 94778556Sobrien ); 94878556Sobrien numQSorted += (hi - lo + 1); 94978556Sobrien if (*budget < 0) return; 95078556Sobrien } 95178556Sobrien } 95278556Sobrien ftab[sb] |= SETMASK; 95378556Sobrien } 95478556Sobrien } 95578556Sobrien 95678556Sobrien AssertH ( !bigDone[ss], 1006 ); 95778556Sobrien 95878556Sobrien /*-- 95978556Sobrien Step 2: 96078556Sobrien Now scan this big bucket [ss] so as to synthesise the 96178556Sobrien sorted order for small buckets [t, ss] for all t, 96278556Sobrien including, magically, the bucket [ss,ss] too. 96378556Sobrien This will avoid doing Real Work in subsequent Step 1's. 96478556Sobrien --*/ 96578556Sobrien { 96678556Sobrien for (j = 0; j <= 255; j++) { 96778556Sobrien copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; 96878556Sobrien copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; 96978556Sobrien } 97078556Sobrien for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { 97178556Sobrien k = ptr[j]-1; if (k < 0) k += nblock; 97278556Sobrien c1 = block[k]; 97378556Sobrien if (!bigDone[c1]) 97478556Sobrien ptr[ copyStart[c1]++ ] = k; 97578556Sobrien } 97678556Sobrien for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { 97778556Sobrien k = ptr[j]-1; if (k < 0) k += nblock; 97878556Sobrien c1 = block[k]; 97978556Sobrien if (!bigDone[c1]) 98078556Sobrien ptr[ copyEnd[c1]-- ] = k; 98178556Sobrien } 98278556Sobrien } 98378556Sobrien 98478556Sobrien AssertH ( copyStart[ss]-1 == copyEnd[ss], 1007 ); 98578556Sobrien 98678556Sobrien for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; 98778556Sobrien 98878556Sobrien /*-- 98978556Sobrien Step 3: 99078556Sobrien The [ss] big bucket is now done. Record this fact, 99178556Sobrien and update the quadrant descriptors. Remember to 99278556Sobrien update quadrants in the overshoot area too, if 99378556Sobrien necessary. The "if (i < 255)" test merely skips 99478556Sobrien this updating for the last bucket processed, since 99578556Sobrien updating for the last bucket is pointless. 99678556Sobrien 99778556Sobrien The quadrant array provides a way to incrementally 99878556Sobrien cache sort orderings, as they appear, so as to 99978556Sobrien make subsequent comparisons in fullGtU() complete 100078556Sobrien faster. For repetitive blocks this makes a big 100178556Sobrien difference (but not big enough to be able to avoid 100278556Sobrien the fallback sorting mechanism, exponential radix sort). 100378556Sobrien 100478556Sobrien The precise meaning is: at all times: 100578556Sobrien 100678556Sobrien for 0 <= i < nblock and 0 <= j <= nblock 100778556Sobrien 100878556Sobrien if block[i] != block[j], 100978556Sobrien 101078556Sobrien then the relative values of quadrant[i] and 101178556Sobrien quadrant[j] are meaningless. 101278556Sobrien 101378556Sobrien else { 101478556Sobrien if quadrant[i] < quadrant[j] 101578556Sobrien then the string starting at i lexicographically 101678556Sobrien precedes the string starting at j 101778556Sobrien 101878556Sobrien else if quadrant[i] > quadrant[j] 101978556Sobrien then the string starting at j lexicographically 102078556Sobrien precedes the string starting at i 102178556Sobrien 102278556Sobrien else 102378556Sobrien the relative ordering of the strings starting 102478556Sobrien at i and j has not yet been determined. 102578556Sobrien } 102678556Sobrien --*/ 102778556Sobrien bigDone[ss] = True; 102878556Sobrien 102978556Sobrien if (i < 255) { 103078556Sobrien Int32 bbStart = ftab[ss << 8] & CLEARMASK; 103178556Sobrien Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; 103278556Sobrien Int32 shifts = 0; 103378556Sobrien 103478556Sobrien while ((bbSize >> shifts) > 65534) shifts++; 103578556Sobrien 103678556Sobrien for (j = bbSize-1; j >= 0; j--) { 103778556Sobrien Int32 a2update = ptr[bbStart + j]; 103878556Sobrien UInt16 qVal = (UInt16)(j >> shifts); 103978556Sobrien quadrant[a2update] = qVal; 104078556Sobrien if (a2update < BZ_N_OVERSHOOT) 104178556Sobrien quadrant[a2update + nblock] = qVal; 104278556Sobrien } 104378556Sobrien AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); 104478556Sobrien } 104578556Sobrien 104678556Sobrien } 104778556Sobrien 104878556Sobrien if (verb >= 4) 104978556Sobrien VPrintf3 ( " %d pointers, %d sorted, %d scanned\n", 105078556Sobrien nblock, numQSorted, nblock - numQSorted ); 105178556Sobrien} 105278556Sobrien 105378556Sobrien#undef BIGFREQ 105478556Sobrien#undef SETMASK 105578556Sobrien#undef CLEARMASK 105678556Sobrien 105778556Sobrien 105878556Sobrien/*---------------------------------------------*/ 105978556Sobrien/* Pre: 106078556Sobrien nblock > 0 106178556Sobrien arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] 106278556Sobrien ((UChar*)arr2) [0 .. nblock-1] holds block 106378556Sobrien arr1 exists for [0 .. nblock-1] 106478556Sobrien 106578556Sobrien Post: 106678556Sobrien ((UChar*)arr2) [0 .. nblock-1] holds block 106778556Sobrien All other areas of block destroyed 106878556Sobrien ftab [ 0 .. 65536 ] destroyed 106978556Sobrien arr1 [0 .. nblock-1] holds sorted order 107078556Sobrien*/ 107178556Sobrienvoid BZ2_blockSort ( EState* s ) 107278556Sobrien{ 107378556Sobrien UInt32* ptr = s->ptr; 107478556Sobrien UChar* block = s->block; 107578556Sobrien UInt32* ftab = s->ftab; 107678556Sobrien Int32 nblock = s->nblock; 107778556Sobrien Int32 verb = s->verbosity; 107878556Sobrien Int32 wfact = s->workFactor; 107978556Sobrien UInt16* quadrant; 108078556Sobrien Int32 budget; 108178556Sobrien Int32 budgetInit; 108278556Sobrien Int32 i; 108378556Sobrien 108478556Sobrien if (nblock < 10000) { 108578556Sobrien fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 108678556Sobrien } else { 108778556Sobrien /* Calculate the location for quadrant, remembering to get 108878556Sobrien the alignment right. Assumes that &(block[0]) is at least 108978556Sobrien 2-byte aligned -- this should be ok since block is really 109078556Sobrien the first section of arr2. 109178556Sobrien */ 109278556Sobrien i = nblock+BZ_N_OVERSHOOT; 109378556Sobrien if (i & 1) i++; 109478556Sobrien quadrant = (UInt16*)(&(block[i])); 109578556Sobrien 109678556Sobrien /* (wfact-1) / 3 puts the default-factor-30 109778556Sobrien transition point at very roughly the same place as 109878556Sobrien with v0.1 and v0.9.0. 109978556Sobrien Not that it particularly matters any more, since the 110078556Sobrien resulting compressed stream is now the same regardless 110178556Sobrien of whether or not we use the main sort or fallback sort. 110278556Sobrien */ 110378556Sobrien if (wfact < 1 ) wfact = 1; 110478556Sobrien if (wfact > 100) wfact = 100; 110578556Sobrien budgetInit = nblock * ((wfact-1) / 3); 110678556Sobrien budget = budgetInit; 110778556Sobrien 110878556Sobrien mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); 110978556Sobrien if (verb >= 3) 111078556Sobrien VPrintf3 ( " %d work, %d block, ratio %5.2f\n", 111178556Sobrien budgetInit - budget, 111278556Sobrien nblock, 111378556Sobrien (float)(budgetInit - budget) / 111478556Sobrien (float)(nblock==0 ? 1 : nblock) ); 111578556Sobrien if (budget < 0) { 111678556Sobrien if (verb >= 2) 111778556Sobrien VPrintf0 ( " too repetitive; using fallback" 111878556Sobrien " sorting algorithm\n" ); 111978556Sobrien fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); 112078556Sobrien } 112178556Sobrien } 112278556Sobrien 112378556Sobrien s->origPtr = -1; 112478556Sobrien for (i = 0; i < s->nblock; i++) 112578556Sobrien if (ptr[i] == 0) 112678556Sobrien { s->origPtr = i; break; }; 112778556Sobrien 112878556Sobrien AssertH( s->origPtr != -1, 1003 ); 112978556Sobrien} 113078556Sobrien 113178556Sobrien 113278556Sobrien/*-------------------------------------------------------------*/ 113378556Sobrien/*--- end blocksort.c ---*/ 113478556Sobrien/*-------------------------------------------------------------*/ 1135