|  |  | 
|  | /*-------------------------------------------------------------*/ | 
|  | /*--- Block sorting machinery                               ---*/ | 
|  | /*---                                           blocksort.c ---*/ | 
|  | /*-------------------------------------------------------------*/ | 
|  |  | 
|  | /* ------------------------------------------------------------------ | 
|  | This file is part of bzip2/libbzip2, a program and library for | 
|  | lossless, block-sorting data compression. | 
|  |  | 
|  | bzip2/libbzip2 version 1.0.5 of 10 December 2007 | 
|  | Copyright (C) 1996-2007 Julian Seward <[email protected]> | 
|  |  | 
|  | Please read the WARNING, DISCLAIMER and PATENTS sections in the | 
|  | README file. | 
|  |  | 
|  | This program is released under the terms of the license contained | 
|  | in the file LICENSE. | 
|  | ------------------------------------------------------------------ */ | 
|  |  | 
|  |  | 
|  | #include "bzlib_private.h" | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /*--- Fallback O(N log(N)^2) sorting        ---*/ | 
|  | /*--- algorithm, for repetitive blocks      ---*/ | 
|  | /*---------------------------------------------*/ | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | static | 
|  | __inline__ | 
|  | void fallbackSimpleSort ( UInt32* fmap, | 
|  | UInt32* eclass, | 
|  | Int32   lo, | 
|  | Int32   hi ) | 
|  | { | 
|  | Int32 i, j, tmp; | 
|  | UInt32 ec_tmp; | 
|  |  | 
|  | if (lo == hi) return; | 
|  |  | 
|  | if (hi - lo > 3) { | 
|  | for ( i = hi-4; i >= lo; i-- ) { | 
|  | tmp = fmap[i]; | 
|  | ec_tmp = eclass[tmp]; | 
|  | for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 ) | 
|  | fmap[j-4] = fmap[j]; | 
|  | fmap[j-4] = tmp; | 
|  | } | 
|  | } | 
|  |  | 
|  | for ( i = hi-1; i >= lo; i-- ) { | 
|  | tmp = fmap[i]; | 
|  | ec_tmp = eclass[tmp]; | 
|  | for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ ) | 
|  | fmap[j-1] = fmap[j]; | 
|  | fmap[j-1] = tmp; | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | #define fswap(zz1, zz2) \ | 
|  | { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } | 
|  |  | 
|  | #define fvswap(zzp1, zzp2, zzn)       \ | 
|  | {                                     \ | 
|  | Int32 yyp1 = (zzp1);               \ | 
|  | Int32 yyp2 = (zzp2);               \ | 
|  | Int32 yyn  = (zzn);                \ | 
|  | while (yyn > 0) {                  \ | 
|  | fswap(fmap[yyp1], fmap[yyp2]);  \ | 
|  | yyp1++; yyp2++; yyn--;          \ | 
|  | }                                  \ | 
|  | } | 
|  |  | 
|  |  | 
|  | #define fmin(a,b) ((a) < (b)) ? (a) : (b) | 
|  |  | 
|  | #define fpush(lz,hz) { stackLo[sp] = lz; \ | 
|  | stackHi[sp] = hz; \ | 
|  | sp++; } | 
|  |  | 
|  | #define fpop(lz,hz) { sp--;              \ | 
|  | lz = stackLo[sp];  \ | 
|  | hz = stackHi[sp]; } | 
|  |  | 
|  | #define FALLBACK_QSORT_SMALL_THRESH 10 | 
|  | #define FALLBACK_QSORT_STACK_SIZE   100 | 
|  |  | 
|  |  | 
|  | static | 
|  | void fallbackQSort3 ( UInt32* fmap, | 
|  | UInt32* eclass, | 
|  | Int32   loSt, | 
|  | Int32   hiSt ) | 
|  | { | 
|  | Int32 unLo, unHi, ltLo, gtHi, n, m; | 
|  | Int32 sp, lo, hi; | 
|  | UInt32 med, r, r3; | 
|  | Int32 stackLo[FALLBACK_QSORT_STACK_SIZE]; | 
|  | Int32 stackHi[FALLBACK_QSORT_STACK_SIZE]; | 
|  |  | 
|  | r = 0; | 
|  |  | 
|  | sp = 0; | 
|  | fpush ( loSt, hiSt ); | 
|  |  | 
|  | while (sp > 0) { | 
|  |  | 
|  | AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 ); | 
|  |  | 
|  | fpop ( lo, hi ); | 
|  | if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { | 
|  | fallbackSimpleSort ( fmap, eclass, lo, hi ); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | /* Random partitioning.  Median of 3 sometimes fails to | 
|  | avoid bad cases.  Median of 9 seems to help but | 
|  | looks rather expensive.  This too seems to work but | 
|  | is cheaper.  Guidance for the magic constants | 
|  | 7621 and 32768 is taken from Sedgewick's algorithms | 
|  | book, chapter 35. | 
|  | */ | 
|  | r = ((r * 7621) + 1) % 32768; | 
|  | r3 = r % 3; | 
|  | if (r3 == 0) med = eclass[fmap[lo]]; else | 
|  | if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else | 
|  | med = eclass[fmap[hi]]; | 
|  |  | 
|  | unLo = ltLo = lo; | 
|  | unHi = gtHi = hi; | 
|  |  | 
|  | while (1) { | 
|  | while (1) { | 
|  | if (unLo > unHi) break; | 
|  | n = (Int32)eclass[fmap[unLo]] - (Int32)med; | 
|  | if (n == 0) { | 
|  | fswap(fmap[unLo], fmap[ltLo]); | 
|  | ltLo++; unLo++; | 
|  | continue; | 
|  | }; | 
|  | if (n > 0) break; | 
|  | unLo++; | 
|  | } | 
|  | while (1) { | 
|  | if (unLo > unHi) break; | 
|  | n = (Int32)eclass[fmap[unHi]] - (Int32)med; | 
|  | if (n == 0) { | 
|  | fswap(fmap[unHi], fmap[gtHi]); | 
|  | gtHi--; unHi--; | 
|  | continue; | 
|  | }; | 
|  | if (n < 0) break; | 
|  | unHi--; | 
|  | } | 
|  | if (unLo > unHi) break; | 
|  | fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; | 
|  | } | 
|  |  | 
|  | AssertD ( unHi == unLo-1, "fallbackQSort3(2)" ); | 
|  |  | 
|  | if (gtHi < ltLo) continue; | 
|  |  | 
|  | n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n); | 
|  | m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m); | 
|  |  | 
|  | n = lo + unLo - ltLo - 1; | 
|  | m = hi - (gtHi - unHi) + 1; | 
|  |  | 
|  | if (n - lo > hi - m) { | 
|  | fpush ( lo, n ); | 
|  | fpush ( m, hi ); | 
|  | } else { | 
|  | fpush ( m, hi ); | 
|  | fpush ( lo, n ); | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | #undef fmin | 
|  | #undef fpush | 
|  | #undef fpop | 
|  | #undef fswap | 
|  | #undef fvswap | 
|  | #undef FALLBACK_QSORT_SMALL_THRESH | 
|  | #undef FALLBACK_QSORT_STACK_SIZE | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /* Pre: | 
|  | nblock > 0 | 
|  | eclass exists for [0 .. nblock-1] | 
|  | ((UChar*)eclass) [0 .. nblock-1] holds block | 
|  | ptr exists for [0 .. nblock-1] | 
|  |  | 
|  | Post: | 
|  | ((UChar*)eclass) [0 .. nblock-1] holds block | 
|  | All other areas of eclass destroyed | 
|  | fmap [0 .. nblock-1] holds sorted order | 
|  | bhtab [ 0 .. 2+(nblock/32) ] destroyed | 
|  | */ | 
|  |  | 
|  | #define       SET_BH(zz)  bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) | 
|  | #define     CLEAR_BH(zz)  bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) | 
|  | #define     ISSET_BH(zz)  (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) | 
|  | #define      WORD_BH(zz)  bhtab[(zz) >> 5] | 
|  | #define UNALIGNED_BH(zz)  ((zz) & 0x01f) | 
|  |  | 
|  | static | 
|  | void fallbackSort ( UInt32* fmap, | 
|  | UInt32* eclass, | 
|  | UInt32* bhtab, | 
|  | Int32   nblock, | 
|  | Int32   verb ) | 
|  | { | 
|  | Int32 ftab[257]; | 
|  | Int32 ftabCopy[256]; | 
|  | Int32 H, i, j, k, l, r, cc, cc1; | 
|  | Int32 nNotDone; | 
|  | Int32 nBhtab; | 
|  | UChar* eclass8 = (UChar*)eclass; | 
|  |  | 
|  | /*-- | 
|  | Initial 1-char radix sort to generate | 
|  | initial fmap and initial BH bits. | 
|  | --*/ | 
|  | if (verb >= 4) | 
|  | VPrintf0 ( "        bucket sorting ...\n" ); | 
|  | for (i = 0; i < 257;    i++) ftab[i] = 0; | 
|  | for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; | 
|  | for (i = 0; i < 256;    i++) ftabCopy[i] = ftab[i]; | 
|  | for (i = 1; i < 257;    i++) ftab[i] += ftab[i-1]; | 
|  |  | 
|  | for (i = 0; i < nblock; i++) { | 
|  | j = eclass8[i]; | 
|  | k = ftab[j] - 1; | 
|  | ftab[j] = k; | 
|  | fmap[k] = i; | 
|  | } | 
|  |  | 
|  | nBhtab = 2 + (nblock / 32); | 
|  | for (i = 0; i < nBhtab; i++) bhtab[i] = 0; | 
|  | for (i = 0; i < 256; i++) SET_BH(ftab[i]); | 
|  |  | 
|  | /*-- | 
|  | Inductively refine the buckets.  Kind-of an | 
|  | "exponential radix sort" (!), inspired by the | 
|  | Manber-Myers suffix array construction algorithm. | 
|  | --*/ | 
|  |  | 
|  | /*-- set sentinel bits for block-end detection --*/ | 
|  | for (i = 0; i < 32; i++) { | 
|  | SET_BH(nblock + 2*i); | 
|  | CLEAR_BH(nblock + 2*i + 1); | 
|  | } | 
|  |  | 
|  | /*-- the log(N) loop --*/ | 
|  | H = 1; | 
|  | while (1) { | 
|  |  | 
|  | if (verb >= 4) | 
|  | VPrintf1 ( "        depth %6d has ", H ); | 
|  |  | 
|  | j = 0; | 
|  | for (i = 0; i < nblock; i++) { | 
|  | if (ISSET_BH(i)) j = i; | 
|  | k = fmap[i] - H; if (k < 0) k += nblock; | 
|  | eclass[k] = j; | 
|  | } | 
|  |  | 
|  | nNotDone = 0; | 
|  | r = -1; | 
|  | while (1) { | 
|  |  | 
|  | /*-- find the next non-singleton bucket --*/ | 
|  | k = r + 1; | 
|  | while (ISSET_BH(k) && UNALIGNED_BH(k)) k++; | 
|  | if (ISSET_BH(k)) { | 
|  | while (WORD_BH(k) == 0xffffffff) k += 32; | 
|  | while (ISSET_BH(k)) k++; | 
|  | } | 
|  | l = k - 1; | 
|  | if (l >= nblock) break; | 
|  | while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++; | 
|  | if (!ISSET_BH(k)) { | 
|  | while (WORD_BH(k) == 0x00000000) k += 32; | 
|  | while (!ISSET_BH(k)) k++; | 
|  | } | 
|  | r = k - 1; | 
|  | if (r >= nblock) break; | 
|  |  | 
|  | /*-- now [l, r] bracket current bucket --*/ | 
|  | if (r > l) { | 
|  | nNotDone += (r - l + 1); | 
|  | fallbackQSort3 ( fmap, eclass, l, r ); | 
|  |  | 
|  | /*-- scan bucket and generate header bits-- */ | 
|  | cc = -1; | 
|  | for (i = l; i <= r; i++) { | 
|  | cc1 = eclass[fmap[i]]; | 
|  | if (cc != cc1) { SET_BH(i); cc = cc1; }; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if (verb >= 4) | 
|  | VPrintf1 ( "%6d unresolved strings\n", nNotDone ); | 
|  |  | 
|  | H *= 2; | 
|  | if (H > nblock || nNotDone == 0) break; | 
|  | } | 
|  |  | 
|  | /*-- | 
|  | Reconstruct the original block in | 
|  | eclass8 [0 .. nblock-1], since the | 
|  | previous phase destroyed it. | 
|  | --*/ | 
|  | if (verb >= 4) | 
|  | VPrintf0 ( "        reconstructing block ...\n" ); | 
|  | j = 0; | 
|  | for (i = 0; i < nblock; i++) { | 
|  | while (ftabCopy[j] == 0) j++; | 
|  | ftabCopy[j]--; | 
|  | eclass8[fmap[i]] = (UChar)j; | 
|  | } | 
|  | AssertH ( j < 256, 1005 ); | 
|  | } | 
|  |  | 
|  | #undef       SET_BH | 
|  | #undef     CLEAR_BH | 
|  | #undef     ISSET_BH | 
|  | #undef      WORD_BH | 
|  | #undef UNALIGNED_BH | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /*--- The main, O(N^2 log(N)) sorting       ---*/ | 
|  | /*--- algorithm.  Faster for "normal"       ---*/ | 
|  | /*--- non-repetitive blocks.                ---*/ | 
|  | /*---------------------------------------------*/ | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | static | 
|  | __inline__ | 
|  | Bool mainGtU ( UInt32  i1, | 
|  | UInt32  i2, | 
|  | UChar*  block, | 
|  | UInt16* quadrant, | 
|  | UInt32  nblock, | 
|  | Int32*  budget ) | 
|  | { | 
|  | Int32  k; | 
|  | UChar  c1, c2; | 
|  | UInt16 s1, s2; | 
|  |  | 
|  | AssertD ( i1 != i2, "mainGtU" ); | 
|  | /* 1 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 2 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 3 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 4 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 5 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 6 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 7 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 8 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 9 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 10 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 11 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  | /* 12 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | i1++; i2++; | 
|  |  | 
|  | k = nblock + 8; | 
|  |  | 
|  | do { | 
|  | /* 1 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 2 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 3 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 4 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 5 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 6 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 7 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  | /* 8 */ | 
|  | c1 = block[i1]; c2 = block[i2]; | 
|  | if (c1 != c2) return (c1 > c2); | 
|  | s1 = quadrant[i1]; s2 = quadrant[i2]; | 
|  | if (s1 != s2) return (s1 > s2); | 
|  | i1++; i2++; | 
|  |  | 
|  | if (i1 >= nblock) i1 -= nblock; | 
|  | if (i2 >= nblock) i2 -= nblock; | 
|  |  | 
|  | k -= 8; | 
|  | (*budget)--; | 
|  | } | 
|  | while (k >= 0); | 
|  |  | 
|  | return False; | 
|  | } | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /*-- | 
|  | Knuth's increments seem to work better | 
|  | than Incerpi-Sedgewick here.  Possibly | 
|  | because the number of elems to sort is | 
|  | usually small, typically <= 20. | 
|  | --*/ | 
|  | static | 
|  | Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, | 
|  | 9841, 29524, 88573, 265720, | 
|  | 797161, 2391484 }; | 
|  |  | 
|  | static | 
|  | void mainSimpleSort ( UInt32* ptr, | 
|  | UChar*  block, | 
|  | UInt16* quadrant, | 
|  | Int32   nblock, | 
|  | Int32   lo, | 
|  | Int32   hi, | 
|  | Int32   d, | 
|  | Int32*  budget ) | 
|  | { | 
|  | Int32 i, j, h, bigN, hp; | 
|  | UInt32 v; | 
|  |  | 
|  | bigN = hi - lo + 1; | 
|  | if (bigN < 2) return; | 
|  |  | 
|  | hp = 0; | 
|  | while (incs[hp] < bigN) hp++; | 
|  | hp--; | 
|  |  | 
|  | for (; hp >= 0; hp--) { | 
|  | h = incs[hp]; | 
|  |  | 
|  | i = lo + h; | 
|  | while (True) { | 
|  |  | 
|  | /*-- copy 1 --*/ | 
|  | if (i > hi) break; | 
|  | v = ptr[i]; | 
|  | j = i; | 
|  | while ( mainGtU ( | 
|  | ptr[j-h]+d, v+d, block, quadrant, nblock, budget | 
|  | ) ) { | 
|  | ptr[j] = ptr[j-h]; | 
|  | j = j - h; | 
|  | if (j <= (lo + h - 1)) break; | 
|  | } | 
|  | ptr[j] = v; | 
|  | i++; | 
|  |  | 
|  | /*-- copy 2 --*/ | 
|  | if (i > hi) break; | 
|  | v = ptr[i]; | 
|  | j = i; | 
|  | while ( mainGtU ( | 
|  | ptr[j-h]+d, v+d, block, quadrant, nblock, budget | 
|  | ) ) { | 
|  | ptr[j] = ptr[j-h]; | 
|  | j = j - h; | 
|  | if (j <= (lo + h - 1)) break; | 
|  | } | 
|  | ptr[j] = v; | 
|  | i++; | 
|  |  | 
|  | /*-- copy 3 --*/ | 
|  | if (i > hi) break; | 
|  | v = ptr[i]; | 
|  | j = i; | 
|  | while ( mainGtU ( | 
|  | ptr[j-h]+d, v+d, block, quadrant, nblock, budget | 
|  | ) ) { | 
|  | ptr[j] = ptr[j-h]; | 
|  | j = j - h; | 
|  | if (j <= (lo + h - 1)) break; | 
|  | } | 
|  | ptr[j] = v; | 
|  | i++; | 
|  |  | 
|  | if (*budget < 0) return; | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /*-- | 
|  | The following is an implementation of | 
|  | an elegant 3-way quicksort for strings, | 
|  | described in a paper "Fast Algorithms for | 
|  | Sorting and Searching Strings", by Robert | 
|  | Sedgewick and Jon L. Bentley. | 
|  | --*/ | 
|  |  | 
|  | #define mswap(zz1, zz2) \ | 
|  | { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; } | 
|  |  | 
|  | #define mvswap(zzp1, zzp2, zzn)       \ | 
|  | {                                     \ | 
|  | Int32 yyp1 = (zzp1);               \ | 
|  | Int32 yyp2 = (zzp2);               \ | 
|  | Int32 yyn  = (zzn);                \ | 
|  | while (yyn > 0) {                  \ | 
|  | mswap(ptr[yyp1], ptr[yyp2]);    \ | 
|  | yyp1++; yyp2++; yyn--;          \ | 
|  | }                                  \ | 
|  | } | 
|  |  | 
|  | static | 
|  | __inline__ | 
|  | UChar mmed3 ( UChar a, UChar b, UChar c ) | 
|  | { | 
|  | UChar t; | 
|  | if (a > b) { t = a; a = b; b = t; }; | 
|  | if (b > c) { | 
|  | b = c; | 
|  | if (a > b) b = a; | 
|  | } | 
|  | return b; | 
|  | } | 
|  |  | 
|  | #define mmin(a,b) ((a) < (b)) ? (a) : (b) | 
|  |  | 
|  | #define mpush(lz,hz,dz) { stackLo[sp] = lz; \ | 
|  | stackHi[sp] = hz; \ | 
|  | stackD [sp] = dz; \ | 
|  | sp++; } | 
|  |  | 
|  | #define mpop(lz,hz,dz) { sp--;             \ | 
|  | lz = stackLo[sp]; \ | 
|  | hz = stackHi[sp]; \ | 
|  | dz = stackD [sp]; } | 
|  |  | 
|  |  | 
|  | #define mnextsize(az) (nextHi[az]-nextLo[az]) | 
|  |  | 
|  | #define mnextswap(az,bz)                                        \ | 
|  | { Int32 tz;                                                  \ | 
|  | tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ | 
|  | tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ | 
|  | tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; } | 
|  |  | 
|  |  | 
|  | #define MAIN_QSORT_SMALL_THRESH 20 | 
|  | #define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) | 
|  | #define MAIN_QSORT_STACK_SIZE 100 | 
|  |  | 
|  | static | 
|  | void mainQSort3 ( UInt32* ptr, | 
|  | UChar*  block, | 
|  | UInt16* quadrant, | 
|  | Int32   nblock, | 
|  | Int32   loSt, | 
|  | Int32   hiSt, | 
|  | Int32   dSt, | 
|  | Int32*  budget ) | 
|  | { | 
|  | Int32 unLo, unHi, ltLo, gtHi, n, m, med; | 
|  | Int32 sp, lo, hi, d; | 
|  |  | 
|  | Int32 stackLo[MAIN_QSORT_STACK_SIZE]; | 
|  | Int32 stackHi[MAIN_QSORT_STACK_SIZE]; | 
|  | Int32 stackD [MAIN_QSORT_STACK_SIZE]; | 
|  |  | 
|  | Int32 nextLo[3]; | 
|  | Int32 nextHi[3]; | 
|  | Int32 nextD [3]; | 
|  |  | 
|  | sp = 0; | 
|  | mpush ( loSt, hiSt, dSt ); | 
|  |  | 
|  | while (sp > 0) { | 
|  |  | 
|  | AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 ); | 
|  |  | 
|  | mpop ( lo, hi, d ); | 
|  | if (hi - lo < MAIN_QSORT_SMALL_THRESH || | 
|  | d > MAIN_QSORT_DEPTH_THRESH) { | 
|  | mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget ); | 
|  | if (*budget < 0) return; | 
|  | continue; | 
|  | } | 
|  |  | 
|  | med = (Int32) | 
|  | mmed3 ( block[ptr[ lo         ]+d], | 
|  | block[ptr[ hi         ]+d], | 
|  | block[ptr[ (lo+hi)>>1 ]+d] ); | 
|  |  | 
|  | unLo = ltLo = lo; | 
|  | unHi = gtHi = hi; | 
|  |  | 
|  | while (True) { | 
|  | while (True) { | 
|  | if (unLo > unHi) break; | 
|  | n = ((Int32)block[ptr[unLo]+d]) - med; | 
|  | if (n == 0) { | 
|  | mswap(ptr[unLo], ptr[ltLo]); | 
|  | ltLo++; unLo++; continue; | 
|  | }; | 
|  | if (n >  0) break; | 
|  | unLo++; | 
|  | } | 
|  | while (True) { | 
|  | if (unLo > unHi) break; | 
|  | n = ((Int32)block[ptr[unHi]+d]) - med; | 
|  | if (n == 0) { | 
|  | mswap(ptr[unHi], ptr[gtHi]); | 
|  | gtHi--; unHi--; continue; | 
|  | }; | 
|  | if (n <  0) break; | 
|  | unHi--; | 
|  | } | 
|  | if (unLo > unHi) break; | 
|  | mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--; | 
|  | } | 
|  |  | 
|  | AssertD ( unHi == unLo-1, "mainQSort3(2)" ); | 
|  |  | 
|  | if (gtHi < ltLo) { | 
|  | mpush(lo, hi, d+1 ); | 
|  | continue; | 
|  | } | 
|  |  | 
|  | n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n); | 
|  | m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m); | 
|  |  | 
|  | n = lo + unLo - ltLo - 1; | 
|  | m = hi - (gtHi - unHi) + 1; | 
|  |  | 
|  | nextLo[0] = lo;  nextHi[0] = n;   nextD[0] = d; | 
|  | nextLo[1] = m;   nextHi[1] = hi;  nextD[1] = d; | 
|  | nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; | 
|  |  | 
|  | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); | 
|  | if (mnextsize(1) < mnextsize(2)) mnextswap(1,2); | 
|  | if (mnextsize(0) < mnextsize(1)) mnextswap(0,1); | 
|  |  | 
|  | AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" ); | 
|  | AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" ); | 
|  |  | 
|  | mpush (nextLo[0], nextHi[0], nextD[0]); | 
|  | mpush (nextLo[1], nextHi[1], nextD[1]); | 
|  | mpush (nextLo[2], nextHi[2], nextD[2]); | 
|  | } | 
|  | } | 
|  |  | 
|  | #undef mswap | 
|  | #undef mvswap | 
|  | #undef mpush | 
|  | #undef mpop | 
|  | #undef mmin | 
|  | #undef mnextsize | 
|  | #undef mnextswap | 
|  | #undef MAIN_QSORT_SMALL_THRESH | 
|  | #undef MAIN_QSORT_DEPTH_THRESH | 
|  | #undef MAIN_QSORT_STACK_SIZE | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /* Pre: | 
|  | nblock > N_OVERSHOOT | 
|  | block32 exists for [0 .. nblock-1 +N_OVERSHOOT] | 
|  | ((UChar*)block32) [0 .. nblock-1] holds block | 
|  | ptr exists for [0 .. nblock-1] | 
|  |  | 
|  | Post: | 
|  | ((UChar*)block32) [0 .. nblock-1] holds block | 
|  | All other areas of block32 destroyed | 
|  | ftab [0 .. 65536 ] destroyed | 
|  | ptr [0 .. nblock-1] holds sorted order | 
|  | if (*budget < 0), sorting was abandoned | 
|  | */ | 
|  |  | 
|  | #define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) | 
|  | #define SETMASK (1 << 21) | 
|  | #define CLEARMASK (~(SETMASK)) | 
|  |  | 
|  | static | 
|  | void mainSort ( UInt32* ptr, | 
|  | UChar*  block, | 
|  | UInt16* quadrant, | 
|  | UInt32* ftab, | 
|  | Int32   nblock, | 
|  | Int32   verb, | 
|  | Int32*  budget ) | 
|  | { | 
|  | Int32  i, j, k, ss, sb; | 
|  | Int32  runningOrder[256]; | 
|  | Bool   bigDone[256]; | 
|  | Int32  copyStart[256]; | 
|  | Int32  copyEnd  [256]; | 
|  | UChar  c1; | 
|  | Int32  numQSorted; | 
|  | UInt16 s; | 
|  | if (verb >= 4) VPrintf0 ( "        main sort initialise ...\n" ); | 
|  |  | 
|  | /*-- set up the 2-byte frequency table --*/ | 
|  | for (i = 65536; i >= 0; i--) ftab[i] = 0; | 
|  |  | 
|  | j = block[0] << 8; | 
|  | i = nblock-1; | 
|  | for (; i >= 3; i -= 4) { | 
|  | quadrant[i] = 0; | 
|  | j = (j >> 8) | ( ((UInt16)block[i]) << 8); | 
|  | ftab[j]++; | 
|  | quadrant[i-1] = 0; | 
|  | j = (j >> 8) | ( ((UInt16)block[i-1]) << 8); | 
|  | ftab[j]++; | 
|  | quadrant[i-2] = 0; | 
|  | j = (j >> 8) | ( ((UInt16)block[i-2]) << 8); | 
|  | ftab[j]++; | 
|  | quadrant[i-3] = 0; | 
|  | j = (j >> 8) | ( ((UInt16)block[i-3]) << 8); | 
|  | ftab[j]++; | 
|  | } | 
|  | for (; i >= 0; i--) { | 
|  | quadrant[i] = 0; | 
|  | j = (j >> 8) | ( ((UInt16)block[i]) << 8); | 
|  | ftab[j]++; | 
|  | } | 
|  |  | 
|  | /*-- (emphasises close relationship of block & quadrant) --*/ | 
|  | for (i = 0; i < BZ_N_OVERSHOOT; i++) { | 
|  | block   [nblock+i] = block[i]; | 
|  | quadrant[nblock+i] = 0; | 
|  | } | 
|  |  | 
|  | if (verb >= 4) VPrintf0 ( "        bucket sorting ...\n" ); | 
|  |  | 
|  | /*-- Complete the initial radix sort --*/ | 
|  | for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1]; | 
|  |  | 
|  | s = block[0] << 8; | 
|  | i = nblock-1; | 
|  | for (; i >= 3; i -= 4) { | 
|  | s = (s >> 8) | (block[i] << 8); | 
|  | j = ftab[s] -1; | 
|  | ftab[s] = j; | 
|  | ptr[j] = i; | 
|  | s = (s >> 8) | (block[i-1] << 8); | 
|  | j = ftab[s] -1; | 
|  | ftab[s] = j; | 
|  | ptr[j] = i-1; | 
|  | s = (s >> 8) | (block[i-2] << 8); | 
|  | j = ftab[s] -1; | 
|  | ftab[s] = j; | 
|  | ptr[j] = i-2; | 
|  | s = (s >> 8) | (block[i-3] << 8); | 
|  | j = ftab[s] -1; | 
|  | ftab[s] = j; | 
|  | ptr[j] = i-3; | 
|  | } | 
|  | for (; i >= 0; i--) { | 
|  | s = (s >> 8) | (block[i] << 8); | 
|  | j = ftab[s] -1; | 
|  | ftab[s] = j; | 
|  | ptr[j] = i; | 
|  | } | 
|  |  | 
|  | /*-- | 
|  | Now ftab contains the first loc of every small bucket. | 
|  | Calculate the running order, from smallest to largest | 
|  | big bucket. | 
|  | --*/ | 
|  | for (i = 0; i <= 255; i++) { | 
|  | bigDone     [i] = False; | 
|  | runningOrder[i] = i; | 
|  | } | 
|  |  | 
|  | { | 
|  | Int32 vv; | 
|  | Int32 h = 1; | 
|  | do h = 3 * h + 1; while (h <= 256); | 
|  | do { | 
|  | h = h / 3; | 
|  | for (i = h; i <= 255; i++) { | 
|  | vv = runningOrder[i]; | 
|  | j = i; | 
|  | while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) { | 
|  | runningOrder[j] = runningOrder[j-h]; | 
|  | j = j - h; | 
|  | if (j <= (h - 1)) goto zero; | 
|  | } | 
|  | zero: | 
|  | runningOrder[j] = vv; | 
|  | } | 
|  | } while (h != 1); | 
|  | } | 
|  |  | 
|  | /*-- | 
|  | The main sorting loop. | 
|  | --*/ | 
|  |  | 
|  | numQSorted = 0; | 
|  |  | 
|  | for (i = 0; i <= 255; i++) { | 
|  |  | 
|  | /*-- | 
|  | Process big buckets, starting with the least full. | 
|  | Basically this is a 3-step process in which we call | 
|  | mainQSort3 to sort the small buckets [ss, j], but | 
|  | also make a big effort to avoid the calls if we can. | 
|  | --*/ | 
|  | ss = runningOrder[i]; | 
|  |  | 
|  | /*-- | 
|  | Step 1: | 
|  | Complete the big bucket [ss] by quicksorting | 
|  | any unsorted small buckets [ss, j], for j != ss. | 
|  | Hopefully previous pointer-scanning phases have already | 
|  | completed many of the small buckets [ss, j], so | 
|  | we don't have to sort them at all. | 
|  | --*/ | 
|  | for (j = 0; j <= 255; j++) { | 
|  | if (j != ss) { | 
|  | sb = (ss << 8) + j; | 
|  | if ( ! (ftab[sb] & SETMASK) ) { | 
|  | Int32 lo = ftab[sb]   & CLEARMASK; | 
|  | Int32 hi = (ftab[sb+1] & CLEARMASK) - 1; | 
|  | if (hi > lo) { | 
|  | if (verb >= 4) | 
|  | VPrintf4 ( "        qsort [0x%x, 0x%x]   " | 
|  | "done %d   this %d\n", | 
|  | ss, j, numQSorted, hi - lo + 1 ); | 
|  | mainQSort3 ( | 
|  | ptr, block, quadrant, nblock, | 
|  | lo, hi, BZ_N_RADIX, budget | 
|  | ); | 
|  | numQSorted += (hi - lo + 1); | 
|  | if (*budget < 0) return; | 
|  | } | 
|  | } | 
|  | ftab[sb] |= SETMASK; | 
|  | } | 
|  | } | 
|  |  | 
|  | AssertH ( !bigDone[ss], 1006 ); | 
|  |  | 
|  | /*-- | 
|  | Step 2: | 
|  | Now scan this big bucket [ss] so as to synthesise the | 
|  | sorted order for small buckets [t, ss] for all t, | 
|  | including, magically, the bucket [ss,ss] too. | 
|  | This will avoid doing Real Work in subsequent Step 1's. | 
|  | --*/ | 
|  | { | 
|  | for (j = 0; j <= 255; j++) { | 
|  | copyStart[j] =  ftab[(j << 8) + ss]     & CLEARMASK; | 
|  | copyEnd  [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; | 
|  | } | 
|  | for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { | 
|  | k = ptr[j]-1; if (k < 0) k += nblock; | 
|  | c1 = block[k]; | 
|  | if (!bigDone[c1]) | 
|  | ptr[ copyStart[c1]++ ] = k; | 
|  | } | 
|  | for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { | 
|  | k = ptr[j]-1; if (k < 0) k += nblock; | 
|  | c1 = block[k]; | 
|  | if (!bigDone[c1]) | 
|  | ptr[ copyEnd[c1]-- ] = k; | 
|  | } | 
|  | } | 
|  |  | 
|  | AssertH ( (copyStart[ss]-1 == copyEnd[ss]) | 
|  | || | 
|  | /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. | 
|  | Necessity for this case is demonstrated by compressing | 
|  | a sequence of approximately 48.5 million of character | 
|  | 251; 1.0.0/1.0.1 will then die here. */ | 
|  | (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), | 
|  | 1007 ) | 
|  |  | 
|  | for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK; | 
|  |  | 
|  | /*-- | 
|  | Step 3: | 
|  | The [ss] big bucket is now done.  Record this fact, | 
|  | and update the quadrant descriptors.  Remember to | 
|  | update quadrants in the overshoot area too, if | 
|  | necessary.  The "if (i < 255)" test merely skips | 
|  | this updating for the last bucket processed, since | 
|  | updating for the last bucket is pointless. | 
|  |  | 
|  | The quadrant array provides a way to incrementally | 
|  | cache sort orderings, as they appear, so as to | 
|  | make subsequent comparisons in fullGtU() complete | 
|  | faster.  For repetitive blocks this makes a big | 
|  | difference (but not big enough to be able to avoid | 
|  | the fallback sorting mechanism, exponential radix sort). | 
|  |  | 
|  | The precise meaning is: at all times: | 
|  |  | 
|  | for 0 <= i < nblock and 0 <= j <= nblock | 
|  |  | 
|  | if block[i] != block[j], | 
|  |  | 
|  | then the relative values of quadrant[i] and | 
|  | quadrant[j] are meaningless. | 
|  |  | 
|  | else { | 
|  | if quadrant[i] < quadrant[j] | 
|  | then the string starting at i lexicographically | 
|  | precedes the string starting at j | 
|  |  | 
|  | else if quadrant[i] > quadrant[j] | 
|  | then the string starting at j lexicographically | 
|  | precedes the string starting at i | 
|  |  | 
|  | else | 
|  | the relative ordering of the strings starting | 
|  | at i and j has not yet been determined. | 
|  | } | 
|  | --*/ | 
|  | bigDone[ss] = True; | 
|  |  | 
|  | if (i < 255) { | 
|  | Int32 bbStart  = ftab[ss << 8] & CLEARMASK; | 
|  | Int32 bbSize   = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; | 
|  | Int32 shifts   = 0; | 
|  |  | 
|  | while ((bbSize >> shifts) > 65534) shifts++; | 
|  |  | 
|  | for (j = bbSize-1; j >= 0; j--) { | 
|  | Int32 a2update     = ptr[bbStart + j]; | 
|  | UInt16 qVal        = (UInt16)(j >> shifts); | 
|  | quadrant[a2update] = qVal; | 
|  | if (a2update < BZ_N_OVERSHOOT) | 
|  | quadrant[a2update + nblock] = qVal; | 
|  | } | 
|  | AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 ); | 
|  | } | 
|  |  | 
|  | } | 
|  |  | 
|  | if (verb >= 4) | 
|  | VPrintf3 ( "        %d pointers, %d sorted, %d scanned\n", | 
|  | nblock, numQSorted, nblock - numQSorted ); | 
|  | } | 
|  |  | 
|  | #undef BIGFREQ | 
|  | #undef SETMASK | 
|  | #undef CLEARMASK | 
|  |  | 
|  |  | 
|  | /*---------------------------------------------*/ | 
|  | /* Pre: | 
|  | nblock > 0 | 
|  | arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] | 
|  | ((UChar*)arr2)  [0 .. nblock-1] holds block | 
|  | arr1 exists for [0 .. nblock-1] | 
|  |  | 
|  | Post: | 
|  | ((UChar*)arr2) [0 .. nblock-1] holds block | 
|  | All other areas of block destroyed | 
|  | ftab [ 0 .. 65536 ] destroyed | 
|  | arr1 [0 .. nblock-1] holds sorted order | 
|  | */ | 
|  | void BZ2_blockSort ( EState* s ) | 
|  | { | 
|  | UInt32* ptr    = s->ptr; | 
|  | UChar*  block  = s->block; | 
|  | UInt32* ftab   = s->ftab; | 
|  | Int32   nblock = s->nblock; | 
|  | Int32   verb   = s->verbosity; | 
|  | Int32   wfact  = s->workFactor; | 
|  | UInt16* quadrant; | 
|  | Int32   budget; | 
|  | Int32   budgetInit; | 
|  | Int32   i; | 
|  |  | 
|  | if (nblock < 10000) { | 
|  | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); | 
|  | } else { | 
|  | /* Calculate the location for quadrant, remembering to get | 
|  | the alignment right.  Assumes that &(block[0]) is at least | 
|  | 2-byte aligned -- this should be ok since block is really | 
|  | the first section of arr2. | 
|  | */ | 
|  | i = nblock+BZ_N_OVERSHOOT; | 
|  | if (i & 1) i++; | 
|  | quadrant = (UInt16*)(&(block[i])); | 
|  |  | 
|  | /* (wfact-1) / 3 puts the default-factor-30 | 
|  | transition point at very roughly the same place as | 
|  | with v0.1 and v0.9.0. | 
|  | Not that it particularly matters any more, since the | 
|  | resulting compressed stream is now the same regardless | 
|  | of whether or not we use the main sort or fallback sort. | 
|  | */ | 
|  | if (wfact < 1  ) wfact = 1; | 
|  | if (wfact > 100) wfact = 100; | 
|  | budgetInit = nblock * ((wfact-1) / 3); | 
|  | budget = budgetInit; | 
|  |  | 
|  | mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget ); | 
|  | if (verb >= 3) | 
|  | VPrintf3 ( "      %d work, %d block, ratio %5.2f\n", | 
|  | budgetInit - budget, | 
|  | nblock, | 
|  | (float)(budgetInit - budget) / | 
|  | (float)(nblock==0 ? 1 : nblock) ); | 
|  | if (budget < 0) { | 
|  | if (verb >= 2) | 
|  | VPrintf0 ( "    too repetitive; using fallback" | 
|  | " sorting algorithm\n" ); | 
|  | fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb ); | 
|  | } | 
|  | } | 
|  |  | 
|  | s->origPtr = -1; | 
|  | for (i = 0; i < s->nblock; i++) | 
|  | if (ptr[i] == 0) | 
|  | { s->origPtr = i; break; }; | 
|  |  | 
|  | AssertH( s->origPtr != -1, 1003 ); | 
|  | } | 
|  |  | 
|  |  | 
|  | /*-------------------------------------------------------------*/ | 
|  | /*--- end                                       blocksort.c ---*/ | 
|  | /*-------------------------------------------------------------*/ |