--- /dev/null
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery ---*/
+/*--- blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+ This file is a part of bzip2 and/or libbzip2, a program and
+ library for lossless, block-sorting data compression.
+
+ Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Julian Seward, Cambridge, UK.
+ jseward@acm.org
+ bzip2/libbzip2 version 1.0.6 of 6 September 2010
+ Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
+
+ This program is based on (at least) the work of:
+ Mike Burrows
+ David Wheeler
+ Peter Fenwick
+ Alistair Moffat
+ Radford Neal
+ Ian H. Witten
+ Robert Sedgewick
+ Jon L. Bentley
+
+ For more information on these sources, see the manual.
+--*/
+
+#include "bzlib_private.h"
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting ---*/
+/*--- algorithm, for repetitive blocks ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+void fallbackSimpleSort ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 lo,
+ Int32 hi )
+{
+ Int32 i, j, tmp;
+ UInt32 ec_tmp;
+
+ if (lo == hi) return;
+
+ if (hi - lo > 3) {
+ for ( i = hi-4; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+ fmap[j-4] = fmap[j];
+ fmap[j-4] = tmp;
+ }
+ }
+
+ for ( i = hi-1; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+ fmap[j-1] = fmap[j];
+ fmap[j-1] = tmp;
+ }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ fswap(fmap[yyp1], fmap[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ sp++; }
+
+#define fpop(lz,hz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE 100
+
+
+static
+void fallbackQSort3 ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 loSt,
+ Int32 hiSt )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m;
+ Int32 sp, lo, hi;
+ UInt32 med, r, r3;
+ Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+ Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+ r = 0;
+
+ sp = 0;
+ fpush ( loSt, hiSt );
+
+ while (sp > 0) {
+
+ AssertH ( sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004 );
+
+ fpop ( lo, hi );
+ if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+ fallbackSimpleSort ( fmap, eclass, lo, hi );
+ continue;
+ }
+
+ /* Random partitioning. Median of 3 sometimes fails to
+ avoid bad cases. Median of 9 seems to help but
+ looks rather expensive. This too seems to work but
+ is cheaper. Guidance for the magic constants
+ 7621 and 32768 is taken from Sedgewick's algorithms
+ book, chapter 35.
+ */
+ r = ((r * 7621) + 1) % 32768;
+ r3 = r % 3;
+ if (r3 == 0) med = eclass[fmap[lo]]; else
+ if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+ med = eclass[fmap[hi]];
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (1) {
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unLo], fmap[ltLo]);
+ ltLo++; unLo++;
+ continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unHi], fmap[gtHi]);
+ gtHi--; unHi--;
+ continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+ }
+
+ AssertD ( unHi == unLo-1, "fallbackQSort3(2)" );
+
+ if (gtHi < ltLo) continue;
+
+ n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+ m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ if (n - lo > hi - m) {
+ fpush ( lo, n );
+ fpush ( m, hi );
+ } else {
+ fpush ( m, hi );
+ fpush ( lo, n );
+ }
+ }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > 0
+ eclass exists for [0 .. nblock-1]
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ ptr exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ All other areas of eclass destroyed
+ fmap [0 .. nblock-1] holds sorted order
+ bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
+#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
+#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
+#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
+#define WORD_BH(zz) bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz) ((zz) & 0x01f)
+
+static
+void fallbackSort ( UInt32* fmap,
+ UInt32* eclass,
+ UInt32* bhtab,
+ Int32 nblock,
+ Int32 verb )
+{
+ Int32 ftab[257];
+ Int32 ftabCopy[256];
+ Int32 H, i, j, k, l, r, cc, cc1;
+ Int32 nNotDone;
+ Int32 nBhtab;
+ UChar* eclass8 = (UChar*)eclass;
+
+ /*--
+ Initial 1-char radix sort to generate
+ initial fmap and initial BH bits.
+ --*/
+ if (verb >= 4)
+ VPrintf0 ( " bucket sorting ...\n" );
+ for (i = 0; i < 257; i++) ftab[i] = 0;
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+ for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
+ for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
+
+ for (i = 0; i < nblock; i++) {
+ j = eclass8[i];
+ k = ftab[j] - 1;
+ ftab[j] = k;
+ fmap[k] = i;
+ }
+
+ nBhtab = 2 + (nblock / 32);
+ for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+ for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+ /*--
+ Inductively refine the buckets. Kind-of an
+ "exponential radix sort" (!), inspired by the
+ Manber-Myers suffix array construction algorithm.
+ --*/
+
+ /*-- set sentinel bits for block-end detection --*/
+ for (i = 0; i < 32; i++) {
+ SET_BH(nblock + 2*i);
+ CLEAR_BH(nblock + 2*i + 1);
+ }
+
+ /*-- the log(N) loop --*/
+ H = 1;
+ while (1) {
+
+ if (verb >= 4)
+ VPrintf1 ( " depth %6d has ", H );
+
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ if (ISSET_BH(i)) j = i;
+ k = fmap[i] - H; if (k < 0) k += nblock;
+ eclass[k] = j;
+ }
+
+ nNotDone = 0;
+ r = -1;
+ while (1) {
+
+ /*-- find the next non-singleton bucket --*/
+ k = r + 1;
+ while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (ISSET_BH(k)) {
+ while (WORD_BH(k) == 0xffffffff) k += 32;
+ while (ISSET_BH(k)) k++;
+ }
+ l = k - 1;
+ if (l >= nblock) break;
+ while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (!ISSET_BH(k)) {
+ while (WORD_BH(k) == 0x00000000) k += 32;
+ while (!ISSET_BH(k)) k++;
+ }
+ r = k - 1;
+ if (r >= nblock) break;
+
+ /*-- now [l, r] bracket current bucket --*/
+ if (r > l) {
+ nNotDone += (r - l + 1);
+ fallbackQSort3 ( fmap, eclass, l, r );
+
+ /*-- scan bucket and generate header bits-- */
+ cc = -1;
+ for (i = l; i <= r; i++) {
+ cc1 = eclass[fmap[i]];
+ if (cc != cc1) { SET_BH(i); cc = cc1; };
+ }
+ }
+ }
+
+ if (verb >= 4)
+ VPrintf1 ( "%6d unresolved strings\n", nNotDone );
+
+ H *= 2;
+ if (H > nblock || nNotDone == 0) break;
+ }
+
+ /*--
+ Reconstruct the original block in
+ eclass8 [0 .. nblock-1], since the
+ previous phase destroyed it.
+ --*/
+ if (verb >= 4)
+ VPrintf0 ( " reconstructing block ...\n" );
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ while (ftabCopy[j] == 0) j++;
+ ftabCopy[j]--;
+ eclass8[fmap[i]] = (UChar)j;
+ }
+ AssertH ( j < 256, 1005 );
+}
+
+#undef SET_BH
+#undef CLEAR_BH
+#undef ISSET_BH
+#undef WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting ---*/
+/*--- algorithm. Faster for "normal" ---*/
+/*--- non-repetitive blocks. ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+static
+__inline__
+Bool mainGtU ( UInt32 i1,
+ UInt32 i2,
+ UChar* block,
+ UInt16* quadrant,
+ UInt32 nblock,
+ Int32* budget )
+{
+ Int32 k;
+ UChar c1, c2;
+ UInt16 s1, s2;
+
+ AssertD ( i1 != i2, "mainGtU" );
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 9 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 10 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 11 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 12 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+
+ k = nblock + 8;
+
+ do {
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+
+ if (i1 >= nblock) i1 -= nblock;
+ if (i2 >= nblock) i2 -= nblock;
+
+ k -= 8;
+ (*budget)--;
+ }
+ while (k >= 0);
+
+ return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+ Knuth's increments seem to work better
+ than Incerpi-Sedgewick here. Possibly
+ because the number of elems to sort is
+ usually small, typically <= 20.
+--*/
+static
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+
+static
+void mainSimpleSort ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 lo,
+ Int32 hi,
+ Int32 d,
+ Int32* budget )
+{
+ Int32 i, j, h, bigN, hp;
+ UInt32 v;
+
+ bigN = hi - lo + 1;
+ if (bigN < 2) return;
+
+ hp = 0;
+ while (incs[hp] < bigN) hp++;
+ hp--;
+
+ for (; hp >= 0; hp--) {
+ h = incs[hp];
+
+ i = lo + h;
+ while (True) {
+
+ /*-- copy 1 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 2 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 3 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ if (*budget < 0) return;
+ }
+ }
+}
+
+
+/*---------------------------------------------*/
+/*--
+ The following is an implementation of
+ an elegant 3-way quicksort for strings,
+ described in a paper "Fast Algorithms for
+ Sorting and Searching Strings", by Robert
+ Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ mswap(ptr[yyp1], ptr[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+static
+__inline__
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+ UChar t;
+ if (a > b) { t = a; a = b; b = t; };
+ if (b > c) {
+ b = c;
+ if (a > b) b = a;
+ }
+ return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ stackD [sp] = dz; \
+ sp++; }
+
+#define mpop(lz,hz,dz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; \
+ dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz) \
+ { Int32 tz; \
+ tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+ tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+ tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+static
+void mainQSort3 ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 loSt,
+ Int32 hiSt,
+ Int32 dSt,
+ Int32* budget )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+ Int32 sp, lo, hi, d;
+
+ Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+ Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+ Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+ Int32 nextLo[3];
+ Int32 nextHi[3];
+ Int32 nextD [3];
+
+ sp = 0;
+ mpush ( loSt, hiSt, dSt );
+
+ while (sp > 0) {
+
+ AssertH ( sp < MAIN_QSORT_STACK_SIZE - 2, 1001 );
+
+ mpop ( lo, hi, d );
+ if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
+ d > MAIN_QSORT_DEPTH_THRESH) {
+ mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+ if (*budget < 0) return;
+ continue;
+ }
+
+ med = (Int32)
+ mmed3 ( block[ptr[ lo ]+d],
+ block[ptr[ hi ]+d],
+ block[ptr[ (lo+hi)>>1 ]+d] );
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (True) {
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unLo]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unLo], ptr[ltLo]);
+ ltLo++; unLo++; continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unHi]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unHi], ptr[gtHi]);
+ gtHi--; unHi--; continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+ }
+
+ AssertD ( unHi == unLo-1, "mainQSort3(2)" );
+
+ if (gtHi < ltLo) {
+ mpush(lo, hi, d+1 );
+ continue;
+ }
+
+ n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+ m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
+ nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+ if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+ AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)" );
+ AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)" );
+
+ mpush (nextLo[0], nextHi[0], nextD[0]);
+ mpush (nextLo[1], nextHi[1], nextD[1]);
+ mpush (nextLo[2], nextHi[2], nextD[2]);
+ }
+}
+
+#undef mswap
+#undef mvswap
+#undef mpush
+#undef mpop
+#undef mmin
+#undef mnextsize
+#undef mnextswap
+#undef MAIN_QSORT_SMALL_THRESH
+#undef MAIN_QSORT_DEPTH_THRESH
+#undef MAIN_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > N_OVERSHOOT
+ block32 exists for [0 .. nblock-1 +N_OVERSHOOT]
+ ((UChar*)block32) [0 .. nblock-1] holds block
+ ptr exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)block32) [0 .. nblock-1] holds block
+ All other areas of block32 destroyed
+ ftab [0 .. 65536 ] destroyed
+ ptr [0 .. nblock-1] holds sorted order
+ if (*budget < 0), sorting was abandoned
+*/
+
+#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8])
+#define SETMASK (1 << 21)
+#define CLEARMASK (~(SETMASK))
+
+static
+void mainSort ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ UInt32* ftab,
+ Int32 nblock,
+ Int32 verb,
+ Int32* budget )
+{
+ Int32 i, j, k, ss, sb;
+ Int32 runningOrder[256];
+ Bool bigDone[256];
+ Int32 copyStart[256];
+ Int32 copyEnd [256];
+ UChar c1;
+ Int32 numQSorted;
+ UInt16 s;
+ if (verb >= 4) VPrintf0 ( " main sort initialise ...\n" );
+
+ /*-- set up the 2-byte frequency table --*/
+ for (i = 65536; i >= 0; i--) ftab[i] = 0;
+
+ j = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ quadrant[i-1] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-1]) << 8);
+ ftab[j]++;
+ quadrant[i-2] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-2]) << 8);
+ ftab[j]++;
+ quadrant[i-3] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i-3]) << 8);
+ ftab[j]++;
+ }
+ for (; i >= 0; i--) {
+ quadrant[i] = 0;
+ j = (j >> 8) | ( ((UInt16)block[i]) << 8);
+ ftab[j]++;
+ }
+
+ /*-- (emphasises close relationship of block & quadrant) --*/
+ for (i = 0; i < BZ_N_OVERSHOOT; i++) {
+ block [nblock+i] = block[i];
+ quadrant[nblock+i] = 0;
+ }
+
+ if (verb >= 4) VPrintf0 ( " bucket sorting ...\n" );
+
+ /*-- Complete the initial radix sort --*/
+ for (i = 1; i <= 65536; i++) ftab[i] += ftab[i-1];
+
+ s = block[0] << 8;
+ i = nblock-1;
+ for (; i >= 3; i -= 4) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ s = (s >> 8) | (block[i-1] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-1;
+ s = (s >> 8) | (block[i-2] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-2;
+ s = (s >> 8) | (block[i-3] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i-3;
+ }
+ for (; i >= 0; i--) {
+ s = (s >> 8) | (block[i] << 8);
+ j = ftab[s] -1;
+ ftab[s] = j;
+ ptr[j] = i;
+ }
+
+ /*--
+ Now ftab contains the first loc of every small bucket.
+ Calculate the running order, from smallest to largest
+ big bucket.
+ --*/
+ for (i = 0; i <= 255; i++) {
+ bigDone [i] = False;
+ runningOrder[i] = i;
+ }
+
+ {
+ Int32 vv;
+ Int32 h = 1;
+ do h = 3 * h + 1; while (h <= 256);
+ do {
+ h = h / 3;
+ for (i = h; i <= 255; i++) {
+ vv = runningOrder[i];
+ j = i;
+ while ( BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv) ) {
+ runningOrder[j] = runningOrder[j-h];
+ j = j - h;
+ if (j <= (h - 1)) goto zero;
+ }
+ zero:
+ runningOrder[j] = vv;
+ }
+ } while (h != 1);
+ }
+
+ /*--
+ The main sorting loop.
+ --*/
+
+ numQSorted = 0;
+
+ for (i = 0; i <= 255; i++) {
+
+ /*--
+ Process big buckets, starting with the least full.
+ Basically this is a 3-step process in which we call
+ mainQSort3 to sort the small buckets [ss, j], but
+ also make a big effort to avoid the calls if we can.
+ --*/
+ ss = runningOrder[i];
+
+ /*--
+ Step 1:
+ Complete the big bucket [ss] by quicksorting
+ any unsorted small buckets [ss, j], for j != ss.
+ Hopefully previous pointer-scanning phases have already
+ completed many of the small buckets [ss, j], so
+ we don't have to sort them at all.
+ --*/
+ for (j = 0; j <= 255; j++) {
+ if (j != ss) {
+ sb = (ss << 8) + j;
+ if ( ! (ftab[sb] & SETMASK) ) {
+ Int32 lo = ftab[sb] & CLEARMASK;
+ Int32 hi = (ftab[sb+1] & CLEARMASK) - 1;
+ if (hi > lo) {
+ if (verb >= 4)
+ VPrintf4 ( " qsort [0x%x, 0x%x] "
+ "done %d this %d\n",
+ ss, j, numQSorted, hi - lo + 1 );
+ mainQSort3 (
+ ptr, block, quadrant, nblock,
+ lo, hi, BZ_N_RADIX, budget
+ );
+ numQSorted += (hi - lo + 1);
+ if (*budget < 0) return;
+ }
+ }
+ ftab[sb] |= SETMASK;
+ }
+ }
+
+ AssertH ( !bigDone[ss], 1006 );
+
+ /*--
+ Step 2:
+ Now scan this big bucket [ss] so as to synthesise the
+ sorted order for small buckets [t, ss] for all t,
+ including, magically, the bucket [ss,ss] too.
+ This will avoid doing Real Work in subsequent Step 1's.
+ --*/
+ {
+ for (j = 0; j <= 255; j++) {
+ copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK;
+ copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1;
+ }
+ for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyStart[c1]++ ] = k;
+ }
+ for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) {
+ k = ptr[j]-1; if (k < 0) k += nblock;
+ c1 = block[k];
+ if (!bigDone[c1])
+ ptr[ copyEnd[c1]-- ] = k;
+ }
+ }
+
+ AssertH ( (copyStart[ss]-1 == copyEnd[ss])
+ ||
+ /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1.
+ Necessity for this case is demonstrated by compressing
+ a sequence of approximately 48.5 million of character
+ 251; 1.0.0/1.0.1 will then die here. */
+ (copyStart[ss] == 0 && copyEnd[ss] == nblock-1),
+ 1007 )
+
+ for (j = 0; j <= 255; j++) ftab[(j << 8) + ss] |= SETMASK;
+
+ /*--
+ Step 3:
+ The [ss] big bucket is now done. Record this fact,
+ and update the quadrant descriptors. Remember to
+ update quadrants in the overshoot area too, if
+ necessary. The "if (i < 255)" test merely skips
+ this updating for the last bucket processed, since
+ updating for the last bucket is pointless.
+
+ The quadrant array provides a way to incrementally
+ cache sort orderings, as they appear, so as to
+ make subsequent comparisons in fullGtU() complete
+ faster. For repetitive blocks this makes a big
+ difference (but not big enough to be able to avoid
+ the fallback sorting mechanism, exponential radix sort).
+
+ The precise meaning is: at all times:
+
+ for 0 <= i < nblock and 0 <= j <= nblock
+
+ if block[i] != block[j],
+
+ then the relative values of quadrant[i] and
+ quadrant[j] are meaningless.
+
+ else {
+ if quadrant[i] < quadrant[j]
+ then the string starting at i lexicographically
+ precedes the string starting at j
+
+ else if quadrant[i] > quadrant[j]
+ then the string starting at j lexicographically
+ precedes the string starting at i
+
+ else
+ the relative ordering of the strings starting
+ at i and j has not yet been determined.
+ }
+ --*/
+ bigDone[ss] = True;
+
+ if (i < 255) {
+ Int32 bbStart = ftab[ss << 8] & CLEARMASK;
+ Int32 bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart;
+ Int32 shifts = 0;
+
+ while ((bbSize >> shifts) > 65534) shifts++;
+
+ for (j = bbSize-1; j >= 0; j--) {
+ Int32 a2update = ptr[bbStart + j];
+ UInt16 qVal = (UInt16)(j >> shifts);
+ quadrant[a2update] = qVal;
+ if (a2update < BZ_N_OVERSHOOT)
+ quadrant[a2update + nblock] = qVal;
+ }
+ AssertH ( ((bbSize-1) >> shifts) <= 65535, 1002 );
+ }
+
+ }
+
+ if (verb >= 4)
+ VPrintf3 ( " %d pointers, %d sorted, %d scanned\n",
+ nblock, numQSorted, nblock - numQSorted );
+}
+
+#undef BIGFREQ
+#undef SETMASK
+#undef CLEARMASK
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > 0
+ arr2 exists for [0 .. nblock-1 +N_OVERSHOOT]
+ ((UChar*)arr2) [0 .. nblock-1] holds block
+ arr1 exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)arr2) [0 .. nblock-1] holds block
+ All other areas of block destroyed
+ ftab [ 0 .. 65536 ] destroyed
+ arr1 [0 .. nblock-1] holds sorted order
+*/
+void BZ2_blockSort ( EState* s )
+{
+ UInt32* ptr = s->ptr;
+ UChar* block = s->block;
+ UInt32* ftab = s->ftab;
+ Int32 nblock = s->nblock;
+ Int32 verb = s->verbosity;
+ Int32 wfact = s->workFactor;
+ UInt16* quadrant;
+ Int32 budget;
+ Int32 budgetInit;
+ Int32 i;
+
+ if (nblock < 10000) {
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+ } else {
+ /* Calculate the location for quadrant, remembering to get
+ the alignment right. Assumes that &(block[0]) is at least
+ 2-byte aligned -- this should be ok since block is really
+ the first section of arr2.
+ */
+ i = nblock+BZ_N_OVERSHOOT;
+ if (i & 1) i++;
+ quadrant = (UInt16*)(&(block[i]));
+
+ /* (wfact-1) / 3 puts the default-factor-30
+ transition point at very roughly the same place as
+ with v0.1 and v0.9.0.
+ Not that it particularly matters any more, since the
+ resulting compressed stream is now the same regardless
+ of whether or not we use the main sort or fallback sort.
+ */
+ if (wfact < 1 ) wfact = 1;
+ if (wfact > 100) wfact = 100;
+ budgetInit = nblock * ((wfact-1) / 3);
+ budget = budgetInit;
+
+ mainSort ( ptr, block, quadrant, ftab, nblock, verb, &budget );
+ if (verb >= 3)
+ VPrintf3 ( " %d work, %d block, ratio %5.2f\n",
+ budgetInit - budget,
+ nblock,
+ (float)(budgetInit - budget) /
+ (float)(nblock==0 ? 1 : nblock) );
+ if (budget < 0) {
+ if (verb >= 2)
+ VPrintf0 ( " too repetitive; using fallback"
+ " sorting algorithm\n" );
+ fallbackSort ( s->arr1, s->arr2, ftab, nblock, verb );
+ }
+ }
+
+ s->origPtr = -1;
+ for (i = 0; i < s->nblock; i++)
+ if (ptr[i] == 0)
+ { s->origPtr = i; break; };
+
+ AssertH( s->origPtr != -1, 1003 );
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end blocksort.c ---*/
+/*-------------------------------------------------------------*/
--- /dev/null
+
+/*-------------------------------------------------------------*/
+/*--- Compression machinery (not incl block sorting) ---*/
+/*--- compress.c ---*/
+/*-------------------------------------------------------------*/
+
+/*--
+ This file is a part of bzip2 and/or libbzip2, a program and
+ library for lossless, block-sorting data compression.
+
+ Copyright (C) 1996-2002 Julian R Seward. All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ 1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ 2. The origin of this software must not be misrepresented; you must
+ not claim that you wrote the original software. If you use this
+ software in a product, an acknowledgment in the product
+ documentation would be appreciated but is not required.
+
+ 3. Altered source versions must be plainly marked as such, and must
+ not be misrepresented as being the original software.
+
+ 4. The name of the author may not be used to endorse or promote
+ products derived from this software without specific prior written
+ permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
+ OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+ GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ Julian Seward, Cambridge, UK.
+ jseward@acm.org
+ bzip2/libbzip2 version 1.0.6 of 6 September 2010
+ Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
+
+ This program is based on (at least) the work of:
+ Mike Burrows
+ David Wheeler
+ Peter Fenwick
+ Alistair Moffat
+ Radford Neal
+ Ian H. Witten
+ Robert Sedgewick
+ Jon L. Bentley
+
+ For more information on these sources, see the manual.
+--*/
+
+/* CHANGES
+ 0.9.0 -- original version.
+ 0.9.0a/b -- no changes in this file.
+ 0.9.0c -- changed setting of nGroups in sendMTFValues()
+ so as to do a bit better on small files
+*/
+
+#include "bzlib_private.h"
+
+
+/*---------------------------------------------------*/
+/*--- Bit stream I/O ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+void BZ2_bsInitWrite ( EState* s )
+{
+ s->bsLive = 0;
+ s->bsBuff = 0;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsFinishWrite ( EState* s )
+{
+ while (s->bsLive > 0) {
+ s->zbits[s->numZ] = (UChar)(s->bsBuff >> 24);
+ s->numZ++;
+ s->bsBuff <<= 8;
+ s->bsLive -= 8;
+ }
+}
+
+
+/*---------------------------------------------------*/
+#define bsNEEDW(nz) \
+{ \
+ while (s->bsLive >= 8) { \
+ s->zbits[s->numZ] \
+ = (UChar)(s->bsBuff >> 24); \
+ s->numZ++; \
+ s->bsBuff <<= 8; \
+ s->bsLive -= 8; \
+ } \
+}
+
+
+/*---------------------------------------------------*/
+static
+__inline__
+void bsW ( EState* s, Int32 n, UInt32 v )
+{
+ bsNEEDW ( n );
+ s->bsBuff |= (v << (32 - s->bsLive - n));
+ s->bsLive += n;
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUInt32 ( EState* s, UInt32 u )
+{
+ bsW ( s, 8, (u >> 24) & 0xffL );
+ bsW ( s, 8, (u >> 16) & 0xffL );
+ bsW ( s, 8, (u >> 8) & 0xffL );
+ bsW ( s, 8, u & 0xffL );
+}
+
+
+/*---------------------------------------------------*/
+static
+void bsPutUChar ( EState* s, UChar c )
+{
+ bsW( s, 8, (UInt32)c );
+}
+
+
+/*---------------------------------------------------*/
+/*--- The back end proper ---*/
+/*---------------------------------------------------*/
+
+/*---------------------------------------------------*/
+static
+void makeMaps_e ( EState* s )
+{
+ Int32 i;
+ s->nInUse = 0;
+ for (i = 0; i < 256; i++)
+ if (s->inUse[i]) {
+ s->unseqToSeq[i] = s->nInUse;
+ s->nInUse++;
+ }
+}
+
+
+/*---------------------------------------------------*/
+static
+void generateMTFValues ( EState* s )
+{
+ UChar yy[256];
+ Int32 i, j;
+ Int32 zPend;
+ Int32 wr;
+ Int32 EOB;
+
+ /*
+ After sorting (eg, here),
+ s->arr1 [ 0 .. s->nblock-1 ] holds sorted order,
+ and
+ ((UChar*)s->arr2) [ 0 .. s->nblock-1 ]
+ holds the original block data.
+
+ The first thing to do is generate the MTF values,
+ and put them in
+ ((UInt16*)s->arr1) [ 0 .. s->nblock-1 ].
+ Because there are strictly fewer or equal MTF values
+ than block values, ptr values in this area are overwritten
+ with MTF values only when they are no longer needed.
+
+ The final compressed bitstream is generated into the
+ area starting at
+ (UChar*) (&((UChar*)s->arr2)[s->nblock])
+
+ These storage aliases are set up in bzCompressInit(),
+ except for the last one, which is arranged in
+ compressBlock().
+ */
+ UInt32* ptr = s->ptr;
+ UChar* block = s->block;
+ UInt16* mtfv = s->mtfv;
+
+ makeMaps_e ( s );
+ EOB = s->nInUse+1;
+
+ for (i = 0; i <= EOB; i++) s->mtfFreq[i] = 0;
+
+ wr = 0;
+ zPend = 0;
+ for (i = 0; i < s->nInUse; i++) yy[i] = (UChar) i;
+
+ for (i = 0; i < s->nblock; i++) {
+ UChar ll_i;
+ AssertD ( wr <= i, "generateMTFValues(1)" );
+ j = ptr[i]-1; if (j < 0) j += s->nblock;
+ ll_i = s->unseqToSeq[block[j]];
+ AssertD ( ll_i < s->nInUse, "generateMTFValues(2a)" );
+
+ if (yy[0] == ll_i) {
+ zPend++;
+ } else {
+
+ if (zPend > 0) {
+ zPend--;
+ while (True) {
+ if (zPend & 1) {
+ mtfv[wr] = BZ_RUNB; wr++;
+ s->mtfFreq[BZ_RUNB]++;
+ } else {
+ mtfv[wr] = BZ_RUNA; wr++;
+ s->mtfFreq[BZ_RUNA]++;
+ }
+ if (zPend < 2) break;
+ zPend = (zPend - 2) / 2;
+ };
+ zPend = 0;
+ }
+ {
+ register UChar rtmp;
+ register UChar* ryy_j;
+ register UChar rll_i;
+ rtmp = yy[1];
+ yy[1] = yy[0];
+ ryy_j = &(yy[1]);
+ rll_i = ll_i;
+ while ( rll_i != rtmp ) {
+ register UChar rtmp2;
+ ryy_j++;
+ rtmp2 = rtmp;
+ rtmp = *ryy_j;
+ *ryy_j = rtmp2;
+ };
+ yy[0] = rtmp;
+ j = ryy_j - &(yy[0]);
+ mtfv[wr] = j+1; wr++; s->mtfFreq[j+1]++;
+ }
+
+ }
+ }
+
+ if (zPend > 0) {
+ zPend--;
+ while (True) {
+ if (zPend & 1) {
+ mtfv[wr] = BZ_RUNB; wr++;
+ s->mtfFreq[BZ_RUNB]++;
+ } else {
+ mtfv[wr] = BZ_RUNA; wr++;
+ s->mtfFreq[BZ_RUNA]++;
+ }
+ if (zPend < 2) break;
+ zPend = (zPend - 2) / 2;
+ };
+ zPend = 0;
+ }
+
+ mtfv[wr] = EOB; wr++; s->mtfFreq[EOB]++;
+
+ s->nMTF = wr;
+}
+
+
+/*---------------------------------------------------*/
+#define BZ_LESSER_ICOST 0
+#define BZ_GREATER_ICOST 15
+
+static
+void sendMTFValues ( EState* s )
+{
+ Int32 v, t, i, j, gs, ge, totc, bt, bc, iter;
+ Int32 nSelectors, alphaSize, minLen, maxLen, selCtr;
+ Int32 nGroups, nBytes;
+
+ /*--
+ UChar len [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ is a global since the decoder also needs it.
+
+ Int32 code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ Int32 rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE];
+ are also globals only used in this proc.
+ Made global to keep stack frame size small.
+ --*/
+
+
+ UInt16 cost[BZ_N_GROUPS];
+ Int32 fave[BZ_N_GROUPS];
+
+ UInt16* mtfv = s->mtfv;
+
+ if (s->verbosity >= 3)
+ VPrintf3( " %d in block, %d after MTF & 1-2 coding, "
+ "%d+2 syms in use\n",
+ s->nblock, s->nMTF, s->nInUse );
+
+ alphaSize = s->nInUse+2;
+ for (t = 0; t < BZ_N_GROUPS; t++)
+ for (v = 0; v < alphaSize; v++)
+ s->len[t][v] = BZ_GREATER_ICOST;
+
+ /*--- Decide how many coding tables to use ---*/
+ AssertH ( s->nMTF > 0, 3001 );
+ if (s->nMTF < 200) nGroups = 2; else
+ if (s->nMTF < 600) nGroups = 3; else
+ if (s->nMTF < 1200) nGroups = 4; else
+ if (s->nMTF < 2400) nGroups = 5; else
+ nGroups = 6;
+
+ /*--- Generate an initial set of coding tables ---*/
+ {
+ Int32 nPart, remF, tFreq, aFreq;
+
+ nPart = nGroups;
+ remF = s->nMTF;
+ gs = 0;
+ while (nPart > 0) {
+ tFreq = remF / nPart;
+ ge = gs-1;
+ aFreq = 0;
+ while (aFreq < tFreq && ge < alphaSize-1) {
+ ge++;
+ aFreq += s->mtfFreq[ge];
+ }
+
+ if (ge > gs
+ && nPart != nGroups && nPart != 1
+ && ((nGroups-nPart) % 2 == 1)) {
+ aFreq -= s->mtfFreq[ge];
+ ge--;
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf5( " initial group %d, [%d .. %d], "
+ "has %d syms (%4.1f%%)\n",
+ nPart, gs, ge, aFreq,
+ (100.0 * (float)aFreq) / (float)(s->nMTF) );
+
+ for (v = 0; v < alphaSize; v++)
+ if (v >= gs && v <= ge)
+ s->len[nPart-1][v] = BZ_LESSER_ICOST; else
+ s->len[nPart-1][v] = BZ_GREATER_ICOST;
+
+ nPart--;
+ gs = ge+1;
+ remF -= aFreq;
+ }
+ }
+
+ /*---
+ Iterate up to BZ_N_ITERS times to improve the tables.
+ ---*/
+ for (iter = 0; iter < BZ_N_ITERS; iter++) {
+
+ for (t = 0; t < nGroups; t++) fave[t] = 0;
+
+ for (t = 0; t < nGroups; t++)
+ for (v = 0; v < alphaSize; v++)
+ s->rfreq[t][v] = 0;
+
+ /*---
+ Set up an auxiliary length table which is used to fast-track
+ the common case (nGroups == 6).
+ ---*/
+ if (nGroups == 6) {
+ for (v = 0; v < alphaSize; v++) {
+ s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v];
+ s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v];
+ s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v];
+ }
+ }
+
+ nSelectors = 0;
+ totc = 0;
+ gs = 0;
+ while (True) {
+
+ /*--- Set group start & end marks. --*/
+ if (gs >= s->nMTF) break;
+ ge = gs + BZ_G_SIZE - 1;
+ if (ge >= s->nMTF) ge = s->nMTF-1;
+
+ /*--
+ Calculate the cost of this group as coded
+ by each of the coding tables.
+ --*/
+ for (t = 0; t < nGroups; t++) cost[t] = 0;
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ register UInt32 cost01, cost23, cost45;
+ register UInt16 icv;
+ cost01 = cost23 = cost45 = 0;
+
+# define BZ_ITER(nn) \
+ icv = mtfv[gs+(nn)]; \
+ cost01 += s->len_pack[icv][0]; \
+ cost23 += s->len_pack[icv][1]; \
+ cost45 += s->len_pack[icv][2]; \
+
+ BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4);
+ BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9);
+ BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14);
+ BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19);
+ BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24);
+ BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29);
+ BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34);
+ BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39);
+ BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44);
+ BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49);
+
+# undef BZ_ITER
+
+ cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16;
+ cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16;
+ cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16;
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ UInt16 icv = mtfv[i];
+ for (t = 0; t < nGroups; t++) cost[t] += s->len[t][icv];
+ }
+ }
+
+ /*--
+ Find the coding table which is best for this group,
+ and record its identity in the selector table.
+ --*/
+ bc = 999999999; bt = -1;
+ for (t = 0; t < nGroups; t++)
+ if (cost[t] < bc) { bc = cost[t]; bt = t; };
+ totc += bc;
+ fave[bt]++;
+ s->selector[nSelectors] = bt;
+ nSelectors++;
+
+ /*--
+ Increment the symbol frequencies for the selected table.
+ --*/
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+
+# define BZ_ITUR(nn) s->rfreq[bt][ mtfv[gs+(nn)] ]++
+
+ BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4);
+ BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9);
+ BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14);
+ BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19);
+ BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24);
+ BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29);
+ BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34);
+ BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39);
+ BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44);
+ BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49);
+
+# undef BZ_ITUR
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++)
+ s->rfreq[bt][ mtfv[i] ]++;
+ }
+
+ gs = ge+1;
+ }
+ if (s->verbosity >= 3) {
+ VPrintf2 ( " pass %d: size is %d, grp uses are ",
+ iter+1, totc/8 );
+ for (t = 0; t < nGroups; t++)
+ VPrintf1 ( "%d ", fave[t] );
+ VPrintf0 ( "\n" );
+ }
+
+ /*--
+ Recompute the tables based on the accumulated frequencies.
+ --*/
+ /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See
+ comment in huffman.c for details. */
+ for (t = 0; t < nGroups; t++)
+ BZ2_hbMakeCodeLengths ( &(s->len[t][0]), &(s->rfreq[t][0]),
+ alphaSize, 17 /*20*/ );
+ }
+
+
+ AssertH( nGroups < 8, 3002 );
+ AssertH( nSelectors < 32768 &&
+ nSelectors <= (2 + (900000 / BZ_G_SIZE)),
+ 3003 );
+
+
+ /*--- Compute MTF values for the selectors. ---*/
+ {
+ UChar pos[BZ_N_GROUPS], ll_i, tmp2, tmp;
+ for (i = 0; i < nGroups; i++) pos[i] = i;
+ for (i = 0; i < nSelectors; i++) {
+ ll_i = s->selector[i];
+ j = 0;
+ tmp = pos[j];
+ while ( ll_i != tmp ) {
+ j++;
+ tmp2 = tmp;
+ tmp = pos[j];
+ pos[j] = tmp2;
+ };
+ pos[0] = tmp;
+ s->selectorMtf[i] = j;
+ }
+ };
+
+ /*--- Assign actual codes for the tables. --*/
+ for (t = 0; t < nGroups; t++) {
+ minLen = 32;
+ maxLen = 0;
+ for (i = 0; i < alphaSize; i++) {
+ if (s->len[t][i] > maxLen) maxLen = s->len[t][i];
+ if (s->len[t][i] < minLen) minLen = s->len[t][i];
+ }
+ AssertH ( !(maxLen > 17 /*20*/ ), 3004 );
+ AssertH ( !(minLen < 1), 3005 );
+ BZ2_hbAssignCodes ( &(s->code[t][0]), &(s->len[t][0]),
+ minLen, maxLen, alphaSize );
+ }
+
+ /*--- Transmit the mapping table. ---*/
+ {
+ Bool inUse16[16];
+ for (i = 0; i < 16; i++) {
+ inUse16[i] = False;
+ for (j = 0; j < 16; j++)
+ if (s->inUse[i * 16 + j]) inUse16[i] = True;
+ }
+
+ nBytes = s->numZ;
+ for (i = 0; i < 16; i++)
+ if (inUse16[i]) bsW(s,1,1); else bsW(s,1,0);
+
+ for (i = 0; i < 16; i++)
+ if (inUse16[i])
+ for (j = 0; j < 16; j++) {
+ if (s->inUse[i * 16 + j]) bsW(s,1,1); else bsW(s,1,0);
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf1( " bytes: mapping %d, ", s->numZ-nBytes );
+ }
+
+ /*--- Now the selectors. ---*/
+ nBytes = s->numZ;
+ bsW ( s, 3, nGroups );
+ bsW ( s, 15, nSelectors );
+ for (i = 0; i < nSelectors; i++) {
+ for (j = 0; j < s->selectorMtf[i]; j++) bsW(s,1,1);
+ bsW(s,1,0);
+ }
+ if (s->verbosity >= 3)
+ VPrintf1( "selectors %d, ", s->numZ-nBytes );
+
+ /*--- Now the coding tables. ---*/
+ nBytes = s->numZ;
+
+ for (t = 0; t < nGroups; t++) {
+ Int32 curr = s->len[t][0];
+ bsW ( s, 5, curr );
+ for (i = 0; i < alphaSize; i++) {
+ while (curr < s->len[t][i]) { bsW(s,2,2); curr++; /* 10 */ };
+ while (curr > s->len[t][i]) { bsW(s,2,3); curr--; /* 11 */ };
+ bsW ( s, 1, 0 );
+ }
+ }
+
+ if (s->verbosity >= 3)
+ VPrintf1 ( "code lengths %d, ", s->numZ-nBytes );
+
+ /*--- And finally, the block data proper ---*/
+ nBytes = s->numZ;
+ selCtr = 0;
+ gs = 0;
+ while (True) {
+ if (gs >= s->nMTF) break;
+ ge = gs + BZ_G_SIZE - 1;
+ if (ge >= s->nMTF) ge = s->nMTF-1;
+ AssertH ( s->selector[selCtr] < nGroups, 3006 );
+
+ if (nGroups == 6 && 50 == ge-gs+1) {
+ /*--- fast track the common case ---*/
+ UInt16 mtfv_i;
+ UChar* s_len_sel_selCtr
+ = &(s->len[s->selector[selCtr]][0]);
+ Int32* s_code_sel_selCtr
+ = &(s->code[s->selector[selCtr]][0]);
+
+# define BZ_ITAH(nn) \
+ mtfv_i = mtfv[gs+(nn)]; \
+ bsW ( s, \
+ s_len_sel_selCtr[mtfv_i], \
+ s_code_sel_selCtr[mtfv_i] )
+
+ BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4);
+ BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9);
+ BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14);
+ BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19);
+ BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24);
+ BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29);
+ BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34);
+ BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39);
+ BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44);
+ BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49);
+
+# undef BZ_ITAH
+
+ } else {
+ /*--- slow version which correctly handles all situations ---*/
+ for (i = gs; i <= ge; i++) {
+ bsW ( s,
+ s->len [s->selector[selCtr]] [mtfv[i]],
+ s->code [s->selector[selCtr]] [mtfv[i]] );
+ }
+ }
+
+
+ gs = ge+1;
+ selCtr++;
+ }
+ AssertH( selCtr == nSelectors, 3007 );
+
+ if (s->verbosity >= 3)
+ VPrintf1( "codes %d\n", s->numZ-nBytes );
+ else /* squash compiler 'used but not set' warning */
+ nBytes = nBytes;
+}
+
+
+/*---------------------------------------------------*/
+void BZ2_compressBlock ( EState* s, Bool is_last_block )
+{
+ if (s->nblock > 0) {
+
+ BZ_FINALISE_CRC ( s->blockCRC );
+ s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31);
+ s->combinedCRC ^= s->blockCRC;
+ if (s->blockNo > 1) s->numZ = 0;
+
+ if (s->verbosity >= 2)
+ VPrintf4( " block %d: crc = 0x%08x, "
+ "combined CRC = 0x%08x, size = %d\n",
+ s->blockNo, s->blockCRC, s->combinedCRC, s->nblock );
+
+ BZ2_blockSort ( s );
+ }
+
+ s->zbits = (UChar*) (&((UChar*)s->arr2)[s->nblock]);
+
+ /*-- If this is the first block, create the stream header. --*/
+ if (s->blockNo == 1) {
+ BZ2_bsInitWrite ( s );
+ bsPutUChar ( s, BZ_HDR_B );
+ bsPutUChar ( s, BZ_HDR_Z );
+ bsPutUChar ( s, BZ_HDR_h );
+ bsPutUChar ( s, (UChar)(BZ_HDR_0 + s->blockSize100k) );
+ }
+
+ if (s->nblock > 0) {
+
+ bsPutUChar ( s, 0x31 ); bsPutUChar ( s, 0x41 );
+ bsPutUChar ( s, 0x59 ); bsPutUChar ( s, 0x26 );
+ bsPutUChar ( s, 0x53 ); bsPutUChar ( s, 0x59 );
+
+ /*-- Now the block's CRC, so it is in a known place. --*/
+ bsPutUInt32 ( s, s->blockCRC );
+
+ /*--
+ Now a single bit indicating (non-)randomisation.
+ As of version 0.9.5, we use a better sorting algorithm
+ which makes randomisation unnecessary. So always set
+ the randomised bit to 'no'. Of course, the decoder
+ still needs to be able to handle randomised blocks
+ so as to maintain backwards compatibility with
+ older versions of bzip2.
+ --*/
+ bsW(s,1,0);
+
+ bsW ( s, 24, s->origPtr );
+ generateMTFValues ( s );
+ sendMTFValues ( s );
+ }
+
+
+ /*-- If this is the last block, add the stream trailer. --*/
+ if (is_last_block) {
+
+ bsPutUChar ( s, 0x17 ); bsPutUChar ( s, 0x72 );
+ bsPutUChar ( s, 0x45 ); bsPutUChar ( s, 0x38 );
+ bsPutUChar ( s, 0x50 ); bsPutUChar ( s, 0x90 );
+ bsPutUInt32 ( s, s->combinedCRC );
+ if (s->verbosity >= 2)
+ VPrintf1( " final combined CRC = 0x%08x\n ", s->combinedCRC );
+ bsFinishWrite ( s );
+ }
+}
+
+
+/*-------------------------------------------------------------*/
+/*--- end compress.c ---*/
+/*-------------------------------------------------------------*/