Coding.java revision 11099:678faa7d1a6a
1/*
2 * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.java.util.jar.pack;
27
28import java.io.IOException;
29import java.io.InputStream;
30import java.io.OutputStream;
31import java.util.HashMap;
32import java.util.Map;
33import static com.sun.java.util.jar.pack.Constants.*;
34/**
35 * Define the conversions between sequences of small integers and raw bytes.
36 * This is a schema of encodings which incorporates varying lengths,
37 * varying degrees of length variability, and varying amounts of signed-ness.
38 * @author John Rose
39 */
40class Coding implements Comparable<Coding>, CodingMethod, Histogram.BitMetric {
41    /*
42      Coding schema for single integers, parameterized by (B,H,S):
43
44      Let B in [1,5], H in [1,256], S in [0,3].
45      (S limit is arbitrary.  B follows the 32-bit limit.  H is byte size.)
46
47      A given (B,H,S) code varies in length from 1 to B bytes.
48
49      The 256 values a byte may take on are divided into L=(256-H) and H
50      values, with all the H values larger than the L values.
51      (That is, the L values are [0,L) and the H are [L,256).)
52
53      The last byte is always either the B-th byte, a byte with "L value"
54      (<L), or both.  There is no other byte that satisfies these conditions.
55      All bytes before the last always have "H values" (>=L).
56
57      Therefore, if L==0, the code always has the full length of B bytes.
58      The coding then becomes a classic B-byte little-endian unsigned integer.
59      (Also, if L==128, the high bit of each byte acts signals the presence
60      of a following byte, up to the maximum length.)
61
62      In the unsigned case (S==0), the coding is compact and monotonic
63      in the ordering of byte sequences defined by appending zero bytes
64      to pad them to a common length B, reversing them, and ordering them
65      lexicographically.  (This agrees with "little-endian" byte order.)
66
67      Therefore, the unsigned value of a byte sequence may be defined as:
68      <pre>
69        U(b0)           == b0
70                           in [0..L)
71                           or [0..256) if B==1 (**)
72
73        U(b0,b1)        == b0 + b1*H
74                           in [L..L*(1+H))
75                           or [L..L*(1+H) + H^2) if B==2
76
77        U(b0,b1,b2)     == b0 + b1*H + b2*H^2
78                           in [L*(1+H)..L*(1+H+H^2))
79                           or [L*(1+H)..L*(1+H+H^2) + H^3) if B==3
80
81        U(b[i]: i<n)    == Sum[i<n]( b[i] * H^i )
82                           up to  L*Sum[i<n]( H^i )
83                           or to  L*Sum[i<n]( H^i ) + H^n if n==B
84      </pre>
85
86      (**) If B==1, the values H,L play no role in the coding.
87      As a convention, we require that any (1,H,S) code must always
88      encode values less than H.  Thus, a simple unsigned byte is coded
89      specifically by the code (1,256,0).
90
91      (Properly speaking, the unsigned case should be parameterized as
92      S==Infinity.  If the schema were regular, the case S==0 would really
93      denote a numbering in which all coded values are negative.)
94
95      If S>0, the unsigned value of a byte sequence is regarded as a binary
96      integer.  If any of the S low-order bits are zero, the corresponding
97      signed value will be non-negative.  If all of the S low-order bits
98      (S>0) are one, the corresponding signed value will be negative.
99
100      The non-negative signed values are compact and monotonically increasing
101      (from 0) in the ordering of the corresponding unsigned values.
102
103      The negative signed values are compact and monotonically decreasing
104      (from -1) in the ordering of the corresponding unsigned values.
105
106      In essence, the low-order S bits function as a collective sign bit
107      for negative signed numbers, and as a low-order base-(2^S-1) digit
108      for non-negative signed numbers.
109
110      Therefore, the signed value corresponding to an unsigned value is:
111      <pre>
112        Sgn(x)  == x                               if S==0
113        Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S),  if S>0, (x % 2^S) < 2^S-1
114        Sgn(x)  == -(x / 2^S)-1,                   if S>0, (x % 2^S) == 2^S-1
115      </pre>
116
117      Finally, the value of a byte sequence, given the coding parameters
118      (B,H,S), is defined as:
119      <pre>
120        V(b[i]: i<n)  == Sgn(U(b[i]: i<n))
121      </pre>
122
123      The extremal positive and negative signed value for a given range
124      of unsigned values may be found by sign-encoding the largest unsigned
125      value which is not 2^S-1 mod 2^S, and that which is, respectively.
126
127      Because B,H,S are variable, this is not a single coding but a schema
128      of codings.  For optimal compression, it is necessary to adaptively
129      select specific codings to the data being compressed.
130
131      For example, if a sequence of values happens never to be negative,
132      S==0 is the best choice.  If the values are equally balanced between
133      negative and positive, S==1.  If negative values are rare, then S>1
134      is more appropriate.
135
136      A (B,H,S) encoding is called a "subrange" if it does not encode
137      the largest 32-bit value, and if the number R of values it does
138      encode can be expressed as a positive 32-bit value.  (Note that
139      B=1 implies R<=256, B=2 implies R<=65536, etc.)
140
141      A delta version of a given (B,H,S) coding encodes an array of integers
142      by writing their successive differences in the (B,H,S) coding.
143      The original integers themselves may be recovered by making a
144      running accumulation of sum of the differences as they are read.
145
146      As a special case, if a (B,H,S) encoding is a subrange, its delta
147      version will only encode arrays of numbers in the coding's unsigned
148      range, [0..R-1].  The coding of deltas is still in the normal signed
149      range, if S!=0.  During delta encoding, all subtraction results are
150      reduced to the signed range, by adding multiples of R.  Likewise,
151.     during encoding, all addition results are reduced to the unsigned range.
152      This special case for subranges allows the benefits of wraparound
153      when encoding correlated sequences of very small positive numbers.
154     */
155
156    // Code-specific limits:
157    private static int saturate32(long x) {
158        if (x > Integer.MAX_VALUE)   return Integer.MAX_VALUE;
159        if (x < Integer.MIN_VALUE)   return Integer.MIN_VALUE;
160        return (int)x;
161    }
162    private static long codeRangeLong(int B, int H) {
163        return codeRangeLong(B, H, B);
164    }
165    private static long codeRangeLong(int B, int H, int nMax) {
166        // Code range for a all (B,H) codes of length <=nMax (<=B).
167        // n < B:   L*Sum[i<n]( H^i )
168        // n == B:  L*Sum[i<B]( H^i ) + H^B
169        assert(nMax >= 0 && nMax <= B);
170        assert(B >= 1 && B <= 5);
171        assert(H >= 1 && H <= 256);
172        if (nMax == 0)  return 0;  // no codes of zero length
173        if (B == 1)     return H;  // special case; see (**) above
174        int L = 256-H;
175        long sum = 0;
176        long H_i = 1;
177        for (int n = 1; n <= nMax; n++) {
178            sum += H_i;
179            H_i *= H;
180        }
181        sum *= L;
182        if (nMax == B)
183            sum += H_i;
184        return sum;
185    }
186    /** Largest int representable by (B,H,S) in up to nMax bytes. */
187    public static int codeMax(int B, int H, int S, int nMax) {
188        //assert(S >= 0 && S <= S_MAX);
189        long range = codeRangeLong(B, H, nMax);
190        if (range == 0)
191            return -1;  // degenerate max value for empty set of codes
192        if (S == 0 || range >= (long)1<<32)
193            return saturate32(range-1);
194        long maxPos = range-1;
195        while (isNegativeCode(maxPos, S)) {
196            --maxPos;
197        }
198        if (maxPos < 0)  return -1;  // No positive codings at all.
199        int smax = decodeSign32(maxPos, S);
200        // check for 32-bit wraparound:
201        if (smax < 0)
202            return Integer.MAX_VALUE;
203        return smax;
204    }
205    /** Smallest int representable by (B,H,S) in up to nMax bytes.
206        Returns Integer.MIN_VALUE if 32-bit wraparound covers
207        the entire negative range.
208     */
209    public static int codeMin(int B, int H, int S, int nMax) {
210        //assert(S >= 0 && S <= S_MAX);
211        long range = codeRangeLong(B, H, nMax);
212        if (range >= (long)1<<32 && nMax == B) {
213            // Can code negative values via 32-bit wraparound.
214            return Integer.MIN_VALUE;
215        }
216        if (S == 0) {
217            return 0;
218        }
219        long maxNeg = range-1;
220        while (!isNegativeCode(maxNeg, S))
221            --maxNeg;
222
223        if (maxNeg < 0)  return 0;  // No negative codings at all.
224        return decodeSign32(maxNeg, S);
225    }
226
227    // Some of the arithmetic below is on unsigned 32-bit integers.
228    // These must be represented in Java as longs in the range [0..2^32-1].
229    // The conversion to a signed int is just the Java cast (int), but
230    // the conversion to an unsigned int is the following little method:
231    private static long toUnsigned32(int sx) {
232        return ((long)sx << 32) >>> 32;
233    }
234
235    // Sign encoding:
236    private static boolean isNegativeCode(long ux, int S) {
237        assert(S > 0);
238        assert(ux >= -1);  // can be out of 32-bit range; who cares
239        int Smask = (1<<S)-1;
240        return (((int)ux+1) & Smask) == 0;
241    }
242    private static boolean hasNegativeCode(int sx, int S) {
243        assert(S > 0);
244        // If S>=2 very low negatives are coded by 32-bit-wrapped positives.
245        // The lowest negative representable by a negative coding is
246        // ~(umax32 >> S), and the next lower number is coded by wrapping
247        // the highest positive:
248        //    CodePos(umax32-1)  ->  (umax32-1)-((umax32-1)>>S)
249        // which simplifies to ~(umax32 >> S)-1.
250        return (0 > sx) && (sx >= ~(-1>>>S));
251    }
252    private static int decodeSign32(long ux, int S) {
253        assert(ux == toUnsigned32((int)ux))  // must be unsigned 32-bit number
254            : (Long.toHexString(ux));
255        if (S == 0) {
256            return (int) ux;  // cast to signed int
257        }
258        int sx;
259        if (isNegativeCode(ux, S)) {
260            // Sgn(x)  == -(x / 2^S)-1
261            sx = ~((int)ux >>> S);
262        } else {
263            // Sgn(x)  == (x / 2^S)*(2^S-1) + (x % 2^S)
264            sx = (int)ux - ((int)ux >>> S);
265        }
266        // Assert special case of S==1:
267        assert(!(S == 1) || sx == (((int)ux >>> 1) ^ -((int)ux & 1)));
268        return sx;
269    }
270    private static long encodeSign32(int sx, int S) {
271        if (S == 0) {
272            return toUnsigned32(sx);  // unsigned 32-bit int
273        }
274        int Smask = (1<<S)-1;
275        long ux;
276        if (!hasNegativeCode(sx, S)) {
277            // InvSgn(sx) = (sx / (2^S-1))*2^S + (sx % (2^S-1))
278            ux = sx + (toUnsigned32(sx) / Smask);
279        } else {
280            // InvSgn(sx) = (-sx-1)*2^S + (2^S-1)
281            ux = (-sx << S) - 1;
282        }
283        ux = toUnsigned32((int)ux);
284        assert(sx == decodeSign32(ux, S))
285            : (Long.toHexString(ux)+" -> "+
286               Integer.toHexString(sx)+" != "+
287               Integer.toHexString(decodeSign32(ux, S)));
288        return ux;
289    }
290
291    // Top-level coding of single integers:
292    public static void writeInt(byte[] out, int[] outpos, int sx, int B, int H, int S) {
293        long ux = encodeSign32(sx, S);
294        assert(ux == toUnsigned32((int)ux));
295        assert(ux < codeRangeLong(B, H))
296            : Long.toHexString(ux);
297        int L = 256-H;
298        long sum = ux;
299        int pos = outpos[0];
300        for (int i = 0; i < B-1; i++) {
301            if (sum < L)
302                break;
303            sum -= L;
304            int b_i = (int)( L + (sum % H) );
305            sum /= H;
306            out[pos++] = (byte)b_i;
307        }
308        out[pos++] = (byte)sum;
309        // Report number of bytes written by updating outpos[0]:
310        outpos[0] = pos;
311        // Check right away for mis-coding.
312        //assert(sx == readInt(out, new int[1], B, H, S));
313    }
314    public static int readInt(byte[] in, int[] inpos, int B, int H, int S) {
315        // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
316        int L = 256-H;
317        long sum = 0;
318        long H_i = 1;
319        int pos = inpos[0];
320        for (int i = 0; i < B; i++) {
321            int b_i = in[pos++] & 0xFF;
322            sum += b_i*H_i;
323            H_i *= H;
324            if (b_i < L)  break;
325        }
326        //assert(sum >= 0 && sum < codeRangeLong(B, H));
327        // Report number of bytes read by updating inpos[0]:
328        inpos[0] = pos;
329        return decodeSign32(sum, S);
330    }
331    // The Stream version doesn't fetch a byte unless it is needed for coding.
332    public static int readIntFrom(InputStream in, int B, int H, int S) throws IOException {
333        // U(b[i]: i<n) == Sum[i<n]( b[i] * H^i )
334        int L = 256-H;
335        long sum = 0;
336        long H_i = 1;
337        for (int i = 0; i < B; i++) {
338            int b_i = in.read();
339            if (b_i < 0)  throw new RuntimeException("unexpected EOF");
340            sum += b_i*H_i;
341            H_i *= H;
342            if (b_i < L)  break;
343        }
344        assert(sum >= 0 && sum < codeRangeLong(B, H));
345        return decodeSign32(sum, S);
346    }
347
348    public static final int B_MAX = 5;    /* B: [1,5] */
349    public static final int H_MAX = 256;  /* H: [1,256] */
350    public static final int S_MAX = 2;    /* S: [0,2] */
351
352    // END OF STATICS.
353
354    private final int B; /*1..5*/       // # bytes (1..5)
355    private final int H; /*1..256*/     // # codes requiring a higher byte
356    private final int L; /*0..255*/     // # codes requiring a higher byte
357    private final int S; /*0..3*/       // # low-order bits representing sign
358    private final int del; /*0..2*/     // type of delta encoding (0 == none)
359    private final int min;              // smallest representable value
360    private final int max;              // largest representable value
361    private final int umin;             // smallest representable uns. value
362    private final int umax;             // largest representable uns. value
363    private final int[] byteMin;        // smallest repr. value, given # bytes
364    private final int[] byteMax;        // largest repr. value, given # bytes
365
366    private Coding(int B, int H, int S) {
367        this(B, H, S, 0);
368    }
369    private Coding(int B, int H, int S, int del) {
370        this.B = B;
371        this.H = H;
372        this.L = 256-H;
373        this.S = S;
374        this.del = del;
375        this.min = codeMin(B, H, S, B);
376        this.max = codeMax(B, H, S, B);
377        this.umin = codeMin(B, H, 0, B);
378        this.umax = codeMax(B, H, 0, B);
379        this.byteMin = new int[B];
380        this.byteMax = new int[B];
381
382        for (int nMax = 1; nMax <= B; nMax++) {
383            byteMin[nMax-1] = codeMin(B, H, S, nMax);
384            byteMax[nMax-1] = codeMax(B, H, S, nMax);
385        }
386    }
387
388    public boolean equals(Object x) {
389        if (!(x instanceof Coding))  return false;
390        Coding that = (Coding) x;
391        if (this.B != that.B)  return false;
392        if (this.H != that.H)  return false;
393        if (this.S != that.S)  return false;
394        if (this.del != that.del)  return false;
395        return true;
396    }
397
398    public int hashCode() {
399        return (del<<14)+(S<<11)+(B<<8)+(H<<0);
400    }
401
402    private static Map<Coding, Coding> codeMap;
403
404    private static synchronized Coding of(int B, int H, int S, int del) {
405        if (codeMap == null)  codeMap = new HashMap<>();
406        Coding x0 = new Coding(B, H, S, del);
407        Coding x1 = codeMap.get(x0);
408        if (x1 == null)  codeMap.put(x0, x1 = x0);
409        return x1;
410    }
411
412    public static Coding of(int B, int H) {
413        return of(B, H, 0, 0);
414    }
415
416    public static Coding of(int B, int H, int S) {
417        return of(B, H, S, 0);
418    }
419
420    public boolean canRepresentValue(int x) {
421        if (isSubrange())
422            return canRepresentUnsigned(x);
423        else
424            return canRepresentSigned(x);
425    }
426    /** Can this coding represent a single value, possibly a delta?
427     *  This ignores the D property.  That is, for delta codings,
428     *  this tests whether a delta value of 'x' can be coded.
429     *  For signed delta codings which produce unsigned end values,
430     *  use canRepresentUnsigned.
431     */
432    public boolean canRepresentSigned(int x) {
433        return (x >= min && x <= max);
434    }
435    /** Can this coding, apart from its S property,
436     *  represent a single value?  (Negative values
437     *  can only be represented via 32-bit overflow,
438     *  so this returns true for negative values
439     *  if isFullRange is true.)
440     */
441    public boolean canRepresentUnsigned(int x) {
442        return (x >= umin && x <= umax);
443    }
444
445    // object-oriented code/decode
446    public int readFrom(byte[] in, int[] inpos) {
447        return readInt(in, inpos, B, H, S);
448    }
449    public void writeTo(byte[] out, int[] outpos, int x) {
450        writeInt(out, outpos, x, B, H, S);
451    }
452
453    // Stream versions
454    public int readFrom(InputStream in) throws IOException {
455        return readIntFrom(in, B, H, S);
456    }
457    public void writeTo(OutputStream out, int x) throws IOException {
458        byte[] buf = new byte[B];
459        int[] pos = new int[1];
460        writeInt(buf, pos, x, B, H, S);
461        out.write(buf, 0, pos[0]);
462    }
463
464    // Stream/array versions
465    public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
466        // %%% use byte[] buffer
467        for (int i = start; i < end; i++)
468            a[i] = readFrom(in);
469
470        for (int dstep = 0; dstep < del; dstep++) {
471            long state = 0;
472            for (int i = start; i < end; i++) {
473                state += a[i];
474                // Reduce array values to the required range.
475                if (isSubrange()) {
476                    state = reduceToUnsignedRange(state);
477                }
478                a[i] = (int) state;
479            }
480        }
481    }
482    public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
483        if (end <= start)  return;
484        for (int dstep = 0; dstep < del; dstep++) {
485            int[] deltas;
486            if (!isSubrange())
487                deltas = makeDeltas(a, start, end, 0, 0);
488            else
489                deltas = makeDeltas(a, start, end, min, max);
490            a = deltas;
491            start = 0;
492            end = deltas.length;
493        }
494        // The following code is a buffered version of this loop:
495        //    for (int i = start; i < end; i++)
496        //        writeTo(out, a[i]);
497        byte[] buf = new byte[1<<8];
498        final int bufmax = buf.length-B;
499        int[] pos = { 0 };
500        for (int i = start; i < end; ) {
501            while (pos[0] <= bufmax) {
502                writeTo(buf, pos, a[i++]);
503                if (i >= end)  break;
504            }
505            out.write(buf, 0, pos[0]);
506            pos[0] = 0;
507        }
508    }
509
510    /** Tell if the range of this coding (number of distinct
511     *  representable values) can be expressed in 32 bits.
512     */
513    boolean isSubrange() {
514        return max < Integer.MAX_VALUE
515            && ((long)max - (long)min + 1) <= Integer.MAX_VALUE;
516    }
517
518    /** Tell if this coding can represent all 32-bit values.
519     *  Note:  Some codings, such as unsigned ones, can be neither
520     *  subranges nor full-range codings.
521     */
522    boolean isFullRange() {
523        return max == Integer.MAX_VALUE && min == Integer.MIN_VALUE;
524    }
525
526    /** Return the number of values this coding (a subrange) can represent. */
527    int getRange() {
528        assert(isSubrange());
529        return (max - min) + 1;  // range includes both min & max
530    }
531
532    Coding setB(int B) { return Coding.of(B, H, S, del); }
533    Coding setH(int H) { return Coding.of(B, H, S, del); }
534    Coding setS(int S) { return Coding.of(B, H, S, del); }
535    Coding setL(int L) { return setH(256-L); }
536    Coding setD(int del) { return Coding.of(B, H, S, del); }
537    Coding getDeltaCoding() { return setD(del+1); }
538
539    /** Return a coding suitable for representing summed, modulo-reduced values. */
540    Coding getValueCoding() {
541        if (isDelta())
542            return Coding.of(B, H, 0, del-1);
543        else
544            return this;
545    }
546
547    /** Reduce the given value to be within this coding's unsigned range,
548     *  by adding or subtracting a multiple of (max-min+1).
549     */
550    int reduceToUnsignedRange(long value) {
551        if (value == (int)value && canRepresentUnsigned((int)value))
552            // already in unsigned range
553            return (int)value;
554        int range = getRange();
555        assert(range > 0);
556        value %= range;
557        if (value < 0)  value += range;
558        assert(canRepresentUnsigned((int)value));
559        return (int)value;
560    }
561
562    int reduceToSignedRange(int value) {
563        if (canRepresentSigned(value))
564            // already in signed range
565            return value;
566        return reduceToSignedRange(value, min, max);
567    }
568    static int reduceToSignedRange(int value, int min, int max) {
569        int range = (max-min+1);
570        assert(range > 0);
571        int value0 = value;
572        value -= min;
573        if (value < 0 && value0 >= 0) {
574            // 32-bit overflow, but the next '%=' op needs to be unsigned
575            value -= range;
576            assert(value >= 0);
577        }
578        value %= range;
579        if (value < 0)  value += range;
580        value += min;
581        assert(min <= value && value <= max);
582        return value;
583    }
584
585    /** Does this coding support at least one negative value?
586        Includes codings that can do so via 32-bit wraparound.
587     */
588    boolean isSigned() {
589        return min < 0;
590    }
591    /** Does this coding code arrays by making successive differences? */
592    boolean isDelta() {
593        return del != 0;
594    }
595
596    public int B() { return B; }
597    public int H() { return H; }
598    public int L() { return L; }
599    public int S() { return S; }
600    public int del() { return del; }
601    public int min() { return min; }
602    public int max() { return max; }
603    public int umin() { return umin; }
604    public int umax() { return umax; }
605    public int byteMin(int b) { return byteMin[b-1]; }
606    public int byteMax(int b) { return byteMax[b-1]; }
607
608    public int compareTo(Coding that) {
609        int dkey = this.del - that.del;
610        if (dkey == 0)
611            dkey = this.B - that.B;
612        if (dkey == 0)
613            dkey = this.H - that.H;
614        if (dkey == 0)
615            dkey = this.S - that.S;
616        return dkey;
617    }
618
619    /** Heuristic measure of the difference between two codings. */
620    public int distanceFrom(Coding that) {
621        int diffdel = this.del - that.del;
622        if (diffdel < 0)  diffdel = -diffdel;
623        int diffS = this.S - that.S;
624        if (diffS < 0)  diffS = -diffS;
625        int diffB = this.B - that.B;
626        if (diffB < 0)  diffB = -diffB;
627        int diffHL;
628        if (this.H == that.H) {
629            diffHL = 0;
630        } else {
631            // Distance in log space of H (<=128) and L (<128).
632            int thisHL = this.getHL();
633            int thatHL = that.getHL();
634            // Double the accuracy of the log:
635            thisHL *= thisHL;
636            thatHL *= thatHL;
637            if (thisHL > thatHL)
638                diffHL = ceil_lg2(1+(thisHL-1)/thatHL);
639            else
640                diffHL = ceil_lg2(1+(thatHL-1)/thisHL);
641        }
642        int norm = 5*(diffdel + diffS + diffB) + diffHL;
643        assert(norm != 0 || this.compareTo(that) == 0);
644        return norm;
645    }
646    private int getHL() {
647        // Follow H in log space by the multiplicative inverse of L.
648        if (H <= 128)  return H;
649        if (L >= 1)    return 128*128/L;
650        return 128*256;
651    }
652
653    /** ceiling(log[2](x)): {1->0, 2->1, 3->2, 4->2, ...} */
654    static int ceil_lg2(int x) {
655        assert(x-1 >= 0);  // x in range (int.MIN_VALUE -> 32)
656        x -= 1;
657        int lg = 0;
658        while (x != 0) {
659            lg++;
660            x >>= 1;
661        }
662        return lg;
663    }
664
665    static private final byte[] byteBitWidths = new byte[0x100];
666    static {
667        for (int b = 0; b < byteBitWidths.length; b++) {
668            byteBitWidths[b] = (byte) ceil_lg2(b + 1);
669        }
670        for (int i = 10; i >= 0; i = (i << 1) - (i >> 3)) {
671            assert(bitWidth(i) == ceil_lg2(i + 1));
672        }
673    }
674
675    /** Number of significant bits in i, not counting sign bits.
676     *  For positive i, it is ceil_lg2(i + 1).
677     */
678    static int bitWidth(int i) {
679        if (i < 0)  i = ~i;  // change sign
680        int w = 0;
681        int lo = i;
682        if (lo < byteBitWidths.length)
683            return byteBitWidths[lo];
684        int hi;
685        hi = (lo >>> 16);
686        if (hi != 0) {
687            lo = hi;
688            w += 16;
689        }
690        hi = (lo >>> 8);
691        if (hi != 0) {
692            lo = hi;
693            w += 8;
694        }
695        w += byteBitWidths[lo];
696        //assert(w == ceil_lg2(i + 1));
697        return w;
698    }
699
700    /** Create an array of successive differences.
701     *  If min==max, accept any and all 32-bit overflow.
702     *  Otherwise, avoid 32-bit overflow, and reduce all differences
703     *  to a value in the given range, by adding or subtracting
704     *  multiples of the range cardinality (max-min+1).
705     *  Also, the values are assumed to be in the range [0..(max-min)].
706     */
707    static int[] makeDeltas(int[] values, int start, int end,
708                            int min, int max) {
709        assert(max >= min);
710        int count = end-start;
711        int[] deltas = new int[count];
712        int state = 0;
713        if (min == max) {
714            for (int i = 0; i < count; i++) {
715                int value = values[start+i];
716                deltas[i] = value - state;
717                state = value;
718            }
719        } else {
720            for (int i = 0; i < count; i++) {
721                int value = values[start+i];
722                assert(value >= 0 && value+min <= max);
723                int delta = value - state;
724                assert(delta == (long)value - (long)state); // no overflow
725                state = value;
726                // Reduce delta values to the required range.
727                delta = reduceToSignedRange(delta, min, max);
728                deltas[i] = delta;
729            }
730        }
731        return deltas;
732    }
733
734    boolean canRepresent(int minValue, int maxValue) {
735        assert(minValue <= maxValue);
736        if (del > 0) {
737            if (isSubrange()) {
738                // We will force the values to reduce to the right subrange.
739                return canRepresentUnsigned(maxValue)
740                    && canRepresentUnsigned(minValue);
741            } else {
742                // Huge range; delta values must assume full 32-bit range.
743                return isFullRange();
744            }
745        }
746        else
747            // final values must be representable
748            return canRepresentSigned(maxValue)
749                && canRepresentSigned(minValue);
750    }
751
752    boolean canRepresent(int[] values, int start, int end) {
753        int len = end-start;
754        if (len == 0)       return true;
755        if (isFullRange())  return true;
756        // Calculate max, min:
757        int lmax = values[start];
758        int lmin = lmax;
759        for (int i = 1; i < len; i++) {
760            int value = values[start+i];
761            if (lmax < value)  lmax = value;
762            if (lmin > value)  lmin = value;
763        }
764        return canRepresent(lmin, lmax);
765    }
766
767    public double getBitLength(int value) {  // implements BitMetric
768        return (double) getLength(value) * 8;
769    }
770
771    /** How many bytes are in the coding of this value?
772     *  Returns Integer.MAX_VALUE if the value has no coding.
773     *  The coding must not be a delta coding, since there is no
774     *  definite size for a single value apart from its context.
775     */
776    public int getLength(int value) {
777        if (isDelta() && isSubrange()) {
778            if (!canRepresentUnsigned(value))
779                return Integer.MAX_VALUE;
780            value = reduceToSignedRange(value);
781        }
782        if (value >= 0) {
783            for (int n = 0; n < B; n++) {
784                if (value <= byteMax[n])  return n+1;
785            }
786        } else {
787            for (int n = 0; n < B; n++) {
788                if (value >= byteMin[n])  return n+1;
789            }
790        }
791        return Integer.MAX_VALUE;
792    }
793
794    public int getLength(int[] values, int start, int end) {
795        int len = end-start;
796        if (B == 1)  return len;
797        if (L == 0)  return len * B;
798        if (isDelta()) {
799            int[] deltas;
800            if (!isSubrange())
801                deltas = makeDeltas(values, start, end, 0, 0);
802            else
803                deltas = makeDeltas(values, start, end, min, max);
804            //return Coding.of(B, H, S).getLength(deltas, 0, len);
805            values = deltas;
806            start = 0;
807        }
808        int sum = len;  // at least 1 byte per
809        // add extra bytes for extra-long values
810        for (int n = 1; n <= B; n++) {
811            // what is the coding interval [min..max] for n bytes?
812            int lmax = byteMax[n-1];
813            int lmin = byteMin[n-1];
814            int longer = 0;  // count of guys longer than n bytes
815            for (int i = 0; i < len; i++) {
816                int value = values[start+i];
817                if (value >= 0) {
818                    if (value > lmax)  longer++;
819                } else {
820                    if (value < lmin)  longer++;
821                }
822            }
823            if (longer == 0)  break;  // no more passes needed
824            if (n == B)  return Integer.MAX_VALUE;  // cannot represent!
825            sum += longer;
826        }
827        return sum;
828    }
829
830    public byte[] getMetaCoding(Coding dflt) {
831        if (dflt == this)  return new byte[]{ (byte) _meta_default };
832        int canonicalIndex = BandStructure.indexOf(this);
833        if (canonicalIndex > 0)
834            return new byte[]{ (byte) canonicalIndex };
835        return new byte[]{
836            (byte)_meta_arb,
837            (byte)(del + 2*S + 8*(B-1)),
838            (byte)(H-1)
839        };
840    }
841    public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
842        int op = bytes[pos++] & 0xFF;
843        if (_meta_canon_min <= op && op <= _meta_canon_max) {
844            Coding c = BandStructure.codingForIndex(op);
845            assert(c != null);
846            res[0] = c;
847            return pos;
848        }
849        if (op == _meta_arb) {
850            int dsb = bytes[pos++] & 0xFF;
851            int H_1 = bytes[pos++] & 0xFF;
852            int del = dsb % 2;
853            int S = (dsb / 2) % 4;
854            int B = (dsb / 8)+1;
855            int H = H_1+1;
856            if (!((1 <= B && B <= B_MAX) &&
857                  (0 <= S && S <= S_MAX) &&
858                  (1 <= H && H <= H_MAX) &&
859                  (0 <= del && del <= 1))
860                || (B == 1 && H != 256)
861                || (B == 5 && H == 256)) {
862                throw new RuntimeException("Bad arb. coding: ("+B+","+H+","+S+","+del);
863            }
864            res[0] = Coding.of(B, H, S, del);
865            return pos;
866        }
867        return pos-1;  // backup
868    }
869
870
871    public String keyString() {
872        return "("+B+","+H+","+S+","+del+")";
873    }
874
875    public String toString() {
876        String str = "Coding"+keyString();
877        // If -ea, print out more informative strings!
878        //assert((str = stringForDebug()) != null);
879        return str;
880    }
881
882    static boolean verboseStringForDebug = false;
883    String stringForDebug() {
884        String minS = (min == Integer.MIN_VALUE ? "min" : ""+min);
885        String maxS = (max == Integer.MAX_VALUE ? "max" : ""+max);
886        String str = keyString()+" L="+L+" r=["+minS+","+maxS+"]";
887        if (isSubrange())
888            str += " subrange";
889        else if (!isFullRange())
890            str += " MIDRANGE";
891        if (verboseStringForDebug) {
892            str += " {";
893            int prev_range = 0;
894            for (int n = 1; n <= B; n++) {
895                int range_n = saturate32((long)byteMax[n-1] - byteMin[n-1] + 1);
896                assert(range_n == saturate32(codeRangeLong(B, H, n)));
897                range_n -= prev_range;
898                prev_range = range_n;
899                String rngS = (range_n == Integer.MAX_VALUE ? "max" : ""+range_n);
900                str += " #"+n+"="+rngS;
901            }
902            str += " }";
903        }
904        return str;
905    }
906}
907