1/*
2 * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26// -*- C++ -*-
27// Small program for unpacking specially compressed Java packages.
28// John R. Rose
29
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include <stdarg.h>
34
35#include "jni_util.h"
36
37#include "defines.h"
38#include "bytes.h"
39#include "utils.h"
40#include "coding.h"
41
42#include "constants.h"
43#include "unpack.h"
44
45extern coding basic_codings[];
46
47#define CODING_PRIVATE(spec) \
48  int spec_ = spec; \
49  int B = CODING_B(spec_); \
50  int H = CODING_H(spec_); \
51  int L = 256 - H; \
52  int S = CODING_S(spec_); \
53  int D = CODING_D(spec_)
54
55#define IS_NEG_CODE(S, codeVal) \
56  ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
57
58#define DECODE_SIGN_S1(ux) \
59  ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
60
61static maybe_inline
62int decode_sign(int S, uint ux) {  // == Coding.decodeSign32
63  assert(S > 0);
64  uint sigbits = (ux >> S);
65  if (IS_NEG_CODE(S, ux))
66    return (int)(    ~sigbits);
67  else
68    return (int)(ux - sigbits);
69  // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
70}
71
72coding* coding::init() {
73  if (umax > 0)  return this;  // already done
74  assert(spec != 0);  // sanity
75
76  // fill in derived fields
77  CODING_PRIVATE(spec);
78
79  // Return null if 'arb(BHSD)' parameter constraints are not met:
80  if (B < 1 || B > B_MAX)  return null;
81  if (H < 1 || H > 256)    return null;
82  if (S < 0 || S > 2)      return null;
83  if (D < 0 || D > 1)      return null;
84  if (B == 1 && H != 256)  return null;  // 1-byte coding must be fixed-size
85  if (B >= 5 && H == 256)  return null;  // no 5-byte fixed-size coding
86
87  // first compute the range of the coding, in 64 bits
88  jlong range = 0;
89  {
90    jlong H_i = 1;
91    for (int i = 0; i < B; i++) {
92      range += H_i;
93      H_i *= H;
94    }
95    range *= L;
96    range += H_i;
97  }
98  assert(range > 0);  // no useless codings, please
99
100  int this_umax;
101
102  // now, compute min and max
103  if (range >= ((jlong)1 << 32)) {
104    this_umax  = INT_MAX_VALUE;
105    this->umin = INT_MIN_VALUE;
106    this->max  = INT_MAX_VALUE;
107    this->min  = INT_MIN_VALUE;
108  } else {
109    this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
110    this->max = this_umax;
111    this->min = this->umin = 0;
112    if (S != 0 && range != 0) {
113      int Smask = (1<<S)-1;
114      jlong maxPosCode = range-1;
115      jlong maxNegCode = range-1;
116      while (IS_NEG_CODE(S,  maxPosCode))  --maxPosCode;
117      while (!IS_NEG_CODE(S, maxNegCode))  --maxNegCode;
118      int maxPos = decode_sign(S, (uint)maxPosCode);
119      if (maxPos < 0)
120        this->max = INT_MAX_VALUE;  // 32-bit wraparound
121      else
122        this->max = maxPos;
123      if (maxNegCode < 0)
124        this->min = 0;  // No negative codings at all.
125      else
126        this->min = decode_sign(S, (uint)maxNegCode);
127    }
128  }
129
130  assert(!(isFullRange | isSigned | isSubrange)); // init
131  if (min < 0)
132    this->isSigned = true;
133  if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
134    this->isSubrange = true;
135  if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
136    this->isFullRange = true;
137
138  // do this last, to reduce MT exposure (should have a membar too)
139  this->umax = this_umax;
140
141  return this;
142}
143
144coding* coding::findBySpec(int spec) {
145  for (coding* scan = &basic_codings[0]; ; scan++) {
146    if (scan->spec == spec)
147      return scan->init();
148    if (scan->spec == 0)
149      break;
150  }
151  coding* ptr = NEW(coding, 1);
152  CHECK_NULL_RETURN(ptr, 0);
153  coding* c = ptr->initFrom(spec);
154  if (c == null) {
155    mtrace('f', ptr, 0);
156    ::free(ptr);
157  } else
158    // else caller should free it...
159    c->isMalloc = true;
160  return c;
161}
162
163coding* coding::findBySpec(int B, int H, int S, int D) {
164  if (B < 1 || B > B_MAX)  return null;
165  if (H < 1 || H > 256)    return null;
166  if (S < 0 || S > 2)      return null;
167  if (D < 0 || D > 1)      return null;
168  return findBySpec(CODING_SPEC(B, H, S, D));
169}
170
171void coding::free() {
172  if (isMalloc) {
173    mtrace('f', this, 0);
174    ::free(this);
175  }
176}
177
178void coding_method::reset(value_stream* state) {
179  assert(state->rp == state->rplimit);  // not in mid-stream, please
180  //assert(this == vs0.cm);
181  state[0] = vs0;
182  if (uValues != null) {
183    uValues->reset(state->helper());
184  }
185}
186
187maybe_inline
188uint coding::parse(byte* &rp, int B, int H) {
189  int L = 256-H;
190  byte* ptr = rp;
191  // hand peel the i==0 part of the loop:
192  uint b_i = *ptr++ & 0xFF;
193  if (B == 1 || b_i < (uint)L)
194    { rp = ptr; return b_i; }
195  uint sum = b_i;
196  uint H_i = H;
197  assert(B <= B_MAX);
198  for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
199    b_i = *ptr++ & 0xFF;
200    sum += b_i * H_i;
201    if (i == B || b_i < (uint)L)
202      { rp = ptr; return sum; }
203    H_i *= H;
204  }
205  assert(false);
206  return 0;
207}
208
209maybe_inline
210uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
211  assert(H == (1<<lgH));
212  int L = 256-(1<<lgH);
213  byte* ptr = rp;
214  // hand peel the i==0 part of the loop:
215  uint b_i = *ptr++ & 0xFF;
216  if (B == 1 || b_i < (uint)L)
217    { rp = ptr; return b_i; }
218  uint sum = b_i;
219  uint lg_H_i = lgH;
220  assert(B <= B_MAX);
221  for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
222    b_i = *ptr++ & 0xFF;
223    sum += b_i << lg_H_i;
224    if (i == B || b_i < (uint)L)
225      { rp = ptr; return sum; }
226    lg_H_i += lgH;
227  }
228  assert(false);
229  return 0;
230}
231
232static const char ERB[] = "EOF reading band";
233
234maybe_inline
235void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
236  if (N < 0) {
237    abort("bad value count");
238    return;
239  }
240  byte* ptr = rp;
241  if (B == 1 || H == 256) {
242    size_t len = (size_t)N*B;
243    if (len / B != (size_t)N || ptr+len > limit) {
244      abort(ERB);
245      return;
246    }
247    rp = ptr+len;
248    return;
249  }
250  // Note:  We assume rp has enough zero-padding.
251  int L = 256-H;
252  int n = B;
253  while (N > 0) {
254    ptr += 1;
255    if (--n == 0) {
256      // end of encoding at B bytes, regardless of byte value
257    } else {
258      int b = (ptr[-1] & 0xFF);
259      if (b >= L) {
260        // keep going, unless we find a byte < L
261        continue;
262      }
263    }
264    // found the last byte
265    N -= 1;
266    n = B;   // reset length counter
267    // do an error check here
268    if (ptr > limit) {
269      abort(ERB);
270      return;
271    }
272  }
273  rp = ptr;
274  return;
275}
276
277bool value_stream::hasHelper() {
278  // If my coding method is a pop-style method,
279  // then I need a second value stream to transmit
280  // unfavored values.
281  // This can be determined by examining fValues.
282  return cm->fValues != null;
283}
284
285void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
286  rp = rp_;
287  rplimit = rplimit_;
288  sum = 0;
289  cm = null;  // no need in the simple case
290  setCoding(defc);
291}
292
293void value_stream::setCoding(coding* defc) {
294  if (defc == null) {
295    unpack_abort("bad coding");
296    defc = coding::findByIndex(_meta_canon_min);  // random pick for recovery
297  }
298
299  c = (*defc);
300
301  // choose cmk
302  cmk = cmk_ERROR;
303  switch (c.spec) {
304  case BYTE1_spec:      cmk = cmk_BYTE1;        break;
305  case CHAR3_spec:      cmk = cmk_CHAR3;        break;
306  case UNSIGNED5_spec:  cmk = cmk_UNSIGNED5;    break;
307  case DELTA5_spec:     cmk = cmk_DELTA5;       break;
308  case BCI5_spec:       cmk = cmk_BCI5;         break;
309  case BRANCH5_spec:    cmk = cmk_BRANCH5;      break;
310  default:
311    if (c.D() == 0) {
312      switch (c.S()) {
313      case 0:  cmk = cmk_BHS0;  break;
314      case 1:  cmk = cmk_BHS1;  break;
315      default: cmk = cmk_BHS;   break;
316      }
317    } else {
318      if (c.S() == 1) {
319        if (c.isFullRange)   cmk = cmk_BHS1D1full;
320        if (c.isSubrange)    cmk = cmk_BHS1D1sub;
321      }
322      if (cmk == cmk_ERROR)  cmk = cmk_BHSD1;
323    }
324  }
325}
326
327static maybe_inline
328int getPopValue(value_stream* self, uint uval) {
329  if (uval > 0) {
330    // note that the initial parse performed a range check
331    assert(uval <= (uint)self->cm->fVlength);
332    return self->cm->fValues[uval-1];
333  } else {
334    // take an unfavored value
335    return self->helper()->getInt();
336  }
337}
338
339maybe_inline
340int coding::sumInUnsignedRange(int x, int y) {
341  assert(isSubrange);
342  int range = (int)(umax+1);
343  assert(range > 0);
344  x += y;
345  if (x != (int)((jlong)(x-y) + (jlong)y)) {
346    // 32-bit overflow interferes with range reduction.
347    // Back off from the overflow by adding a multiple of range:
348    if (x < 0) {
349      x -= range;
350      assert(x >= 0);
351    } else {
352      x += range;
353      assert(x < 0);
354    }
355  }
356  if (x < 0) {
357    x += range;
358    if (x >= 0)  return x;
359  } else if (x >= range) {
360    x -= range;
361    if (x < range)  return x;
362  } else {
363    // in range
364    return x;
365  }
366  // do it the hard way
367  x %= range;
368  if (x < 0)  x += range;
369  return x;
370}
371
372static maybe_inline
373int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
374  assert((uint)(self->c.isSubrange) == (uint)isSubrange);
375  assert(self->c.isSubrange | self->c.isFullRange);
376  if (isSubrange)
377    return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
378  else
379    return self->sum += (int) uval;
380}
381
382bool value_stream::hasValue() {
383  if (rp < rplimit)      return true;
384  if (cm == null)        return false;
385  if (cm->next == null)  return false;
386  cm->next->reset(this);
387  return hasValue();
388}
389
390int value_stream::getInt() {
391  if (rp >= rplimit) {
392    // Advance to next coding segment.
393    if (rp > rplimit || cm == null || cm->next == null) {
394      // Must perform this check and throw an exception on bad input.
395      unpack_abort(ERB);
396      return 0;
397    }
398    cm->next->reset(this);
399    return getInt();
400  }
401
402  CODING_PRIVATE(c.spec);
403  uint uval;
404  enum {
405    B5 = 5,
406    B3 = 3,
407    H128 = 128,
408    H64 = 64,
409    H4 = 4
410  };
411  switch (cmk) {
412  case cmk_BHS:
413    assert(D == 0);
414    uval = coding::parse(rp, B, H);
415    if (S == 0)
416      return (int) uval;
417    return decode_sign(S, uval);
418
419  case cmk_BHS0:
420    assert(S == 0 && D == 0);
421    uval = coding::parse(rp, B, H);
422    return (int) uval;
423
424  case cmk_BHS1:
425    assert(S == 1 && D == 0);
426    uval = coding::parse(rp, B, H);
427    return DECODE_SIGN_S1(uval);
428
429  case cmk_BYTE1:
430    assert(c.spec == BYTE1_spec);
431    assert(B == 1 && H == 256 && S == 0 && D == 0);
432    return *rp++ & 0xFF;
433
434  case cmk_CHAR3:
435    assert(c.spec == CHAR3_spec);
436    assert(B == B3 && H == H128 && S == 0 && D == 0);
437    return coding::parse_lgH(rp, B3, H128, 7);
438
439  case cmk_UNSIGNED5:
440    assert(c.spec == UNSIGNED5_spec);
441    assert(B == B5 && H == H64 && S == 0 && D == 0);
442    return coding::parse_lgH(rp, B5, H64, 6);
443
444  case cmk_BHSD1:
445    assert(D == 1);
446    uval = coding::parse(rp, B, H);
447    if (S != 0)
448      uval = (uint) decode_sign(S, uval);
449    return getDeltaValue(this, uval, (bool)c.isSubrange);
450
451  case cmk_BHS1D1full:
452    assert(S == 1 && D == 1 && c.isFullRange);
453    uval = coding::parse(rp, B, H);
454    uval = (uint) DECODE_SIGN_S1(uval);
455    return getDeltaValue(this, uval, false);
456
457  case cmk_BHS1D1sub:
458    assert(S == 1 && D == 1 && c.isSubrange);
459    uval = coding::parse(rp, B, H);
460    uval = (uint) DECODE_SIGN_S1(uval);
461    return getDeltaValue(this, uval, true);
462
463  case cmk_DELTA5:
464    assert(c.spec == DELTA5_spec);
465    assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
466    uval = coding::parse_lgH(rp, B5, H64, 6);
467    sum += DECODE_SIGN_S1(uval);
468    return sum;
469
470  case cmk_BCI5:
471    assert(c.spec == BCI5_spec);
472    assert(B == B5 && H == H4 && S == 0 && D == 0);
473    return coding::parse_lgH(rp, B5, H4, 2);
474
475  case cmk_BRANCH5:
476    assert(c.spec == BRANCH5_spec);
477    assert(B == B5 && H == H4 && S == 2 && D == 0);
478    uval = coding::parse_lgH(rp, B5, H4, 2);
479    return decode_sign(S, uval);
480
481  case cmk_pop:
482    uval = coding::parse(rp, B, H);
483    if (S != 0) {
484      uval = (uint) decode_sign(S, uval);
485    }
486    if (D != 0) {
487      assert(c.isSubrange | c.isFullRange);
488      if (c.isSubrange)
489        sum = c.sumInUnsignedRange(sum, (int) uval);
490      else
491        sum += (int) uval;
492      uval = (uint) sum;
493    }
494    return getPopValue(this, uval);
495
496  case cmk_pop_BHS0:
497    assert(S == 0 && D == 0);
498    uval = coding::parse(rp, B, H);
499    return getPopValue(this, uval);
500
501  case cmk_pop_BYTE1:
502    assert(c.spec == BYTE1_spec);
503    assert(B == 1 && H == 256 && S == 0 && D == 0);
504    return getPopValue(this, *rp++ & 0xFF);
505
506  default:
507    break;
508  }
509  assert(false);
510  return 0;
511}
512
513static maybe_inline
514int moreCentral(int x, int y) {  // used to find end of Pop.{F}
515  // Suggested implementation from the Pack200 specification:
516  uint kx = (x >> 31) ^ (x << 1);
517  uint ky = (y >> 31) ^ (y << 1);
518  return (kx < ky? x: y);
519}
520//static maybe_inline
521//int moreCentral2(int x, int y, int min) {
522//  // Strict implementation of buggy 150.7 specification.
523//  // The bug is that the spec. says absolute-value ties are broken
524//  // in favor of positive numbers, but the suggested implementation
525//  // (also mentioned in the spec.) breaks ties in favor of negative numbers.
526//  if ((x + y) != 0)
527//    return min;
528//  else
529//    // return the other value, which breaks a tie in the positive direction
530//    return (x > y)? x: y;
531//}
532
533static const byte* no_meta[] = {null};
534#define NO_META (*(byte**)no_meta)
535enum { POP_FAVORED_N = -2 };
536
537// mode bits
538#define DISABLE_RUN  1  // used immediately inside ACodee
539#define DISABLE_POP  2  // used recursively in all pop sub-bands
540
541// This function knows all about meta-coding.
542void coding_method::init(byte* &band_rp, byte* band_limit,
543                         byte* &meta_rp, int mode,
544                         coding* defc, int N,
545                         intlist* valueSink) {
546  assert(N != 0);
547
548  assert(u != null);  // must be pre-initialized
549  //if (u == null)  u = unpacker::current();  // expensive
550
551  int op = (meta_rp == null) ? _meta_default :  (*meta_rp++ & 0xFF);
552  coding* foundc = null;
553  coding* to_free = null;
554
555  if (op == _meta_default) {
556    foundc = defc;
557    // and fall through
558
559  } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
560    foundc = coding::findByIndex(op);
561    // and fall through
562
563  } else if (op == _meta_arb) {
564    int args = (*meta_rp++ & 0xFF);
565    // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
566    int D = ((args >> 0) & 1);
567    int S = ((args >> 1) & 3);
568    int B = ((args >> 3) & -1) + 1;
569    // & (H[1..256]-1)
570    int H = (*meta_rp++ & 0xFF) + 1;
571    foundc = coding::findBySpec(B, H, S, D);
572    to_free = foundc;  // findBySpec may dynamically allocate
573    if (foundc == null) {
574      abort("illegal arb. coding");
575      return;
576    }
577    // and fall through
578
579  } else if (op >= _meta_run && op < _meta_pop) {
580    int args = (op - _meta_run);
581    // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
582    int KX     = ((args >> 0) & 3);
583    int KBFlag = ((args >> 2) & 1);
584    int ABDef  = ((args >> 3) & -1);
585    assert(ABDef <= 2);
586    // & KB: one of [0..255] if KBFlag=1
587    int KB     = (!KBFlag? 3: (*meta_rp++ & 0xFF));
588    int K      = (KB+1) << (KX * 4);
589    int N2 = (N >= 0) ? N-K : N;
590    if (N == 0 || (N2 <= 0 && N2 != N)) {
591      abort("illegal run encoding");
592      return;
593    }
594    if ((mode & DISABLE_RUN) != 0) {
595      abort("illegal nested run encoding");
596      return;
597    }
598
599    // & Enc{ ACode } if ADef=0  (ABDef != 1)
600    // No direct nesting of 'run' in ACode, but in BCode it's OK.
601    int disRun = mode | DISABLE_RUN;
602    if (ABDef == 1) {
603      this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
604    } else {
605      this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
606    }
607    CHECK;
608
609    // & Enc{ BCode } if BDef=0  (ABDef != 2)
610    coding_method* tail = U_NEW(coding_method, 1);
611    CHECK_NULL(tail);
612    tail->u = u;
613
614    // The 'run' codings may be nested indirectly via 'pop' codings.
615    // This means that this->next may already be filled in, if
616    // ACode was of type 'pop' with a 'run' token coding.
617    // No problem:  Just chain the upcoming BCode onto the end.
618    for (coding_method* self = this; ; self = self->next) {
619      if (self->next == null) {
620        self->next = tail;
621        break;
622      }
623    }
624
625    if (ABDef == 2) {
626      tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
627    } else {
628      tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
629    }
630    // Note:  The preceding calls to init should be tail-recursive.
631
632    return;  // done; no falling through
633
634  } else if (op >= _meta_pop && op < _meta_limit) {
635    int args = (op - _meta_pop);
636    // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
637    int FDef  = ((args >> 0) & 1);
638    int UDef  = ((args >> 1) & 1);
639    int TDefL = ((args >> 2) & -1);
640    assert(TDefL <= 11);
641    int TDef  = (TDefL > 0);
642    int TL    = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
643    int TH    = (256-TL);
644    if (N <= 0) {
645      abort("illegal pop encoding");
646      return;
647    }
648    if ((mode & DISABLE_POP) != 0) {
649      abort("illegal nested pop encoding");
650      return;
651    }
652
653    // No indirect nesting of 'pop', but 'run' is OK.
654    int disPop = DISABLE_POP;
655
656    // & Enc{ FCode } if FDef=0
657    int FN = POP_FAVORED_N;
658    assert(valueSink == null);
659    intlist fValueSink; fValueSink.init();
660    coding_method fval;
661    BYTES_OF(fval).clear(); fval.u = u;
662    if (FDef != 0) {
663      fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
664    } else {
665      fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
666    }
667    bytes fvbuf;
668    fValues  = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
669    fVlength = fValueSink.length();  // i.e., the parameter K
670    fValueSink.free();
671    CHECK;
672
673    // Skip the first {F} run in all subsequent passes.
674    // The next call to this->init(...) will set vs0.rp to point after the {F}.
675
676    // & Enc{ TCode } if TDef=0  (TDefL==0)
677    if (TDef != 0) {
678      coding* tcode = coding::findBySpec(1, 256);  // BYTE1
679      // find the most narrowly sufficient code:
680      for (int B = 2; B <= B_MAX; B++) {
681        if (fVlength <= tcode->umax)  break;  // found it
682        tcode->free();
683        tcode = coding::findBySpec(B, TH);
684        CHECK_NULL(tcode);
685      }
686      if (!(fVlength <= tcode->umax)) {
687        abort("pop.L value too small");
688        return;
689      }
690      this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
691      tcode->free();
692    } else {
693      this->init(band_rp, band_limit, meta_rp, disPop,  defc, N, null);
694    }
695    CHECK;
696
697    // Count the number of zero tokens right now.
698    // Also verify that they are in bounds.
699    int UN = 0;   // one {U} for each zero in {T}
700    value_stream vs = vs0;
701    for (int i = 0; i < N; i++) {
702      uint val = vs.getInt();
703      if (val == 0)  UN += 1;
704      if (!(val <= (uint)fVlength)) {
705        abort("pop token out of range");
706        return;
707      }
708    }
709    vs.done();
710
711    // & Enc{ UCode } if UDef=0
712    if (UN != 0) {
713      uValues = U_NEW(coding_method, 1);
714      CHECK_NULL(uValues);
715      uValues->u = u;
716      if (UDef != 0) {
717        uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
718      } else {
719        uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
720      }
721    } else {
722      if (UDef == 0) {
723        int uop = (*meta_rp++ & 0xFF);
724        if (uop > _meta_canon_max)
725          // %%% Spec. requires the more strict (uop != _meta_default).
726          abort("bad meta-coding for empty pop/U");
727      }
728    }
729
730    // Bug fix for 6259542
731    // Last of all, adjust vs0.cmk to the 'pop' flavor
732    for (coding_method* self = this; self != null; self = self->next) {
733        coding_method_kind cmk2 = cmk_pop;
734        switch (self->vs0.cmk) {
735        case cmk_BHS0:   cmk2 = cmk_pop_BHS0;   break;
736        case cmk_BYTE1:  cmk2 = cmk_pop_BYTE1;  break;
737        default: break;
738        }
739        self->vs0.cmk = cmk2;
740        if (self != this) {
741          assert(self->fValues == null); // no double init
742          self->fValues  = this->fValues;
743          self->fVlength = this->fVlength;
744          assert(self->uValues == null); // must stay null
745        }
746    }
747
748    return;  // done; no falling through
749
750  } else {
751    abort("bad meta-coding");
752    return;
753  }
754
755  // Common code here skips a series of values with one coding.
756  assert(foundc != null);
757
758  assert(vs0.cmk == cmk_ERROR);  // no garbage, please
759  assert(vs0.rp == null);  // no garbage, please
760  assert(vs0.rplimit == null);  // no garbage, please
761  assert(vs0.sum == 0);  // no garbage, please
762
763  vs0.init(band_rp, band_limit, foundc);
764
765  // Done with foundc.  Free if necessary.
766  if (to_free != null) {
767    to_free->free();
768    to_free = null;
769  }
770  foundc = null;
771
772  coding& c = vs0.c;
773  CODING_PRIVATE(c.spec);
774  // assert sane N
775  assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
776
777  // Look at the values, or at least skip over them quickly.
778  if (valueSink == null) {
779    // Skip and ignore values in the first pass.
780    c.parseMultiple(band_rp, N, band_limit, B, H);
781  } else if (N >= 0) {
782    // Pop coding, {F} sequence, initial run of values...
783    assert((mode & DISABLE_POP) != 0);
784    value_stream vs = vs0;
785    for (int n = 0; n < N; n++) {
786      int val = vs.getInt();
787      valueSink->add(val);
788    }
789    band_rp = vs.rp;
790  } else {
791    // Pop coding, {F} sequence, final run of values...
792    assert((mode & DISABLE_POP) != 0);
793    assert(N == POP_FAVORED_N);
794    int min = INT_MIN_VALUE;  // farthest from the center
795    // min2 is based on the buggy specification of centrality in version 150.7
796    // no known implementations transmit this value, but just in case...
797    //int min2 = INT_MIN_VALUE;
798    int last = 0;
799    // if there were initial runs, find the potential sentinels in them:
800    for (int i = 0; i < valueSink->length(); i++) {
801      last = valueSink->get(i);
802      min = moreCentral(min, last);
803      //min2 = moreCentral2(min2, last, min);
804    }
805    value_stream vs = vs0;
806    for (;;) {
807      int val = vs.getInt();
808      if (valueSink->length() > 0 &&
809          (val == last || val == min)) //|| val == min2
810        break;
811      valueSink->add(val);
812      CHECK;
813      last = val;
814      min = moreCentral(min, last);
815      //min2 = moreCentral2(min2, last, min);
816    }
817    band_rp = vs.rp;
818  }
819  CHECK;
820
821  // Get an accurate upper limit now.
822  vs0.rplimit = band_rp;
823  vs0.cm = this;
824
825  return; // success
826}
827
828coding basic_codings[] = {
829  // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
830  CODING_INIT(3,128,0,0),
831
832  // Fixed-length codings:
833  CODING_INIT(1,256,0,0),
834  CODING_INIT(1,256,1,0),
835  CODING_INIT(1,256,0,1),
836  CODING_INIT(1,256,1,1),
837  CODING_INIT(2,256,0,0),
838  CODING_INIT(2,256,1,0),
839  CODING_INIT(2,256,0,1),
840  CODING_INIT(2,256,1,1),
841  CODING_INIT(3,256,0,0),
842  CODING_INIT(3,256,1,0),
843  CODING_INIT(3,256,0,1),
844  CODING_INIT(3,256,1,1),
845  CODING_INIT(4,256,0,0),
846  CODING_INIT(4,256,1,0),
847  CODING_INIT(4,256,0,1),
848  CODING_INIT(4,256,1,1),
849
850  // Full-range variable-length codings:
851  CODING_INIT(5,  4,0,0),
852  CODING_INIT(5,  4,1,0),
853  CODING_INIT(5,  4,2,0),
854  CODING_INIT(5, 16,0,0),
855  CODING_INIT(5, 16,1,0),
856  CODING_INIT(5, 16,2,0),
857  CODING_INIT(5, 32,0,0),
858  CODING_INIT(5, 32,1,0),
859  CODING_INIT(5, 32,2,0),
860  CODING_INIT(5, 64,0,0),
861  CODING_INIT(5, 64,1,0),
862  CODING_INIT(5, 64,2,0),
863  CODING_INIT(5,128,0,0),
864  CODING_INIT(5,128,1,0),
865  CODING_INIT(5,128,2,0),
866
867  CODING_INIT(5,  4,0,1),
868  CODING_INIT(5,  4,1,1),
869  CODING_INIT(5,  4,2,1),
870  CODING_INIT(5, 16,0,1),
871  CODING_INIT(5, 16,1,1),
872  CODING_INIT(5, 16,2,1),
873  CODING_INIT(5, 32,0,1),
874  CODING_INIT(5, 32,1,1),
875  CODING_INIT(5, 32,2,1),
876  CODING_INIT(5, 64,0,1),
877  CODING_INIT(5, 64,1,1),
878  CODING_INIT(5, 64,2,1),
879  CODING_INIT(5,128,0,1),
880  CODING_INIT(5,128,1,1),
881  CODING_INIT(5,128,2,1),
882
883  // Variable length subrange codings:
884  CODING_INIT(2,192,0,0),
885  CODING_INIT(2,224,0,0),
886  CODING_INIT(2,240,0,0),
887  CODING_INIT(2,248,0,0),
888  CODING_INIT(2,252,0,0),
889
890  CODING_INIT(2,  8,0,1),
891  CODING_INIT(2,  8,1,1),
892  CODING_INIT(2, 16,0,1),
893  CODING_INIT(2, 16,1,1),
894  CODING_INIT(2, 32,0,1),
895  CODING_INIT(2, 32,1,1),
896  CODING_INIT(2, 64,0,1),
897  CODING_INIT(2, 64,1,1),
898  CODING_INIT(2,128,0,1),
899  CODING_INIT(2,128,1,1),
900  CODING_INIT(2,192,0,1),
901  CODING_INIT(2,192,1,1),
902  CODING_INIT(2,224,0,1),
903  CODING_INIT(2,224,1,1),
904  CODING_INIT(2,240,0,1),
905  CODING_INIT(2,240,1,1),
906  CODING_INIT(2,248,0,1),
907  CODING_INIT(2,248,1,1),
908
909  CODING_INIT(3,192,0,0),
910  CODING_INIT(3,224,0,0),
911  CODING_INIT(3,240,0,0),
912  CODING_INIT(3,248,0,0),
913  CODING_INIT(3,252,0,0),
914
915  CODING_INIT(3,  8,0,1),
916  CODING_INIT(3,  8,1,1),
917  CODING_INIT(3, 16,0,1),
918  CODING_INIT(3, 16,1,1),
919  CODING_INIT(3, 32,0,1),
920  CODING_INIT(3, 32,1,1),
921  CODING_INIT(3, 64,0,1),
922  CODING_INIT(3, 64,1,1),
923  CODING_INIT(3,128,0,1),
924  CODING_INIT(3,128,1,1),
925  CODING_INIT(3,192,0,1),
926  CODING_INIT(3,192,1,1),
927  CODING_INIT(3,224,0,1),
928  CODING_INIT(3,224,1,1),
929  CODING_INIT(3,240,0,1),
930  CODING_INIT(3,240,1,1),
931  CODING_INIT(3,248,0,1),
932  CODING_INIT(3,248,1,1),
933
934  CODING_INIT(4,192,0,0),
935  CODING_INIT(4,224,0,0),
936  CODING_INIT(4,240,0,0),
937  CODING_INIT(4,248,0,0),
938  CODING_INIT(4,252,0,0),
939
940  CODING_INIT(4,  8,0,1),
941  CODING_INIT(4,  8,1,1),
942  CODING_INIT(4, 16,0,1),
943  CODING_INIT(4, 16,1,1),
944  CODING_INIT(4, 32,0,1),
945  CODING_INIT(4, 32,1,1),
946  CODING_INIT(4, 64,0,1),
947  CODING_INIT(4, 64,1,1),
948  CODING_INIT(4,128,0,1),
949  CODING_INIT(4,128,1,1),
950  CODING_INIT(4,192,0,1),
951  CODING_INIT(4,192,1,1),
952  CODING_INIT(4,224,0,1),
953  CODING_INIT(4,224,1,1),
954  CODING_INIT(4,240,0,1),
955  CODING_INIT(4,240,1,1),
956  CODING_INIT(4,248,0,1),
957  CODING_INIT(4,248,1,1),
958  CODING_INIT(0,0,0,0)
959};
960#define BASIC_INDEX_LIMIT \
961        (int)(sizeof(basic_codings)/sizeof(basic_codings[0])-1)
962
963coding* coding::findByIndex(int idx) {
964#ifndef PRODUCT
965  /* Tricky assert here, constants and gcc complains about it without local. */
966  int index_limit = BASIC_INDEX_LIMIT;
967  assert(_meta_canon_min == 1 && _meta_canon_max+1 == index_limit);
968#endif
969  if (idx >= _meta_canon_min && idx <= _meta_canon_max)
970    return basic_codings[idx].init();
971  else
972    return null;
973}
974
975#ifndef PRODUCT
976const char* coding::string() {
977  CODING_PRIVATE(spec);
978  bytes buf;
979  buf.malloc(100);
980  char maxS[20], minS[20];
981  sprintf(maxS, "%d", max);
982  sprintf(minS, "%d", min);
983  if (max == INT_MAX_VALUE)  strcpy(maxS, "max");
984  if (min == INT_MIN_VALUE)  strcpy(minS, "min");
985  sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
986          B,H,S,D,L,minS,maxS);
987  return (const char*) buf.ptr;
988}
989#endif
990