1/* 2 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.java.util.jar.pack; 27 28import java.io.ByteArrayOutputStream; 29import java.io.IOException; 30import java.io.OutputStream; 31import java.util.ArrayList; 32import java.util.Collections; 33import java.util.HashSet; 34import java.util.Iterator; 35import java.util.List; 36import java.util.Random; 37import java.util.Set; 38import java.util.zip.Deflater; 39import java.util.zip.DeflaterOutputStream; 40import static com.sun.java.util.jar.pack.Constants.*; 41/** 42 * Heuristic chooser of basic encodings. 43 * Runs "zip" to measure the apparent information content after coding. 44 * @author John Rose 45 */ 46class CodingChooser { 47 int verbose; 48 int effort; 49 boolean optUseHistogram = true; 50 boolean optUsePopulationCoding = true; 51 boolean optUseAdaptiveCoding = true; 52 boolean disablePopCoding; 53 boolean disableRunCoding; 54 boolean topLevel = true; 55 56 // Derived from effort; >1 (<1) means try more (less) experiments 57 // when looking to beat a best score. 58 double fuzz; 59 60 Coding[] allCodingChoices; 61 Choice[] choices; 62 ByteArrayOutputStream context; 63 CodingChooser popHelper; 64 CodingChooser runHelper; 65 66 Random stress; // If not null, stress mode oracle. 67 68 // Element in sorted set of coding choices: 69 static 70 class Choice { 71 final Coding coding; 72 final int index; // index in choices 73 final int[] distance; // cache of distance 74 Choice(Coding coding, int index, int[] distance) { 75 this.coding = coding; 76 this.index = index; 77 this.distance = distance; 78 } 79 // These variables are reset and reused: 80 int searchOrder; // order in which it is checked 81 int minDistance; // min distance from already-checked choices 82 int zipSize; // size of encoding in sample, zipped output 83 int byteSize; // size of encoding in sample (debug only) 84 int histSize; // size of encoding, according to histogram 85 86 void reset() { 87 searchOrder = Integer.MAX_VALUE; 88 minDistance = Integer.MAX_VALUE; 89 zipSize = byteSize = histSize = -1; 90 } 91 92 boolean isExtra() { 93 return index < 0; 94 } 95 96 public String toString() { 97 return stringForDebug(); 98 } 99 100 private String stringForDebug() { 101 String s = ""; 102 if (searchOrder < Integer.MAX_VALUE) 103 s += " so: "+searchOrder; 104 if (minDistance < Integer.MAX_VALUE) 105 s += " md: "+minDistance; 106 if (zipSize > 0) 107 s += " zs: "+zipSize; 108 if (byteSize > 0) 109 s += " bs: "+byteSize; 110 if (histSize > 0) 111 s += " hs: "+histSize; 112 return "Choice["+index+"] "+s+" "+coding; 113 } 114 } 115 116 CodingChooser(int effort, Coding[] allCodingChoices) { 117 PropMap p200 = Utils.currentPropMap(); 118 if (p200 != null) { 119 this.verbose 120 = Math.max(p200.getInteger(Utils.DEBUG_VERBOSE), 121 p200.getInteger(Utils.COM_PREFIX+"verbose.coding")); 122 this.optUseHistogram 123 = !p200.getBoolean(Utils.COM_PREFIX+"no.histogram"); 124 this.optUsePopulationCoding 125 = !p200.getBoolean(Utils.COM_PREFIX+"no.population.coding"); 126 this.optUseAdaptiveCoding 127 = !p200.getBoolean(Utils.COM_PREFIX+"no.adaptive.coding"); 128 int lstress 129 = p200.getInteger(Utils.COM_PREFIX+"stress.coding"); 130 if (lstress != 0) 131 this.stress = new Random(lstress); 132 } 133 134 this.effort = effort; 135 // The following line "makes sense" but is too much 136 // work for a simple heuristic. 137 //if (effort > 5) zipDef.setLevel(effort); 138 139 this.allCodingChoices = allCodingChoices; 140 141 // If effort = 9, look carefully at any solution 142 // whose initial metrics are within 1% of the best 143 // so far. If effort = 1, look carefully only at 144 // solutions whose initial metrics promise a 1% win. 145 this.fuzz = 1 + (0.0025 * (effort-MID_EFFORT)); 146 147 int nc = 0; 148 for (int i = 0; i < allCodingChoices.length; i++) { 149 if (allCodingChoices[i] == null) continue; 150 nc++; 151 } 152 choices = new Choice[nc]; 153 nc = 0; 154 for (int i = 0; i < allCodingChoices.length; i++) { 155 if (allCodingChoices[i] == null) continue; 156 int[] distance = new int[choices.length]; 157 choices[nc++] = new Choice(allCodingChoices[i], i, distance); 158 } 159 for (int i = 0; i < choices.length; i++) { 160 Coding ci = choices[i].coding; 161 assert(ci.distanceFrom(ci) == 0); 162 for (int j = 0; j < i; j++) { 163 Coding cj = choices[j].coding; 164 int dij = ci.distanceFrom(cj); 165 assert(dij > 0); 166 assert(dij == cj.distanceFrom(ci)); 167 choices[i].distance[j] = dij; 168 choices[j].distance[i] = dij; 169 } 170 } 171 } 172 173 Choice makeExtraChoice(Coding coding) { 174 int[] distance = new int[choices.length]; 175 for (int i = 0; i < distance.length; i++) { 176 Coding ci = choices[i].coding; 177 int dij = coding.distanceFrom(ci); 178 assert(dij > 0); 179 assert(dij == ci.distanceFrom(coding)); 180 distance[i] = dij; 181 } 182 Choice c = new Choice(coding, -1, distance); 183 c.reset(); 184 return c; 185 } 186 187 ByteArrayOutputStream getContext() { 188 if (context == null) 189 context = new ByteArrayOutputStream(1 << 16); 190 return context; 191 } 192 193 // These variables are reset and reused: 194 private int[] values; 195 private int start, end; // slice of values 196 private int[] deltas; 197 private int min, max; 198 private Histogram vHist; 199 private Histogram dHist; 200 private int searchOrder; 201 private Choice regularChoice; 202 private Choice bestChoice; 203 private CodingMethod bestMethod; 204 private int bestByteSize; 205 private int bestZipSize; 206 private int targetSize; // fuzzed target byte size 207 208 private void reset(int[] values, int start, int end) { 209 this.values = values; 210 this.start = start; 211 this.end = end; 212 this.deltas = null; 213 this.min = Integer.MAX_VALUE; 214 this.max = Integer.MIN_VALUE; 215 this.vHist = null; 216 this.dHist = null; 217 this.searchOrder = 0; 218 this.regularChoice = null; 219 this.bestChoice = null; 220 this.bestMethod = null; 221 this.bestZipSize = Integer.MAX_VALUE; 222 this.bestByteSize = Integer.MAX_VALUE; 223 this.targetSize = Integer.MAX_VALUE; 224 } 225 226 public static final int MIN_EFFORT = 1; 227 public static final int MID_EFFORT = 5; 228 public static final int MAX_EFFORT = 9; 229 230 public static final int POP_EFFORT = MID_EFFORT-1; 231 public static final int RUN_EFFORT = MID_EFFORT-2; 232 233 public static final int BYTE_SIZE = 0; 234 public static final int ZIP_SIZE = 1; 235 236 CodingMethod choose(int[] values, int start, int end, Coding regular, int[] sizes) { 237 // Save the value array 238 reset(values, start, end); 239 240 if (effort <= MIN_EFFORT || start >= end) { 241 if (sizes != null) { 242 int[] computed = computeSizePrivate(regular); 243 sizes[BYTE_SIZE] = computed[BYTE_SIZE]; 244 sizes[ZIP_SIZE] = computed[ZIP_SIZE]; 245 } 246 return regular; 247 } 248 249 if (optUseHistogram) { 250 getValueHistogram(); 251 getDeltaHistogram(); 252 } 253 254 for (int i = start; i < end; i++) { 255 int val = values[i]; 256 if (min > val) min = val; 257 if (max < val) max = val; 258 } 259 260 // Find all the preset choices that might be worth looking at: 261 int numChoices = markUsableChoices(regular); 262 263 if (stress != null) { 264 // Make a random choice. 265 int rand = stress.nextInt(numChoices*2 + 4); 266 CodingMethod coding = null; 267 for (int i = 0; i < choices.length; i++) { 268 Choice c = choices[i]; 269 if (c.searchOrder >= 0 && rand-- == 0) { 270 coding = c.coding; 271 break; 272 } 273 } 274 if (coding == null) { 275 if ((rand & 7) != 0) { 276 coding = regular; 277 } else { 278 // Pick a totally random coding 6% of the time. 279 coding = stressCoding(min, max); 280 } 281 } 282 if (!disablePopCoding 283 && optUsePopulationCoding 284 && effort >= POP_EFFORT) { 285 coding = stressPopCoding(coding); 286 } 287 if (!disableRunCoding 288 && optUseAdaptiveCoding 289 && effort >= RUN_EFFORT) { 290 coding = stressAdaptiveCoding(coding); 291 } 292 return coding; 293 } 294 295 double searchScale = 1.0; 296 for (int x = effort; x < MAX_EFFORT; x++) { 297 searchScale /= 1.414; // every 2 effort points doubles work 298 } 299 int searchOrderLimit = (int)Math.ceil( numChoices * searchScale ); 300 301 // Start by evaluating the "regular" choice. 302 bestChoice = regularChoice; 303 evaluate(regularChoice); 304 int maxd = updateDistances(regularChoice); 305 306 // save these first-cut numbers for later 307 int zipSize1 = bestZipSize; 308 int byteSize1 = bestByteSize; 309 310 if (regularChoice.coding == regular && topLevel) { 311 // Give credit for being the default; no band header is needed. 312 // Rather than increasing every other size value by the band 313 // header amount, we decrement this one metric, to give it an edge. 314 // Decreasing zipSize by a byte length is conservatively correct, 315 // especially considering that the escape byte is not likely to 316 // zip well with other bytes in the band. 317 int X = BandStructure.encodeEscapeValue(_meta_canon_max, regular); 318 if (regular.canRepresentSigned(X)) { 319 int Xlen = regular.getLength(X); // band coding header 320 //regularChoice.histSize -= Xlen; // keep exact byteSize 321 //regularChoice.byteSize -= Xlen; // keep exact byteSize 322 regularChoice.zipSize -= Xlen; 323 bestByteSize = regularChoice.byteSize; 324 bestZipSize = regularChoice.zipSize; 325 } 326 } 327 328 int dscale = 1; 329 // Continually select a new choice to evaluate. 330 while (searchOrder < searchOrderLimit) { 331 Choice nextChoice; 332 if (dscale > maxd) dscale = 1; // cycle dscale values! 333 int dhi = maxd / dscale; 334 int dlo = maxd / (dscale *= 2) + 1; 335 nextChoice = findChoiceNear(bestChoice, dhi, dlo); 336 if (nextChoice == null) continue; 337 assert(nextChoice.coding.canRepresent(min, max)); 338 evaluate(nextChoice); 339 int nextMaxd = updateDistances(nextChoice); 340 if (nextChoice == bestChoice) { 341 maxd = nextMaxd; 342 if (verbose > 5) Utils.log.info("maxd = "+maxd); 343 } 344 } 345 346 // Record best "plain coding" choice. 347 Coding plainBest = bestChoice.coding; 348 assert(plainBest == bestMethod); 349 350 if (verbose > 2) { 351 Utils.log.info("chooser: plain result="+bestChoice+" after "+bestChoice.searchOrder+" rounds, "+(regularChoice.zipSize-bestZipSize)+" fewer bytes than regular "+regular); 352 } 353 bestChoice = null; 354 355 if (!disablePopCoding 356 && optUsePopulationCoding 357 && effort >= POP_EFFORT 358 && bestMethod instanceof Coding) { 359 tryPopulationCoding(plainBest); 360 } 361 362 if (!disableRunCoding 363 && optUseAdaptiveCoding 364 && effort >= RUN_EFFORT 365 && bestMethod instanceof Coding) { 366 tryAdaptiveCoding(plainBest); 367 } 368 369 // Pass back the requested information: 370 if (sizes != null) { 371 sizes[BYTE_SIZE] = bestByteSize; 372 sizes[ZIP_SIZE] = bestZipSize; 373 } 374 if (verbose > 1) { 375 Utils.log.info("chooser: result="+bestMethod+" "+ 376 (zipSize1-bestZipSize)+ 377 " fewer bytes than regular "+regular+ 378 "; win="+pct(zipSize1-bestZipSize, zipSize1)); 379 } 380 CodingMethod lbestMethod = this.bestMethod; 381 reset(null, 0, 0); // for GC 382 return lbestMethod; 383 } 384 CodingMethod choose(int[] values, int start, int end, Coding regular) { 385 return choose(values, start, end, regular, null); 386 } 387 CodingMethod choose(int[] values, Coding regular, int[] sizes) { 388 return choose(values, 0, values.length, regular, sizes); 389 } 390 CodingMethod choose(int[] values, Coding regular) { 391 return choose(values, 0, values.length, regular, null); 392 } 393 394 private int markUsableChoices(Coding regular) { 395 int numChoices = 0; 396 for (int i = 0; i < choices.length; i++) { 397 Choice c = choices[i]; 398 c.reset(); 399 if (!c.coding.canRepresent(min, max)) { 400 // Mark as already visited: 401 c.searchOrder = -1; 402 if (verbose > 1 && c.coding == regular) { 403 Utils.log.info("regular coding cannot represent ["+min+".."+max+"]: "+regular); 404 } 405 continue; 406 } 407 if (c.coding == regular) 408 regularChoice = c; 409 numChoices++; 410 } 411 if (regularChoice == null && regular.canRepresent(min, max)) { 412 regularChoice = makeExtraChoice(regular); 413 if (verbose > 1) { 414 Utils.log.info("*** regular choice is extra: "+regularChoice.coding); 415 } 416 } 417 if (regularChoice == null) { 418 for (int i = 0; i < choices.length; i++) { 419 Choice c = choices[i]; 420 if (c.searchOrder != -1) { 421 regularChoice = c; // arbitrary pick 422 break; 423 } 424 } 425 if (verbose > 1) { 426 Utils.log.info("*** regular choice does not apply "+regular); 427 Utils.log.info(" using instead "+regularChoice.coding); 428 } 429 } 430 if (verbose > 2) { 431 Utils.log.info("chooser: #choices="+numChoices+" ["+min+".."+max+"]"); 432 if (verbose > 4) { 433 for (int i = 0; i < choices.length; i++) { 434 Choice c = choices[i]; 435 if (c.searchOrder >= 0) 436 Utils.log.info(" "+c); 437 } 438 } 439 } 440 return numChoices; 441 } 442 443 // Find an arbitrary choice at least dlo away from a previously 444 // evaluated choices, and at most dhi. Try also to regulate its 445 // min distance to all previously evaluated choices, in this range. 446 private Choice findChoiceNear(Choice near, int dhi, int dlo) { 447 if (verbose > 5) 448 Utils.log.info("findChoice "+dhi+".."+dlo+" near: "+near); 449 int[] distance = near.distance; 450 Choice found = null; 451 for (int i = 0; i < choices.length; i++) { 452 Choice c = choices[i]; 453 if (c.searchOrder < searchOrder) 454 continue; // already searched 455 // Distance from "near" guy must be in bounds: 456 if (distance[i] >= dlo && distance[i] <= dhi) { 457 // Try also to keep min-distance from other guys in bounds: 458 if (c.minDistance >= dlo && c.minDistance <= dhi) { 459 if (verbose > 5) 460 Utils.log.info("findChoice => good "+c); 461 return c; 462 } 463 found = c; 464 } 465 } 466 if (verbose > 5) 467 Utils.log.info("findChoice => found "+found); 468 return found; 469 } 470 471 private void evaluate(Choice c) { 472 assert(c.searchOrder == Integer.MAX_VALUE); 473 c.searchOrder = searchOrder++; 474 boolean mustComputeSize; 475 if (c == bestChoice || c.isExtra()) { 476 mustComputeSize = true; 477 } else if (optUseHistogram) { 478 Histogram hist = getHistogram(c.coding.isDelta()); 479 c.histSize = (int)Math.ceil(hist.getBitLength(c.coding) / 8); 480 c.byteSize = c.histSize; 481 mustComputeSize = (c.byteSize <= targetSize); 482 } else { 483 mustComputeSize = true; 484 } 485 if (mustComputeSize) { 486 int[] sizes = computeSizePrivate(c.coding); 487 c.byteSize = sizes[BYTE_SIZE]; 488 c.zipSize = sizes[ZIP_SIZE]; 489 if (noteSizes(c.coding, c.byteSize, c.zipSize)) 490 bestChoice = c; 491 } 492 if (c.histSize >= 0) { 493 assert(c.byteSize == c.histSize); // models should agree 494 } 495 if (verbose > 4) { 496 Utils.log.info("evaluated "+c); 497 } 498 } 499 500 private boolean noteSizes(CodingMethod c, int byteSize, int zipSize) { 501 assert(zipSize > 0 && byteSize > 0); 502 boolean better = (zipSize < bestZipSize); 503 if (verbose > 3) 504 Utils.log.info("computed size "+c+" "+byteSize+"/zs="+zipSize+ 505 ((better && bestMethod != null)? 506 (" better by "+ 507 pct(bestZipSize - zipSize, zipSize)): "")); 508 if (better) { 509 bestMethod = c; 510 bestZipSize = zipSize; 511 bestByteSize = byteSize; 512 targetSize = (int)(byteSize * fuzz); 513 return true; 514 } else { 515 return false; 516 } 517 } 518 519 520 private int updateDistances(Choice c) { 521 // update all minDistance values in still unevaluated choices 522 int[] distance = c.distance; 523 int maxd = 0; // how far is c from everybody else? 524 for (int i = 0; i < choices.length; i++) { 525 Choice c2 = choices[i]; 526 if (c2.searchOrder < searchOrder) 527 continue; 528 int d = distance[i]; 529 if (verbose > 5) 530 Utils.log.info("evaluate dist "+d+" to "+c2); 531 int mind = c2.minDistance; 532 if (mind > d) 533 c2.minDistance = mind = d; 534 if (maxd < d) 535 maxd = d; 536 } 537 // Now maxd has the distance of the farthest outlier 538 // from all evaluated choices. 539 if (verbose > 5) 540 Utils.log.info("evaluate maxd => "+maxd); 541 return maxd; 542 } 543 544 // Compute the coded size of a sequence of values. 545 // The first int is the size in uncompressed bytes. 546 // The second is an estimate of the compressed size of these bytes. 547 public void computeSize(CodingMethod c, int[] values, int start, int end, int[] sizes) { 548 if (end <= start) { 549 sizes[BYTE_SIZE] = sizes[ZIP_SIZE] = 0; 550 return; 551 } 552 try { 553 resetData(); 554 c.writeArrayTo(byteSizer, values, start, end); 555 sizes[BYTE_SIZE] = getByteSize(); 556 sizes[ZIP_SIZE] = getZipSize(); 557 } catch (IOException ee) { 558 throw new RuntimeException(ee); // cannot happen 559 } 560 } 561 public void computeSize(CodingMethod c, int[] values, int[] sizes) { 562 computeSize(c, values, 0, values.length, sizes); 563 } 564 public int[] computeSize(CodingMethod c, int[] values, int start, int end) { 565 int[] sizes = { 0, 0 }; 566 computeSize(c, values, start, end, sizes); 567 return sizes; 568 } 569 public int[] computeSize(CodingMethod c, int[] values) { 570 return computeSize(c, values, 0, values.length); 571 } 572 // This version uses the implicit local arguments 573 private int[] computeSizePrivate(CodingMethod c) { 574 int[] sizes = { 0, 0 }; 575 computeSize(c, values, start, end, sizes); 576 return sizes; 577 } 578 public int computeByteSize(CodingMethod cm, int[] values, int start, int end) { 579 int len = end-start; 580 if (len < 0) { 581 return 0; 582 } 583 if (cm instanceof Coding) { 584 Coding c = (Coding) cm; 585 int size = c.getLength(values, start, end); 586 int size2; 587 assert(size == (size2=countBytesToSizer(cm, values, start, end))) 588 : (cm+" : "+size+" != "+size2); 589 return size; 590 } 591 return countBytesToSizer(cm, values, start, end); 592 } 593 private int countBytesToSizer(CodingMethod cm, int[] values, int start, int end) { 594 try { 595 byteOnlySizer.reset(); 596 cm.writeArrayTo(byteOnlySizer, values, start, end); 597 return byteOnlySizer.getSize(); 598 } catch (IOException ee) { 599 throw new RuntimeException(ee); // cannot happen 600 } 601 } 602 603 int[] getDeltas(int min, int max) { 604 if ((min|max) != 0) 605 return Coding.makeDeltas(values, start, end, min, max); 606 if (deltas == null) { 607 deltas = Coding.makeDeltas(values, start, end, 0, 0); 608 } 609 return deltas; 610 } 611 Histogram getValueHistogram() { 612 if (vHist == null) { 613 vHist = new Histogram(values, start, end); 614 if (verbose > 3) { 615 vHist.print("vHist", System.out); 616 } else if (verbose > 1) { 617 vHist.print("vHist", null, System.out); 618 } 619 } 620 return vHist; 621 } 622 Histogram getDeltaHistogram() { 623 if (dHist == null) { 624 dHist = new Histogram(getDeltas(0, 0)); 625 if (verbose > 3) { 626 dHist.print("dHist", System.out); 627 } else if (verbose > 1) { 628 dHist.print("dHist", null, System.out); 629 } 630 } 631 return dHist; 632 } 633 Histogram getHistogram(boolean isDelta) { 634 return isDelta ? getDeltaHistogram(): getValueHistogram(); 635 } 636 637 private void tryPopulationCoding(Coding plainCoding) { 638 // assert(plainCoding.canRepresent(min, max)); 639 Histogram hist = getValueHistogram(); 640 // Start with "reasonable" default codings. 641 final int approxL = 64; 642 Coding favoredCoding = plainCoding.getValueCoding(); 643 Coding tokenCoding = BandStructure.UNSIGNED5.setL(approxL); 644 Coding unfavoredCoding = plainCoding.getValueCoding(); 645 // There's going to be a band header. Estimate conservatively large. 646 final int BAND_HEADER = 4; 647 // Keep a running model of the predicted sizes of the F/T/U sequences. 648 int currentFSize; 649 int currentTSize; 650 int currentUSize; 651 // Start by assuming a degenerate favored-value length of 0, 652 // which looks like a bunch of zero tokens followed by the 653 // original sequence. 654 // The {F} list ends with a repeated F value; find worst case: 655 currentFSize = 656 BAND_HEADER + Math.max(favoredCoding.getLength(min), 657 favoredCoding.getLength(max)); 658 // The {T} list starts out a bunch of zeros, each of length 1. 659 final int ZERO_LEN = tokenCoding.getLength(0); 660 currentTSize = ZERO_LEN * (end-start); 661 // The {U} list starts out a copy of the plainCoding: 662 currentUSize = (int) Math.ceil(hist.getBitLength(unfavoredCoding) / 8); 663 664 int bestPopSize = (currentFSize + currentTSize + currentUSize); 665 int bestPopFVC = 0; 666 667 // Record all the values, in decreasing order of favor. 668 int[] allFavoredValues = new int[1+hist.getTotalLength()]; 669 //int[] allPopSizes = new int[1+hist.getTotalLength()]; 670 671 // What sizes are "interesting"? 672 int targetLowFVC = -1; 673 int targetHighFVC = -1; 674 675 // For each length, adjust the currentXSize model, and look for a win. 676 int[][] matrix = hist.getMatrix(); 677 int mrow = -1; 678 int mcol = 1; 679 int mrowFreq = 0; 680 for (int fvcount = 1; fvcount <= hist.getTotalLength(); fvcount++) { 681 // The {F} list gets an additional member. 682 // Take it from the end of the current matrix row. 683 // (It's the end, so that we get larger favored values first.) 684 if (mcol == 1) { 685 mrow += 1; 686 mrowFreq = matrix[mrow][0]; 687 mcol = matrix[mrow].length; 688 } 689 int thisValue = matrix[mrow][--mcol]; 690 allFavoredValues[fvcount] = thisValue; 691 int thisVLen = favoredCoding.getLength(thisValue); 692 currentFSize += thisVLen; 693 // The token list replaces occurrences of zero with a new token: 694 int thisVCount = mrowFreq; 695 int thisToken = fvcount; 696 currentTSize += (tokenCoding.getLength(thisToken) 697 - ZERO_LEN) * thisVCount; 698 // The unfavored list loses occurrences of the newly favored value. 699 // (This is the whole point of the exercise!) 700 currentUSize -= thisVLen * thisVCount; 701 int currentSize = (currentFSize + currentTSize + currentUSize); 702 //allPopSizes[fvcount] = currentSize; 703 if (bestPopSize > currentSize) { 704 if (currentSize <= targetSize) { 705 targetHighFVC = fvcount; 706 if (targetLowFVC < 0) 707 targetLowFVC = fvcount; 708 if (verbose > 4) 709 Utils.log.info("better pop-size at fvc="+fvcount+ 710 " by "+pct(bestPopSize-currentSize, 711 bestPopSize)); 712 } 713 bestPopSize = currentSize; 714 bestPopFVC = fvcount; 715 } 716 } 717 if (targetLowFVC < 0) { 718 if (verbose > 1) { 719 // Complete loss. 720 if (verbose > 1) 721 Utils.log.info("no good pop-size; best was "+ 722 bestPopSize+" at "+bestPopFVC+ 723 " worse by "+ 724 pct(bestPopSize-bestByteSize, 725 bestByteSize)); 726 } 727 return; 728 } 729 if (verbose > 1) 730 Utils.log.info("initial best pop-size at fvc="+bestPopFVC+ 731 " in ["+targetLowFVC+".."+targetHighFVC+"]"+ 732 " by "+pct(bestByteSize-bestPopSize, 733 bestByteSize)); 734 int oldZipSize = bestZipSize; 735 // Now close onto a specific coding, testing more rigorously 736 // with the zipSize metric. 737 // Questions to decide: 738 // 1. How many favored values? 739 // 2. What token coding (TC)? 740 // 3. Sort favored values by value within length brackets? 741 // 4. What favored coding? 742 // 5. What unfavored coding? 743 // Steps 1/2/3 are interdependent, and may be iterated. 744 // Steps 4 and 5 may be decided independently afterward. 745 int[] LValuesCoded = PopulationCoding.LValuesCoded; 746 List<Coding> bestFits = new ArrayList<>(); 747 List<Coding> fullFits = new ArrayList<>(); 748 List<Coding> longFits = new ArrayList<>(); 749 final int PACK_TO_MAX_S = 1; 750 if (bestPopFVC <= 255) { 751 bestFits.add(BandStructure.BYTE1); 752 } else { 753 int bestB = Coding.B_MAX; 754 boolean doFullAlso = (effort > POP_EFFORT); 755 if (doFullAlso) 756 fullFits.add(BandStructure.BYTE1.setS(PACK_TO_MAX_S)); 757 for (int i = LValuesCoded.length-1; i >= 1; i--) { 758 int L = LValuesCoded[i]; 759 Coding c0 = PopulationCoding.fitTokenCoding(targetLowFVC, L); 760 Coding c1 = PopulationCoding.fitTokenCoding(bestPopFVC, L); 761 Coding c3 = PopulationCoding.fitTokenCoding(targetHighFVC, L); 762 if (c1 != null) { 763 if (!bestFits.contains(c1)) 764 bestFits.add(c1); 765 if (bestB > c1.B()) 766 bestB = c1.B(); 767 } 768 if (doFullAlso) { 769 if (c3 == null) c3 = c1; 770 for (int B = c0.B(); B <= c3.B(); B++) { 771 if (B == c1.B()) continue; 772 if (B == 1) continue; 773 Coding c2 = c3.setB(B).setS(PACK_TO_MAX_S); 774 if (!fullFits.contains(c2)) 775 fullFits.add(c2); 776 } 777 } 778 } 779 // interleave all B greater than bestB with best and full fits 780 for (Iterator<Coding> i = bestFits.iterator(); i.hasNext(); ) { 781 Coding c = i.next(); 782 if (c.B() > bestB) { 783 i.remove(); 784 longFits.add(0, c); 785 } 786 } 787 } 788 List<Coding> allFits = new ArrayList<>(); 789 for (Iterator<Coding> i = bestFits.iterator(), 790 j = fullFits.iterator(), 791 k = longFits.iterator(); 792 i.hasNext() || j.hasNext() || k.hasNext(); ) { 793 if (i.hasNext()) allFits.add(i.next()); 794 if (j.hasNext()) allFits.add(j.next()); 795 if (k.hasNext()) allFits.add(k.next()); 796 } 797 bestFits.clear(); 798 fullFits.clear(); 799 longFits.clear(); 800 int maxFits = allFits.size(); 801 if (effort == POP_EFFORT) 802 maxFits = 2; 803 else if (maxFits > 4) { 804 maxFits -= 4; 805 maxFits = (maxFits * (effort-POP_EFFORT) 806 ) / (MAX_EFFORT-POP_EFFORT); 807 maxFits += 4; 808 } 809 if (allFits.size() > maxFits) { 810 if (verbose > 4) 811 Utils.log.info("allFits before clip: "+allFits); 812 allFits.subList(maxFits, allFits.size()).clear(); 813 } 814 if (verbose > 3) 815 Utils.log.info("allFits: "+allFits); 816 for (Coding tc : allFits) { 817 boolean packToMax = false; 818 if (tc.S() == PACK_TO_MAX_S) { 819 // Kludge: setS(PACK_TO_MAX_S) means packToMax here. 820 packToMax = true; 821 tc = tc.setS(0); 822 } 823 int fVlen; 824 if (!packToMax) { 825 fVlen = bestPopFVC; 826 assert(tc.umax() >= fVlen); 827 assert(tc.B() == 1 || tc.setB(tc.B()-1).umax() < fVlen); 828 } else { 829 fVlen = Math.min(tc.umax(), targetHighFVC); 830 if (fVlen < targetLowFVC) 831 continue; 832 if (fVlen == bestPopFVC) 833 continue; // redundant test 834 } 835 PopulationCoding pop = new PopulationCoding(); 836 pop.setHistogram(hist); 837 pop.setL(tc.L()); 838 pop.setFavoredValues(allFavoredValues, fVlen); 839 assert(pop.tokenCoding == tc); // predict correctly 840 pop.resortFavoredValues(); 841 int[] tcsizes = 842 computePopSizePrivate(pop, 843 favoredCoding, unfavoredCoding); 844 noteSizes(pop, tcsizes[BYTE_SIZE], BAND_HEADER+tcsizes[ZIP_SIZE]); 845 } 846 if (verbose > 3) { 847 Utils.log.info("measured best pop, size="+bestByteSize+ 848 "/zs="+bestZipSize+ 849 " better by "+ 850 pct(oldZipSize-bestZipSize, oldZipSize)); 851 if (bestZipSize < oldZipSize) { 852 Utils.log.info(">>> POP WINS BY "+ 853 (oldZipSize - bestZipSize)); 854 } 855 } 856 } 857 858 private 859 int[] computePopSizePrivate(PopulationCoding pop, 860 Coding favoredCoding, 861 Coding unfavoredCoding) { 862 if (popHelper == null) { 863 popHelper = new CodingChooser(effort, allCodingChoices); 864 if (stress != null) 865 popHelper.addStressSeed(stress.nextInt()); 866 popHelper.topLevel = false; 867 popHelper.verbose -= 1; 868 popHelper.disablePopCoding = true; 869 popHelper.disableRunCoding = this.disableRunCoding; 870 if (effort < MID_EFFORT) 871 // No nested run codings. 872 popHelper.disableRunCoding = true; 873 } 874 int fVlen = pop.fVlen; 875 if (verbose > 2) { 876 Utils.log.info("computePopSizePrivate fvlen="+fVlen+ 877 " tc="+pop.tokenCoding); 878 Utils.log.info("{ //BEGIN"); 879 } 880 881 // Find good coding choices for the token and unfavored sequences. 882 int[] favoredValues = pop.fValues; 883 int[][] vals = pop.encodeValues(values, start, end); 884 int[] tokens = vals[0]; 885 int[] unfavoredValues = vals[1]; 886 if (verbose > 2) 887 Utils.log.info("-- refine on fv["+fVlen+"] fc="+favoredCoding); 888 pop.setFavoredCoding(popHelper.choose(favoredValues, 1, 1+fVlen, favoredCoding)); 889 if (pop.tokenCoding instanceof Coding && 890 (stress == null || stress.nextBoolean())) { 891 if (verbose > 2) 892 Utils.log.info("-- refine on tv["+tokens.length+"] tc="+pop.tokenCoding); 893 CodingMethod tc = popHelper.choose(tokens, (Coding) pop.tokenCoding); 894 if (tc != pop.tokenCoding) { 895 if (verbose > 2) 896 Utils.log.info(">>> refined tc="+tc); 897 pop.setTokenCoding(tc); 898 } 899 } 900 if (unfavoredValues.length == 0) 901 pop.setUnfavoredCoding(null); 902 else { 903 if (verbose > 2) 904 Utils.log.info("-- refine on uv["+unfavoredValues.length+"] uc="+pop.unfavoredCoding); 905 pop.setUnfavoredCoding(popHelper.choose(unfavoredValues, unfavoredCoding)); 906 } 907 if (verbose > 3) { 908 Utils.log.info("finish computePopSizePrivate fvlen="+fVlen+ 909 " fc="+pop.favoredCoding+ 910 " tc="+pop.tokenCoding+ 911 " uc="+pop.unfavoredCoding); 912 //pop.hist.print("pop-hist", null, System.out); 913 StringBuilder sb = new StringBuilder(); 914 sb.append("fv = {"); 915 for (int i = 1; i <= fVlen; i++) { 916 if ((i % 10) == 0) 917 sb.append('\n'); 918 sb.append(" ").append(favoredValues[i]); 919 } 920 sb.append('\n'); 921 sb.append("}"); 922 Utils.log.info(sb.toString()); 923 } 924 if (verbose > 2) { 925 Utils.log.info("} //END"); 926 } 927 if (stress != null) { 928 return null; // do not bother with size computation 929 } 930 int[] sizes; 931 try { 932 resetData(); 933 // Write the array of favored values. 934 pop.writeSequencesTo(byteSizer, tokens, unfavoredValues); 935 sizes = new int[] { getByteSize(), getZipSize() }; 936 } catch (IOException ee) { 937 throw new RuntimeException(ee); // cannot happen 938 } 939 int[] checkSizes = null; 940 assert((checkSizes = computeSizePrivate(pop)) != null); 941 assert(checkSizes[BYTE_SIZE] == sizes[BYTE_SIZE]) 942 : (checkSizes[BYTE_SIZE]+" != "+sizes[BYTE_SIZE]); 943 return sizes; 944 } 945 946 private void tryAdaptiveCoding(Coding plainCoding) { 947 int oldZipSize = bestZipSize; 948 // Scan the value sequence, determining whether an interesting 949 // run occupies too much space. ("Too much" means, say 5% more 950 // than the average integer size of the band as a whole.) 951 // Try to find a better coding for those segments. 952 int lstart = this.start; 953 int lend = this.end; 954 int[] lvalues = this.values; 955 int len = lend-lstart; 956 if (plainCoding.isDelta()) { 957 lvalues = getDeltas(0,0); //%%% not quite right! 958 lstart = 0; 959 lend = lvalues.length; 960 } 961 int[] sizes = new int[len+1]; 962 int fillp = 0; 963 int totalSize = 0; 964 for (int i = lstart; i < lend; i++) { 965 int val = lvalues[i]; 966 sizes[fillp++] = totalSize; 967 int size = plainCoding.getLength(val); 968 assert(size < Integer.MAX_VALUE); 969 //System.out.println("len "+val+" = "+size); 970 totalSize += size; 971 } 972 sizes[fillp++] = totalSize; 973 assert(fillp == sizes.length); 974 double avgSize = (double)totalSize / len; 975 double sizeFuzz; 976 double sizeFuzz2; 977 double sizeFuzz3; 978 if (effort >= MID_EFFORT) { 979 if (effort > MID_EFFORT+1) 980 sizeFuzz = 1.001; 981 else 982 sizeFuzz = 1.003; 983 } else { 984 if (effort > RUN_EFFORT) 985 sizeFuzz = 1.01; 986 else 987 sizeFuzz = 1.03; 988 } 989 // for now: 990 sizeFuzz *= sizeFuzz; // double the thresh 991 sizeFuzz2 = (sizeFuzz*sizeFuzz); 992 sizeFuzz3 = (sizeFuzz*sizeFuzz*sizeFuzz); 993 // Find some mesh scales we like. 994 double[] dmeshes = new double[1 + (effort-RUN_EFFORT)]; 995 double logLen = Math.log(len); 996 for (int i = 0; i < dmeshes.length; i++) { 997 dmeshes[i] = Math.exp(logLen*(i+1)/(dmeshes.length+1)); 998 } 999 int[] meshes = new int[dmeshes.length]; 1000 int mfillp = 0; 1001 for (int i = 0; i < dmeshes.length; i++) { 1002 int m = (int)Math.round(dmeshes[i]); 1003 m = AdaptiveCoding.getNextK(m-1); 1004 if (m <= 0 || m >= len) continue; 1005 if (mfillp > 0 && m == meshes[mfillp-1]) continue; 1006 meshes[mfillp++] = m; 1007 } 1008 meshes = BandStructure.realloc(meshes, mfillp); 1009 // There's going to be a band header. Estimate conservatively large. 1010 final int BAND_HEADER = 4; // op, KB, A, B 1011 // Threshold values for a "too big" mesh. 1012 int[] threshes = new int[meshes.length]; 1013 double[] fuzzes = new double[meshes.length]; 1014 for (int i = 0; i < meshes.length; i++) { 1015 int mesh = meshes[i]; 1016 double lfuzz; 1017 if (mesh < 10) 1018 lfuzz = sizeFuzz3; 1019 else if (mesh < 100) 1020 lfuzz = sizeFuzz2; 1021 else 1022 lfuzz = sizeFuzz; 1023 fuzzes[i] = lfuzz; 1024 threshes[i] = BAND_HEADER + (int)Math.ceil(mesh * avgSize * lfuzz); 1025 } 1026 if (verbose > 1) { 1027 System.out.print("tryAdaptiveCoding ["+len+"]"+ 1028 " avgS="+avgSize+" fuzz="+sizeFuzz+ 1029 " meshes: {"); 1030 for (int i = 0; i < meshes.length; i++) { 1031 System.out.print(" " + meshes[i] + "(" + threshes[i] + ")"); 1032 } 1033 Utils.log.info(" }"); 1034 } 1035 if (runHelper == null) { 1036 runHelper = new CodingChooser(effort, allCodingChoices); 1037 if (stress != null) 1038 runHelper.addStressSeed(stress.nextInt()); 1039 runHelper.topLevel = false; 1040 runHelper.verbose -= 1; 1041 runHelper.disableRunCoding = true; 1042 runHelper.disablePopCoding = this.disablePopCoding; 1043 if (effort < MID_EFFORT) 1044 // No nested pop codings. 1045 runHelper.disablePopCoding = true; 1046 } 1047 for (int i = 0; i < len; i++) { 1048 i = AdaptiveCoding.getNextK(i-1); 1049 if (i > len) i = len; 1050 for (int j = meshes.length-1; j >= 0; j--) { 1051 int mesh = meshes[j]; 1052 int thresh = threshes[j]; 1053 if (i+mesh > len) continue; 1054 int size = sizes[i+mesh] - sizes[i]; 1055 if (size >= thresh) { 1056 // Found a size bulge. 1057 int bend = i+mesh; 1058 int bsize = size; 1059 double bigSize = avgSize * fuzzes[j]; 1060 while (bend < len && (bend-i) <= len/2) { 1061 int bend0 = bend; 1062 int bsize0 = bsize; 1063 bend += mesh; 1064 bend = i+AdaptiveCoding.getNextK(bend-i-1); 1065 if (bend < 0 || bend > len) 1066 bend = len; 1067 bsize = sizes[bend]-sizes[i]; 1068 if (bsize < BAND_HEADER + (bend-i) * bigSize) { 1069 bsize = bsize0; 1070 bend = bend0; 1071 break; 1072 } 1073 } 1074 int nexti = bend; 1075 if (verbose > 2) { 1076 Utils.log.info("bulge at "+i+"["+(bend-i)+"] of "+ 1077 pct(bsize - avgSize*(bend-i), 1078 avgSize*(bend-i))); 1079 Utils.log.info("{ //BEGIN"); 1080 } 1081 CodingMethod begcm, midcm, endcm; 1082 midcm = runHelper.choose(this.values, 1083 this.start+i, 1084 this.start+bend, 1085 plainCoding); 1086 if (midcm == plainCoding) { 1087 // No use working further. 1088 begcm = plainCoding; 1089 endcm = plainCoding; 1090 } else { 1091 begcm = runHelper.choose(this.values, 1092 this.start, 1093 this.start+i, 1094 plainCoding); 1095 endcm = runHelper.choose(this.values, 1096 this.start+bend, 1097 this.start+len, 1098 plainCoding); 1099 } 1100 if (verbose > 2) 1101 Utils.log.info("} //END"); 1102 if (begcm == midcm && i > 0 && 1103 AdaptiveCoding.isCodableLength(bend)) { 1104 i = 0; 1105 } 1106 if (midcm == endcm && bend < len) { 1107 bend = len; 1108 } 1109 if (begcm != plainCoding || 1110 midcm != plainCoding || 1111 endcm != plainCoding) { 1112 CodingMethod chain; 1113 int hlen = 0; 1114 if (bend == len) { 1115 chain = midcm; 1116 } else { 1117 chain = new AdaptiveCoding(bend-i, midcm, endcm); 1118 hlen += BAND_HEADER; 1119 } 1120 if (i > 0) { 1121 chain = new AdaptiveCoding(i, begcm, chain); 1122 hlen += BAND_HEADER; 1123 } 1124 int[] chainSize = computeSizePrivate(chain); 1125 noteSizes(chain, 1126 chainSize[BYTE_SIZE], 1127 chainSize[ZIP_SIZE]+hlen); 1128 } 1129 i = nexti; 1130 break; 1131 } 1132 } 1133 } 1134 if (verbose > 3) { 1135 if (bestZipSize < oldZipSize) { 1136 Utils.log.info(">>> RUN WINS BY "+ 1137 (oldZipSize - bestZipSize)); 1138 } 1139 } 1140 } 1141 1142 private static 1143 String pct(double num, double den) { 1144 return (Math.round((num / den)*10000)/100.0)+"%"; 1145 } 1146 1147 static 1148 class Sizer extends OutputStream { 1149 final OutputStream out; // if non-null, copy output here also 1150 Sizer(OutputStream out) { 1151 this.out = out; 1152 } 1153 Sizer() { 1154 this(null); 1155 } 1156 private int count; 1157 public void write(int b) throws IOException { 1158 count++; 1159 if (out != null) out.write(b); 1160 } 1161 public void write(byte b[], int off, int len) throws IOException { 1162 count += len; 1163 if (out != null) out.write(b, off, len); 1164 } 1165 public void reset() { 1166 count = 0; 1167 } 1168 public int getSize() { return count; } 1169 1170 public String toString() { 1171 String str = super.toString(); 1172 // If -ea, print out more informative strings! 1173 assert((str = stringForDebug()) != null); 1174 return str; 1175 } 1176 String stringForDebug() { 1177 return "<Sizer "+getSize()+">"; 1178 } 1179 } 1180 1181 private Sizer zipSizer = new Sizer(); 1182 private Deflater zipDef = new Deflater(); 1183 private DeflaterOutputStream zipOut = new DeflaterOutputStream(zipSizer, zipDef); 1184 private Sizer byteSizer = new Sizer(zipOut); 1185 private Sizer byteOnlySizer = new Sizer(); 1186 1187 private void resetData() { 1188 flushData(); 1189 zipDef.reset(); 1190 if (context != null) { 1191 // Prepend given salt to the test output. 1192 try { 1193 context.writeTo(byteSizer); 1194 } catch (IOException ee) { 1195 throw new RuntimeException(ee); // cannot happen 1196 } 1197 } 1198 zipSizer.reset(); 1199 byteSizer.reset(); 1200 } 1201 private void flushData() { 1202 try { 1203 zipOut.finish(); 1204 } catch (IOException ee) { 1205 throw new RuntimeException(ee); // cannot happen 1206 } 1207 } 1208 private int getByteSize() { 1209 return byteSizer.getSize(); 1210 } 1211 private int getZipSize() { 1212 flushData(); 1213 return zipSizer.getSize(); 1214 } 1215 1216 1217 /// Stress-test helpers. 1218 1219 void addStressSeed(int x) { 1220 if (stress == null) return; 1221 stress.setSeed(x + ((long)stress.nextInt() << 32)); 1222 } 1223 1224 // Pick a random pop-coding. 1225 private CodingMethod stressPopCoding(CodingMethod coding) { 1226 assert(stress != null); // this method is only for testing 1227 // Don't turn it into a pop coding if it's already something special. 1228 if (!(coding instanceof Coding)) return coding; 1229 Coding valueCoding = ((Coding)coding).getValueCoding(); 1230 Histogram hist = getValueHistogram(); 1231 int fVlen = stressLen(hist.getTotalLength()); 1232 if (fVlen == 0) return coding; 1233 List<Integer> popvals = new ArrayList<>(); 1234 if (stress.nextBoolean()) { 1235 // Build the population from the value list. 1236 Set<Integer> popset = new HashSet<>(); 1237 for (int i = start; i < end; i++) { 1238 if (popset.add(values[i])) popvals.add(values[i]); 1239 } 1240 } else { 1241 int[][] matrix = hist.getMatrix(); 1242 for (int mrow = 0; mrow < matrix.length; mrow++) { 1243 int[] row = matrix[mrow]; 1244 for (int mcol = 1; mcol < row.length; mcol++) { 1245 popvals.add(row[mcol]); 1246 } 1247 } 1248 } 1249 int reorder = stress.nextInt(); 1250 if ((reorder & 7) <= 2) { 1251 // Lose the order. 1252 Collections.shuffle(popvals, stress); 1253 } else { 1254 // Keep the order, mostly. 1255 if (((reorder >>>= 3) & 7) <= 2) Collections.sort(popvals); 1256 if (((reorder >>>= 3) & 7) <= 2) Collections.reverse(popvals); 1257 if (((reorder >>>= 3) & 7) <= 2) Collections.rotate(popvals, stressLen(popvals.size())); 1258 } 1259 if (popvals.size() > fVlen) { 1260 // Cut the list down. 1261 if (((reorder >>>= 3) & 7) <= 2) { 1262 // Cut at end. 1263 popvals.subList(fVlen, popvals.size()).clear(); 1264 } else { 1265 // Cut at start. 1266 popvals.subList(0, popvals.size()-fVlen).clear(); 1267 } 1268 } 1269 fVlen = popvals.size(); 1270 int[] fvals = new int[1+fVlen]; 1271 for (int i = 0; i < fVlen; i++) { 1272 fvals[1+i] = (popvals.get(i)).intValue(); 1273 } 1274 PopulationCoding pop = new PopulationCoding(); 1275 pop.setFavoredValues(fvals, fVlen); 1276 int[] lvals = PopulationCoding.LValuesCoded; 1277 for (int i = 0; i < lvals.length / 2; i++) { 1278 int popl = lvals[stress.nextInt(lvals.length)]; 1279 if (popl < 0) continue; 1280 if (PopulationCoding.fitTokenCoding(fVlen, popl) != null) { 1281 pop.setL(popl); 1282 break; 1283 } 1284 } 1285 if (pop.tokenCoding == null) { 1286 int lmin = fvals[1], lmax = lmin; 1287 for (int i = 2; i <= fVlen; i++) { 1288 int val = fvals[i]; 1289 if (lmin > val) lmin = val; 1290 if (lmax < val) lmax = val; 1291 } 1292 pop.tokenCoding = stressCoding(lmin, lmax); 1293 } 1294 1295 computePopSizePrivate(pop, valueCoding, valueCoding); 1296 return pop; 1297 } 1298 1299 // Pick a random adaptive coding. 1300 private CodingMethod stressAdaptiveCoding(CodingMethod coding) { 1301 assert(stress != null); // this method is only for testing 1302 // Don't turn it into a run coding if it's already something special. 1303 if (!(coding instanceof Coding)) return coding; 1304 Coding plainCoding = (Coding)coding; 1305 int len = end-start; 1306 if (len < 2) return coding; 1307 // Decide how many spans we'll create. 1308 int spanlen = stressLen(len-1)+1; 1309 if (spanlen == len) return coding; 1310 try { 1311 assert(!disableRunCoding); 1312 disableRunCoding = true; // temporary, while I decide spans 1313 int[] allValues = values.clone(); 1314 CodingMethod result = null; 1315 int scan = this.end; 1316 int lstart = this.start; 1317 for (int split; scan > lstart; scan = split) { 1318 int thisspan; 1319 int rand = (scan - lstart < 100)? -1: stress.nextInt(); 1320 if ((rand & 7) != 0) { 1321 thisspan = (spanlen==1? spanlen: stressLen(spanlen-1)+1); 1322 } else { 1323 // Every so often generate a value based on KX/KB format. 1324 int KX = (rand >>>= 3) & AdaptiveCoding.KX_MAX; 1325 int KB = (rand >>>= 3) & AdaptiveCoding.KB_MAX; 1326 for (;;) { 1327 thisspan = AdaptiveCoding.decodeK(KX, KB); 1328 if (thisspan <= scan - lstart) break; 1329 // Try smaller and smaller codings: 1330 if (KB != AdaptiveCoding.KB_DEFAULT) 1331 KB = AdaptiveCoding.KB_DEFAULT; 1332 else 1333 KX -= 1; 1334 } 1335 //System.out.println("KX="+KX+" KB="+KB+" K="+thisspan); 1336 assert(AdaptiveCoding.isCodableLength(thisspan)); 1337 } 1338 if (thisspan > scan - lstart) thisspan = scan - lstart; 1339 while (!AdaptiveCoding.isCodableLength(thisspan)) { 1340 --thisspan; 1341 } 1342 split = scan - thisspan; 1343 assert(split < scan); 1344 assert(split >= lstart); 1345 // Choose a coding for the span [split..scan). 1346 CodingMethod sc = choose(allValues, split, scan, plainCoding); 1347 if (result == null) { 1348 result = sc; // the caboose 1349 } else { 1350 result = new AdaptiveCoding(scan-split, sc, result); 1351 } 1352 } 1353 return result; 1354 } finally { 1355 disableRunCoding = false; // return to normal value 1356 } 1357 } 1358 1359 // Return a random value in [0..len], gently biased toward extremes. 1360 private Coding stressCoding(int min, int max) { 1361 assert(stress != null); // this method is only for testing 1362 for (int i = 0; i < 100; i++) { 1363 Coding c = Coding.of(stress.nextInt(Coding.B_MAX)+1, 1364 stress.nextInt(Coding.H_MAX)+1, 1365 stress.nextInt(Coding.S_MAX+1)); 1366 if (c.B() == 1) c = c.setH(256); 1367 if (c.H() == 256 && c.B() >= 5) c = c.setB(4); 1368 if (stress.nextBoolean()) { 1369 Coding dc = c.setD(1); 1370 if (dc.canRepresent(min, max)) return dc; 1371 } 1372 if (c.canRepresent(min, max)) return c; 1373 } 1374 return BandStructure.UNSIGNED5; 1375 } 1376 1377 // Return a random value in [0..len], gently biased toward extremes. 1378 private int stressLen(int len) { 1379 assert(stress != null); // this method is only for testing 1380 assert(len >= 0); 1381 int rand = stress.nextInt(100); 1382 if (rand < 20) 1383 return Math.min(len/5, rand); 1384 else if (rand < 40) 1385 return len; 1386 else 1387 return stress.nextInt(len); 1388 } 1389 1390 // For debug only. 1391/* 1392 public static 1393 int[] readValuesFrom(InputStream instr) { 1394 return readValuesFrom(new InputStreamReader(instr)); 1395 } 1396 public static 1397 int[] readValuesFrom(Reader inrdr) { 1398 inrdr = new BufferedReader(inrdr); 1399 final StreamTokenizer in = new StreamTokenizer(inrdr); 1400 final int TT_NOTHING = -99; 1401 in.commentChar('#'); 1402 return readValuesFrom(new Iterator() { 1403 int token = TT_NOTHING; 1404 private int getToken() { 1405 if (token == TT_NOTHING) { 1406 try { 1407 token = in.nextToken(); 1408 assert(token != TT_NOTHING); 1409 } catch (IOException ee) { 1410 throw new RuntimeException(ee); 1411 } 1412 } 1413 return token; 1414 } 1415 public boolean hasNext() { 1416 return getToken() != StreamTokenizer.TT_EOF; 1417 } 1418 public Object next() { 1419 int ntok = getToken(); 1420 token = TT_NOTHING; 1421 switch (ntok) { 1422 case StreamTokenizer.TT_EOF: 1423 throw new NoSuchElementException(); 1424 case StreamTokenizer.TT_NUMBER: 1425 return Integer.valueOf((int) in.nval); 1426 default: 1427 assert(false); 1428 return null; 1429 } 1430 } 1431 public void remove() { 1432 throw new UnsupportedOperationException(); 1433 } 1434 }); 1435 } 1436 public static 1437 int[] readValuesFrom(Iterator iter) { 1438 return readValuesFrom(iter, 0); 1439 } 1440 public static 1441 int[] readValuesFrom(Iterator iter, int initSize) { 1442 int[] na = new int[Math.max(10, initSize)]; 1443 int np = 0; 1444 while (iter.hasNext()) { 1445 Integer val = (Integer) iter.next(); 1446 if (np == na.length) { 1447 na = BandStructure.realloc(na); 1448 } 1449 na[np++] = val.intValue(); 1450 } 1451 if (np != na.length) { 1452 na = BandStructure.realloc(na, np); 1453 } 1454 return na; 1455 } 1456 1457 public static 1458 void main(String[] av) throws IOException { 1459 int effort = MID_EFFORT; 1460 int ap = 0; 1461 if (ap < av.length && av[ap].equals("-e")) { 1462 ap++; 1463 effort = Integer.parseInt(av[ap++]); 1464 } 1465 int verbose = 1; 1466 if (ap < av.length && av[ap].equals("-v")) { 1467 ap++; 1468 verbose = Integer.parseInt(av[ap++]); 1469 } 1470 Coding[] bcs = BandStructure.getBasicCodings(); 1471 CodingChooser cc = new CodingChooser(effort, bcs); 1472 if (ap < av.length && av[ap].equals("-p")) { 1473 ap++; 1474 cc.optUsePopulationCoding = false; 1475 } 1476 if (ap < av.length && av[ap].equals("-a")) { 1477 ap++; 1478 cc.optUseAdaptiveCoding = false; 1479 } 1480 cc.verbose = verbose; 1481 int[] values = readValuesFrom(System.in); 1482 int[] sizes = {0,0}; 1483 CodingMethod cm = cc.choose(values, BandStructure.UNSIGNED5, sizes); 1484 System.out.println("size: "+sizes[BYTE_SIZE]+"/zs="+sizes[ZIP_SIZE]); 1485 System.out.println(cm); 1486 } 1487//*/ 1488 1489} 1490