1/* 2 * reserved comment block 3 * DO NOT REMOVE OR ALTER! 4 */ 5/* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22package com.sun.org.apache.xml.internal.utils; 23 24/** 25 * Bare-bones, unsafe, fast string buffer. No thread-safety, no 26 * parameter range checking, exposed fields. Note that in typical 27 * applications, thread-safety of a StringBuffer is a somewhat 28 * dubious concept in any case. 29 * <p> 30 * Note that Stree and DTM used a single FastStringBuffer as a string pool, 31 * by recording start and length indices within this single buffer. This 32 * minimizes heap overhead, but of course requires more work when retrieving 33 * the data. 34 * <p> 35 * FastStringBuffer operates as a "chunked buffer". Doing so 36 * reduces the need to recopy existing information when an append 37 * exceeds the space available; we just allocate another chunk and 38 * flow across to it. (The array of chunks may need to grow, 39 * admittedly, but that's a much smaller object.) Some excess 40 * recopying may arise when we extract Strings which cross chunk 41 * boundaries; larger chunks make that less frequent. 42 * <p> 43 * The size values are parameterized, to allow tuning this code. In 44 * theory, Result Tree Fragments might want to be tuned differently 45 * from the main document's text. 46 * <p> 47 * %REVIEW% An experiment in self-tuning is 48 * included in the code (using nested FastStringBuffers to achieve 49 * variation in chunk sizes), but this implementation has proven to 50 * be problematic when data may be being copied from the FSB into itself. 51 * We should either re-architect that to make this safe (if possible) 52 * or remove that code and clean up for performance/maintainability reasons. 53 * <p> 54 */ 55public class FastStringBuffer 56{ 57 // If nonzero, forces the inial chunk size. 58 /**/static final int DEBUG_FORCE_INIT_BITS=0; 59 60 // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied 61 // back into the same FSB (variable set from previous variable, for example) 62 // and blocksize changes in mid-copy... there's risk of severe malfunction in 63 // the read process, due to how the resizing code re-jiggers storage. Arggh. 64 // If we want to retain the variable-size-block feature, we need to reconsider 65 // that issue. For now, I have forced us into fixed-size mode. 66 static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true; 67 68 /** Manifest constant: Suppress leading whitespace. 69 * This should be used when normalize-to-SAX is called for the first chunk of a 70 * multi-chunk output, or one following unsuppressed whitespace in a previous 71 * chunk. 72 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 73 */ 74 public static final int SUPPRESS_LEADING_WS=0x01; 75 76 /** Manifest constant: Suppress trailing whitespace. 77 * This should be used when normalize-to-SAX is called for the last chunk of a 78 * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS. 79 */ 80 public static final int SUPPRESS_TRAILING_WS=0x02; 81 82 /** Manifest constant: Suppress both leading and trailing whitespace. 83 * This should be used when normalize-to-SAX is called for a complete string. 84 * (I'm not wild about the name of this one. Ideas welcome.) 85 * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int) 86 */ 87 public static final int SUPPRESS_BOTH 88 = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS; 89 90 /** Manifest constant: Carry trailing whitespace of one chunk as leading 91 * whitespace of the next chunk. Used internally; I don't see any reason 92 * to make it public right now. 93 */ 94 private static final int CARRY_WS=0x04; 95 96 /** 97 * Field m_chunkBits sets our chunking strategy, by saying how many 98 * bits of index can be used within a single chunk before flowing over 99 * to the next chunk. For example, if m_chunkbits is set to 15, each 100 * chunk can contain up to 2^15 (32K) characters 101 */ 102 int m_chunkBits = 15; 103 104 /** 105 * Field m_maxChunkBits affects our chunk-growth strategy, by saying what 106 * the largest permissible chunk size is in this particular FastStringBuffer 107 * hierarchy. 108 */ 109 int m_maxChunkBits = 15; 110 111 /** 112 * Field m_rechunkBits affects our chunk-growth strategy, by saying how 113 * many chunks should be allocated at one size before we encapsulate them 114 * into the first chunk of the next size up. For example, if m_rechunkBits 115 * is set to 3, then after 8 chunks at a given size we will rebundle 116 * them as the first element of a FastStringBuffer using a chunk size 117 * 8 times larger (chunkBits shifted left three bits). 118 */ 119 int m_rebundleBits = 2; 120 121 /** 122 * Field m_chunkSize establishes the maximum size of one chunk of the array 123 * as 2**chunkbits characters. 124 * (Which may also be the minimum size if we aren't tuning for storage) 125 */ 126 int m_chunkSize; // =1<<(m_chunkBits-1); 127 128 /** 129 * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits 130 * worth of low-order '1' bits, useful for shift-and-mask addressing 131 * within the chunks. 132 */ 133 int m_chunkMask; // =m_chunkSize-1; 134 135 /** 136 * Field m_array holds the string buffer's text contents, using an 137 * array-of-arrays. Note that this array, and the arrays it contains, may be 138 * reallocated when necessary in order to allow the buffer to grow; 139 * references to them should be considered to be invalidated after any 140 * append. However, the only time these arrays are directly exposed 141 * is in the sendSAXcharacters call. 142 */ 143 char[][] m_array; 144 145 /** 146 * Field m_lastChunk is an index into m_array[], pointing to the last 147 * chunk of the Chunked Array currently in use. Note that additional 148 * chunks may actually be allocated, eg if the FastStringBuffer had 149 * previously been truncated or if someone issued an ensureSpace request. 150 * <p> 151 * The insertion point for append operations is addressed by the combination 152 * of m_lastChunk and m_firstFree. 153 */ 154 int m_lastChunk = 0; 155 156 /** 157 * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to 158 * the first character in the Chunked Array which is not part of the 159 * FastStringBuffer's current content. Since m_array[][] is zero-based, 160 * the length of that content can be calculated as 161 * (m_lastChunk<<m_chunkBits) + m_firstFree 162 */ 163 int m_firstFree = 0; 164 165 /** 166 * Field m_innerFSB, when non-null, is a FastStringBuffer whose total 167 * length equals m_chunkSize, and which replaces m_array[0]. This allows 168 * building a hierarchy of FastStringBuffers, where early appends use 169 * a smaller chunkSize (for less wasted memory overhead) but later 170 * ones use a larger chunkSize (for less heap activity overhead). 171 */ 172 FastStringBuffer m_innerFSB = null; 173 174 /** 175 * Construct a FastStringBuffer, with allocation policy as per parameters. 176 * <p> 177 * For coding convenience, I've expressed both allocation sizes in terms of 178 * a number of bits. That's needed for the final size of a chunk, 179 * to permit fast and efficient shift-and-mask addressing. It's less critical 180 * for the inital size, and may be reconsidered. 181 * <p> 182 * An alternative would be to accept integer sizes and round to powers of two; 183 * that really doesn't seem to buy us much, if anything. 184 * 185 * @param initChunkBits Length in characters of the initial allocation 186 * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024 187 * characters.) Later chunks will use larger allocation units, to trade off 188 * allocation speed of large document against storage efficiency of small 189 * ones. 190 * @param maxChunkBits Number of character-offset bits that should be used for 191 * addressing within a chunk. Maximum length of a chunk is 2^chunkBits 192 * characters. 193 * @param rebundleBits Number of character-offset bits that addressing should 194 * advance before we attempt to take a step from initChunkBits to maxChunkBits 195 */ 196 public FastStringBuffer(int initChunkBits, int maxChunkBits, 197 int rebundleBits) 198 { 199 if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS; 200 201 // %REVIEW% 202 // Should this force to larger value, or smaller? Smaller less efficient, but if 203 // someone requested variable mode it's because they care about storage space. 204 // On the other hand, given the other changes I'm making, odds are that we should 205 // adopt the larger size. Dither, dither, dither... This is just stopgap workaround 206 // anyway; we need a permanant solution. 207 // 208 if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits; 209 //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits; 210 211 m_array = new char[16][]; 212 213 // Don't bite off more than we're prepared to swallow! 214 if (initChunkBits > maxChunkBits) 215 initChunkBits = maxChunkBits; 216 217 m_chunkBits = initChunkBits; 218 m_maxChunkBits = maxChunkBits; 219 m_rebundleBits = rebundleBits; 220 m_chunkSize = 1 << (initChunkBits); 221 m_chunkMask = m_chunkSize - 1; 222 m_array[0] = new char[m_chunkSize]; 223 } 224 225 /** 226 * Construct a FastStringBuffer, using a default rebundleBits value. 227 * 228 * NEEDSDOC @param initChunkBits 229 * NEEDSDOC @param maxChunkBits 230 */ 231 public FastStringBuffer(int initChunkBits, int maxChunkBits) 232 { 233 this(initChunkBits, maxChunkBits, 2); 234 } 235 236 /** 237 * Construct a FastStringBuffer, using default maxChunkBits and 238 * rebundleBits values. 239 * <p> 240 * ISSUE: Should this call assert initial size, or fixed size? 241 * Now configured as initial, with a default for fixed. 242 * 243 * NEEDSDOC @param initChunkBits 244 */ 245 public FastStringBuffer(int initChunkBits) 246 { 247 this(initChunkBits, 15, 2); 248 } 249 250 /** 251 * Construct a FastStringBuffer, using a default allocation policy. 252 */ 253 public FastStringBuffer() 254 { 255 256 // 10 bits is 1K. 15 bits is 32K. Remember that these are character 257 // counts, so actual memory allocation unit is doubled for UTF-16 chars. 258 // 259 // For reference: In the original FastStringBuffer, we simply 260 // overallocated by blocksize (default 1KB) on each buffer-growth. 261 this(10, 15, 2); 262 } 263 264 /** 265 * Get the length of the list. Synonym for length(). 266 * 267 * @return the number of characters in the FastStringBuffer's content. 268 */ 269 public final int size() 270 { 271 return (m_lastChunk << m_chunkBits) + m_firstFree; 272 } 273 274 /** 275 * Get the length of the list. Synonym for size(). 276 * 277 * @return the number of characters in the FastStringBuffer's content. 278 */ 279 public final int length() 280 { 281 return (m_lastChunk << m_chunkBits) + m_firstFree; 282 } 283 284 /** 285 * Discard the content of the FastStringBuffer, and most of the memory 286 * that was allocated by it, restoring the initial state. Note that this 287 * may eventually be different from setLength(0), which see. 288 */ 289 public final void reset() 290 { 291 292 m_lastChunk = 0; 293 m_firstFree = 0; 294 295 // Recover the original chunk size 296 FastStringBuffer innermost = this; 297 298 while (innermost.m_innerFSB != null) 299 { 300 innermost = innermost.m_innerFSB; 301 } 302 303 m_chunkBits = innermost.m_chunkBits; 304 m_chunkSize = innermost.m_chunkSize; 305 m_chunkMask = innermost.m_chunkMask; 306 307 // Discard the hierarchy 308 m_innerFSB = null; 309 m_array = new char[16][0]; 310 m_array[0] = new char[m_chunkSize]; 311 } 312 313 /** 314 * Directly set how much of the FastStringBuffer's storage is to be 315 * considered part of its content. This is a fast but hazardous 316 * operation. It is not protected against negative values, or values 317 * greater than the amount of storage currently available... and even 318 * if additional storage does exist, its contents are unpredictable. 319 * The only safe use for our setLength() is to truncate the FastStringBuffer 320 * to a shorter string. 321 * 322 * @param l New length. If l<0 or l>=getLength(), this operation will 323 * not report an error but future operations will almost certainly fail. 324 */ 325 public final void setLength(int l) 326 { 327 m_lastChunk = l >>> m_chunkBits; 328 329 if (m_lastChunk == 0 && m_innerFSB != null) 330 { 331 // Replace this FSB with the appropriate inner FSB, truncated 332 m_innerFSB.setLength(l, this); 333 } 334 else 335 { 336 m_firstFree = l & m_chunkMask; 337 338 // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving 339 // us pointing at the start of a chunk which has not yet been allocated. Rather than 340 // pay the cost of dealing with that in the append loops (more scattered and more 341 // inner-loop), we correct it here by moving to the safe side of that 342 // line -- as we would have left the indexes had we appended up to that point. 343 if(m_firstFree==0 && m_lastChunk>0) 344 { 345 --m_lastChunk; 346 m_firstFree=m_chunkSize; 347 } 348 } 349 } 350 351 /** 352 * Subroutine for the public setLength() method. Deals with the fact 353 * that truncation may require restoring one of the innerFSBs 354 * 355 * NEEDSDOC @param l 356 * NEEDSDOC @param rootFSB 357 */ 358 private final void setLength(int l, FastStringBuffer rootFSB) 359 { 360 361 m_lastChunk = l >>> m_chunkBits; 362 363 if (m_lastChunk == 0 && m_innerFSB != null) 364 { 365 m_innerFSB.setLength(l, rootFSB); 366 } 367 else 368 { 369 370 // Undo encapsulation -- pop the innerFSB data back up to root. 371 // Inefficient, but attempts to keep the code simple. 372 rootFSB.m_chunkBits = m_chunkBits; 373 rootFSB.m_maxChunkBits = m_maxChunkBits; 374 rootFSB.m_rebundleBits = m_rebundleBits; 375 rootFSB.m_chunkSize = m_chunkSize; 376 rootFSB.m_chunkMask = m_chunkMask; 377 rootFSB.m_array = m_array; 378 rootFSB.m_innerFSB = m_innerFSB; 379 rootFSB.m_lastChunk = m_lastChunk; 380 381 // Finally, truncate this sucker. 382 rootFSB.m_firstFree = l & m_chunkMask; 383 } 384 } 385 386 /** 387 * Note that this operation has been somewhat deoptimized by the shift to a 388 * chunked array, as there is no factory method to produce a String object 389 * directly from an array of arrays and hence a double copy is needed. 390 * By using ensureCapacity we hope to minimize the heap overhead of building 391 * the intermediate StringBuffer. 392 * <p> 393 * (It really is a pity that Java didn't design String as a final subclass 394 * of MutableString, rather than having StringBuffer be a separate hierarchy. 395 * We'd avoid a <strong>lot</strong> of double-buffering.) 396 * 397 * @return the contents of the FastStringBuffer as a standard Java string. 398 */ 399 public final String toString() 400 { 401 402 int length = (m_lastChunk << m_chunkBits) + m_firstFree; 403 404 return getString(new StringBuffer(length), 0, 0, length).toString(); 405 } 406 407 /** 408 * Append a single character onto the FastStringBuffer, growing the 409 * storage if necessary. 410 * <p> 411 * NOTE THAT after calling append(), previously obtained 412 * references to m_array[][] may no longer be valid.... 413 * though in fact they should be in this instance. 414 * 415 * @param value character to be appended. 416 */ 417 public final void append(char value) 418 { 419 420 char[] chunk; 421 422 // We may have preallocated chunks. If so, all but last should 423 // be at full size. 424 boolean lastchunk = (m_lastChunk + 1 == m_array.length); 425 426 if (m_firstFree < m_chunkSize) // Simplified test single-character-fits 427 chunk = m_array[m_lastChunk]; 428 else 429 { 430 431 // Extend array? 432 int i = m_array.length; 433 434 if (m_lastChunk + 1 == i) 435 { 436 char[][] newarray = new char[i + 16][]; 437 438 System.arraycopy(m_array, 0, newarray, 0, i); 439 440 m_array = newarray; 441 } 442 443 // Advance one chunk 444 chunk = m_array[++m_lastChunk]; 445 446 if (chunk == null) 447 { 448 449 // Hierarchical encapsulation 450 if (m_lastChunk == 1 << m_rebundleBits 451 && m_chunkBits < m_maxChunkBits) 452 { 453 454 // Should do all the work of both encapsulating 455 // existing data and establishing new sizes/offsets 456 m_innerFSB = new FastStringBuffer(this); 457 } 458 459 // Add a chunk. 460 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 461 } 462 463 m_firstFree = 0; 464 } 465 466 // Space exists in the chunk. Append the character. 467 chunk[m_firstFree++] = value; 468 } 469 470 /** 471 * Append the contents of a String onto the FastStringBuffer, 472 * growing the storage if necessary. 473 * <p> 474 * NOTE THAT after calling append(), previously obtained 475 * references to m_array[] may no longer be valid. 476 * 477 * @param value String whose contents are to be appended. 478 */ 479 public final void append(String value) 480 { 481 482 if (value == null) 483 return; 484 int strlen = value.length(); 485 486 if (0 == strlen) 487 return; 488 489 int copyfrom = 0; 490 char[] chunk = m_array[m_lastChunk]; 491 int available = m_chunkSize - m_firstFree; 492 493 // Repeat while data remains to be copied 494 while (strlen > 0) 495 { 496 497 // Copy what fits 498 if (available > strlen) 499 available = strlen; 500 501 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 502 m_firstFree); 503 504 strlen -= available; 505 copyfrom += available; 506 507 // If there's more left, allocate another chunk and continue 508 if (strlen > 0) 509 { 510 511 // Extend array? 512 int i = m_array.length; 513 514 if (m_lastChunk + 1 == i) 515 { 516 char[][] newarray = new char[i + 16][]; 517 518 System.arraycopy(m_array, 0, newarray, 0, i); 519 520 m_array = newarray; 521 } 522 523 // Advance one chunk 524 chunk = m_array[++m_lastChunk]; 525 526 if (chunk == null) 527 { 528 529 // Hierarchical encapsulation 530 if (m_lastChunk == 1 << m_rebundleBits 531 && m_chunkBits < m_maxChunkBits) 532 { 533 534 // Should do all the work of both encapsulating 535 // existing data and establishing new sizes/offsets 536 m_innerFSB = new FastStringBuffer(this); 537 } 538 539 // Add a chunk. 540 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 541 } 542 543 available = m_chunkSize; 544 m_firstFree = 0; 545 } 546 } 547 548 // Adjust the insert point in the last chunk, when we've reached it. 549 m_firstFree += available; 550 } 551 552 /** 553 * Append the contents of a StringBuffer onto the FastStringBuffer, 554 * growing the storage if necessary. 555 * <p> 556 * NOTE THAT after calling append(), previously obtained 557 * references to m_array[] may no longer be valid. 558 * 559 * @param value StringBuffer whose contents are to be appended. 560 */ 561 public final void append(StringBuffer value) 562 { 563 564 if (value == null) 565 return; 566 int strlen = value.length(); 567 568 if (0 == strlen) 569 return; 570 571 int copyfrom = 0; 572 char[] chunk = m_array[m_lastChunk]; 573 int available = m_chunkSize - m_firstFree; 574 575 // Repeat while data remains to be copied 576 while (strlen > 0) 577 { 578 579 // Copy what fits 580 if (available > strlen) 581 available = strlen; 582 583 value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk], 584 m_firstFree); 585 586 strlen -= available; 587 copyfrom += available; 588 589 // If there's more left, allocate another chunk and continue 590 if (strlen > 0) 591 { 592 593 // Extend array? 594 int i = m_array.length; 595 596 if (m_lastChunk + 1 == i) 597 { 598 char[][] newarray = new char[i + 16][]; 599 600 System.arraycopy(m_array, 0, newarray, 0, i); 601 602 m_array = newarray; 603 } 604 605 // Advance one chunk 606 chunk = m_array[++m_lastChunk]; 607 608 if (chunk == null) 609 { 610 611 // Hierarchical encapsulation 612 if (m_lastChunk == 1 << m_rebundleBits 613 && m_chunkBits < m_maxChunkBits) 614 { 615 616 // Should do all the work of both encapsulating 617 // existing data and establishing new sizes/offsets 618 m_innerFSB = new FastStringBuffer(this); 619 } 620 621 // Add a chunk. 622 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 623 } 624 625 available = m_chunkSize; 626 m_firstFree = 0; 627 } 628 } 629 630 // Adjust the insert point in the last chunk, when we've reached it. 631 m_firstFree += available; 632 } 633 634 /** 635 * Append part of the contents of a Character Array onto the 636 * FastStringBuffer, growing the storage if necessary. 637 * <p> 638 * NOTE THAT after calling append(), previously obtained 639 * references to m_array[] may no longer be valid. 640 * 641 * @param chars character array from which data is to be copied 642 * @param start offset in chars of first character to be copied, 643 * zero-based. 644 * @param length number of characters to be copied 645 */ 646 public final void append(char[] chars, int start, int length) 647 { 648 649 int strlen = length; 650 651 if (0 == strlen) 652 return; 653 654 int copyfrom = start; 655 char[] chunk = m_array[m_lastChunk]; 656 int available = m_chunkSize - m_firstFree; 657 658 // Repeat while data remains to be copied 659 while (strlen > 0) 660 { 661 662 // Copy what fits 663 if (available > strlen) 664 available = strlen; 665 666 System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree, 667 available); 668 669 strlen -= available; 670 copyfrom += available; 671 672 // If there's more left, allocate another chunk and continue 673 if (strlen > 0) 674 { 675 676 // Extend array? 677 int i = m_array.length; 678 679 if (m_lastChunk + 1 == i) 680 { 681 char[][] newarray = new char[i + 16][]; 682 683 System.arraycopy(m_array, 0, newarray, 0, i); 684 685 m_array = newarray; 686 } 687 688 // Advance one chunk 689 chunk = m_array[++m_lastChunk]; 690 691 if (chunk == null) 692 { 693 694 // Hierarchical encapsulation 695 if (m_lastChunk == 1 << m_rebundleBits 696 && m_chunkBits < m_maxChunkBits) 697 { 698 699 // Should do all the work of both encapsulating 700 // existing data and establishing new sizes/offsets 701 m_innerFSB = new FastStringBuffer(this); 702 } 703 704 // Add a chunk. 705 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 706 } 707 708 available = m_chunkSize; 709 m_firstFree = 0; 710 } 711 } 712 713 // Adjust the insert point in the last chunk, when we've reached it. 714 m_firstFree += available; 715 } 716 717 /** 718 * Append the contents of another FastStringBuffer onto 719 * this FastStringBuffer, growing the storage if necessary. 720 * <p> 721 * NOTE THAT after calling append(), previously obtained 722 * references to m_array[] may no longer be valid. 723 * 724 * @param value FastStringBuffer whose contents are 725 * to be appended. 726 */ 727 public final void append(FastStringBuffer value) 728 { 729 730 // Complicating factor here is that the two buffers may use 731 // different chunk sizes, and even if they're the same we're 732 // probably on a different alignment due to previously appended 733 // data. We have to work through the source in bite-sized chunks. 734 if (value == null) 735 return; 736 int strlen = value.length(); 737 738 if (0 == strlen) 739 return; 740 741 int copyfrom = 0; 742 char[] chunk = m_array[m_lastChunk]; 743 int available = m_chunkSize - m_firstFree; 744 745 // Repeat while data remains to be copied 746 while (strlen > 0) 747 { 748 749 // Copy what fits 750 if (available > strlen) 751 available = strlen; 752 753 int sourcechunk = (copyfrom + value.m_chunkSize - 1) 754 >>> value.m_chunkBits; 755 int sourcecolumn = copyfrom & value.m_chunkMask; 756 int runlength = value.m_chunkSize - sourcecolumn; 757 758 if (runlength > available) 759 runlength = available; 760 761 System.arraycopy(value.m_array[sourcechunk], sourcecolumn, 762 m_array[m_lastChunk], m_firstFree, runlength); 763 764 if (runlength != available) 765 System.arraycopy(value.m_array[sourcechunk + 1], 0, 766 m_array[m_lastChunk], m_firstFree + runlength, 767 available - runlength); 768 769 strlen -= available; 770 copyfrom += available; 771 772 // If there's more left, allocate another chunk and continue 773 if (strlen > 0) 774 { 775 776 // Extend array? 777 int i = m_array.length; 778 779 if (m_lastChunk + 1 == i) 780 { 781 char[][] newarray = new char[i + 16][]; 782 783 System.arraycopy(m_array, 0, newarray, 0, i); 784 785 m_array = newarray; 786 } 787 788 // Advance one chunk 789 chunk = m_array[++m_lastChunk]; 790 791 if (chunk == null) 792 { 793 794 // Hierarchical encapsulation 795 if (m_lastChunk == 1 << m_rebundleBits 796 && m_chunkBits < m_maxChunkBits) 797 { 798 799 // Should do all the work of both encapsulating 800 // existing data and establishing new sizes/offsets 801 m_innerFSB = new FastStringBuffer(this); 802 } 803 804 // Add a chunk. 805 chunk = m_array[m_lastChunk] = new char[m_chunkSize]; 806 } 807 808 available = m_chunkSize; 809 m_firstFree = 0; 810 } 811 } 812 813 // Adjust the insert point in the last chunk, when we've reached it. 814 m_firstFree += available; 815 } 816 817 /** 818 * @return true if the specified range of characters are all whitespace, 819 * as defined by XMLCharacterRecognizer. 820 * <p> 821 * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE. 822 * 823 * @param start Offset of first character in the range. 824 * @param length Number of characters to send. 825 */ 826 public boolean isWhitespace(int start, int length) 827 { 828 829 int sourcechunk = start >>> m_chunkBits; 830 int sourcecolumn = start & m_chunkMask; 831 int available = m_chunkSize - sourcecolumn; 832 boolean chunkOK; 833 834 while (length > 0) 835 { 836 int runlength = (length <= available) ? length : available; 837 838 if (sourcechunk == 0 && m_innerFSB != null) 839 chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength); 840 else 841 chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace( 842 m_array[sourcechunk], sourcecolumn, runlength); 843 844 if (!chunkOK) 845 return false; 846 847 length -= runlength; 848 849 ++sourcechunk; 850 851 sourcecolumn = 0; 852 available = m_chunkSize; 853 } 854 855 return true; 856 } 857 858 /** 859 * @param start Offset of first character in the range. 860 * @param length Number of characters to send. 861 * @return a new String object initialized from the specified range of 862 * characters. 863 */ 864 public String getString(int start, int length) 865 { 866 int startColumn = start & m_chunkMask; 867 int startChunk = start >>> m_chunkBits; 868 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 869 return getOneChunkString(startChunk, startColumn, length); 870 } 871 return getString(new StringBuffer(length), startChunk, startColumn, 872 length).toString(); 873 } 874 875 protected String getOneChunkString(int startChunk, int startColumn, 876 int length) { 877 return new String(m_array[startChunk], startColumn, length); 878 } 879 880 /** 881 * @param sb StringBuffer to be appended to 882 * @param start Offset of first character in the range. 883 * @param length Number of characters to send. 884 * @return sb with the requested text appended to it 885 */ 886 StringBuffer getString(StringBuffer sb, int start, int length) 887 { 888 return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length); 889 } 890 891 /** 892 * Internal support for toString() and getString(). 893 * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into 894 * and returns a StringBuffer supplied by the caller. This simplifies 895 * m_innerFSB support. 896 * <p> 897 * Note that this operation has been somewhat deoptimized by the shift to a 898 * chunked array, as there is no factory method to produce a String object 899 * directly from an array of arrays and hence a double copy is needed. 900 * By presetting length we hope to minimize the heap overhead of building 901 * the intermediate StringBuffer. 902 * <p> 903 * (It really is a pity that Java didn't design String as a final subclass 904 * of MutableString, rather than having StringBuffer be a separate hierarchy. 905 * We'd avoid a <strong>lot</strong> of double-buffering.) 906 * 907 * 908 * @param sb 909 * @param startChunk 910 * @param startColumn 911 * @param length 912 * 913 * @return the contents of the FastStringBuffer as a standard Java string. 914 */ 915 StringBuffer getString(StringBuffer sb, int startChunk, int startColumn, 916 int length) 917 { 918 919 int stop = (startChunk << m_chunkBits) + startColumn + length; 920 int stopChunk = stop >>> m_chunkBits; 921 int stopColumn = stop & m_chunkMask; 922 923 // Factored out 924 //StringBuffer sb=new StringBuffer(length); 925 for (int i = startChunk; i < stopChunk; ++i) 926 { 927 if (i == 0 && m_innerFSB != null) 928 m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn); 929 else 930 sb.append(m_array[i], startColumn, m_chunkSize - startColumn); 931 932 startColumn = 0; // after first chunk 933 } 934 935 if (stopChunk == 0 && m_innerFSB != null) 936 m_innerFSB.getString(sb, startColumn, stopColumn - startColumn); 937 else if (stopColumn > startColumn) 938 sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn); 939 940 return sb; 941 } 942 943 /** 944 * Get a single character from the string buffer. 945 * 946 * 947 * @param pos character position requested. 948 * @return A character from the requested position. 949 */ 950 public char charAt(int pos) 951 { 952 int startChunk = pos >>> m_chunkBits; 953 954 if (startChunk == 0 && m_innerFSB != null) 955 return m_innerFSB.charAt(pos & m_chunkMask); 956 else 957 return m_array[startChunk][pos & m_chunkMask]; 958 } 959 960 /** 961 * Sends the specified range of characters as one or more SAX characters() 962 * events. 963 * Note that the buffer reference passed to the ContentHandler may be 964 * invalidated if the FastStringBuffer is edited; it's the user's 965 * responsibility to manage access to the FastStringBuffer to prevent this 966 * problem from arising. 967 * <p> 968 * Note too that there is no promise that the output will be sent as a 969 * single call. As is always true in SAX, one logical string may be split 970 * across multiple blocks of memory and hence delivered as several 971 * successive events. 972 * 973 * @param ch SAX ContentHandler object to receive the event. 974 * @param start Offset of first character in the range. 975 * @param length Number of characters to send. 976 * @exception org.xml.sax.SAXException may be thrown by handler's 977 * characters() method. 978 */ 979 public void sendSAXcharacters( 980 org.xml.sax.ContentHandler ch, int start, int length) 981 throws org.xml.sax.SAXException 982 { 983 984 int startChunk = start >>> m_chunkBits; 985 int startColumn = start & m_chunkMask; 986 if (startColumn + length < m_chunkMask && m_innerFSB == null) { 987 ch.characters(m_array[startChunk], startColumn, length); 988 return; 989 } 990 991 int stop = start + length; 992 int stopChunk = stop >>> m_chunkBits; 993 int stopColumn = stop & m_chunkMask; 994 995 for (int i = startChunk; i < stopChunk; ++i) 996 { 997 if (i == 0 && m_innerFSB != null) 998 m_innerFSB.sendSAXcharacters(ch, startColumn, 999 m_chunkSize - startColumn); 1000 else 1001 ch.characters(m_array[i], startColumn, m_chunkSize - startColumn); 1002 1003 startColumn = 0; // after first chunk 1004 } 1005 1006 // Last, or only, chunk 1007 if (stopChunk == 0 && m_innerFSB != null) 1008 m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn); 1009 else if (stopColumn > startColumn) 1010 { 1011 ch.characters(m_array[stopChunk], startColumn, 1012 stopColumn - startColumn); 1013 } 1014 } 1015 1016 /** 1017 * Sends the specified range of characters as one or more SAX characters() 1018 * events, normalizing the characters according to XSLT rules. 1019 * 1020 * @param ch SAX ContentHandler object to receive the event. 1021 * @param start Offset of first character in the range. 1022 * @param length Number of characters to send. 1023 * @return normalization status to apply to next chunk (because we may 1024 * have been called recursively to process an inner FSB): 1025 * <dl> 1026 * <dt>0</dt> 1027 * <dd>if this output did not end in retained whitespace, and thus whitespace 1028 * at the start of the following chunk (if any) should be converted to a 1029 * single space. 1030 * <dt>SUPPRESS_LEADING_WS</dt> 1031 * <dd>if this output ended in retained whitespace, and thus whitespace 1032 * at the start of the following chunk (if any) should be completely 1033 * suppressed.</dd> 1034 * </dd> 1035 * </dl> 1036 * @exception org.xml.sax.SAXException may be thrown by handler's 1037 * characters() method. 1038 */ 1039 public int sendNormalizedSAXcharacters( 1040 org.xml.sax.ContentHandler ch, int start, int length) 1041 throws org.xml.sax.SAXException 1042 { 1043 // This call always starts at the beginning of the 1044 // string being written out, either because it was called directly or 1045 // because it was an m_innerFSB recursion. This is important since 1046 // it gives us a well-known initial state for this flag: 1047 int stateForNextChunk=SUPPRESS_LEADING_WS; 1048 1049 int stop = start + length; 1050 int startChunk = start >>> m_chunkBits; 1051 int startColumn = start & m_chunkMask; 1052 int stopChunk = stop >>> m_chunkBits; 1053 int stopColumn = stop & m_chunkMask; 1054 1055 for (int i = startChunk; i < stopChunk; ++i) 1056 { 1057 if (i == 0 && m_innerFSB != null) 1058 stateForNextChunk= 1059 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, 1060 m_chunkSize - startColumn); 1061 else 1062 stateForNextChunk= 1063 sendNormalizedSAXcharacters(m_array[i], startColumn, 1064 m_chunkSize - startColumn, 1065 ch,stateForNextChunk); 1066 1067 startColumn = 0; // after first chunk 1068 } 1069 1070 // Last, or only, chunk 1071 if (stopChunk == 0 && m_innerFSB != null) 1072 stateForNextChunk= // %REVIEW% Is this update really needed? 1073 m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn); 1074 else if (stopColumn > startColumn) 1075 { 1076 stateForNextChunk= // %REVIEW% Is this update really needed? 1077 sendNormalizedSAXcharacters(m_array[stopChunk], 1078 startColumn, stopColumn - startColumn, 1079 ch, stateForNextChunk | SUPPRESS_TRAILING_WS); 1080 } 1081 return stateForNextChunk; 1082 } 1083 1084 static final char[] SINGLE_SPACE = {' '}; 1085 1086 /** 1087 * Internal method to directly normalize and dispatch the character array. 1088 * This version is aware of the fact that it may be called several times 1089 * in succession if the data is made up of multiple "chunks", and thus 1090 * must actively manage the handling of leading and trailing whitespace. 1091 * 1092 * Note: The recursion is due to the possible recursion of inner FSBs. 1093 * 1094 * @param ch The characters from the XML document. 1095 * @param start The start position in the array. 1096 * @param length The number of characters to read from the array. 1097 * @param handler SAX ContentHandler object to receive the event. 1098 * @param edgeTreatmentFlags How leading/trailing spaces should be handled. 1099 * This is a bitfield contining two flags, bitwise-ORed together: 1100 * <dl> 1101 * <dt>SUPPRESS_LEADING_WS</dt> 1102 * <dd>When false, causes leading whitespace to be converted to a single 1103 * space; when true, causes it to be discarded entirely. 1104 * Should be set TRUE for the first chunk, and (in multi-chunk output) 1105 * whenever the previous chunk ended in retained whitespace.</dd> 1106 * <dt>SUPPRESS_TRAILING_WS</dt> 1107 * <dd>When false, causes trailing whitespace to be converted to a single 1108 * space; when true, causes it to be discarded entirely. 1109 * Should be set TRUE for the last or only chunk. 1110 * </dd> 1111 * </dl> 1112 * @return normalization status, as in the edgeTreatmentFlags parameter: 1113 * <dl> 1114 * <dt>0</dt> 1115 * <dd>if this output did not end in retained whitespace, and thus whitespace 1116 * at the start of the following chunk (if any) should be converted to a 1117 * single space. 1118 * <dt>SUPPRESS_LEADING_WS</dt> 1119 * <dd>if this output ended in retained whitespace, and thus whitespace 1120 * at the start of the following chunk (if any) should be completely 1121 * suppressed.</dd> 1122 * </dd> 1123 * </dl> 1124 * 1125 * 1126 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1127 * wrapping another exception. 1128 */ 1129 static int sendNormalizedSAXcharacters(char ch[], 1130 int start, int length, 1131 org.xml.sax.ContentHandler handler, 1132 int edgeTreatmentFlags) 1133 throws org.xml.sax.SAXException 1134 { 1135 boolean processingLeadingWhitespace = 1136 ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0); 1137 boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0); 1138 boolean suppressTrailingWhitespace = 1139 ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0); 1140 int currPos = start; 1141 int limit = start+length; 1142 1143 // Strip any leading spaces first, if required 1144 if (processingLeadingWhitespace) { 1145 for (; currPos < limit 1146 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1147 currPos++) { } 1148 1149 // If we've only encountered leading spaces, the 1150 // current state remains unchanged 1151 if (currPos == limit) { 1152 return edgeTreatmentFlags; 1153 } 1154 } 1155 1156 // If we get here, there are no more leading spaces to strip 1157 while (currPos < limit) { 1158 int startNonWhitespace = currPos; 1159 1160 // Grab a chunk of non-whitespace characters 1161 for (; currPos < limit 1162 && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1163 currPos++) { } 1164 1165 // Non-whitespace seen - emit them, along with a single 1166 // space for any preceding whitespace characters 1167 if (startNonWhitespace != currPos) { 1168 if (seenWhitespace) { 1169 handler.characters(SINGLE_SPACE, 0, 1); 1170 seenWhitespace = false; 1171 } 1172 handler.characters(ch, startNonWhitespace, 1173 currPos - startNonWhitespace); 1174 } 1175 1176 int startWhitespace = currPos; 1177 1178 // Consume any whitespace characters 1179 for (; currPos < limit 1180 && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]); 1181 currPos++) { } 1182 1183 if (startWhitespace != currPos) { 1184 seenWhitespace = true; 1185 } 1186 } 1187 1188 return (seenWhitespace ? CARRY_WS : 0) 1189 | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS); 1190 } 1191 1192 /** 1193 * Directly normalize and dispatch the character array. 1194 * 1195 * @param ch The characters from the XML document. 1196 * @param start The start position in the array. 1197 * @param length The number of characters to read from the array. 1198 * @param handler SAX ContentHandler object to receive the event. 1199 * @exception org.xml.sax.SAXException Any SAX exception, possibly 1200 * wrapping another exception. 1201 */ 1202 public static void sendNormalizedSAXcharacters(char ch[], 1203 int start, int length, 1204 org.xml.sax.ContentHandler handler) 1205 throws org.xml.sax.SAXException 1206 { 1207 sendNormalizedSAXcharacters(ch, start, length, 1208 handler, SUPPRESS_BOTH); 1209 } 1210 1211 /** 1212 * Sends the specified range of characters as sax Comment. 1213 * <p> 1214 * Note that, unlike sendSAXcharacters, this has to be done as a single 1215 * call to LexicalHandler#comment. 1216 * 1217 * @param ch SAX LexicalHandler object to receive the event. 1218 * @param start Offset of first character in the range. 1219 * @param length Number of characters to send. 1220 * @exception org.xml.sax.SAXException may be thrown by handler's 1221 * characters() method. 1222 */ 1223 public void sendSAXComment( 1224 org.xml.sax.ext.LexicalHandler ch, int start, int length) 1225 throws org.xml.sax.SAXException 1226 { 1227 1228 // %OPT% Do it this way for now... 1229 String comment = getString(start, length); 1230 ch.comment(comment.toCharArray(), 0, length); 1231 } 1232 1233 /** 1234 * Copies characters from this string into the destination character 1235 * array. 1236 * 1237 * @param srcBegin index of the first character in the string 1238 * to copy. 1239 * @param srcEnd index after the last character in the string 1240 * to copy. 1241 * @param dst the destination array. 1242 * @param dstBegin the start offset in the destination array. 1243 * @exception IndexOutOfBoundsException If any of the following 1244 * is true: 1245 * <ul><li><code>srcBegin</code> is negative. 1246 * <li><code>srcBegin</code> is greater than <code>srcEnd</code> 1247 * <li><code>srcEnd</code> is greater than the length of this 1248 * string 1249 * <li><code>dstBegin</code> is negative 1250 * <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than 1251 * <code>dst.length</code></ul> 1252 * @exception NullPointerException if <code>dst</code> is <code>null</code> 1253 */ 1254 private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin) 1255 { 1256 // %TBD% Joe needs to write this function. Make public when implemented. 1257 } 1258 1259 /** 1260 * Encapsulation c'tor. After this is called, the source FastStringBuffer 1261 * will be reset to use the new object as its m_innerFSB, and will have 1262 * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED 1263 * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits) 1264 * 1265 * NEEDSDOC @param source 1266 */ 1267 private FastStringBuffer(FastStringBuffer source) 1268 { 1269 1270 // Copy existing information into new encapsulation 1271 m_chunkBits = source.m_chunkBits; 1272 m_maxChunkBits = source.m_maxChunkBits; 1273 m_rebundleBits = source.m_rebundleBits; 1274 m_chunkSize = source.m_chunkSize; 1275 m_chunkMask = source.m_chunkMask; 1276 m_array = source.m_array; 1277 m_innerFSB = source.m_innerFSB; 1278 1279 // These have to be adjusted because we're calling just at the time 1280 // when we would be about to allocate another chunk 1281 m_lastChunk = source.m_lastChunk - 1; 1282 m_firstFree = source.m_chunkSize; 1283 1284 // Establish capsule as the Inner FSB, reset chunk sizes/addressing 1285 source.m_array = new char[16][]; 1286 source.m_innerFSB = this; 1287 1288 // Since we encapsulated just as we were about to append another 1289 // chunk, return ready to create the chunk after the innerFSB 1290 // -- 1, not 0. 1291 source.m_lastChunk = 1; 1292 source.m_firstFree = 0; 1293 source.m_chunkBits += m_rebundleBits; 1294 source.m_chunkSize = 1 << (source.m_chunkBits); 1295 source.m_chunkMask = source.m_chunkSize - 1; 1296 } 1297} 1298