1/*
2 * reserved comment block
3 * DO NOT REMOVE OR ALTER!
4 */
5/*
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements.  See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License.  You may obtain a copy of the License at
12 *
13 *      http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22package com.sun.org.apache.xml.internal.utils;
23
24/**
25 * Bare-bones, unsafe, fast string buffer. No thread-safety, no
26 * parameter range checking, exposed fields. Note that in typical
27 * applications, thread-safety of a StringBuffer is a somewhat
28 * dubious concept in any case.
29 * <p>
30 * Note that Stree and DTM used a single FastStringBuffer as a string pool,
31 * by recording start and length indices within this single buffer. This
32 * minimizes heap overhead, but of course requires more work when retrieving
33 * the data.
34 * <p>
35 * FastStringBuffer operates as a "chunked buffer". Doing so
36 * reduces the need to recopy existing information when an append
37 * exceeds the space available; we just allocate another chunk and
38 * flow across to it. (The array of chunks may need to grow,
39 * admittedly, but that's a much smaller object.) Some excess
40 * recopying may arise when we extract Strings which cross chunk
41 * boundaries; larger chunks make that less frequent.
42 * <p>
43 * The size values are parameterized, to allow tuning this code. In
44 * theory, Result Tree Fragments might want to be tuned differently
45 * from the main document's text.
46 * <p>
47 * %REVIEW% An experiment in self-tuning is
48 * included in the code (using nested FastStringBuffers to achieve
49 * variation in chunk sizes), but this implementation has proven to
50 * be problematic when data may be being copied from the FSB into itself.
51 * We should either re-architect that to make this safe (if possible)
52 * or remove that code and clean up for performance/maintainability reasons.
53 * <p>
54 */
55public class FastStringBuffer
56{
57  // If nonzero, forces the inial chunk size.
58  /**/static final int DEBUG_FORCE_INIT_BITS=0;
59
60        // %BUG% %REVIEW% *****PROBLEM SUSPECTED: If data from an FSB is being copied
61        // back into the same FSB (variable set from previous variable, for example)
62        // and blocksize changes in mid-copy... there's risk of severe malfunction in
63        // the read process, due to how the resizing code re-jiggers storage. Arggh.
64        // If we want to retain the variable-size-block feature, we need to reconsider
65        // that issue. For now, I have forced us into fixed-size mode.
66    static final boolean DEBUG_FORCE_FIXED_CHUNKSIZE=true;
67
68        /** Manifest constant: Suppress leading whitespace.
69         * This should be used when normalize-to-SAX is called for the first chunk of a
70         * multi-chunk output, or one following unsuppressed whitespace in a previous
71         * chunk.
72         * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
73         */
74        public static final int SUPPRESS_LEADING_WS=0x01;
75
76        /** Manifest constant: Suppress trailing whitespace.
77         * This should be used when normalize-to-SAX is called for the last chunk of a
78         * multi-chunk output; it may have to be or'ed with SUPPRESS_LEADING_WS.
79         */
80        public static final int SUPPRESS_TRAILING_WS=0x02;
81
82        /** Manifest constant: Suppress both leading and trailing whitespace.
83         * This should be used when normalize-to-SAX is called for a complete string.
84         * (I'm not wild about the name of this one. Ideas welcome.)
85         * @see #sendNormalizedSAXcharacters(org.xml.sax.ContentHandler,int,int)
86         */
87        public static final int SUPPRESS_BOTH
88                = SUPPRESS_LEADING_WS | SUPPRESS_TRAILING_WS;
89
90        /** Manifest constant: Carry trailing whitespace of one chunk as leading
91         * whitespace of the next chunk. Used internally; I don't see any reason
92         * to make it public right now.
93         */
94        private static final int CARRY_WS=0x04;
95
96        /**
97   * Field m_chunkBits sets our chunking strategy, by saying how many
98   * bits of index can be used within a single chunk before flowing over
99   * to the next chunk. For example, if m_chunkbits is set to 15, each
100   * chunk can contain up to 2^15 (32K) characters
101   */
102  int m_chunkBits = 15;
103
104  /**
105   * Field m_maxChunkBits affects our chunk-growth strategy, by saying what
106   * the largest permissible chunk size is in this particular FastStringBuffer
107   * hierarchy.
108   */
109  int m_maxChunkBits = 15;
110
111  /**
112   * Field m_rechunkBits affects our chunk-growth strategy, by saying how
113   * many chunks should be allocated at one size before we encapsulate them
114   * into the first chunk of the next size up. For example, if m_rechunkBits
115   * is set to 3, then after 8 chunks at a given size we will rebundle
116   * them as the first element of a FastStringBuffer using a chunk size
117   * 8 times larger (chunkBits shifted left three bits).
118   */
119  int m_rebundleBits = 2;
120
121  /**
122   * Field m_chunkSize establishes the maximum size of one chunk of the array
123   * as 2**chunkbits characters.
124   * (Which may also be the minimum size if we aren't tuning for storage)
125   */
126  int m_chunkSize;  // =1<<(m_chunkBits-1);
127
128  /**
129   * Field m_chunkMask is m_chunkSize-1 -- in other words, m_chunkBits
130   * worth of low-order '1' bits, useful for shift-and-mask addressing
131   * within the chunks.
132   */
133  int m_chunkMask;  // =m_chunkSize-1;
134
135  /**
136   * Field m_array holds the string buffer's text contents, using an
137   * array-of-arrays. Note that this array, and the arrays it contains, may be
138   * reallocated when necessary in order to allow the buffer to grow;
139   * references to them should be considered to be invalidated after any
140   * append. However, the only time these arrays are directly exposed
141   * is in the sendSAXcharacters call.
142   */
143  char[][] m_array;
144
145  /**
146   * Field m_lastChunk is an index into m_array[], pointing to the last
147   * chunk of the Chunked Array currently in use. Note that additional
148   * chunks may actually be allocated, eg if the FastStringBuffer had
149   * previously been truncated or if someone issued an ensureSpace request.
150   * <p>
151   * The insertion point for append operations is addressed by the combination
152   * of m_lastChunk and m_firstFree.
153   */
154  int m_lastChunk = 0;
155
156  /**
157   * Field m_firstFree is an index into m_array[m_lastChunk][], pointing to
158   * the first character in the Chunked Array which is not part of the
159   * FastStringBuffer's current content. Since m_array[][] is zero-based,
160   * the length of that content can be calculated as
161   * (m_lastChunk<<m_chunkBits) + m_firstFree
162   */
163  int m_firstFree = 0;
164
165  /**
166   * Field m_innerFSB, when non-null, is a FastStringBuffer whose total
167   * length equals m_chunkSize, and which replaces m_array[0]. This allows
168   * building a hierarchy of FastStringBuffers, where early appends use
169   * a smaller chunkSize (for less wasted memory overhead) but later
170   * ones use a larger chunkSize (for less heap activity overhead).
171   */
172  FastStringBuffer m_innerFSB = null;
173
174  /**
175   * Construct a FastStringBuffer, with allocation policy as per parameters.
176   * <p>
177   * For coding convenience, I've expressed both allocation sizes in terms of
178   * a number of bits. That's needed for the final size of a chunk,
179   * to permit fast and efficient shift-and-mask addressing. It's less critical
180   * for the inital size, and may be reconsidered.
181   * <p>
182   * An alternative would be to accept integer sizes and round to powers of two;
183   * that really doesn't seem to buy us much, if anything.
184   *
185   * @param initChunkBits Length in characters of the initial allocation
186   * of a chunk, expressed in log-base-2. (That is, 10 means allocate 1024
187   * characters.) Later chunks will use larger allocation units, to trade off
188   * allocation speed of large document against storage efficiency of small
189   * ones.
190   * @param maxChunkBits Number of character-offset bits that should be used for
191   * addressing within a chunk. Maximum length of a chunk is 2^chunkBits
192   * characters.
193   * @param rebundleBits Number of character-offset bits that addressing should
194   * advance before we attempt to take a step from initChunkBits to maxChunkBits
195   */
196  public FastStringBuffer(int initChunkBits, int maxChunkBits,
197                          int rebundleBits)
198  {
199    if(DEBUG_FORCE_INIT_BITS!=0) initChunkBits=DEBUG_FORCE_INIT_BITS;
200
201    // %REVIEW%
202    // Should this force to larger value, or smaller? Smaller less efficient, but if
203    // someone requested variable mode it's because they care about storage space.
204    // On the other hand, given the other changes I'm making, odds are that we should
205    // adopt the larger size. Dither, dither, dither... This is just stopgap workaround
206    // anyway; we need a permanant solution.
207    //
208    if(DEBUG_FORCE_FIXED_CHUNKSIZE) maxChunkBits=initChunkBits;
209    //if(DEBUG_FORCE_FIXED_CHUNKSIZE) initChunkBits=maxChunkBits;
210
211    m_array = new char[16][];
212
213    // Don't bite off more than we're prepared to swallow!
214    if (initChunkBits > maxChunkBits)
215      initChunkBits = maxChunkBits;
216
217    m_chunkBits = initChunkBits;
218    m_maxChunkBits = maxChunkBits;
219    m_rebundleBits = rebundleBits;
220    m_chunkSize = 1 << (initChunkBits);
221    m_chunkMask = m_chunkSize - 1;
222    m_array[0] = new char[m_chunkSize];
223  }
224
225  /**
226   * Construct a FastStringBuffer, using a default rebundleBits value.
227   *
228   * NEEDSDOC @param initChunkBits
229   * NEEDSDOC @param maxChunkBits
230   */
231  public FastStringBuffer(int initChunkBits, int maxChunkBits)
232  {
233    this(initChunkBits, maxChunkBits, 2);
234  }
235
236  /**
237   * Construct a FastStringBuffer, using default maxChunkBits and
238   * rebundleBits values.
239   * <p>
240   * ISSUE: Should this call assert initial size, or fixed size?
241   * Now configured as initial, with a default for fixed.
242   *
243   * NEEDSDOC @param initChunkBits
244   */
245  public FastStringBuffer(int initChunkBits)
246  {
247    this(initChunkBits, 15, 2);
248  }
249
250  /**
251   * Construct a FastStringBuffer, using a default allocation policy.
252   */
253  public FastStringBuffer()
254  {
255
256    // 10 bits is 1K. 15 bits is 32K. Remember that these are character
257    // counts, so actual memory allocation unit is doubled for UTF-16 chars.
258    //
259    // For reference: In the original FastStringBuffer, we simply
260    // overallocated by blocksize (default 1KB) on each buffer-growth.
261    this(10, 15, 2);
262  }
263
264  /**
265   * Get the length of the list. Synonym for length().
266   *
267   * @return the number of characters in the FastStringBuffer's content.
268   */
269  public final int size()
270  {
271    return (m_lastChunk << m_chunkBits) + m_firstFree;
272  }
273
274  /**
275   * Get the length of the list. Synonym for size().
276   *
277   * @return the number of characters in the FastStringBuffer's content.
278   */
279  public final int length()
280  {
281    return (m_lastChunk << m_chunkBits) + m_firstFree;
282  }
283
284  /**
285   * Discard the content of the FastStringBuffer, and most of the memory
286   * that was allocated by it, restoring the initial state. Note that this
287   * may eventually be different from setLength(0), which see.
288   */
289  public final void reset()
290  {
291
292    m_lastChunk = 0;
293    m_firstFree = 0;
294
295    // Recover the original chunk size
296    FastStringBuffer innermost = this;
297
298    while (innermost.m_innerFSB != null)
299    {
300      innermost = innermost.m_innerFSB;
301    }
302
303    m_chunkBits = innermost.m_chunkBits;
304    m_chunkSize = innermost.m_chunkSize;
305    m_chunkMask = innermost.m_chunkMask;
306
307    // Discard the hierarchy
308    m_innerFSB = null;
309    m_array = new char[16][0];
310    m_array[0] = new char[m_chunkSize];
311  }
312
313  /**
314   * Directly set how much of the FastStringBuffer's storage is to be
315   * considered part of its content. This is a fast but hazardous
316   * operation. It is not protected against negative values, or values
317   * greater than the amount of storage currently available... and even
318   * if additional storage does exist, its contents are unpredictable.
319   * The only safe use for our setLength() is to truncate the FastStringBuffer
320   * to a shorter string.
321   *
322   * @param l New length. If l<0 or l>=getLength(), this operation will
323   * not report an error but future operations will almost certainly fail.
324   */
325  public final void setLength(int l)
326  {
327    m_lastChunk = l >>> m_chunkBits;
328
329    if (m_lastChunk == 0 && m_innerFSB != null)
330    {
331      // Replace this FSB with the appropriate inner FSB, truncated
332      m_innerFSB.setLength(l, this);
333    }
334    else
335    {
336      m_firstFree = l & m_chunkMask;
337
338          // There's an edge case if l is an exact multiple of m_chunkBits, which risks leaving
339          // us pointing at the start of a chunk which has not yet been allocated. Rather than
340          // pay the cost of dealing with that in the append loops (more scattered and more
341          // inner-loop), we correct it here by moving to the safe side of that
342          // line -- as we would have left the indexes had we appended up to that point.
343      if(m_firstFree==0 && m_lastChunk>0)
344      {
345        --m_lastChunk;
346        m_firstFree=m_chunkSize;
347      }
348    }
349  }
350
351  /**
352   * Subroutine for the public setLength() method. Deals with the fact
353   * that truncation may require restoring one of the innerFSBs
354   *
355   * NEEDSDOC @param l
356   * NEEDSDOC @param rootFSB
357   */
358  private final void setLength(int l, FastStringBuffer rootFSB)
359  {
360
361    m_lastChunk = l >>> m_chunkBits;
362
363    if (m_lastChunk == 0 && m_innerFSB != null)
364    {
365      m_innerFSB.setLength(l, rootFSB);
366    }
367    else
368    {
369
370      // Undo encapsulation -- pop the innerFSB data back up to root.
371      // Inefficient, but attempts to keep the code simple.
372      rootFSB.m_chunkBits = m_chunkBits;
373      rootFSB.m_maxChunkBits = m_maxChunkBits;
374      rootFSB.m_rebundleBits = m_rebundleBits;
375      rootFSB.m_chunkSize = m_chunkSize;
376      rootFSB.m_chunkMask = m_chunkMask;
377      rootFSB.m_array = m_array;
378      rootFSB.m_innerFSB = m_innerFSB;
379      rootFSB.m_lastChunk = m_lastChunk;
380
381      // Finally, truncate this sucker.
382      rootFSB.m_firstFree = l & m_chunkMask;
383    }
384  }
385
386  /**
387   * Note that this operation has been somewhat deoptimized by the shift to a
388   * chunked array, as there is no factory method to produce a String object
389   * directly from an array of arrays and hence a double copy is needed.
390   * By using ensureCapacity we hope to minimize the heap overhead of building
391   * the intermediate StringBuffer.
392   * <p>
393   * (It really is a pity that Java didn't design String as a final subclass
394   * of MutableString, rather than having StringBuffer be a separate hierarchy.
395   * We'd avoid a <strong>lot</strong> of double-buffering.)
396   *
397   * @return the contents of the FastStringBuffer as a standard Java string.
398   */
399  public final String toString()
400  {
401
402    int length = (m_lastChunk << m_chunkBits) + m_firstFree;
403
404    return getString(new StringBuffer(length), 0, 0, length).toString();
405  }
406
407  /**
408   * Append a single character onto the FastStringBuffer, growing the
409   * storage if necessary.
410   * <p>
411   * NOTE THAT after calling append(), previously obtained
412   * references to m_array[][] may no longer be valid....
413   * though in fact they should be in this instance.
414   *
415   * @param value character to be appended.
416   */
417  public final void append(char value)
418  {
419
420    char[] chunk;
421
422    // We may have preallocated chunks. If so, all but last should
423    // be at full size.
424    boolean lastchunk = (m_lastChunk + 1 == m_array.length);
425
426    if (m_firstFree < m_chunkSize)  // Simplified test single-character-fits
427      chunk = m_array[m_lastChunk];
428    else
429    {
430
431      // Extend array?
432      int i = m_array.length;
433
434      if (m_lastChunk + 1 == i)
435      {
436        char[][] newarray = new char[i + 16][];
437
438        System.arraycopy(m_array, 0, newarray, 0, i);
439
440        m_array = newarray;
441      }
442
443      // Advance one chunk
444      chunk = m_array[++m_lastChunk];
445
446      if (chunk == null)
447      {
448
449        // Hierarchical encapsulation
450        if (m_lastChunk == 1 << m_rebundleBits
451                && m_chunkBits < m_maxChunkBits)
452        {
453
454          // Should do all the work of both encapsulating
455          // existing data and establishing new sizes/offsets
456          m_innerFSB = new FastStringBuffer(this);
457        }
458
459        // Add a chunk.
460        chunk = m_array[m_lastChunk] = new char[m_chunkSize];
461      }
462
463      m_firstFree = 0;
464    }
465
466    // Space exists in the chunk. Append the character.
467    chunk[m_firstFree++] = value;
468  }
469
470  /**
471   * Append the contents of a String onto the FastStringBuffer,
472   * growing the storage if necessary.
473   * <p>
474   * NOTE THAT after calling append(), previously obtained
475   * references to m_array[] may no longer be valid.
476   *
477   * @param value String whose contents are to be appended.
478   */
479  public final void append(String value)
480  {
481
482    if (value == null)
483      return;
484    int strlen = value.length();
485
486    if (0 == strlen)
487      return;
488
489    int copyfrom = 0;
490    char[] chunk = m_array[m_lastChunk];
491    int available = m_chunkSize - m_firstFree;
492
493    // Repeat while data remains to be copied
494    while (strlen > 0)
495    {
496
497      // Copy what fits
498      if (available > strlen)
499        available = strlen;
500
501      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
502                     m_firstFree);
503
504      strlen -= available;
505      copyfrom += available;
506
507      // If there's more left, allocate another chunk and continue
508      if (strlen > 0)
509      {
510
511        // Extend array?
512        int i = m_array.length;
513
514        if (m_lastChunk + 1 == i)
515        {
516          char[][] newarray = new char[i + 16][];
517
518          System.arraycopy(m_array, 0, newarray, 0, i);
519
520          m_array = newarray;
521        }
522
523        // Advance one chunk
524        chunk = m_array[++m_lastChunk];
525
526        if (chunk == null)
527        {
528
529          // Hierarchical encapsulation
530          if (m_lastChunk == 1 << m_rebundleBits
531                  && m_chunkBits < m_maxChunkBits)
532          {
533
534            // Should do all the work of both encapsulating
535            // existing data and establishing new sizes/offsets
536            m_innerFSB = new FastStringBuffer(this);
537          }
538
539          // Add a chunk.
540          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
541        }
542
543        available = m_chunkSize;
544        m_firstFree = 0;
545      }
546    }
547
548    // Adjust the insert point in the last chunk, when we've reached it.
549    m_firstFree += available;
550  }
551
552  /**
553   * Append the contents of a StringBuffer onto the FastStringBuffer,
554   * growing the storage if necessary.
555   * <p>
556   * NOTE THAT after calling append(), previously obtained
557   * references to m_array[] may no longer be valid.
558   *
559   * @param value StringBuffer whose contents are to be appended.
560   */
561  public final void append(StringBuffer value)
562  {
563
564    if (value == null)
565      return;
566    int strlen = value.length();
567
568    if (0 == strlen)
569      return;
570
571    int copyfrom = 0;
572    char[] chunk = m_array[m_lastChunk];
573    int available = m_chunkSize - m_firstFree;
574
575    // Repeat while data remains to be copied
576    while (strlen > 0)
577    {
578
579      // Copy what fits
580      if (available > strlen)
581        available = strlen;
582
583      value.getChars(copyfrom, copyfrom + available, m_array[m_lastChunk],
584                     m_firstFree);
585
586      strlen -= available;
587      copyfrom += available;
588
589      // If there's more left, allocate another chunk and continue
590      if (strlen > 0)
591      {
592
593        // Extend array?
594        int i = m_array.length;
595
596        if (m_lastChunk + 1 == i)
597        {
598          char[][] newarray = new char[i + 16][];
599
600          System.arraycopy(m_array, 0, newarray, 0, i);
601
602          m_array = newarray;
603        }
604
605        // Advance one chunk
606        chunk = m_array[++m_lastChunk];
607
608        if (chunk == null)
609        {
610
611          // Hierarchical encapsulation
612          if (m_lastChunk == 1 << m_rebundleBits
613                  && m_chunkBits < m_maxChunkBits)
614          {
615
616            // Should do all the work of both encapsulating
617            // existing data and establishing new sizes/offsets
618            m_innerFSB = new FastStringBuffer(this);
619          }
620
621          // Add a chunk.
622          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
623        }
624
625        available = m_chunkSize;
626        m_firstFree = 0;
627      }
628    }
629
630    // Adjust the insert point in the last chunk, when we've reached it.
631    m_firstFree += available;
632  }
633
634  /**
635   * Append part of the contents of a Character Array onto the
636   * FastStringBuffer,  growing the storage if necessary.
637   * <p>
638   * NOTE THAT after calling append(), previously obtained
639   * references to m_array[] may no longer be valid.
640   *
641   * @param chars character array from which data is to be copied
642   * @param start offset in chars of first character to be copied,
643   * zero-based.
644   * @param length number of characters to be copied
645   */
646  public final void append(char[] chars, int start, int length)
647  {
648
649    int strlen = length;
650
651    if (0 == strlen)
652      return;
653
654    int copyfrom = start;
655    char[] chunk = m_array[m_lastChunk];
656    int available = m_chunkSize - m_firstFree;
657
658    // Repeat while data remains to be copied
659    while (strlen > 0)
660    {
661
662      // Copy what fits
663      if (available > strlen)
664        available = strlen;
665
666      System.arraycopy(chars, copyfrom, m_array[m_lastChunk], m_firstFree,
667                       available);
668
669      strlen -= available;
670      copyfrom += available;
671
672      // If there's more left, allocate another chunk and continue
673      if (strlen > 0)
674      {
675
676        // Extend array?
677        int i = m_array.length;
678
679        if (m_lastChunk + 1 == i)
680        {
681          char[][] newarray = new char[i + 16][];
682
683          System.arraycopy(m_array, 0, newarray, 0, i);
684
685          m_array = newarray;
686        }
687
688        // Advance one chunk
689        chunk = m_array[++m_lastChunk];
690
691        if (chunk == null)
692        {
693
694          // Hierarchical encapsulation
695          if (m_lastChunk == 1 << m_rebundleBits
696                  && m_chunkBits < m_maxChunkBits)
697          {
698
699            // Should do all the work of both encapsulating
700            // existing data and establishing new sizes/offsets
701            m_innerFSB = new FastStringBuffer(this);
702          }
703
704          // Add a chunk.
705          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
706        }
707
708        available = m_chunkSize;
709        m_firstFree = 0;
710      }
711    }
712
713    // Adjust the insert point in the last chunk, when we've reached it.
714    m_firstFree += available;
715  }
716
717  /**
718   * Append the contents of another FastStringBuffer onto
719   * this FastStringBuffer, growing the storage if necessary.
720   * <p>
721   * NOTE THAT after calling append(), previously obtained
722   * references to m_array[] may no longer be valid.
723   *
724   * @param value FastStringBuffer whose contents are
725   * to be appended.
726   */
727  public final void append(FastStringBuffer value)
728  {
729
730    // Complicating factor here is that the two buffers may use
731    // different chunk sizes, and even if they're the same we're
732    // probably on a different alignment due to previously appended
733    // data. We have to work through the source in bite-sized chunks.
734    if (value == null)
735      return;
736    int strlen = value.length();
737
738    if (0 == strlen)
739      return;
740
741    int copyfrom = 0;
742    char[] chunk = m_array[m_lastChunk];
743    int available = m_chunkSize - m_firstFree;
744
745    // Repeat while data remains to be copied
746    while (strlen > 0)
747    {
748
749      // Copy what fits
750      if (available > strlen)
751        available = strlen;
752
753      int sourcechunk = (copyfrom + value.m_chunkSize - 1)
754                        >>> value.m_chunkBits;
755      int sourcecolumn = copyfrom & value.m_chunkMask;
756      int runlength = value.m_chunkSize - sourcecolumn;
757
758      if (runlength > available)
759        runlength = available;
760
761      System.arraycopy(value.m_array[sourcechunk], sourcecolumn,
762                       m_array[m_lastChunk], m_firstFree, runlength);
763
764      if (runlength != available)
765        System.arraycopy(value.m_array[sourcechunk + 1], 0,
766                         m_array[m_lastChunk], m_firstFree + runlength,
767                         available - runlength);
768
769      strlen -= available;
770      copyfrom += available;
771
772      // If there's more left, allocate another chunk and continue
773      if (strlen > 0)
774      {
775
776        // Extend array?
777        int i = m_array.length;
778
779        if (m_lastChunk + 1 == i)
780        {
781          char[][] newarray = new char[i + 16][];
782
783          System.arraycopy(m_array, 0, newarray, 0, i);
784
785          m_array = newarray;
786        }
787
788        // Advance one chunk
789        chunk = m_array[++m_lastChunk];
790
791        if (chunk == null)
792        {
793
794          // Hierarchical encapsulation
795          if (m_lastChunk == 1 << m_rebundleBits
796                  && m_chunkBits < m_maxChunkBits)
797          {
798
799            // Should do all the work of both encapsulating
800            // existing data and establishing new sizes/offsets
801            m_innerFSB = new FastStringBuffer(this);
802          }
803
804          // Add a chunk.
805          chunk = m_array[m_lastChunk] = new char[m_chunkSize];
806        }
807
808        available = m_chunkSize;
809        m_firstFree = 0;
810      }
811    }
812
813    // Adjust the insert point in the last chunk, when we've reached it.
814    m_firstFree += available;
815  }
816
817  /**
818   * @return true if the specified range of characters are all whitespace,
819   * as defined by XMLCharacterRecognizer.
820   * <p>
821   * CURRENTLY DOES NOT CHECK FOR OUT-OF-RANGE.
822   *
823   * @param start Offset of first character in the range.
824   * @param length Number of characters to send.
825   */
826  public boolean isWhitespace(int start, int length)
827  {
828
829    int sourcechunk = start >>> m_chunkBits;
830    int sourcecolumn = start & m_chunkMask;
831    int available = m_chunkSize - sourcecolumn;
832    boolean chunkOK;
833
834    while (length > 0)
835    {
836      int runlength = (length <= available) ? length : available;
837
838      if (sourcechunk == 0 && m_innerFSB != null)
839        chunkOK = m_innerFSB.isWhitespace(sourcecolumn, runlength);
840      else
841        chunkOK = com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer.isWhiteSpace(
842          m_array[sourcechunk], sourcecolumn, runlength);
843
844      if (!chunkOK)
845        return false;
846
847      length -= runlength;
848
849      ++sourcechunk;
850
851      sourcecolumn = 0;
852      available = m_chunkSize;
853    }
854
855    return true;
856  }
857
858  /**
859   * @param start Offset of first character in the range.
860   * @param length Number of characters to send.
861   * @return a new String object initialized from the specified range of
862   * characters.
863   */
864  public String getString(int start, int length)
865  {
866    int startColumn = start & m_chunkMask;
867    int startChunk = start >>> m_chunkBits;
868    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
869      return getOneChunkString(startChunk, startColumn, length);
870    }
871    return getString(new StringBuffer(length), startChunk, startColumn,
872                     length).toString();
873  }
874
875  protected String getOneChunkString(int startChunk, int startColumn,
876                                     int length) {
877    return new String(m_array[startChunk], startColumn, length);
878  }
879
880  /**
881   * @param sb StringBuffer to be appended to
882   * @param start Offset of first character in the range.
883   * @param length Number of characters to send.
884   * @return sb with the requested text appended to it
885   */
886  StringBuffer getString(StringBuffer sb, int start, int length)
887  {
888    return getString(sb, start >>> m_chunkBits, start & m_chunkMask, length);
889  }
890
891  /**
892   * Internal support for toString() and getString().
893   * PLEASE NOTE SIGNATURE CHANGE from earlier versions; it now appends into
894   * and returns a StringBuffer supplied by the caller. This simplifies
895   * m_innerFSB support.
896   * <p>
897   * Note that this operation has been somewhat deoptimized by the shift to a
898   * chunked array, as there is no factory method to produce a String object
899   * directly from an array of arrays and hence a double copy is needed.
900   * By presetting length we hope to minimize the heap overhead of building
901   * the intermediate StringBuffer.
902   * <p>
903   * (It really is a pity that Java didn't design String as a final subclass
904   * of MutableString, rather than having StringBuffer be a separate hierarchy.
905   * We'd avoid a <strong>lot</strong> of double-buffering.)
906   *
907   *
908   * @param sb
909   * @param startChunk
910   * @param startColumn
911   * @param length
912   *
913   * @return the contents of the FastStringBuffer as a standard Java string.
914   */
915  StringBuffer getString(StringBuffer sb, int startChunk, int startColumn,
916                         int length)
917  {
918
919    int stop = (startChunk << m_chunkBits) + startColumn + length;
920    int stopChunk = stop >>> m_chunkBits;
921    int stopColumn = stop & m_chunkMask;
922
923    // Factored out
924    //StringBuffer sb=new StringBuffer(length);
925    for (int i = startChunk; i < stopChunk; ++i)
926    {
927      if (i == 0 && m_innerFSB != null)
928        m_innerFSB.getString(sb, startColumn, m_chunkSize - startColumn);
929      else
930        sb.append(m_array[i], startColumn, m_chunkSize - startColumn);
931
932      startColumn = 0;  // after first chunk
933    }
934
935    if (stopChunk == 0 && m_innerFSB != null)
936      m_innerFSB.getString(sb, startColumn, stopColumn - startColumn);
937    else if (stopColumn > startColumn)
938      sb.append(m_array[stopChunk], startColumn, stopColumn - startColumn);
939
940    return sb;
941  }
942
943  /**
944   * Get a single character from the string buffer.
945   *
946   *
947   * @param pos character position requested.
948   * @return A character from the requested position.
949   */
950  public char charAt(int pos)
951  {
952    int startChunk = pos >>> m_chunkBits;
953
954    if (startChunk == 0 && m_innerFSB != null)
955      return m_innerFSB.charAt(pos & m_chunkMask);
956    else
957      return m_array[startChunk][pos & m_chunkMask];
958  }
959
960  /**
961   * Sends the specified range of characters as one or more SAX characters()
962   * events.
963   * Note that the buffer reference passed to the ContentHandler may be
964   * invalidated if the FastStringBuffer is edited; it's the user's
965   * responsibility to manage access to the FastStringBuffer to prevent this
966   * problem from arising.
967   * <p>
968   * Note too that there is no promise that the output will be sent as a
969   * single call. As is always true in SAX, one logical string may be split
970   * across multiple blocks of memory and hence delivered as several
971   * successive events.
972   *
973   * @param ch SAX ContentHandler object to receive the event.
974   * @param start Offset of first character in the range.
975   * @param length Number of characters to send.
976   * @exception org.xml.sax.SAXException may be thrown by handler's
977   * characters() method.
978   */
979  public void sendSAXcharacters(
980          org.xml.sax.ContentHandler ch, int start, int length)
981            throws org.xml.sax.SAXException
982  {
983
984    int startChunk = start >>> m_chunkBits;
985    int startColumn = start & m_chunkMask;
986    if (startColumn + length < m_chunkMask && m_innerFSB == null) {
987        ch.characters(m_array[startChunk], startColumn, length);
988        return;
989    }
990
991    int stop = start + length;
992    int stopChunk = stop >>> m_chunkBits;
993    int stopColumn = stop & m_chunkMask;
994
995    for (int i = startChunk; i < stopChunk; ++i)
996    {
997      if (i == 0 && m_innerFSB != null)
998        m_innerFSB.sendSAXcharacters(ch, startColumn,
999                                     m_chunkSize - startColumn);
1000      else
1001        ch.characters(m_array[i], startColumn, m_chunkSize - startColumn);
1002
1003      startColumn = 0;  // after first chunk
1004    }
1005
1006    // Last, or only, chunk
1007    if (stopChunk == 0 && m_innerFSB != null)
1008      m_innerFSB.sendSAXcharacters(ch, startColumn, stopColumn - startColumn);
1009    else if (stopColumn > startColumn)
1010    {
1011      ch.characters(m_array[stopChunk], startColumn,
1012                    stopColumn - startColumn);
1013    }
1014  }
1015
1016  /**
1017   * Sends the specified range of characters as one or more SAX characters()
1018   * events, normalizing the characters according to XSLT rules.
1019   *
1020   * @param ch SAX ContentHandler object to receive the event.
1021   * @param start Offset of first character in the range.
1022   * @param length Number of characters to send.
1023   * @return normalization status to apply to next chunk (because we may
1024   * have been called recursively to process an inner FSB):
1025   * <dl>
1026   * <dt>0</dt>
1027   * <dd>if this output did not end in retained whitespace, and thus whitespace
1028   * at the start of the following chunk (if any) should be converted to a
1029   * single space.
1030   * <dt>SUPPRESS_LEADING_WS</dt>
1031   * <dd>if this output ended in retained whitespace, and thus whitespace
1032   * at the start of the following chunk (if any) should be completely
1033   * suppressed.</dd>
1034   * </dd>
1035   * </dl>
1036   * @exception org.xml.sax.SAXException may be thrown by handler's
1037   * characters() method.
1038   */
1039  public int sendNormalizedSAXcharacters(
1040          org.xml.sax.ContentHandler ch, int start, int length)
1041            throws org.xml.sax.SAXException
1042  {
1043        // This call always starts at the beginning of the
1044    // string being written out, either because it was called directly or
1045    // because it was an m_innerFSB recursion. This is important since
1046        // it gives us a well-known initial state for this flag:
1047        int stateForNextChunk=SUPPRESS_LEADING_WS;
1048
1049    int stop = start + length;
1050    int startChunk = start >>> m_chunkBits;
1051    int startColumn = start & m_chunkMask;
1052    int stopChunk = stop >>> m_chunkBits;
1053    int stopColumn = stop & m_chunkMask;
1054
1055    for (int i = startChunk; i < stopChunk; ++i)
1056    {
1057      if (i == 0 && m_innerFSB != null)
1058                                stateForNextChunk=
1059        m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn,
1060                                     m_chunkSize - startColumn);
1061      else
1062                                stateForNextChunk=
1063        sendNormalizedSAXcharacters(m_array[i], startColumn,
1064                                    m_chunkSize - startColumn,
1065                                                                                                                                                ch,stateForNextChunk);
1066
1067      startColumn = 0;  // after first chunk
1068    }
1069
1070    // Last, or only, chunk
1071    if (stopChunk == 0 && m_innerFSB != null)
1072                        stateForNextChunk= // %REVIEW% Is this update really needed?
1073      m_innerFSB.sendNormalizedSAXcharacters(ch, startColumn, stopColumn - startColumn);
1074    else if (stopColumn > startColumn)
1075    {
1076                        stateForNextChunk= // %REVIEW% Is this update really needed?
1077      sendNormalizedSAXcharacters(m_array[stopChunk],
1078                                                                                                                                        startColumn, stopColumn - startColumn,
1079                                                                                                                                        ch, stateForNextChunk | SUPPRESS_TRAILING_WS);
1080    }
1081                return stateForNextChunk;
1082  }
1083
1084  static final char[] SINGLE_SPACE = {' '};
1085
1086  /**
1087   * Internal method to directly normalize and dispatch the character array.
1088   * This version is aware of the fact that it may be called several times
1089   * in succession if the data is made up of multiple "chunks", and thus
1090   * must actively manage the handling of leading and trailing whitespace.
1091   *
1092   * Note: The recursion is due to the possible recursion of inner FSBs.
1093   *
1094   * @param ch The characters from the XML document.
1095   * @param start The start position in the array.
1096   * @param length The number of characters to read from the array.
1097   * @param handler SAX ContentHandler object to receive the event.
1098   * @param edgeTreatmentFlags How leading/trailing spaces should be handled.
1099   * This is a bitfield contining two flags, bitwise-ORed together:
1100   * <dl>
1101   * <dt>SUPPRESS_LEADING_WS</dt>
1102   * <dd>When false, causes leading whitespace to be converted to a single
1103   * space; when true, causes it to be discarded entirely.
1104   * Should be set TRUE for the first chunk, and (in multi-chunk output)
1105   * whenever the previous chunk ended in retained whitespace.</dd>
1106   * <dt>SUPPRESS_TRAILING_WS</dt>
1107   * <dd>When false, causes trailing whitespace to be converted to a single
1108   * space; when true, causes it to be discarded entirely.
1109   * Should be set TRUE for the last or only chunk.
1110   * </dd>
1111   * </dl>
1112   * @return normalization status, as in the edgeTreatmentFlags parameter:
1113   * <dl>
1114   * <dt>0</dt>
1115   * <dd>if this output did not end in retained whitespace, and thus whitespace
1116   * at the start of the following chunk (if any) should be converted to a
1117   * single space.
1118   * <dt>SUPPRESS_LEADING_WS</dt>
1119   * <dd>if this output ended in retained whitespace, and thus whitespace
1120   * at the start of the following chunk (if any) should be completely
1121   * suppressed.</dd>
1122   * </dd>
1123   * </dl>
1124   *
1125   *
1126   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1127   *            wrapping another exception.
1128   */
1129  static int sendNormalizedSAXcharacters(char ch[],
1130             int start, int length,
1131             org.xml.sax.ContentHandler handler,
1132                                                 int edgeTreatmentFlags)
1133          throws org.xml.sax.SAXException
1134  {
1135     boolean processingLeadingWhitespace =
1136                       ((edgeTreatmentFlags & SUPPRESS_LEADING_WS) != 0);
1137     boolean seenWhitespace = ((edgeTreatmentFlags & CARRY_WS) != 0);
1138     boolean suppressTrailingWhitespace =
1139                       ((edgeTreatmentFlags & SUPPRESS_TRAILING_WS) != 0);
1140     int currPos = start;
1141     int limit = start+length;
1142
1143     // Strip any leading spaces first, if required
1144     if (processingLeadingWhitespace) {
1145         for (; currPos < limit
1146                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1147              currPos++) { }
1148
1149         // If we've only encountered leading spaces, the
1150         // current state remains unchanged
1151         if (currPos == limit) {
1152             return edgeTreatmentFlags;
1153         }
1154     }
1155
1156     // If we get here, there are no more leading spaces to strip
1157     while (currPos < limit) {
1158         int startNonWhitespace = currPos;
1159
1160         // Grab a chunk of non-whitespace characters
1161         for (; currPos < limit
1162                && !XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1163              currPos++) { }
1164
1165         // Non-whitespace seen - emit them, along with a single
1166         // space for any preceding whitespace characters
1167         if (startNonWhitespace != currPos) {
1168             if (seenWhitespace) {
1169                 handler.characters(SINGLE_SPACE, 0, 1);
1170                 seenWhitespace = false;
1171             }
1172             handler.characters(ch, startNonWhitespace,
1173                                currPos - startNonWhitespace);
1174         }
1175
1176         int startWhitespace = currPos;
1177
1178         // Consume any whitespace characters
1179         for (; currPos < limit
1180                && XMLCharacterRecognizer.isWhiteSpace(ch[currPos]);
1181              currPos++) { }
1182
1183         if (startWhitespace != currPos) {
1184             seenWhitespace = true;
1185         }
1186     }
1187
1188     return (seenWhitespace ? CARRY_WS : 0)
1189            | (edgeTreatmentFlags & SUPPRESS_TRAILING_WS);
1190  }
1191
1192  /**
1193   * Directly normalize and dispatch the character array.
1194   *
1195   * @param ch The characters from the XML document.
1196   * @param start The start position in the array.
1197   * @param length The number of characters to read from the array.
1198   * @param handler SAX ContentHandler object to receive the event.
1199   * @exception org.xml.sax.SAXException Any SAX exception, possibly
1200   *            wrapping another exception.
1201   */
1202  public static void sendNormalizedSAXcharacters(char ch[],
1203             int start, int length,
1204             org.xml.sax.ContentHandler handler)
1205          throws org.xml.sax.SAXException
1206  {
1207                sendNormalizedSAXcharacters(ch, start, length,
1208             handler, SUPPRESS_BOTH);
1209        }
1210
1211        /**
1212   * Sends the specified range of characters as sax Comment.
1213   * <p>
1214   * Note that, unlike sendSAXcharacters, this has to be done as a single
1215   * call to LexicalHandler#comment.
1216   *
1217   * @param ch SAX LexicalHandler object to receive the event.
1218   * @param start Offset of first character in the range.
1219   * @param length Number of characters to send.
1220   * @exception org.xml.sax.SAXException may be thrown by handler's
1221   * characters() method.
1222   */
1223  public void sendSAXComment(
1224          org.xml.sax.ext.LexicalHandler ch, int start, int length)
1225            throws org.xml.sax.SAXException
1226  {
1227
1228    // %OPT% Do it this way for now...
1229    String comment = getString(start, length);
1230    ch.comment(comment.toCharArray(), 0, length);
1231  }
1232
1233  /**
1234   * Copies characters from this string into the destination character
1235   * array.
1236   *
1237   * @param      srcBegin   index of the first character in the string
1238   *                        to copy.
1239   * @param      srcEnd     index after the last character in the string
1240   *                        to copy.
1241   * @param      dst        the destination array.
1242   * @param      dstBegin   the start offset in the destination array.
1243   * @exception IndexOutOfBoundsException If any of the following
1244   *            is true:
1245   *            <ul><li><code>srcBegin</code> is negative.
1246   *            <li><code>srcBegin</code> is greater than <code>srcEnd</code>
1247   *            <li><code>srcEnd</code> is greater than the length of this
1248   *                string
1249   *            <li><code>dstBegin</code> is negative
1250   *            <li><code>dstBegin+(srcEnd-srcBegin)</code> is larger than
1251   *                <code>dst.length</code></ul>
1252   * @exception NullPointerException if <code>dst</code> is <code>null</code>
1253   */
1254  private void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin)
1255  {
1256    // %TBD% Joe needs to write this function.  Make public when implemented.
1257  }
1258
1259  /**
1260   * Encapsulation c'tor. After this is called, the source FastStringBuffer
1261   * will be reset to use the new object as its m_innerFSB, and will have
1262   * had its chunk size reset appropriately. IT SHOULD NEVER BE CALLED
1263   * EXCEPT WHEN source.length()==1<<(source.m_chunkBits+source.m_rebundleBits)
1264   *
1265   * NEEDSDOC @param source
1266   */
1267  private FastStringBuffer(FastStringBuffer source)
1268  {
1269
1270    // Copy existing information into new encapsulation
1271    m_chunkBits = source.m_chunkBits;
1272    m_maxChunkBits = source.m_maxChunkBits;
1273    m_rebundleBits = source.m_rebundleBits;
1274    m_chunkSize = source.m_chunkSize;
1275    m_chunkMask = source.m_chunkMask;
1276    m_array = source.m_array;
1277    m_innerFSB = source.m_innerFSB;
1278
1279    // These have to be adjusted because we're calling just at the time
1280    // when we would be about to allocate another chunk
1281    m_lastChunk = source.m_lastChunk - 1;
1282    m_firstFree = source.m_chunkSize;
1283
1284    // Establish capsule as the Inner FSB, reset chunk sizes/addressing
1285    source.m_array = new char[16][];
1286    source.m_innerFSB = this;
1287
1288    // Since we encapsulated just as we were about to append another
1289    // chunk, return ready to create the chunk after the innerFSB
1290    // -- 1, not 0.
1291    source.m_lastChunk = 1;
1292    source.m_firstFree = 0;
1293    source.m_chunkBits += m_rebundleBits;
1294    source.m_chunkSize = 1 << (source.m_chunkBits);
1295    source.m_chunkMask = source.m_chunkSize - 1;
1296  }
1297}
1298