1/*
2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3 */
4
5/*
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements.  See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License.  You may obtain a copy of the License at
12 *
13 *     http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22package com.sun.org.apache.xerces.internal.impl;
23
24import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType;
25import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader;
26import com.sun.org.apache.xerces.internal.impl.io.UCSReader;
27import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader;
28import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
29import com.sun.org.apache.xerces.internal.util.EncodingMap;
30import com.sun.org.apache.xerces.internal.util.SymbolTable;
31import com.sun.org.apache.xerces.internal.util.XMLChar;
32import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
33import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer;
34import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
35import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
36import com.sun.org.apache.xerces.internal.xni.*;
37import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
38import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
39import com.sun.xml.internal.stream.Entity;
40import com.sun.xml.internal.stream.Entity.ScannedEntity;
41import com.sun.xml.internal.stream.XMLBufferListener;
42import java.io.EOFException;
43import java.io.IOException;
44import java.io.InputStream;
45import java.io.InputStreamReader;
46import java.io.Reader;
47import java.util.ArrayList;
48import java.util.Locale;
49
50/**
51 * Implements the entity scanner methods.
52 *
53 * @author Neeraj Bajaj, Sun Microsystems
54 * @author Andy Clark, IBM
55 * @author Arnaud  Le Hors, IBM
56 * @author K.Venugopal Sun Microsystems
57 *
58 */
59public class XMLEntityScanner implements XMLLocator  {
60
61    protected Entity.ScannedEntity fCurrentEntity = null;
62    protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE;
63
64    protected XMLEntityManager fEntityManager;
65
66    /** Security manager. */
67    protected XMLSecurityManager fSecurityManager = null;
68
69    /** Limit analyzer. */
70    protected XMLLimitAnalyzer fLimitAnalyzer = null;
71
72    /** Debug switching readers for encodings. */
73    private static final boolean DEBUG_ENCODINGS = false;
74
75    /** Listeners which should know when load is being called */
76    private ArrayList<XMLBufferListener> listeners = new ArrayList<>();
77
78    private static final boolean [] VALID_NAMES = new boolean[127];
79
80    /**
81     * Debug printing of buffer. This debugging flag works best when you
82     * resize the DEFAULT_BUFFER_SIZE down to something reasonable like
83     * 64 characters.
84     */
85    private static final boolean DEBUG_BUFFER = false;
86    private static final boolean DEBUG_SKIP_STRING = false;
87    /**
88     * To signal the end of the document entity, this exception will be thrown.
89     */
90    private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() {
91        private static final long serialVersionUID = 980337771224675268L;
92        public Throwable fillInStackTrace() {
93            return this;
94        }
95    };
96
97    protected SymbolTable fSymbolTable = null;
98    protected XMLErrorReporter fErrorReporter = null;
99    int [] whiteSpaceLookup = new int[100];
100    int whiteSpaceLen = 0;
101    boolean whiteSpaceInfoNeeded = true;
102
103    /**
104     * Allow Java encoding names. This feature identifier is:
105     * http://apache.org/xml/features/allow-java-encodings
106     */
107    protected boolean fAllowJavaEncodings;
108
109    //Will be used only during internal subsets.
110    //for appending data.
111
112    /** Property identifier: symbol table. */
113    protected static final String SYMBOL_TABLE =
114            Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY;
115
116    /** Property identifier: error reporter. */
117    protected static final String ERROR_REPORTER =
118            Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY;
119
120    /** Feature identifier: allow Java encodings. */
121    protected static final String ALLOW_JAVA_ENCODINGS =
122            Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE;
123
124    protected PropertyManager fPropertyManager = null ;
125
126    boolean isExternal = false;
127    static {
128
129        for(int i=0x0041;i<=0x005A ; i++){
130            VALID_NAMES[i]=true;
131        }
132        for(int i=0x0061;i<=0x007A; i++){
133            VALID_NAMES[i]=true;
134        }
135        for(int i=0x0030;i<=0x0039; i++){
136            VALID_NAMES[i]=true;
137        }
138        VALID_NAMES[45]=true;
139        VALID_NAMES[46]=true;
140        VALID_NAMES[58]=true;
141        VALID_NAMES[95]=true;
142    }
143
144    // Remember, that the XML version has explicitly been set,
145    // so that XMLStreamReader.getVersion() can find that out.
146    protected boolean xmlVersionSetExplicitly = false;
147
148    // indicates that the operation is for detecting XML version
149    boolean detectingVersion = false;
150
151    //
152    // Constructors
153    //
154
155    /** Default constructor. */
156    public XMLEntityScanner() {
157    } // <init>()
158
159
160    /**  private constructor, this class can only be instantiated within this class. Instance of this class should
161     *    be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity)
162     *    @see getEntityScanner()
163     *    @see getEntityScanner(ScannedEntity)
164     */
165    public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) {
166        fEntityManager = entityManager ;
167        reset(propertyManager);
168    } // <init>()
169
170
171    // set buffer size:
172    public final void setBufferSize(int size) {
173        // REVISIT: Buffer size passed to entity scanner
174        // was not being kept in synch with the actual size
175        // of the buffers in each scanned entity. If any
176        // of the buffers were actually resized, it was possible
177        // that the parser would throw an ArrayIndexOutOfBoundsException
178        // for documents which contained names which are longer than
179        // the current buffer size. Conceivably the buffer size passed
180        // to entity scanner could be used to determine a minimum size
181        // for resizing, if doubling its size is smaller than this
182        // minimum. -- mrglavas
183        fBufferSize = size;
184    }
185
186    /**
187     * Resets the components.
188     */
189    public void reset(PropertyManager propertyManager){
190        fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ;
191        fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ;
192        resetCommon();
193    }
194
195    /**
196     * Resets the component. The component can query the component manager
197     * about any features and properties that affect the operation of the
198     * component.
199     *
200     * @param componentManager The component manager.
201     *
202     * @throws SAXException Thrown by component on initialization error.
203     *                      For example, if a feature or property is
204     *                      required for the operation of the component, the
205     *                      component manager may throw a
206     *                      SAXNotRecognizedException or a
207     *                      SAXNotSupportedException.
208     */
209    public void reset(XMLComponentManager componentManager)
210    throws XMLConfigurationException {
211        // xerces features
212        fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false);
213
214        //xerces properties
215        fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE);
216        fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER);
217        resetCommon();
218    } // reset(XMLComponentManager)
219
220
221    public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager,
222            XMLErrorReporter reporter) {
223        fCurrentEntity = null;
224        fSymbolTable = symbolTable;
225        fEntityManager = entityManager;
226        fErrorReporter = reporter;
227        fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
228        fSecurityManager = fEntityManager.fSecurityManager;
229    }
230
231    private void resetCommon() {
232        fCurrentEntity = null;
233        whiteSpaceLen = 0;
234        whiteSpaceInfoNeeded = true;
235        listeners.clear();
236        fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
237        fSecurityManager = fEntityManager.fSecurityManager;
238    }
239
240    /**
241     * Returns the XML version of the current entity. This will normally be the
242     * value from the XML or text declaration or defaulted by the parser. Note that
243     * that this value may be different than the version of the processing rules
244     * applied to the current entity. For instance, an XML 1.1 document may refer to
245     * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire
246     * document. Also note that, for a given entity, this value can only be considered
247     * final once the XML or text declaration has been read or once it has been
248     * determined that there is no such declaration.
249     */
250    public final String getXMLVersion() {
251        if (fCurrentEntity != null) {
252            return fCurrentEntity.xmlVersion;
253        }
254        return null;
255    } // getXMLVersion():String
256
257    /**
258     * Sets the XML version. This method is used by the
259     * scanners to report the value of the version pseudo-attribute
260     * in an XML or text declaration.
261     *
262     * @param xmlVersion the XML version of the current entity
263     */
264    public final void setXMLVersion(String xmlVersion) {
265        xmlVersionSetExplicitly = true;
266        fCurrentEntity.xmlVersion = xmlVersion;
267    } // setXMLVersion(String)
268
269
270    /** set the instance of current scanned entity.
271     *   @param ScannedEntity
272     */
273
274    public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){
275        fCurrentEntity = scannedEntity ;
276        if(fCurrentEntity != null){
277            isExternal = fCurrentEntity.isExternal();
278            if(DEBUG_BUFFER)
279                System.out.println("Current Entity is "+scannedEntity.name);
280        }
281    }
282
283    public  Entity.ScannedEntity getCurrentEntity(){
284        return fCurrentEntity ;
285    }
286    //
287    // XMLEntityReader methods
288    //
289
290    /**
291     * Returns the base system identifier of the currently scanned
292     * entity, or null if none is available.
293     */
294    public final String getBaseSystemId() {
295        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
296    } // getBaseSystemId():String
297
298    /**
299     * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String)
300     */
301    public void setBaseSystemId(String systemId) {
302        //no-op
303    }
304
305    ///////////// Locator methods start.
306    public final int getLineNumber(){
307        //if the entity is closed, we should return -1
308        //xxx at first place why such call should be there...
309        return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ;
310    }
311
312    /**
313     * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int)
314     */
315    public void setLineNumber(int line) {
316        //no-op
317    }
318
319
320    public final int getColumnNumber(){
321        //if the entity is closed, we should return -1
322        //xxx at first place why such call should be there...
323        return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ;
324    }
325
326    /**
327     * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int)
328     */
329    public void setColumnNumber(int col) {
330        // no-op
331    }
332
333
334    public final int getCharacterOffset(){
335        return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ;
336    }
337
338    /** Returns the expanded system identifier.  */
339    public final String getExpandedSystemId() {
340        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null;
341    }
342
343    /**
344     * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String)
345     */
346    public void setExpandedSystemId(String systemId) {
347        //no-op
348    }
349
350    /** Returns the literal system identifier.  */
351    public final String getLiteralSystemId() {
352        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null;
353    }
354
355    /**
356     * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String)
357     */
358    public void setLiteralSystemId(String systemId) {
359        //no-op
360    }
361
362    /** Returns the public identifier.  */
363    public final String getPublicId() {
364        return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null;
365    }
366
367    /**
368     * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String)
369     */
370    public void setPublicId(String publicId) {
371        //no-op
372    }
373
374    ///////////////// Locator methods finished.
375
376    /** the version of the current entity being scanned */
377    public void setVersion(String version){
378        fCurrentEntity.version = version;
379    }
380
381    public String getVersion(){
382        if (fCurrentEntity != null)
383            return fCurrentEntity.version ;
384        return null;
385    }
386
387    /**
388     * Returns the encoding of the current entity.
389     * Note that, for a given entity, this value can only be
390     * considered final once the encoding declaration has been read (or once it
391     * has been determined that there is no such declaration) since, no encoding
392     * having been specified on the XMLInputSource, the parser
393     * will make an initial "guess" which could be in error.
394     */
395    public final String getEncoding() {
396        if (fCurrentEntity != null) {
397            return fCurrentEntity.encoding;
398        }
399        return null;
400    } // getEncoding():String
401
402    /**
403     * Sets the encoding of the scanner. This method is used by the
404     * scanners if the XMLDecl or TextDecl line contains an encoding
405     * pseudo-attribute.
406     * <p>
407     * <strong>Note:</strong> The underlying character reader on the
408     * current entity will be changed to accomodate the new encoding.
409     * However, the new encoding is ignored if the current reader was
410     * not constructed from an input stream (e.g. an external entity
411     * that is resolved directly to the appropriate java.io.Reader
412     * object).
413     *
414     * @param encoding The IANA encoding name of the new encoding.
415     *
416     * @throws IOException Thrown if the new encoding is not supported.
417     *
418     * @see com.sun.org.apache.xerces.internal.util.EncodingMap
419     */
420    public final void setEncoding(String encoding) throws IOException {
421
422        if (DEBUG_ENCODINGS) {
423            System.out.println("$$$ setEncoding: "+encoding);
424        }
425
426        if (fCurrentEntity.stream != null) {
427            // if the encoding is the same, don't change the reader and
428            // re-use the original reader used by the OneCharReader
429            // NOTE: Besides saving an object, this overcomes deficiencies
430            //       in the UTF-16 reader supplied with the standard Java
431            //       distribution (up to and including 1.3). The UTF-16
432            //       decoder buffers 8K blocks even when only asked to read
433            //       a single char! -Ac
434            if (fCurrentEntity.encoding == null ||
435                    !fCurrentEntity.encoding.equals(encoding)) {
436                // UTF-16 is a bit of a special case.  If the encoding is UTF-16,
437                // and we know the endian-ness, we shouldn't change readers.
438                // If it's ISO-10646-UCS-(2|4), then we'll have to deduce
439                // the endian-ness from the encoding we presently have.
440                if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) {
441                    String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
442                    if(ENCODING.equals("UTF-16")) return;
443                    if(ENCODING.equals("ISO-10646-UCS-4")) {
444                        if(fCurrentEntity.encoding.equals("UTF-16BE")) {
445                            fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE);
446                        } else {
447                            fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE);
448                        }
449                        return;
450                    }
451                    if(ENCODING.equals("ISO-10646-UCS-2")) {
452                        if(fCurrentEntity.encoding.equals("UTF-16BE")) {
453                            fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE);
454                        } else {
455                            fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE);
456                        }
457                        return;
458                    }
459                }
460                // wrap a new reader around the input stream, changing
461                // the encoding
462                if (DEBUG_ENCODINGS) {
463                    System.out.println("$$$ creating new reader from stream: "+
464                            fCurrentEntity.stream);
465                }
466                //fCurrentEntity.stream.reset();
467                fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null);
468                fCurrentEntity.encoding = encoding;
469
470            } else {
471                if (DEBUG_ENCODINGS)
472                    System.out.println("$$$ reusing old reader on stream");
473            }
474        }
475
476    } // setEncoding(String)
477
478    /** Returns true if the current entity being scanned is external. */
479    public final boolean isExternal() {
480        return fCurrentEntity.isExternal();
481    } // isExternal():boolean
482
483    public int getChar(int relative) throws IOException{
484        if(arrangeCapacity(relative + 1, false)){
485            return fCurrentEntity.ch[fCurrentEntity.position + relative];
486        }else{
487            return -1;
488        }
489    }//getChar()
490
491    /**
492     * Returns the next character on the input.
493     * <p>
494     * <strong>Note:</strong> The character is <em>not</em> consumed.
495     *
496     * @throws IOException  Thrown if i/o error occurs.
497     * @throws EOFException Thrown on end of file.
498     */
499    public int peekChar() throws IOException {
500        if (DEBUG_BUFFER) {
501            System.out.print("(peekChar: ");
502            print();
503            System.out.println();
504        }
505
506        // load more characters, if needed
507        if (fCurrentEntity.position == fCurrentEntity.count) {
508            load(0, true, true);
509        }
510
511        // peek at character
512        int c = fCurrentEntity.ch[fCurrentEntity.position];
513
514        // return peeked character
515        if (DEBUG_BUFFER) {
516            System.out.print(")peekChar: ");
517            print();
518            if (isExternal) {
519                System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'");
520            } else {
521                System.out.println(" -> '"+(char)c+"'");
522            }
523        }
524        if (isExternal) {
525            return c != '\r' ? c : '\n';
526        } else {
527            return c;
528        }
529
530    } // peekChar():int
531
532    /**
533     * Returns the next character on the input.
534     * <p>
535     * <strong>Note:</strong> The character is consumed.
536     *
537     * @param nt The type of the name (element or attribute)
538     *
539     * @throws IOException  Thrown if i/o error occurs.
540     * @throws EOFException Thrown on end of file.
541     */
542    protected int scanChar(NameType nt) throws IOException {
543        if (DEBUG_BUFFER) {
544            System.out.print("(scanChar: ");
545            print();
546            System.out.println();
547        }
548
549        // load more characters, if needed
550        if (fCurrentEntity.position == fCurrentEntity.count) {
551            load(0, true, true);
552        }
553
554        // scan character
555        int offset = fCurrentEntity.position;
556        int c = fCurrentEntity.ch[fCurrentEntity.position++];
557        if (c == '\n' || (c == '\r' && isExternal)) {
558            fCurrentEntity.lineNumber++;
559            fCurrentEntity.columnNumber = 1;
560            if (fCurrentEntity.position == fCurrentEntity.count) {
561                invokeListeners(1);
562                fCurrentEntity.ch[0] = (char)c;
563                load(1, false, false);
564                offset = 0;
565            }
566            if (c == '\r' && isExternal) {
567                if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') {
568                    fCurrentEntity.position--;
569                }
570                c = '\n';
571            }
572        }
573
574        // return character that was scanned
575        if (DEBUG_BUFFER) {
576            System.out.print(")scanChar: ");
577            print();
578            System.out.println(" -> '"+(char)c+"'");
579        }
580        fCurrentEntity.columnNumber++;
581        if (!detectingVersion) {
582            checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
583        }
584        return c;
585
586    } // scanChar():int
587
588    /**
589     * Returns a string matching the NMTOKEN production appearing immediately
590     * on the input as a symbol, or null if NMTOKEN Name string is present.
591     * <p>
592     * <strong>Note:</strong> The NMTOKEN characters are consumed.
593     * <p>
594     * <strong>Note:</strong> The string returned must be a symbol. The
595     * SymbolTable can be used for this purpose.
596     *
597     * @throws IOException  Thrown if i/o error occurs.
598     * @throws EOFException Thrown on end of file.
599     *
600     * @see com.sun.org.apache.xerces.internal.util.SymbolTable
601     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
602     */
603    protected String scanNmtoken() throws IOException {
604        if (DEBUG_BUFFER) {
605            System.out.print("(scanNmtoken: ");
606            print();
607            System.out.println();
608        }
609
610        // load more characters, if needed
611        if (fCurrentEntity.position == fCurrentEntity.count) {
612            load(0, true, true);
613        }
614
615        // scan nmtoken
616        int offset = fCurrentEntity.position;
617        boolean vc = false;
618        char c;
619        while (true){
620            //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) {
621            c = fCurrentEntity.ch[fCurrentEntity.position];
622            if(c < 127){
623                vc = VALID_NAMES[c];
624            }else{
625                vc = XMLChar.isName(c);
626            }
627            if(!vc)break;
628
629            if (++fCurrentEntity.position == fCurrentEntity.count) {
630                int length = fCurrentEntity.position - offset;
631                invokeListeners(length);
632                if (length == fCurrentEntity.fBufferSize) {
633                    // bad luck we have to resize our buffer
634                    char[] tmp = new char[fCurrentEntity.fBufferSize * 2];
635                    System.arraycopy(fCurrentEntity.ch, offset,
636                            tmp, 0, length);
637                    fCurrentEntity.ch = tmp;
638                    fCurrentEntity.fBufferSize *= 2;
639                } else {
640                    System.arraycopy(fCurrentEntity.ch, offset,
641                            fCurrentEntity.ch, 0, length);
642                }
643                offset = 0;
644                if (load(length, false, false)) {
645                    break;
646                }
647            }
648        }
649        int length = fCurrentEntity.position - offset;
650        fCurrentEntity.columnNumber += length;
651
652        // return nmtoken
653        String symbol = null;
654        if (length > 0) {
655            symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
656        }
657        if (DEBUG_BUFFER) {
658            System.out.print(")scanNmtoken: ");
659            print();
660            System.out.println(" -> "+String.valueOf(symbol));
661        }
662        return symbol;
663
664    } // scanNmtoken():String
665
666    /**
667     * Returns a string matching the Name production appearing immediately
668     * on the input as a symbol, or null if no Name string is present.
669     * <p>
670     * <strong>Note:</strong> The Name characters are consumed.
671     * <p>
672     * <strong>Note:</strong> The string returned must be a symbol. The
673     * SymbolTable can be used for this purpose.
674     *
675     * @param nt The type of the name (element or attribute)
676     *
677     * @throws IOException  Thrown if i/o error occurs.
678     * @throws EOFException Thrown on end of file.
679     *
680     * @see com.sun.org.apache.xerces.internal.util.SymbolTable
681     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
682     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
683     */
684    protected String scanName(NameType nt) throws IOException {
685        if (DEBUG_BUFFER) {
686            System.out.print("(scanName: ");
687            print();
688            System.out.println();
689        }
690
691        // load more characters, if needed
692        if (fCurrentEntity.position == fCurrentEntity.count) {
693            load(0, true, true);
694        }
695
696        // scan name
697        int offset = fCurrentEntity.position;
698        int length;
699        if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
700            if (++fCurrentEntity.position == fCurrentEntity.count) {
701                invokeListeners(1);
702                fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
703                offset = 0;
704                if (load(1, false, false)) {
705                    fCurrentEntity.columnNumber++;
706                    String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
707
708                    if (DEBUG_BUFFER) {
709                        System.out.print(")scanName: ");
710                        print();
711                        System.out.println(" -> "+String.valueOf(symbol));
712                    }
713                    return symbol;
714                }
715            }
716            boolean vc =false;
717            while (true ){
718                //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
719                char c = fCurrentEntity.ch[fCurrentEntity.position];
720                if(c < 127){
721                    vc = VALID_NAMES[c];
722                }else{
723                    vc = XMLChar.isName(c);
724                }
725                if(!vc)break;
726                if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) {
727                    offset = 0;
728                    if (load(length, false, false)) {
729                        break;
730                    }
731                }
732            }
733        }
734        length = fCurrentEntity.position - offset;
735        fCurrentEntity.columnNumber += length;
736
737        // return name
738        String symbol;
739        if (length > 0) {
740            checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
741            checkEntityLimit(nt, fCurrentEntity, offset, length);
742            symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length);
743        } else
744            symbol = null;
745        if (DEBUG_BUFFER) {
746            System.out.print(")scanName: ");
747            print();
748            System.out.println(" -> "+String.valueOf(symbol));
749        }
750        return symbol;
751
752    } // scanName():String
753
754    /**
755     * Scans a qualified name from the input, setting the fields of the
756     * QName structure appropriately.
757     * <p>
758     * <strong>Note:</strong> The qualified name characters are consumed.
759     * <p>
760     * <strong>Note:</strong> The strings used to set the values of the
761     * QName structure must be symbols. The SymbolTable can be used for
762     * this purpose.
763     *
764     * @param qname The qualified name structure to fill.
765     * @param nt The type of the name (element or attribute)
766     *
767     * @return Returns true if a qualified name appeared immediately on
768     *         the input and was scanned, false otherwise.
769     *
770     * @throws IOException  Thrown if i/o error occurs.
771     * @throws EOFException Thrown on end of file.
772     *
773     * @see com.sun.org.apache.xerces.internal.util.SymbolTable
774     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName
775     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart
776     */
777    protected boolean scanQName(QName qname, NameType nt) throws IOException {
778        if (DEBUG_BUFFER) {
779            System.out.print("(scanQName, "+qname+": ");
780            print();
781            System.out.println();
782        }
783
784        // load more characters, if needed
785        if (fCurrentEntity.position == fCurrentEntity.count) {
786            load(0, true, true);
787        }
788
789        // scan qualified name
790        int offset = fCurrentEntity.position;
791
792        //making a check if if the specified character is a valid name start character
793        //as defined by production [5] in the XML 1.0 specification.
794        // Name ::= (Letter | '_' | ':') (NameChar)*
795
796        if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) {
797            if (++fCurrentEntity.position == fCurrentEntity.count) {
798                invokeListeners(1);
799                fCurrentEntity.ch[0] = fCurrentEntity.ch[offset];
800                offset = 0;
801
802                if (load(1, false, false)) {
803                    fCurrentEntity.columnNumber++;
804                    //adding into symbol table.
805                    //XXX We are trying to add single character in SymbolTable??????
806                    String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1);
807                    qname.setValues(null, name, name, null);
808                    if (DEBUG_BUFFER) {
809                        System.out.print(")scanQName, "+qname+": ");
810                        print();
811                        System.out.println(" -> true");
812                    }
813                    checkEntityLimit(nt, fCurrentEntity, 0, 1);
814                    return true;
815                }
816            }
817            int index = -1;
818            boolean vc = false;
819            int length;
820            while ( true){
821
822                //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ;
823                char c = fCurrentEntity.ch[fCurrentEntity.position];
824                if(c < 127){
825                    vc = VALID_NAMES[c];
826                }else{
827                    vc = XMLChar.isName(c);
828                }
829                if(!vc)break;
830                if (c == ':') {
831                    if (index != -1) {
832                        break;
833                    }
834                    index = fCurrentEntity.position;
835                    //check prefix before further read
836                    checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset);
837                }
838                if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) {
839                    if (index != -1) {
840                        index = index - offset;
841                    }
842                    offset = 0;
843                    if (load(length, false, false)) {
844                        break;
845                    }
846                }
847            }
848            length = fCurrentEntity.position - offset;
849            fCurrentEntity.columnNumber += length;
850            if (length > 0) {
851                String prefix = null;
852                String localpart = null;
853                String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch,
854                        offset, length);
855
856                if (index != -1) {
857                    int prefixLength = index - offset;
858                    //check the result: prefix
859                    checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength);
860                    prefix = fSymbolTable.addSymbol(fCurrentEntity.ch,
861                            offset, prefixLength);
862                    int len = length - prefixLength - 1;
863                    //check the result: localpart
864                    checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len);
865                    localpart = fSymbolTable.addSymbol(fCurrentEntity.ch,
866                            index + 1, len);
867
868                } else {
869                    localpart = rawname;
870                    //check the result: localpart
871                    checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
872                }
873                qname.setValues(prefix, localpart, rawname, null);
874                if (DEBUG_BUFFER) {
875                    System.out.print(")scanQName, "+qname+": ");
876                    print();
877                    System.out.println(" -> true");
878                }
879                checkEntityLimit(nt, fCurrentEntity, offset, length);
880                return true;
881            }
882        }
883
884        // no qualified name found
885        if (DEBUG_BUFFER) {
886            System.out.print(")scanQName, "+qname+": ");
887            print();
888            System.out.println(" -> false");
889        }
890        return false;
891
892    } // scanQName(QName):boolean
893
894    /**
895     * Checks whether the end of the entity buffer has been reached. If yes,
896     * checks against the limit and buffer size before loading more characters.
897     *
898     * @param entity the current entity
899     * @param offset the offset from which the current read was started
900     * @param nameOffset the offset from which the current name starts
901     * @return the length of characters scanned before the end of the buffer,
902     * zero if there is more to be read in the buffer
903     */
904    protected int checkBeforeLoad(Entity.ScannedEntity entity, int offset,
905            int nameOffset) throws IOException {
906        int length = 0;
907        if (++entity.position == entity.count) {
908            length = entity.position - offset;
909            int nameLength = length;
910            if (nameOffset != -1) {
911                nameOffset = nameOffset - offset;
912                nameLength = length - nameOffset;
913            } else {
914                nameOffset = offset;
915            }
916            //check limit before loading more data
917            checkLimit(Limit.MAX_NAME_LIMIT, entity, nameOffset, nameLength);
918            invokeListeners(length);
919            if (length == entity.ch.length) {
920                // bad luck we have to resize our buffer
921                char[] tmp = new char[entity.fBufferSize * 2];
922                System.arraycopy(entity.ch, offset, tmp, 0, length);
923                entity.ch = tmp;
924                entity.fBufferSize *= 2;
925            }
926            else {
927                System.arraycopy(entity.ch, offset, entity.ch, 0, length);
928            }
929        }
930        return length;
931    }
932
933    /**
934     * If the current entity is an Entity reference, check the accumulated size
935     * against the limit.
936     *
937     * @param nt type of name (element, attribute or entity)
938     * @param entity The current entity
939     * @param offset The index of the first byte
940     * @param length The length of the entity scanned
941     */
942    protected void checkEntityLimit(NameType nt, ScannedEntity entity, int offset, int length) {
943        if (entity == null || !entity.isGE) {
944            return;
945        }
946
947        if (nt != NameType.REFERENCE) {
948            checkLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, entity, offset, length);
949        }
950        if (nt == NameType.ELEMENTSTART || nt == NameType.ATTRIBUTENAME) {
951            checkNodeCount(entity);
952        }
953    }
954
955    /**
956     * If the current entity is an Entity reference, counts the total nodes in
957     * the entity and checks the accumulated value against the limit.
958     *
959     * @param entity The current entity
960     */
961    protected void checkNodeCount(ScannedEntity entity) {
962        if (entity != null && entity.isGE) {
963            checkLimit(Limit.ENTITY_REPLACEMENT_LIMIT, entity, 0, 1);
964        }
965    }
966
967    /**
968     * Checks whether the value of the specified Limit exceeds its limit
969     *
970     * @param limit The Limit to be checked
971     * @param entity The current entity
972     * @param offset The index of the first byte
973     * @param length The length of the entity scanned
974     */
975    protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) {
976        fLimitAnalyzer.addValue(limit, entity.name, length);
977        if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) {
978            fSecurityManager.debugPrint(fLimitAnalyzer);
979            Object[] e = (limit == Limit.ENTITY_REPLACEMENT_LIMIT) ?
980                    new Object[]{fLimitAnalyzer.getValue(limit),
981                        fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)} :
982                    new Object[]{entity.name, fLimitAnalyzer.getValue(limit),
983                        fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)};
984            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(),
985                    e, XMLErrorReporter.SEVERITY_FATAL_ERROR);
986        }
987        if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) {
988            fSecurityManager.debugPrint(fLimitAnalyzer);
989            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "TotalEntitySizeLimit",
990                    new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT),
991                fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT),
992                fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)},
993                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
994        }
995    }
996
997    /**
998     * CHANGED:
999     * Scans a range of parsed character data, This function appends the character data to
1000     * the supplied buffer.
1001     * <p>
1002     * <strong>Note:</strong> The characters are consumed.
1003     * <p>
1004     * <strong>Note:</strong> This method does not guarantee to return
1005     * the longest run of parsed character data. This method may return
1006     * before markup due to reaching the end of the input buffer or any
1007     * other reason.
1008     * <p>
1009     *
1010     * @param content The content structure to fill.
1011     *
1012     * @return Returns the next character on the input, if known. This
1013     *         value may be -1 but this does <em>note</em> designate
1014     *         end of file.
1015     *
1016     * @throws IOException  Thrown if i/o error occurs.
1017     * @throws EOFException Thrown on end of file.
1018     */
1019    protected int scanContent(XMLString content) throws IOException {
1020        if (DEBUG_BUFFER) {
1021            System.out.print("(scanContent: ");
1022            print();
1023            System.out.println();
1024        }
1025
1026        // load more characters, if needed
1027        if (fCurrentEntity.position == fCurrentEntity.count) {
1028            load(0, true, true);
1029        } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1030            invokeListeners(1);
1031            fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1032            load(1, false, false);
1033            fCurrentEntity.position = 0;
1034        }
1035
1036        // normalize newlines
1037        int offset = fCurrentEntity.position;
1038        int c = fCurrentEntity.ch[offset];
1039        int newlines = 0;
1040        boolean counted = false;
1041        if (c == '\n' || (c == '\r' && isExternal)) {
1042            if (DEBUG_BUFFER) {
1043                System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1044                print();
1045                System.out.println();
1046            }
1047            do {
1048                c = fCurrentEntity.ch[fCurrentEntity.position++];
1049                if (c == '\r' && isExternal) {
1050                    newlines++;
1051                    fCurrentEntity.lineNumber++;
1052                    fCurrentEntity.columnNumber = 1;
1053                    if (fCurrentEntity.position == fCurrentEntity.count) {
1054                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
1055                        offset = 0;
1056                        fCurrentEntity.position = newlines;
1057                        if (load(newlines, false, true)) {
1058                            counted = true;
1059                            break;
1060                        }
1061                    }
1062                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1063                        fCurrentEntity.position++;
1064                        offset++;
1065                    }
1066                    /*** NEWLINE NORMALIZATION ***/
1067                    else {
1068                        newlines++;
1069                    }
1070                } else if (c == '\n') {
1071                    newlines++;
1072                    fCurrentEntity.lineNumber++;
1073                    fCurrentEntity.columnNumber = 1;
1074                    if (fCurrentEntity.position == fCurrentEntity.count) {
1075                        checkEntityLimit(null, fCurrentEntity, offset, newlines);
1076                        offset = 0;
1077                        fCurrentEntity.position = newlines;
1078                        if (load(newlines, false, true)) {
1079                            counted = true;
1080                            break;
1081                        }
1082                    }
1083                } else {
1084                    fCurrentEntity.position--;
1085                    break;
1086                }
1087            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1088            for (int i = offset; i < fCurrentEntity.position; i++) {
1089                fCurrentEntity.ch[i] = '\n';
1090            }
1091            int length = fCurrentEntity.position - offset;
1092            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1093                checkEntityLimit(null, fCurrentEntity, offset, length);
1094                //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1095                //on buffering the data..
1096                content.setValues(fCurrentEntity.ch, offset, length);
1097                //content.append(fCurrentEntity.ch, offset, length);
1098                if (DEBUG_BUFFER) {
1099                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1100                    print();
1101                    System.out.println();
1102                }
1103                return -1;
1104            }
1105            if (DEBUG_BUFFER) {
1106                System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1107                print();
1108                System.out.println();
1109            }
1110        }
1111
1112        while (fCurrentEntity.position < fCurrentEntity.count) {
1113            c = fCurrentEntity.ch[fCurrentEntity.position++];
1114            if (!XMLChar.isContent(c)) {
1115                fCurrentEntity.position--;
1116                break;
1117            }
1118        }
1119        int length = fCurrentEntity.position - offset;
1120        fCurrentEntity.columnNumber += length - newlines;
1121        if (!counted) {
1122            checkEntityLimit(null, fCurrentEntity, offset, length);
1123        }
1124
1125        //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee
1126        //on buffering the data..
1127        content.setValues(fCurrentEntity.ch, offset, length);
1128        //content.append(fCurrentEntity.ch, offset, length);
1129        // return next character
1130        if (fCurrentEntity.position != fCurrentEntity.count) {
1131            c = fCurrentEntity.ch[fCurrentEntity.position];
1132            // REVISIT: Does this need to be updated to fix the
1133            //          #x0D ^#x0A newline normalization problem? -Ac
1134            if (c == '\r' && isExternal) {
1135                c = '\n';
1136            }
1137        } else {
1138            c = -1;
1139        }
1140        if (DEBUG_BUFFER) {
1141            System.out.print(")scanContent: ");
1142            print();
1143            System.out.println(" -> '"+(char)c+"'");
1144        }
1145        return c;
1146
1147    } // scanContent(XMLString):int
1148
1149    /**
1150     * Scans a range of attribute value data, setting the fields of the
1151     * XMLString structure, appropriately.
1152     * <p>
1153     * <strong>Note:</strong> The characters are consumed.
1154     * <p>
1155     * <strong>Note:</strong> This method does not guarantee to return
1156     * the longest run of attribute value data. This method may return
1157     * before the quote character due to reaching the end of the input
1158     * buffer or any other reason.
1159     * <p>
1160     * <strong>Note:</strong> The fields contained in the XMLString
1161     * structure are not guaranteed to remain valid upon subsequent calls
1162     * to the entity scanner. Therefore, the caller is responsible for
1163     * immediately using the returned character data or making a copy of
1164     * the character data.
1165     *
1166     * @param quote   The quote character that signifies the end of the
1167     *                attribute value data.
1168     * @param content The content structure to fill.
1169     * @param isNSURI a flag indicating whether the content is a Namespace URI
1170     *
1171     * @return Returns the next character on the input, if known. This
1172     *         value may be -1 but this does <em>note</em> designate
1173     *         end of file.
1174     *
1175     * @throws IOException  Thrown if i/o error occurs.
1176     * @throws EOFException Thrown on end of file.
1177     */
1178    protected int scanLiteral(int quote, XMLString content, boolean isNSURI)
1179    throws IOException {
1180        if (DEBUG_BUFFER) {
1181            System.out.print("(scanLiteral, '"+(char)quote+"': ");
1182            print();
1183            System.out.println();
1184        }
1185        // load more characters, if needed
1186        if (fCurrentEntity.position == fCurrentEntity.count) {
1187            load(0, true, true);
1188        } else if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1189            invokeListeners(1);
1190            fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1];
1191            load(1, false, false);
1192            fCurrentEntity.position = 0;
1193        }
1194
1195        // normalize newlines
1196        int offset = fCurrentEntity.position;
1197        int c = fCurrentEntity.ch[offset];
1198        int newlines = 0;
1199        if(whiteSpaceInfoNeeded)
1200            whiteSpaceLen=0;
1201        if (c == '\n' || (c == '\r' && isExternal)) {
1202            if (DEBUG_BUFFER) {
1203                System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1204                print();
1205                System.out.println();
1206            }
1207            do {
1208                c = fCurrentEntity.ch[fCurrentEntity.position++];
1209                if (c == '\r' && isExternal) {
1210                    newlines++;
1211                    fCurrentEntity.lineNumber++;
1212                    fCurrentEntity.columnNumber = 1;
1213                    if (fCurrentEntity.position == fCurrentEntity.count) {
1214                        offset = 0;
1215                        fCurrentEntity.position = newlines;
1216                        if (load(newlines, false, true)) {
1217                            break;
1218                        }
1219                    }
1220                    if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1221                        fCurrentEntity.position++;
1222                        offset++;
1223                    }
1224                    /*** NEWLINE NORMALIZATION ***/
1225                    else {
1226                        newlines++;
1227                    }
1228                    /***/
1229                } else if (c == '\n') {
1230                    newlines++;
1231                    fCurrentEntity.lineNumber++;
1232                    fCurrentEntity.columnNumber = 1;
1233                    if (fCurrentEntity.position == fCurrentEntity.count) {
1234                        offset = 0;
1235                        fCurrentEntity.position = newlines;
1236                        if (load(newlines, false, true)) {
1237                            break;
1238                        }
1239                    }
1240                    /*** NEWLINE NORMALIZATION ***
1241                     * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r'
1242                     * && external) {
1243                     * fCurrentEntity.position++;
1244                     * offset++;
1245                     * }
1246                     * /***/
1247                } else {
1248                    fCurrentEntity.position--;
1249                    break;
1250                }
1251            } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1252            int i=0;
1253            for ( i = offset; i < fCurrentEntity.position; i++) {
1254                fCurrentEntity.ch[i] = '\n';
1255                storeWhiteSpace(i);
1256            }
1257
1258            int length = fCurrentEntity.position - offset;
1259            if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1260                content.setValues(fCurrentEntity.ch, offset, length);
1261                if (DEBUG_BUFFER) {
1262                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1263                    print();
1264                    System.out.println();
1265                }
1266                return -1;
1267            }
1268            if (DEBUG_BUFFER) {
1269                System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1270                print();
1271                System.out.println();
1272            }
1273        }
1274
1275        // scan literal value
1276        for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) {
1277            c = fCurrentEntity.ch[fCurrentEntity.position];
1278            if ((c == quote &&
1279                    (!fCurrentEntity.literal || isExternal)) ||
1280                    c == '%' || !XMLChar.isContent(c)) {
1281                break;
1282            }
1283            if (whiteSpaceInfoNeeded && c == '\t') {
1284                storeWhiteSpace(fCurrentEntity.position);
1285            }
1286        }
1287        int length = fCurrentEntity.position - offset;
1288        fCurrentEntity.columnNumber += length - newlines;
1289
1290        checkEntityLimit(null, fCurrentEntity, offset, length);
1291        if (isNSURI) {
1292            checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length);
1293        }
1294        content.setValues(fCurrentEntity.ch, offset, length);
1295
1296        // return next character
1297        if (fCurrentEntity.position != fCurrentEntity.count) {
1298            c = fCurrentEntity.ch[fCurrentEntity.position];
1299            // NOTE: We don't want to accidentally signal the
1300            //       end of the literal if we're expanding an
1301            //       entity appearing in the literal. -Ac
1302            if (c == quote && fCurrentEntity.literal) {
1303                c = -1;
1304            }
1305        } else {
1306            c = -1;
1307        }
1308        if (DEBUG_BUFFER) {
1309            System.out.print(")scanLiteral, '"+(char)quote+"': ");
1310            print();
1311            System.out.println(" -> '"+(char)c+"'");
1312        }
1313        return c;
1314
1315    } // scanLiteral(int,XMLString):int
1316
1317    /**
1318     * Save whitespace information. Increase the whitespace buffer by 100
1319     * when needed.
1320     *
1321     * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR).
1322     *
1323     * @param whiteSpacePos position of a whitespace in the scanner entity buffer
1324     */
1325    private void storeWhiteSpace(int whiteSpacePos) {
1326        if (whiteSpaceLen >= whiteSpaceLookup.length) {
1327            int [] tmp = new int[whiteSpaceLookup.length + 100];
1328            System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length);
1329            whiteSpaceLookup = tmp;
1330        }
1331
1332        whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos;
1333    }
1334
1335    //CHANGED:
1336    /**
1337     * Scans a range of character data up to the specified delimiter,
1338     * setting the fields of the XMLString structure, appropriately.
1339     * <p>
1340     * <strong>Note:</strong> The characters are consumed.
1341     * <p>
1342     * <strong>Note:</strong> This assumes that the delimiter contains at
1343     * least one character.
1344     * <p>
1345     * <strong>Note:</strong> This method does not guarantee to return
1346     * the longest run of character data. This method may return before
1347     * the delimiter due to reaching the end of the input buffer or any
1348     * other reason.
1349     * <p>
1350     * @param delimiter The string that signifies the end of the character
1351     *                  data to be scanned.
1352     * @param buffer    The XMLStringBuffer to fill.
1353     * @param chunkLimit the size limit of the data to be scanned. Zero by default
1354     * indicating no limit.
1355     *
1356     * @return Returns true if there is more data to scan, false otherwise.
1357     *
1358     * @throws IOException  Thrown if i/o error occurs.
1359     * @throws EOFException Thrown on end of file.
1360     */
1361    protected boolean scanData(String delimiter, XMLStringBuffer buffer, int chunkLimit)
1362    throws IOException {
1363
1364        boolean done = false;
1365        int delimLen = delimiter.length();
1366        char charAt0 = delimiter.charAt(0);
1367        do {
1368            if (DEBUG_BUFFER) {
1369                System.out.print("(scanData: ");
1370                print();
1371                System.out.println();
1372            }
1373
1374            // load more characters, if needed
1375
1376            if (fCurrentEntity.position == fCurrentEntity.count) {
1377                load(0, true, false);
1378            }
1379
1380            boolean bNextEntity = false;
1381
1382            while ((fCurrentEntity.position > fCurrentEntity.count - delimLen)
1383                && (!bNextEntity))
1384            {
1385              System.arraycopy(fCurrentEntity.ch,
1386                               fCurrentEntity.position,
1387                               fCurrentEntity.ch,
1388                               0,
1389                               fCurrentEntity.count - fCurrentEntity.position);
1390
1391              bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false);
1392              fCurrentEntity.position = 0;
1393              fCurrentEntity.startPosition = 0;
1394            }
1395
1396            if (fCurrentEntity.position > fCurrentEntity.count - delimLen) {
1397                // something must be wrong with the input:  e.g., file ends in an unterminated comment
1398                int length = fCurrentEntity.count - fCurrentEntity.position;
1399                checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length);
1400                buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length);
1401                fCurrentEntity.columnNumber += fCurrentEntity.count;
1402                fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition);
1403                fCurrentEntity.position = fCurrentEntity.count;
1404                fCurrentEntity.startPosition = fCurrentEntity.count;
1405                load(0, true, false);
1406                return false;
1407            }
1408
1409            // normalize newlines
1410            int offset = fCurrentEntity.position;
1411            int c = fCurrentEntity.ch[offset];
1412            int newlines = 0;
1413            if (c == '\n' || (c == '\r' && isExternal)) {
1414                if (DEBUG_BUFFER) {
1415                    System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": ");
1416                    print();
1417                    System.out.println();
1418                }
1419                do {
1420                    c = fCurrentEntity.ch[fCurrentEntity.position++];
1421                    if (c == '\r' && isExternal) {
1422                        newlines++;
1423                        fCurrentEntity.lineNumber++;
1424                        fCurrentEntity.columnNumber = 1;
1425                        if (fCurrentEntity.position == fCurrentEntity.count) {
1426                            offset = 0;
1427                            fCurrentEntity.position = newlines;
1428                            if (load(newlines, false, true)) {
1429                                break;
1430                            }
1431                        }
1432                        if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1433                            fCurrentEntity.position++;
1434                            offset++;
1435                        }
1436                        /*** NEWLINE NORMALIZATION ***/
1437                        else {
1438                            newlines++;
1439                        }
1440                    } else if (c == '\n') {
1441                        newlines++;
1442                        fCurrentEntity.lineNumber++;
1443                        fCurrentEntity.columnNumber = 1;
1444                        if (fCurrentEntity.position == fCurrentEntity.count) {
1445                            offset = 0;
1446                            fCurrentEntity.position = newlines;
1447                            fCurrentEntity.count = newlines;
1448                            if (load(newlines, false, true)) {
1449                                break;
1450                            }
1451                        }
1452                    } else {
1453                        fCurrentEntity.position--;
1454                        break;
1455                    }
1456                } while (fCurrentEntity.position < fCurrentEntity.count - 1);
1457                for (int i = offset; i < fCurrentEntity.position; i++) {
1458                    fCurrentEntity.ch[i] = '\n';
1459                }
1460                int length = fCurrentEntity.position - offset;
1461                if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1462                    checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1463                    buffer.append(fCurrentEntity.ch, offset, length);
1464                    if (DEBUG_BUFFER) {
1465                        System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1466                        print();
1467                        System.out.println();
1468                    }
1469                    return true;
1470                }
1471                if (DEBUG_BUFFER) {
1472                    System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": ");
1473                    print();
1474                    System.out.println();
1475                }
1476            }
1477
1478            // iterate over buffer looking for delimiter
1479            OUTER: while (fCurrentEntity.position < fCurrentEntity.count) {
1480                c = fCurrentEntity.ch[fCurrentEntity.position++];
1481                if (c == charAt0) {
1482                    // looks like we just hit the delimiter
1483                    int delimOffset = fCurrentEntity.position - 1;
1484                    for (int i = 1; i < delimLen; i++) {
1485                        if (fCurrentEntity.position == fCurrentEntity.count) {
1486                            fCurrentEntity.position -= i;
1487                            break OUTER;
1488                        }
1489                        c = fCurrentEntity.ch[fCurrentEntity.position++];
1490                        if (delimiter.charAt(i) != c) {
1491                            fCurrentEntity.position -= i;
1492                            break;
1493                        }
1494                    }
1495                    if (fCurrentEntity.position == delimOffset + delimLen) {
1496                        done = true;
1497                        break;
1498                    }
1499                } else if (c == '\n' || (isExternal && c == '\r')) {
1500                    fCurrentEntity.position--;
1501                    break;
1502                } else if (XMLChar.isInvalid(c)) {
1503                    fCurrentEntity.position--;
1504                    int length = fCurrentEntity.position - offset;
1505                    fCurrentEntity.columnNumber += length - newlines;
1506                    checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1507                    buffer.append(fCurrentEntity.ch, offset, length);
1508                    return true;
1509                }
1510                if (chunkLimit > 0 &&
1511                        (buffer.length + fCurrentEntity.position - offset) >= chunkLimit) {
1512                    break;
1513                }
1514            }
1515            int length = fCurrentEntity.position - offset;
1516            fCurrentEntity.columnNumber += length - newlines;
1517            checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length);
1518            if (done) {
1519                length -= delimLen;
1520            }
1521            buffer.append(fCurrentEntity.ch, offset, length);
1522
1523            // return true if string was skipped
1524            if (DEBUG_BUFFER) {
1525                System.out.print(")scanData: ");
1526                print();
1527                System.out.println(" -> " + done);
1528            }
1529            if (chunkLimit > 0 && buffer.length >= chunkLimit) {
1530                break;
1531            }
1532        } while (!done && chunkLimit == 0);
1533        return !done;
1534
1535    } // scanData(String, XMLStringBuffer)
1536
1537    /**
1538     * Skips a character appearing immediately on the input.
1539     * <p>
1540     * <strong>Note:</strong> The character is consumed only if it matches
1541     * the specified character.
1542     *
1543     * @param c The character to skip.
1544     * @param nt The type of the name (element or attribute)
1545     *
1546     * @return Returns true if the character was skipped.
1547     *
1548     * @throws IOException  Thrown if i/o error occurs.
1549     * @throws EOFException Thrown on end of file.
1550     */
1551    protected boolean skipChar(int c, NameType nt) throws IOException {
1552        if (DEBUG_BUFFER) {
1553            System.out.print("(skipChar, '"+(char)c+"': ");
1554            print();
1555            System.out.println();
1556        }
1557
1558        // load more characters, if needed
1559        if (fCurrentEntity.position == fCurrentEntity.count) {
1560            load(0, true, true);
1561        }
1562
1563        // skip character
1564        int offset = fCurrentEntity.position;
1565        int cc = fCurrentEntity.ch[fCurrentEntity.position];
1566        if (cc == c) {
1567            fCurrentEntity.position++;
1568            if (c == '\n') {
1569                fCurrentEntity.lineNumber++;
1570                fCurrentEntity.columnNumber = 1;
1571            } else {
1572                fCurrentEntity.columnNumber++;
1573            }
1574            if (DEBUG_BUFFER) {
1575                System.out.print(")skipChar, '"+(char)c+"': ");
1576                print();
1577                System.out.println(" -> true");
1578            }
1579            checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1580            return true;
1581        } else if (c == '\n' && cc == '\r' && isExternal) {
1582            // handle newlines
1583            if (fCurrentEntity.position == fCurrentEntity.count) {
1584                invokeListeners(1);
1585                fCurrentEntity.ch[0] = (char)cc;
1586                load(1, false, false);
1587            }
1588            fCurrentEntity.position++;
1589            if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') {
1590                fCurrentEntity.position++;
1591            }
1592            fCurrentEntity.lineNumber++;
1593            fCurrentEntity.columnNumber = 1;
1594            if (DEBUG_BUFFER) {
1595                System.out.print(")skipChar, '"+(char)c+"': ");
1596                print();
1597                System.out.println(" -> true");
1598            }
1599            checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset);
1600            return true;
1601        }
1602
1603        // character was not skipped
1604        if (DEBUG_BUFFER) {
1605            System.out.print(")skipChar, '"+(char)c+"': ");
1606            print();
1607            System.out.println(" -> false");
1608        }
1609        return false;
1610
1611    } // skipChar(int):boolean
1612
1613    public boolean isSpace(char ch){
1614        return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r');
1615    }
1616    /**
1617     * Skips space characters appearing immediately on the input.
1618     * <p>
1619     * <strong>Note:</strong> The characters are consumed only if they are
1620     * space characters.
1621     *
1622     * @return Returns true if at least one space character was skipped.
1623     *
1624     * @throws IOException  Thrown if i/o error occurs.
1625     * @throws EOFException Thrown on end of file.
1626     *
1627     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
1628     */
1629    protected boolean skipSpaces() throws IOException {
1630        if (DEBUG_BUFFER) {
1631            System.out.print("(skipSpaces: ");
1632            print();
1633            System.out.println();
1634        }
1635        //boolean entityChanged = false;
1636        // load more characters, if needed
1637        if (fCurrentEntity.position == fCurrentEntity.count) {
1638            load(0, true, true);
1639        }
1640
1641        //we are doing this check only in skipSpace() because it is called by
1642        //fMiscDispatcher and we want the parser to exit gracefully when document
1643        //is well-formed.
1644        //it is possible that end of document is reached and
1645        //fCurrentEntity becomes null
1646        //nothing was read so entity changed  'false' should be returned.
1647        if(fCurrentEntity == null){
1648            return false ;
1649        }
1650
1651        // skip spaces
1652        int c = fCurrentEntity.ch[fCurrentEntity.position];
1653        int offset = fCurrentEntity.position - 1;
1654        if (XMLChar.isSpace(c)) {
1655            do {
1656                boolean entityChanged = false;
1657                // handle newlines
1658                if (c == '\n' || (isExternal && c == '\r')) {
1659                    fCurrentEntity.lineNumber++;
1660                    fCurrentEntity.columnNumber = 1;
1661                    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
1662                        invokeListeners(1);
1663                        fCurrentEntity.ch[0] = (char)c;
1664                        entityChanged = load(1, true, false);
1665                        if (!entityChanged){
1666                            // the load change the position to be 1,
1667                            // need to restore it when entity not changed
1668                            fCurrentEntity.position = 0;
1669                        }else if(fCurrentEntity == null){
1670                            return true ;
1671                        }
1672                    }
1673                    if (c == '\r' && isExternal) {
1674                        // REVISIT: Does this need to be updated to fix the
1675                        //          #x0D ^#x0A newline normalization problem? -Ac
1676                        if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
1677                            fCurrentEntity.position--;
1678                        }
1679                    }
1680                } else {
1681                    fCurrentEntity.columnNumber++;
1682                }
1683
1684                //If this is a general entity, spaces within a start element should be counted
1685                checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset);
1686                offset = fCurrentEntity.position;
1687
1688                // load more characters, if needed
1689                if (!entityChanged){
1690                    fCurrentEntity.position++;
1691                }
1692
1693                if (fCurrentEntity.position == fCurrentEntity.count) {
1694                    load(0, true, true);
1695
1696                    //we are doing this check only in skipSpace() because it is called by
1697                    //fMiscDispatcher and we want the parser to exit gracefully when document
1698                    //is well-formed.
1699
1700                    //it is possible that end of document is reached and
1701                    //fCurrentEntity becomes null
1702                    //nothing was read so entity changed  'false' should be returned.
1703                    if(fCurrentEntity == null){
1704                        return true ;
1705                    }
1706
1707                }
1708            } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
1709            if (DEBUG_BUFFER) {
1710                System.out.print(")skipSpaces: ");
1711                print();
1712                System.out.println(" -> true");
1713            }
1714            return true;
1715        }
1716
1717        // no spaces were found
1718        if (DEBUG_BUFFER) {
1719            System.out.print(")skipSpaces: ");
1720            print();
1721            System.out.println(" -> false");
1722        }
1723        return false;
1724
1725    } // skipSpaces():boolean
1726
1727
1728    /**
1729     * @param length This function checks that following number of characters are available.
1730     * to the underlying buffer.
1731     * @return This function returns true if capacity asked is available.
1732     */
1733    public boolean arrangeCapacity(int length) throws IOException{
1734        return arrangeCapacity(length, false);
1735    }
1736
1737    /**
1738     * @param length This function checks that following number of characters are available.
1739     * to the underlying buffer.
1740     * @param changeEntity a flag to indicate that the underlying function should change the entity
1741     * @return This function returns true if capacity asked is available.
1742     *
1743     */
1744    public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{
1745        //check if the capacity is availble in the current buffer
1746        //count is no. of characters in the buffer   [x][m][l]
1747        //position is '0' based
1748        //System.out.println("fCurrent Entity " + fCurrentEntity);
1749        if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1750            return true;
1751        }
1752        if(DEBUG_SKIP_STRING){
1753            System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1754            System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1755            System.out.println("length = " + length);
1756        }
1757        boolean entityChanged = false;
1758        //load more characters -- this function shouldn't change the entity
1759        while((fCurrentEntity.count - fCurrentEntity.position) < length){
1760            if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){
1761                invokeListeners(0);
1762                System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position);
1763                fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position;
1764                fCurrentEntity.position = 0;
1765            }
1766
1767            if((fCurrentEntity.count - fCurrentEntity.position) < length){
1768                int pos = fCurrentEntity.position;
1769                invokeListeners(pos);
1770                entityChanged = load(fCurrentEntity.count, changeEntity, false);
1771                fCurrentEntity.position = pos;
1772                if(entityChanged)break;
1773            }
1774            if(DEBUG_SKIP_STRING){
1775                System.out.println("fCurrentEntity.count = " + fCurrentEntity.count);
1776                System.out.println("fCurrentEntity.position = " + fCurrentEntity.position);
1777                System.out.println("length = " + length);
1778            }
1779        }
1780        //load changes the position.. set it back to the point where we started.
1781
1782        //after loading check again.
1783        if((fCurrentEntity.count - fCurrentEntity.position) >= length) {
1784            return true;
1785        } else {
1786            return false;
1787        }
1788    }
1789
1790    /**
1791     * Skips the specified string appearing immediately on the input.
1792     * <p>
1793     * <strong>Note:</strong> The characters are consumed only if all
1794     * the characters are skipped.
1795     *
1796     * @param s The string to skip.
1797     *
1798     * @return Returns true if the string was skipped.
1799     *
1800     * @throws IOException  Thrown if i/o error occurs.
1801     * @throws EOFException Thrown on end of file.
1802     */
1803    protected boolean skipString(String s) throws IOException {
1804
1805        final int length = s.length();
1806
1807        //first make sure that required capacity is avaible
1808        if(arrangeCapacity(length, false)){
1809            final int beforeSkip = fCurrentEntity.position ;
1810            int afterSkip = fCurrentEntity.position + length - 1 ;
1811            if(DEBUG_SKIP_STRING){
1812                System.out.println("skipString,length = " + s + "," + length);
1813                System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip,  length));
1814            }
1815
1816            //s.charAt() indexes are 0 to 'Length -1' based.
1817            int i = length - 1 ;
1818            //check from reverse
1819            while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){
1820                if(afterSkip-- == beforeSkip){
1821                    fCurrentEntity.position = fCurrentEntity.position + length ;
1822                    fCurrentEntity.columnNumber += length;
1823                    if (!detectingVersion) {
1824                        checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
1825                    }
1826                    return true;
1827                }
1828            }
1829        }
1830
1831        return false;
1832    } // skipString(String):boolean
1833
1834    protected boolean skipString(char [] s) throws IOException {
1835
1836        final int length = s.length;
1837        //first make sure that required capacity is avaible
1838        if(arrangeCapacity(length, false)){
1839            int beforeSkip = fCurrentEntity.position;
1840
1841            if(DEBUG_SKIP_STRING){
1842                System.out.println("skipString,length = " + new String(s) + "," + length);
1843                System.out.println("skipString,length = " + new String(s) + "," + length);
1844            }
1845
1846            for(int i=0;i<length;i++){
1847                if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){
1848                   return false;
1849                }
1850            }
1851            fCurrentEntity.position = fCurrentEntity.position + length ;
1852            fCurrentEntity.columnNumber += length;
1853            if (!detectingVersion) {
1854                checkEntityLimit(null, fCurrentEntity, beforeSkip, length);
1855            }
1856            return true;
1857
1858        }
1859
1860        return false;
1861    }
1862
1863    //
1864    // Locator methods
1865    //
1866    //
1867    // Private methods
1868    //
1869
1870    /**
1871     * Loads a chunk of text.
1872     *
1873     * @param offset       The offset into the character buffer to
1874     *                     read the next batch of characters.
1875     * @param changeEntity True if the load should change entities
1876     *                     at the end of the entity, otherwise leave
1877     *                     the current entity in place and the entity
1878     *                     boundary will be signaled by the return
1879     *                     value.
1880     * @param notify       Determine whether to notify listeners of
1881     *                     the event
1882     *
1883     * @returns Returns true if the entity changed as a result of this
1884     *          load operation.
1885     */
1886    final boolean load(int offset, boolean changeEntity, boolean notify)
1887    throws IOException {
1888        if (DEBUG_BUFFER) {
1889            System.out.print("(load, "+offset+": ");
1890            print();
1891            System.out.println();
1892        }
1893        if (notify) {
1894            invokeListeners(offset);
1895        }
1896        //maintaing the count till last load
1897        fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ;
1898        // read characters
1899        int length = fCurrentEntity.ch.length - offset;
1900        if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) {
1901            length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE;
1902        }
1903        if (DEBUG_BUFFER) System.out.println("  length to try to read: "+length);
1904        int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length);
1905        if (DEBUG_BUFFER) System.out.println("  length actually read:  "+count);
1906
1907        // reset count and position
1908        boolean entityChanged = false;
1909        if (count != -1) {
1910            if (count != 0) {
1911                // record the last count
1912                fCurrentEntity.fLastCount = count;
1913                fCurrentEntity.count = count + offset;
1914                fCurrentEntity.position = offset;
1915            }
1916        }
1917        // end of this entity
1918        else {
1919            fCurrentEntity.count = offset;
1920            fCurrentEntity.position = offset;
1921            entityChanged = true;
1922
1923            if (changeEntity) {
1924                //notify the entity manager about the end of entity
1925                fEntityManager.endEntity();
1926                //return if the current entity becomes null
1927                if(fCurrentEntity == null){
1928                    throw END_OF_DOCUMENT_ENTITY;
1929                }
1930                // handle the trailing edges
1931                if (fCurrentEntity.position == fCurrentEntity.count) {
1932                    load(0, true, false);
1933                }
1934            }
1935
1936        }
1937        if (DEBUG_BUFFER) {
1938            System.out.print(")load, "+offset+": ");
1939            print();
1940            System.out.println();
1941        }
1942
1943        return entityChanged;
1944
1945    } // load(int, boolean):boolean
1946
1947    /**
1948     * Creates a reader capable of reading the given input stream in
1949     * the specified encoding.
1950     *
1951     * @param inputStream  The input stream.
1952     * @param encoding     The encoding name that the input stream is
1953     *                     encoded using. If the user has specified that
1954     *                     Java encoding names are allowed, then the
1955     *                     encoding name may be a Java encoding name;
1956     *                     otherwise, it is an ianaEncoding name.
1957     * @param isBigEndian   For encodings (like uCS-4), whose names cannot
1958     *                      specify a byte order, this tells whether the order is bigEndian.  null menas
1959     *                      unknown or not relevant.
1960     *
1961     * @return Returns a reader.
1962     */
1963    protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian)
1964    throws IOException {
1965
1966        // normalize encoding name
1967        if (encoding == null) {
1968            encoding = "UTF-8";
1969        }
1970
1971        // try to use an optimized reader
1972        String ENCODING = encoding.toUpperCase(Locale.ENGLISH);
1973        if (ENCODING.equals("UTF-8")) {
1974            if (DEBUG_ENCODINGS) {
1975                System.out.println("$$$ creating UTF8Reader");
1976            }
1977            return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() );
1978        }
1979        if (ENCODING.equals("US-ASCII")) {
1980            if (DEBUG_ENCODINGS) {
1981                System.out.println("$$$ creating ASCIIReader");
1982            }
1983            return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
1984        }
1985        if(ENCODING.equals("ISO-10646-UCS-4")) {
1986            if(isBigEndian != null) {
1987                boolean isBE = isBigEndian.booleanValue();
1988                if(isBE) {
1989                    return new UCSReader(inputStream, UCSReader.UCS4BE);
1990                } else {
1991                    return new UCSReader(inputStream, UCSReader.UCS4LE);
1992                }
1993            } else {
1994                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1995                        "EncodingByteOrderUnsupported",
1996                        new Object[] { encoding },
1997                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
1998            }
1999        }
2000        if(ENCODING.equals("ISO-10646-UCS-2")) {
2001            if(isBigEndian != null) { // sould never happen with this encoding...
2002                boolean isBE = isBigEndian.booleanValue();
2003                if(isBE) {
2004                    return new UCSReader(inputStream, UCSReader.UCS2BE);
2005                } else {
2006                    return new UCSReader(inputStream, UCSReader.UCS2LE);
2007                }
2008            } else {
2009                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2010                        "EncodingByteOrderUnsupported",
2011                        new Object[] { encoding },
2012                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
2013            }
2014        }
2015
2016        // check for valid name
2017        boolean validIANA = XMLChar.isValidIANAEncoding(encoding);
2018        boolean validJava = XMLChar.isValidJavaEncoding(encoding);
2019        if (!validIANA || (fAllowJavaEncodings && !validJava)) {
2020            fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2021                    "EncodingDeclInvalid",
2022                    new Object[] { encoding },
2023                    XMLErrorReporter.SEVERITY_FATAL_ERROR);
2024                    // NOTE: AndyH suggested that, on failure, we use ISO Latin 1
2025                    //       because every byte is a valid ISO Latin 1 character.
2026                    //       It may not translate correctly but if we failed on
2027                    //       the encoding anyway, then we're expecting the content
2028                    //       of the document to be bad. This will just prevent an
2029                    //       invalid UTF-8 sequence to be detected. This is only
2030                    //       important when continue-after-fatal-error is turned
2031                    //       on. -Ac
2032                    encoding = "ISO-8859-1";
2033        }
2034
2035        // try to use a Java reader
2036        String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING);
2037        if (javaEncoding == null) {
2038            if(fAllowJavaEncodings) {
2039                javaEncoding = encoding;
2040            } else {
2041                fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
2042                        "EncodingDeclInvalid",
2043                        new Object[] { encoding },
2044                        XMLErrorReporter.SEVERITY_FATAL_ERROR);
2045                        // see comment above.
2046                        javaEncoding = "ISO8859_1";
2047            }
2048        }
2049        else if (javaEncoding.equals("ASCII")) {
2050            if (DEBUG_ENCODINGS) {
2051                System.out.println("$$$ creating ASCIIReader");
2052            }
2053            return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale());
2054        }
2055
2056        if (DEBUG_ENCODINGS) {
2057            System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding);
2058            if (javaEncoding == encoding) {
2059                System.out.print(" (IANA encoding)");
2060            }
2061            System.out.println();
2062        }
2063        return new InputStreamReader(inputStream, javaEncoding);
2064
2065    } // createReader(InputStream,String, Boolean): Reader
2066
2067    /**
2068     * Returns the IANA encoding name that is auto-detected from
2069     * the bytes specified, with the endian-ness of that encoding where appropriate.
2070     *
2071     * @param b4    The first four bytes of the input.
2072     * @param count The number of bytes actually read.
2073     * @return a 2-element array:  the first element, an IANA-encoding string,
2074     *  the second element a Boolean which is true iff the document is big endian, false
2075     *  if it's little-endian, and null if the distinction isn't relevant.
2076     */
2077    protected Object[] getEncodingName(byte[] b4, int count) {
2078
2079        if (count < 2) {
2080            return new Object[]{"UTF-8", null};
2081        }
2082
2083        // UTF-16, with BOM
2084        int b0 = b4[0] & 0xFF;
2085        int b1 = b4[1] & 0xFF;
2086        if (b0 == 0xFE && b1 == 0xFF) {
2087            // UTF-16, big-endian
2088            return new Object [] {"UTF-16BE", new Boolean(true)};
2089        }
2090        if (b0 == 0xFF && b1 == 0xFE) {
2091            // UTF-16, little-endian
2092            return new Object [] {"UTF-16LE", new Boolean(false)};
2093        }
2094
2095        // default to UTF-8 if we don't have enough bytes to make a
2096        // good determination of the encoding
2097        if (count < 3) {
2098            return new Object [] {"UTF-8", null};
2099        }
2100
2101        // UTF-8 with a BOM
2102        int b2 = b4[2] & 0xFF;
2103        if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) {
2104            return new Object [] {"UTF-8", null};
2105        }
2106
2107        // default to UTF-8 if we don't have enough bytes to make a
2108        // good determination of the encoding
2109        if (count < 4) {
2110            return new Object [] {"UTF-8", null};
2111        }
2112
2113        // other encodings
2114        int b3 = b4[3] & 0xFF;
2115        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) {
2116            // UCS-4, big endian (1234)
2117            return new Object [] {"ISO-10646-UCS-4", new Boolean(true)};
2118        }
2119        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) {
2120            // UCS-4, little endian (4321)
2121            return new Object [] {"ISO-10646-UCS-4", new Boolean(false)};
2122        }
2123        if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) {
2124            // UCS-4, unusual octet order (2143)
2125            // REVISIT: What should this be?
2126            return new Object [] {"ISO-10646-UCS-4", null};
2127        }
2128        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) {
2129            // UCS-4, unusual octect order (3412)
2130            // REVISIT: What should this be?
2131            return new Object [] {"ISO-10646-UCS-4", null};
2132        }
2133        if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) {
2134            // UTF-16, big-endian, no BOM
2135            // (or could turn out to be UCS-2...
2136            // REVISIT: What should this be?
2137            return new Object [] {"UTF-16BE", new Boolean(true)};
2138        }
2139        if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) {
2140            // UTF-16, little-endian, no BOM
2141            // (or could turn out to be UCS-2...
2142            return new Object [] {"UTF-16LE", new Boolean(false)};
2143        }
2144        if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) {
2145            // EBCDIC
2146            // a la xerces1, return CP037 instead of EBCDIC here
2147            return new Object [] {"CP037", null};
2148        }
2149
2150        // default encoding
2151        return new Object [] {"UTF-8", null};
2152
2153    } // getEncodingName(byte[],int):Object[]
2154
2155    /**
2156     * xxx not removing endEntity() so that i remember that we need to implement it.
2157     * Ends an entity.
2158     *
2159     * @throws XNIException Thrown by entity handler to signal an error.
2160     */
2161    //
2162    /** Prints the contents of the buffer. */
2163    final void print() {
2164        if (DEBUG_BUFFER) {
2165            if (fCurrentEntity != null) {
2166                System.out.print('[');
2167                System.out.print(fCurrentEntity.count);
2168                System.out.print(' ');
2169                System.out.print(fCurrentEntity.position);
2170                if (fCurrentEntity.count > 0) {
2171                    System.out.print(" \"");
2172                    for (int i = 0; i < fCurrentEntity.count; i++) {
2173                        if (i == fCurrentEntity.position) {
2174                            System.out.print('^');
2175                        }
2176                        char c = fCurrentEntity.ch[i];
2177                        switch (c) {
2178                            case '\n': {
2179                                System.out.print("\\n");
2180                                break;
2181                            }
2182                            case '\r': {
2183                                System.out.print("\\r");
2184                                break;
2185                            }
2186                            case '\t': {
2187                                System.out.print("\\t");
2188                                break;
2189                            }
2190                            case '\\': {
2191                                System.out.print("\\\\");
2192                                break;
2193                            }
2194                            default: {
2195                                System.out.print(c);
2196                            }
2197                        }
2198                    }
2199                    if (fCurrentEntity.position == fCurrentEntity.count) {
2200                        System.out.print('^');
2201                    }
2202                    System.out.print('"');
2203                }
2204                System.out.print(']');
2205                System.out.print(" @ ");
2206                System.out.print(fCurrentEntity.lineNumber);
2207                System.out.print(',');
2208                System.out.print(fCurrentEntity.columnNumber);
2209            } else {
2210                System.out.print("*NO CURRENT ENTITY*");
2211            }
2212        }
2213    }
2214
2215    /**
2216     * Registers the listener object and provides callback.
2217     * @param listener listener to which call back should be provided when scanner buffer
2218     * is being changed.
2219     */
2220    public void registerListener(XMLBufferListener listener) {
2221        if (!listeners.contains(listener)) {
2222            listeners.add(listener);
2223        }
2224    }
2225
2226    /**
2227     *
2228     * @param loadPos Starting position from which new data is being loaded into scanner buffer.
2229     */
2230    public void invokeListeners(int loadPos){
2231        for (int i=0; i<listeners.size(); i++) {
2232            listeners.get(i).refresh(loadPos);
2233        }
2234    }
2235
2236    /**
2237     * Skips space characters appearing immediately on the input that would
2238     * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line
2239     * normalization is performed. This is useful when scanning structures
2240     * such as the XMLDecl and TextDecl that can only contain US-ASCII
2241     * characters.
2242     * <p>
2243     * <strong>Note:</strong> The characters are consumed only if they would
2244     * match non-terminal S before end of line normalization is performed.
2245     *
2246     * @return Returns true if at least one space character was skipped.
2247     *
2248     * @throws IOException  Thrown if i/o error occurs.
2249     * @throws EOFException Thrown on end of file.
2250     *
2251     * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace
2252     */
2253    protected final boolean skipDeclSpaces() throws IOException {
2254        if (DEBUG_BUFFER) {
2255            System.out.print("(skipDeclSpaces: ");
2256            //XMLEntityManager.print(fCurrentEntity);
2257            System.out.println();
2258        }
2259
2260        // load more characters, if needed
2261        if (fCurrentEntity.position == fCurrentEntity.count) {
2262            load(0, true, false);
2263        }
2264
2265        // skip spaces
2266        int c = fCurrentEntity.ch[fCurrentEntity.position];
2267        if (XMLChar.isSpace(c)) {
2268            boolean external = fCurrentEntity.isExternal();
2269            do {
2270                boolean entityChanged = false;
2271                // handle newlines
2272                if (c == '\n' || (external && c == '\r')) {
2273                    fCurrentEntity.lineNumber++;
2274                    fCurrentEntity.columnNumber = 1;
2275                    if (fCurrentEntity.position == fCurrentEntity.count - 1) {
2276                        fCurrentEntity.ch[0] = (char)c;
2277                        entityChanged = load(1, true, false);
2278                        if (!entityChanged)
2279                            // the load change the position to be 1,
2280                            // need to restore it when entity not changed
2281                            fCurrentEntity.position = 0;
2282                    }
2283                    if (c == '\r' && external) {
2284                        // REVISIT: Does this need to be updated to fix the
2285                        //          #x0D ^#x0A newline normalization problem? -Ac
2286                        if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') {
2287                            fCurrentEntity.position--;
2288                        }
2289                    }
2290                    /*** NEWLINE NORMALIZATION ***
2291                     * else {
2292                     * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r'
2293                     * && external) {
2294                     * fCurrentEntity.position++;
2295                     * }
2296                     * }
2297                     * /***/
2298                } else {
2299                    fCurrentEntity.columnNumber++;
2300                }
2301                // load more characters, if needed
2302                if (!entityChanged)
2303                    fCurrentEntity.position++;
2304                if (fCurrentEntity.position == fCurrentEntity.count) {
2305                    load(0, true, false);
2306                }
2307            } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position]));
2308            if (DEBUG_BUFFER) {
2309                System.out.print(")skipDeclSpaces: ");
2310                //  XMLEntityManager.print(fCurrentEntity);
2311                System.out.println(" -> true");
2312            }
2313            return true;
2314        }
2315
2316        // no spaces were found
2317        if (DEBUG_BUFFER) {
2318            System.out.print(")skipDeclSpaces: ");
2319            //XMLEntityManager.print(fCurrentEntity);
2320            System.out.println(" -> false");
2321        }
2322        return false;
2323
2324    } // skipDeclSpaces():boolean
2325
2326
2327} // class XMLEntityScanner
2328