1/*
2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
3 */
4
5/*
6 * Licensed to the Apache Software Foundation (ASF) under one or more
7 * contributor license agreements.  See the NOTICE file distributed with
8 * this work for additional information regarding copyright ownership.
9 * The ASF licenses this file to You under the Apache License, Version 2.0
10 * (the "License"); you may not use this file except in compliance with
11 * the License.  You may obtain a copy of the License at
12 *
13 *     http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 */
21
22package com.sun.org.apache.xerces.internal.impl;
23
24import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
25import com.sun.org.apache.xerces.internal.util.AugmentationsImpl;
26import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl;
27import com.sun.org.apache.xerces.internal.util.XMLChar;
28import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
29import com.sun.org.apache.xerces.internal.util.XMLSymbols;
30import com.sun.org.apache.xerces.internal.xni.QName;
31import com.sun.org.apache.xerces.internal.xni.XMLAttributes;
32import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler;
33import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier;
34import com.sun.org.apache.xerces.internal.xni.XMLString;
35import com.sun.org.apache.xerces.internal.xni.XNIException;
36import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent;
37import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager;
38import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException;
39import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner;
40import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource;
41import com.sun.org.apache.xerces.internal.xni.Augmentations;
42import com.sun.org.apache.xerces.internal.utils.SecuritySupport;
43import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager;
44import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit;
45import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager;
46import com.sun.xml.internal.stream.XMLBufferListener;
47import com.sun.xml.internal.stream.XMLEntityStorage;
48import com.sun.xml.internal.stream.dtd.DTDGrammarUtil;
49import java.io.EOFException;
50import java.io.IOException;
51import javax.xml.XMLConstants;
52import javax.xml.stream.XMLInputFactory;
53import javax.xml.stream.XMLStreamConstants;
54import javax.xml.stream.events.XMLEvent;
55import jdk.xml.internal.JdkXmlUtils;
56
57/**
58 *
59 * This class is responsible for scanning the structure and content
60 * of document fragments.
61 *
62 * This class has been modified as per the new design which is more suited to
63 * efficiently build pull parser. Lot of improvements have been done and
64 * the code has been added to support stax functionality/features.
65 *
66 * @author Neeraj Bajaj SUN Microsystems
67 * @author K.Venugopal SUN Microsystems
68 * @author Glenn Marcy, IBM
69 * @author Andy Clark, IBM
70 * @author Arnaud  Le Hors, IBM
71 * @author Eric Ye, IBM
72 * @author Sunitha Reddy, SUN Microsystems
73 *
74 */
75public class XMLDocumentFragmentScannerImpl
76        extends XMLScanner
77        implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener {
78
79    //
80    // Constants
81    //
82
83    protected int fElementAttributeLimit, fXMLNameLimit;
84
85    /** External subset resolver. **/
86    protected ExternalSubsetResolver fExternalSubsetResolver;
87
88    // scanner states
89
90    //XXX this should be divided into more states.
91    /** Scanner state: start of markup. */
92    protected static final int SCANNER_STATE_START_OF_MARKUP = 21;
93
94    /** Scanner state: content. */
95    protected static final int SCANNER_STATE_CONTENT = 22;
96
97    /** Scanner state: processing instruction. */
98    protected static final int SCANNER_STATE_PI = 23;
99
100    /** Scanner state: DOCTYPE. */
101    protected static final int SCANNER_STATE_DOCTYPE = 24;
102
103    /** Scanner state: XML Declaration */
104    protected static final int SCANNER_STATE_XML_DECL = 25;
105
106    /** Scanner state: root element. */
107    protected static final int SCANNER_STATE_ROOT_ELEMENT = 26;
108
109    /** Scanner state: comment. */
110    protected static final int SCANNER_STATE_COMMENT = 27;
111
112    /** Scanner state: reference. */
113    protected static final int SCANNER_STATE_REFERENCE = 28;
114
115    // <book type="hard"> reading attribute name 'type'
116    protected static final int SCANNER_STATE_ATTRIBUTE = 29;
117
118    // <book type="hard"> //reading attribute value.
119    protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30;
120
121    /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/
122    //protected static final int SCANNER_STATE_TRAILING_MISC = 32;
123
124    /** Scanner state: end of input. */
125    protected static final int SCANNER_STATE_END_OF_INPUT = 33;
126
127    /** Scanner state: terminated. */
128    protected static final int SCANNER_STATE_TERMINATED = 34;
129
130    /** Scanner state: CDATA section. */
131    protected static final int SCANNER_STATE_CDATA = 35;
132
133    /** Scanner state: Text declaration. */
134    protected static final int SCANNER_STATE_TEXT_DECL = 36;
135
136    /** Scanner state: Text declaration. */
137    protected static final int SCANNER_STATE_CHARACTER_DATA = 37;
138
139    //<book type="hard">foo</book>
140    protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38;
141
142    //<book type="hard">foo</book> reading </book>
143    protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39;
144
145    protected static final int SCANNER_STATE_CHAR_REFERENCE = 40;
146    protected static final int SCANNER_STATE_BUILT_IN_REFS = 41;
147
148    // feature identifiers
149
150
151    /** Feature identifier: notify built-in refereces. */
152    protected static final String NOTIFY_BUILTIN_REFS =
153            Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE;
154
155    /** Property identifier: entity resolver. */
156    protected static final String ENTITY_RESOLVER =
157            Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY;
158
159    /** Feature identifier: standard uri conformant */
160    protected static final String STANDARD_URI_CONFORMANT =
161            Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE;
162
163    /** Property identifier: Security property manager. */
164    private static final String XML_SECURITY_PROPERTY_MANAGER =
165            Constants.XML_SECURITY_PROPERTY_MANAGER;
166
167    /** access external dtd: file protocol
168     *  For DOM/SAX, the secure feature is set to true by default
169     */
170    final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT;
171
172    // recognized features and properties
173
174    /** Recognized features. */
175    private static final String[] RECOGNIZED_FEATURES = {
176                NAMESPACES,
177                VALIDATION,
178                NOTIFY_BUILTIN_REFS,
179                NOTIFY_CHAR_REFS,
180                Constants.STAX_REPORT_CDATA_EVENT,
181                XMLConstants.USE_CATALOG
182    };
183
184    /** Feature defaults. */
185    private static final Boolean[] FEATURE_DEFAULTS = {
186                Boolean.TRUE,
187                null,
188                Boolean.FALSE,
189                Boolean.FALSE,
190                Boolean.TRUE,
191                JdkXmlUtils.USE_CATALOG_DEFAULT
192    };
193
194    /** Recognized properties. */
195    private static final String[] RECOGNIZED_PROPERTIES = {
196                SYMBOL_TABLE,
197                ERROR_REPORTER,
198                ENTITY_MANAGER,
199                XML_SECURITY_PROPERTY_MANAGER,
200                JdkXmlUtils.CATALOG_DEFER,
201                JdkXmlUtils.CATALOG_FILES,
202                JdkXmlUtils.CATALOG_PREFER,
203                JdkXmlUtils.CATALOG_RESOLVE,
204                JdkXmlUtils.CDATA_CHUNK_SIZE
205    };
206
207    /** Property defaults. */
208    private static final Object[] PROPERTY_DEFAULTS = {
209                null,
210                null,
211                null,
212                null,
213                null,
214                null,
215                null,
216                null,
217                JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT
218    };
219
220
221    private static final char [] CDATA = {'[','C','D','A','T','A','['};
222    static final char [] XMLDECL = {'<','?','x','m','l'};
223    // private static final char [] endTag = {'<','/'};
224    // debugging
225
226    /** Debug scanner state. */
227    private static final boolean DEBUG_SCANNER_STATE = false;
228
229    /** Debug driver. */
230    private static final boolean DEBUG_DISPATCHER = false;
231
232    /** Debug content driver scanning. */
233    protected static final boolean DEBUG_START_END_ELEMENT = false;
234
235    /** Debug driver next */
236    protected static final boolean DEBUG = false;
237
238    //
239    // Data
240    //
241
242    // protected data
243
244    /** Document handler. */
245    protected XMLDocumentHandler fDocumentHandler;
246    protected int fScannerLastState ;
247
248    /** Entity Storage */
249    protected XMLEntityStorage fEntityStore;
250
251    /** Entity stack. */
252    protected int[] fEntityStack = new int[4];
253
254    /** Markup depth. */
255    protected int fMarkupDepth;
256
257    //is the element empty
258    protected boolean fEmptyElement ;
259
260    //track if we are reading attributes, this is usefule while
261    //there is a callback
262    protected boolean fReadingAttributes = false;
263
264    /** Scanner state. */
265    protected int fScannerState;
266
267    /** SubScanner state: inside scanContent method. */
268    protected boolean fInScanContent = false;
269    protected boolean fLastSectionWasCData = false;
270    protected boolean fCDataStart = false;
271    protected boolean fInCData = false;
272    protected boolean fCDataEnd = false;
273    protected boolean fLastSectionWasEntityReference = false;
274    protected boolean fLastSectionWasCharacterData = false;
275
276    /** has external dtd */
277    protected boolean fHasExternalDTD;
278
279    /** Standalone. */
280    protected boolean fStandaloneSet;
281    protected boolean fStandalone;
282    protected String fVersion;
283
284    // element information
285
286    /** Current element. */
287    protected QName fCurrentElement;
288
289    /** Element stack. */
290    protected ElementStack fElementStack = new ElementStack();
291    protected ElementStack2 fElementStack2 = new ElementStack2();
292
293    // other info
294
295    /** Document system identifier.
296     * REVISIT:  So what's this used for?  - NG
297     * protected String fDocumentSystemId;
298     ******/
299
300    protected String fPITarget ;
301
302    //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values
303    protected XMLString fPIData  = new XMLString();
304
305    // features
306
307
308    /** Notify built-in references. */
309    protected boolean fNotifyBuiltInRefs = false;
310
311    //STAX related properties
312    //defaultValues.
313    protected boolean fSupportDTD = true;
314    protected boolean fReplaceEntityReferences = true;
315    protected boolean fSupportExternalEntities = false;
316    protected boolean fReportCdataEvent = false ;
317    protected boolean fIsCoalesce = false ;
318    protected String fDeclaredEncoding =  null;
319    /** Xerces Feature: Disallow doctype declaration. */
320    protected boolean fDisallowDoctype = false;
321
322    /**
323     * CDATA chunk size limit
324     */
325    private int fChunkSize;
326
327    /**
328     * comma-delimited list of protocols that are allowed for the purpose
329     * of accessing external dtd or entity references
330     */
331    protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT;
332
333    /**
334     * standard uri conformant (strict uri).
335     * http://apache.org/xml/features/standard-uri-conformant
336     */
337    protected boolean fStrictURI;
338
339    // drivers
340
341    /** Active driver. */
342    protected Driver fDriver;
343
344    /** Content driver. */
345    protected Driver fContentDriver = createContentDriver();
346
347    // temporary variables
348
349    /** Element QName. */
350    protected QName fElementQName = new QName();
351
352    /** Attribute QName. */
353    protected QName fAttributeQName = new QName();
354
355    /**
356     * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class
357     * implements Iterator interface so we can directly give Attributes in the form of
358     * iterator.
359     */
360    protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl();
361
362
363    /** String. */
364    protected XMLString fTempString = new XMLString();
365
366    /** String. */
367    protected XMLString fTempString2 = new XMLString();
368
369    /** Array of 3 strings. */
370    private final String[] fStrings = new String[3];
371
372    /** Making the buffer accessible to derived class -- String buffer. */
373    protected XMLStringBuffer fStringBuffer = new XMLStringBuffer();
374
375    /** Making the buffer accessible to derived class -- String buffer. */
376    protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
377
378    /** stores character data. */
379    /** Making the buffer accessible to derived class -- stores PI data */
380    protected XMLStringBuffer fContentBuffer = new XMLStringBuffer();
381
382    /** Single character array. */
383    private final char[] fSingleChar = new char[1];
384    private String fCurrentEntityName = null;
385
386    // New members
387    protected boolean fScanToEnd = false;
388
389    protected DTDGrammarUtil dtdGrammarUtil= null;
390
391    protected boolean fAddDefaultAttr = false;
392
393    protected boolean foundBuiltInRefs = false;
394
395    /** Built-in reference character event */
396    protected boolean builtInRefCharacterHandled = false;
397
398    //skip element algorithm
399    static final short MAX_DEPTH_LIMIT = 5 ;
400    static final short ELEMENT_ARRAY_LENGTH = 200 ;
401    static final short MAX_POINTER_AT_A_DEPTH = 4 ;
402    static final boolean DEBUG_SKIP_ALGORITHM = false;
403    //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH
404    String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ;
405    //pointer location where last element was skipped
406    short fLastPointerLocation = 0 ;
407    short fElementPointer = 0 ;
408    //2D array to store pointer info
409    short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ;
410    protected String fElementRawname ;
411    protected boolean fShouldSkip = false;
412    protected boolean fAdd = false ;
413    protected boolean fSkip = false;
414
415    /** Reusable Augmentations. */
416    private Augmentations fTempAugmentations = null;
417    //
418    // Constructors
419    //
420
421    /** Default constructor. */
422    public XMLDocumentFragmentScannerImpl() {
423    } // <init>()
424
425    //
426    // XMLDocumentScanner methods
427    //
428
429    /**
430     * Sets the input source.
431     *
432     * @param inputSource The input source.
433     *
434     * @throws IOException Thrown on i/o error.
435     */
436    public void setInputSource(XMLInputSource inputSource) throws IOException {
437        fEntityManager.setEntityHandler(this);
438        fEntityManager.startEntity(false, "$fragment$", inputSource, false, true);
439        // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId());
440    } // setInputSource(XMLInputSource)
441
442    /**
443     * Scans a document.
444     *
445     * @param complete True if the scanner should scan the document
446     *                 completely, pushing all events to the registered
447     *                 document handler. A value of false indicates that
448     *                 that the scanner should only scan the next portion
449     *                 of the document and return. A scanner instance is
450     *                 permitted to completely scan a document if it does
451     *                 not support this "pull" scanning model.
452     *
453     * @return True if there is more to scan, false otherwise.
454     */
455    public boolean scanDocument(boolean complete)
456    throws IOException, XNIException {
457
458        // keep dispatching "events"
459        fEntityManager.setEntityHandler(this);
460        //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler );
461
462        int event = next();
463        do {
464            switch (event) {
465                case XMLStreamConstants.START_DOCUMENT :
466                    //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get
467                    break;
468                case XMLStreamConstants.START_ELEMENT :
469                    //System.out.println(" in scann element");
470                    //fDocumentHandler.startElement(getElementQName(),fAttributes,null);
471                    break;
472                case XMLStreamConstants.CHARACTERS :
473                    fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
474                    fDocumentHandler.characters(getCharacterData(),null);
475                    break;
476                case XMLStreamConstants.SPACE:
477                    //check if getCharacterData() is the right function to retrieve ignorableWhitespace information.
478                    //System.out.println("in the space");
479                    //fDocumentHandler.ignorableWhitespace(getCharacterData(), null);
480                    break;
481                case XMLStreamConstants.ENTITY_REFERENCE :
482                    fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
483                    //entity reference callback are given in startEntity
484                    break;
485                case XMLStreamConstants.PROCESSING_INSTRUCTION :
486                    fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
487                    fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null);
488                    break;
489                case XMLStreamConstants.COMMENT :
490                    fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
491                    fDocumentHandler.comment(getCharacterData(),null);
492                    break;
493                case XMLStreamConstants.DTD :
494                    //all DTD related callbacks are handled in DTDScanner.
495                    //1. Stax doesn't define DTD states as it does for XML Document.
496                    //therefore we don't need to take care of anything here. So Just break;
497                    break;
498                case XMLStreamConstants.CDATA:
499                   fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity);
500                    if (fCDataStart) {
501                        fDocumentHandler.startCDATA(null);
502                        fCDataStart = false;
503                        fInCData = true;
504                    }
505
506                    fDocumentHandler.characters(getCharacterData(),null);
507                    if (fCDataEnd) {
508                        fDocumentHandler.endCDATA(null);
509                        fCDataEnd = false;
510                    }
511                    break;
512                case XMLStreamConstants.NOTATION_DECLARATION :
513                    break;
514                case XMLStreamConstants.ENTITY_DECLARATION :
515                    break;
516                case XMLStreamConstants.NAMESPACE :
517                    break;
518                case XMLStreamConstants.ATTRIBUTE :
519                    break;
520                case XMLStreamConstants.END_ELEMENT :
521                    //do not give callback here.
522                    //this callback is given in scanEndElement function.
523                    //fDocumentHandler.endElement(getElementQName(),null);
524                    break;
525                default :
526                    // Errors should have already been handled by the Scanner
527                    return false;
528
529            }
530            //System.out.println("here in before calling next");
531            event = next();
532            //System.out.println("here in after calling next");
533        } while (event!=XMLStreamConstants.END_DOCUMENT && complete);
534
535        if(event == XMLStreamConstants.END_DOCUMENT) {
536            fDocumentHandler.endDocument(null);
537            return false;
538        }
539
540        return true;
541
542    } // scanDocument(boolean):boolean
543
544
545
546    public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){
547        if(fScannerLastState == XMLEvent.END_ELEMENT){
548            fElementQName.setValues(fElementStack.getLastPoppedElement());
549        }
550        return fElementQName ;
551    }
552
553    /** return the next state on the input
554     * @return int
555     */
556
557    public int next() throws IOException, XNIException {
558        return fDriver.next();
559    }
560
561    //
562    // XMLComponent methods
563    //
564
565    /**
566     * Resets the component. The component can query the component manager
567     * about any features and properties that affect the operation of the
568     * component.
569     *
570     * @param componentManager The component manager.
571     *
572     * @throws SAXException Thrown by component on initialization error.
573     *                      For example, if a feature or property is
574     *                      required for the operation of the component, the
575     *                      component manager may throw a
576     *                      SAXNotRecognizedException or a
577     *                      SAXNotSupportedException.
578     */
579
580    public void reset(XMLComponentManager componentManager)
581    throws XMLConfigurationException {
582
583        super.reset(componentManager);
584
585        // other settings
586        // fDocumentSystemId = null;
587
588        // sax features
589        //fAttributes.setNamespaces(fNamespaces);
590
591        // xerces features
592        fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true);
593        fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null);
594        fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false);
595
596        Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null);
597        fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ?
598                (ExternalSubsetResolver) resolver : null;
599
600        //attribute
601        fReadingAttributes = false;
602        //xxx: external entities are supported in Xerces
603        // it would be good to define feature for this case
604        fSupportExternalEntities = true;
605        fReplaceEntityReferences = true;
606        fIsCoalesce = false;
607
608        // setup Driver
609        setScannerState(SCANNER_STATE_CONTENT);
610        setDriver(fContentDriver);
611
612        // JAXP 1.5 features and properties
613        XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)
614                componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null);
615        fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
616
617        fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false);
618        fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE),
619                JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT);
620
621        resetCommon();
622        //fEntityManager.test();
623    } // reset(XMLComponentManager)
624
625
626    public void reset(PropertyManager propertyManager){
627
628        super.reset(propertyManager);
629
630        // other settings
631        // fDocumentSystemId = null;
632        fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE));
633        fNotifyBuiltInRefs = false ;
634
635        //fElementStack2.clear();
636        //fReplaceEntityReferences = true;
637        //fSupportExternalEntities = true;
638        Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES);
639        fReplaceEntityReferences = bo;
640        bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES);
641        fSupportExternalEntities = bo;
642        Boolean cdata = (Boolean)propertyManager.getProperty(
643                Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ;
644        if(cdata != null)
645            fReportCdataEvent = cdata ;
646        Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ;
647        if(coalesce != null)
648            fIsCoalesce = coalesce;
649        fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ;
650        //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true,
651        //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application
652        fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences;
653        // setup Driver
654        //we dont need to do this -- nb.
655        //setScannerState(SCANNER_STATE_CONTENT);
656        //setDriver(fContentDriver);
657        //fEntityManager.test();
658
659         // JAXP 1.5 features and properties
660        XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)
661                propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER);
662        fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
663
664        fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER);
665        fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE),
666                JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT);
667        resetCommon();
668    } // reset(XMLComponentManager)
669
670    void resetCommon() {
671        // initialize vars
672        fMarkupDepth = 0;
673        fCurrentElement = null;
674        fElementStack.clear();
675        fHasExternalDTD = false;
676        fStandaloneSet = false;
677        fStandalone = false;
678        fInScanContent = false;
679        //skipping algorithm
680        fShouldSkip = false;
681        fAdd = false;
682        fSkip = false;
683
684        fEntityStore = fEntityManager.getEntityStore();
685        dtdGrammarUtil = null;
686
687        if (fSecurityManager != null) {
688            fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT);
689            fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT);
690        } else {
691            fElementAttributeLimit = 0;
692            fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue();
693        }
694        fLimitAnalyzer = fEntityManager.fLimitAnalyzer;
695    }
696
697    /**
698     * Returns a list of feature identifiers that are recognized by
699     * this component. This method may return null if no features
700     * are recognized by this component.
701     */
702    public String[] getRecognizedFeatures() {
703        return RECOGNIZED_FEATURES.clone();
704    } // getRecognizedFeatures():String[]
705
706    /**
707     * Sets the state of a feature. This method is called by the component
708     * manager any time after reset when a feature changes state.
709     * <p>
710     * <strong>Note:</strong> Components should silently ignore features
711     * that do not affect the operation of the component.
712     *
713     * @param featureId The feature identifier.
714     * @param state     The state of the feature.
715     *
716     * @throws SAXNotRecognizedException The component should not throw
717     *                                   this exception.
718     * @throws SAXNotSupportedException The component should not throw
719     *                                  this exception.
720     */
721    public void setFeature(String featureId, boolean state)
722    throws XMLConfigurationException {
723
724        super.setFeature(featureId, state);
725
726        // Xerces properties
727        if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) {
728            String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length());
729            if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) {
730                fNotifyBuiltInRefs = state;
731            }
732        }
733
734    } // setFeature(String,boolean)
735
736    /**
737     * Returns a list of property identifiers that are recognized by
738     * this component. This method may return null if no properties
739     * are recognized by this component.
740     */
741    public String[] getRecognizedProperties() {
742        return RECOGNIZED_PROPERTIES.clone();
743    } // getRecognizedProperties():String[]
744
745    /**
746     * Sets the value of a property. This method is called by the component
747     * manager any time after reset when a property changes value.
748     * <p>
749     * <strong>Note:</strong> Components should silently ignore properties
750     * that do not affect the operation of the component.
751     *
752     * @param propertyId The property identifier.
753     * @param value      The value of the property.
754     *
755     * @throws SAXNotRecognizedException The component should not throw
756     *                                   this exception.
757     * @throws SAXNotSupportedException The component should not throw
758     *                                  this exception.
759     */
760    public void setProperty(String propertyId, Object value)
761    throws XMLConfigurationException {
762
763        super.setProperty(propertyId, value);
764
765        // Xerces properties
766        if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
767            final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length();
768            if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() &&
769                    propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) {
770                fEntityManager = (XMLEntityManager)value;
771                return;
772            }
773            if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() &&
774                    propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) {
775                fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ?
776                    (ExternalSubsetResolver) value : null;
777                return;
778            }
779        }
780
781
782                // Xerces properties
783        if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) {
784            String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length());
785            if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) {
786                fEntityManager = (XMLEntityManager)value;
787            }
788            return;
789        }
790
791        //JAXP 1.5 properties
792        if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER))
793        {
794            XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value;
795            fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD);
796        }
797
798    } // setProperty(String,Object)
799
800    /**
801     * Returns the default state for a feature, or null if this
802     * component does not want to report a default value for this
803     * feature.
804     *
805     * @param featureId The feature identifier.
806     *
807     * @since Xerces 2.2.0
808     */
809    public Boolean getFeatureDefault(String featureId) {
810        for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) {
811            if (RECOGNIZED_FEATURES[i].equals(featureId)) {
812                return FEATURE_DEFAULTS[i];
813            }
814        }
815        return null;
816    } // getFeatureDefault(String):Boolean
817
818    /**
819     * Returns the default state for a property, or null if this
820     * component does not want to report a default value for this
821     * property.
822     *
823     * @param propertyId The property identifier.
824     *
825     * @since Xerces 2.2.0
826     */
827    public Object getPropertyDefault(String propertyId) {
828        for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) {
829            if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) {
830                return PROPERTY_DEFAULTS[i];
831            }
832        }
833        return null;
834    } // getPropertyDefault(String):Object
835
836    //
837    // XMLDocumentSource methods
838    //
839
840    /**
841     * setDocumentHandler
842     *
843     * @param documentHandler
844     */
845    public void setDocumentHandler(XMLDocumentHandler documentHandler) {
846        fDocumentHandler = documentHandler;
847        //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this);
848    } // setDocumentHandler(XMLDocumentHandler)
849
850
851    /** Returns the document handler */
852    public XMLDocumentHandler getDocumentHandler(){
853        return fDocumentHandler;
854    }
855
856    //
857    // XMLEntityHandler methods
858    //
859
860    /**
861     * This method notifies of the start of an entity. The DTD has the
862     * pseudo-name of "[dtd]" parameter entity names start with '%'; and
863     * general entities are just specified by their name.
864     *
865     * @param name     The name of the entity.
866     * @param identifier The resource identifier.
867     * @param encoding The auto-detected IANA encoding name of the entity
868     *                 stream. This value will be null in those situations
869     *                 where the entity encoding is not auto-detected (e.g.
870     *                 internal entities or a document entity that is
871     *                 parsed from a java.io.Reader).
872     * @param augs     Additional information that may include infoset augmentations
873     *
874     * @throws XNIException Thrown by handler to signal an error.
875     */
876    public void startEntity(String name,
877            XMLResourceIdentifier identifier,
878            String encoding, Augmentations augs) throws XNIException {
879
880        // keep track of this entity before fEntityDepth is increased
881        if (fEntityDepth == fEntityStack.length) {
882            int[] entityarray = new int[fEntityStack.length * 2];
883            System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length);
884            fEntityStack = entityarray;
885        }
886        fEntityStack[fEntityDepth] = fMarkupDepth;
887
888        super.startEntity(name, identifier, encoding, augs);
889
890        // WFC:  entity declared in external subset in standalone doc
891        if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) {
892            reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE",
893                    new Object[]{name});
894        }
895
896        /** we are not calling the handlers yet.. */
897        // call handler
898        if (fDocumentHandler != null && !fScanningAttribute) {
899            if (!name.equals("[xml]")) {
900                fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs);
901            }
902        }
903
904    } // startEntity(String,XMLResourceIdentifier,String)
905
906    /**
907     * This method notifies the end of an entity. The DTD has the pseudo-name
908     * of "[dtd]" parameter entity names start with '%'; and general entities
909     * are just specified by their name.
910     *
911     * @param name The name of the entity.
912     * @param augs Additional information that may include infoset augmentations
913     *
914     * @throws XNIException Thrown by handler to signal an error.
915     */
916    public void endEntity(String name, Augmentations augs) throws IOException, XNIException {
917
918        /**
919         * // flush possible pending output buffer - see scanContent
920         * if (fInScanContent && fStringBuffer.length != 0
921         * && fDocumentHandler != null) {
922         * fDocumentHandler.characters(fStringBuffer, null);
923         * fStringBuffer.length = 0; // make sure we know it's been flushed
924         * }
925         */
926        super.endEntity(name, augs);
927
928        // make sure markup is properly balanced
929        if (fMarkupDepth != fEntityStack[fEntityDepth]) {
930            reportFatalError("MarkupEntityMismatch", null);
931        }
932
933        /**/
934        // call handler
935        if (fDocumentHandler != null && !fScanningAttribute) {
936            if (!name.equals("[xml]")) {
937                fDocumentHandler.endGeneralEntity(name, augs);
938            }
939        }
940
941
942    } // endEntity(String)
943
944    //
945    // Protected methods
946    //
947
948    // Driver factory methods
949
950    /** Creates a content Driver. */
951    protected Driver createContentDriver() {
952        return new FragmentContentDriver();
953    } // createContentDriver():Driver
954
955    // scanning methods
956
957    /**
958     * Scans an XML or text declaration.
959     * <p>
960     * <pre>
961     * [23] XMLDecl ::= '&lt;?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
962     * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
963     * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' |  "'" EncName "'" )
964     * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
965     * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'")
966     *                 | ('"' ('yes' | 'no') '"'))
967     *
968     * [77] TextDecl ::= '&lt;?xml' VersionInfo? EncodingDecl S? '?>'
969     * </pre>
970     *
971     * @param scanningTextDecl True if a text declaration is to
972     *                         be scanned instead of an XML
973     *                         declaration.
974     */
975    protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl)
976    throws IOException, XNIException {
977
978        // scan decl
979        super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings);
980        fMarkupDepth--;
981
982        // pseudo-attribute values
983        String version = fStrings[0];
984        String encoding = fStrings[1];
985        String standalone = fStrings[2];
986        fDeclaredEncoding = encoding;
987        // set standalone
988        fStandaloneSet = standalone != null;
989        fStandalone = fStandaloneSet && standalone.equals("yes");
990        ///xxx see where its used.. this is not used anywhere.
991        //it may be useful for entity to store this information
992        //but this information is only related with Document Entity.
993        fEntityManager.setStandalone(fStandalone);
994
995
996        // call handler
997        if (fDocumentHandler != null) {
998            if (scanningTextDecl) {
999                fDocumentHandler.textDecl(version, encoding, null);
1000            } else {
1001                fDocumentHandler.xmlDecl(version, encoding, standalone, null);
1002            }
1003        }
1004
1005        if(version != null){
1006            fEntityScanner.setVersion(version);
1007            fEntityScanner.setXMLVersion(version);
1008        }
1009        // set encoding on reader, only if encoding was not specified by the application explicitly
1010        if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) {
1011             fEntityScanner.setEncoding(encoding);
1012        }
1013
1014    } // scanXMLDeclOrTextDecl(boolean)
1015
1016    public String getPITarget(){
1017        return fPITarget ;
1018    }
1019
1020    public XMLStringBuffer getPIData(){
1021        return fContentBuffer ;
1022    }
1023
1024    //XXX: why not this function behave as per the state of the parser?
1025    public XMLString getCharacterData(){
1026        if(fUsebuffer){
1027            return fContentBuffer ;
1028        }else{
1029            return fTempString;
1030        }
1031
1032    }
1033
1034
1035    /**
1036     * Scans a processing data. This is needed to handle the situation
1037     * where a document starts with a processing instruction whose
1038     * target name <em>starts with</em> "xml". (e.g. xmlfoo)
1039     *
1040     * @param target The PI target
1041     * @param data The XMLStringBuffer to fill in with the data
1042     */
1043    protected void scanPIData(String target, XMLStringBuffer data)
1044    throws IOException, XNIException {
1045
1046        super.scanPIData(target, data);
1047
1048        //set the PI target and values
1049        fPITarget = target ;
1050
1051        fMarkupDepth--;
1052
1053    } // scanPIData(String)
1054
1055    /**
1056     * Scans a comment.
1057     * <p>
1058     * <pre>
1059     * [15] Comment ::= '&lt!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
1060     * </pre>
1061     * <p>
1062     * <strong>Note:</strong> Called after scanning past '&lt;!--'
1063     */
1064    protected void scanComment() throws IOException, XNIException {
1065        fContentBuffer.clear();
1066        scanComment(fContentBuffer);
1067        //getTextCharacters can also be called for reading comments
1068        fUsebuffer = true;
1069        fMarkupDepth--;
1070
1071    } // scanComment()
1072
1073    //xxx value returned by this function may not remain valid if another event is scanned.
1074    public String getComment(){
1075        return fContentBuffer.toString();
1076    }
1077
1078    void addElement(String rawname){
1079        if(fElementPointer < ELEMENT_ARRAY_LENGTH){
1080            //storing element raw name in a linear list of array
1081            fElementArray[fElementPointer] = rawname ;
1082            //storing elemnetPointer for particular element depth
1083
1084            if(DEBUG_SKIP_ALGORITHM){
1085                StringBuffer sb = new StringBuffer() ;
1086                sb.append(" Storing element information ") ;
1087                sb.append(" fElementPointer = " + fElementPointer) ;
1088                sb.append(" fElementRawname = " + fElementQName.rawname) ;
1089                sb.append(" fElementStack.fDepth = " + fElementStack.fDepth);
1090                System.out.println(sb.toString()) ;
1091            }
1092
1093            //store pointer information only when element depth is less MAX_DEPTH_LIMIT
1094            if(fElementStack.fDepth < MAX_DEPTH_LIMIT){
1095                short column = storePointerForADepth(fElementPointer);
1096                if(column > 0){
1097                    short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) );
1098                    //identity comparison shouldn't take much time and we can rely on this
1099                    //since its guaranteed to have same object id for same string.
1100                    if(rawname == fElementArray[pointer]){
1101                        fShouldSkip = true ;
1102                        fLastPointerLocation = pointer ;
1103                        //reset the things and return.
1104                        resetPointer((short)fElementStack.fDepth , column) ;
1105                        fElementArray[fElementPointer] = null ;
1106                        return ;
1107                    }else{
1108                        fShouldSkip = false ;
1109                    }
1110                }
1111            }
1112            fElementPointer++ ;
1113        }
1114    }
1115
1116
1117    void resetPointer(short depth, short column){
1118        fPointerInfo[depth] [column] = (short)0;
1119    }
1120
1121    //returns column information at which pointer was stored.
1122    short storePointerForADepth(short elementPointer){
1123        short depth = (short) fElementStack.fDepth ;
1124
1125        //Stores element pointer locations at particular depth , only 4 pointer locations
1126        //are stored at particular depth for now.
1127        for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){
1128
1129            if(canStore(depth, i)){
1130                fPointerInfo[depth][i] = elementPointer ;
1131                if(DEBUG_SKIP_ALGORITHM){
1132                    StringBuffer sb = new StringBuffer() ;
1133                    sb.append(" Pointer information ") ;
1134                    sb.append(" fElementPointer = " + fElementPointer) ;
1135                    sb.append(" fElementStack.fDepth = " + fElementStack.fDepth);
1136                    sb.append(" column = " + i ) ;
1137                    System.out.println(sb.toString()) ;
1138                }
1139                return i;
1140            }
1141            //else
1142            //pointer was not stored because we reached the limit
1143        }
1144        return -1 ;
1145    }
1146
1147    boolean canStore(short depth, short column){
1148        //colum = 0 , means first element at particular depth
1149        //column = 1, means second element at particular depth
1150        //        calle should make sure that it doesn't call for value outside allowed co-ordinates
1151        return fPointerInfo[depth][column] == 0 ? true : false ;
1152    }
1153
1154
1155    short getElementPointer(short depth, short column){
1156        //colum = 0 , means first element at particular depth
1157        //column = 1, means second element at particular depth
1158        //        calle should make sure that it doesn't call for value outside allowed co-ordinates
1159        return fPointerInfo[depth][column] ;
1160    }
1161
1162    //this function assumes that string passed is not null and skips
1163    //the following string from the buffer this makes sure
1164    boolean skipFromTheBuffer(String rawname) throws IOException{
1165        if(fEntityScanner.skipString(rawname)){
1166            char c = (char)fEntityScanner.peekChar() ;
1167            //If the start element was completely skipped we should encounter either ' '(space),
1168            //or '/' (in case of empty element)  or '>'
1169            if( c == ' ' || c == '/' || c == '>'){
1170                fElementRawname = rawname ;
1171                return true ;
1172            } else{
1173                return false;
1174            }
1175        } else
1176            return false ;
1177    }
1178
1179    boolean skipQElement(String rawname) throws IOException{
1180
1181        final int c = fEntityScanner.getChar(rawname.length());
1182        //if this character is still valid element name -- this means string can't match
1183        if(XMLChar.isName(c)){
1184            return false;
1185        }else{
1186            return fEntityScanner.skipString(rawname);
1187        }
1188    }
1189
1190    protected boolean skipElement() throws IOException {
1191
1192        if(!fShouldSkip) return false ;
1193
1194        if(fLastPointerLocation != 0){
1195            //Look at the next element stored in the array list.. we might just get a match.
1196            String rawname = fElementArray[fLastPointerLocation + 1] ;
1197            if(rawname != null && skipFromTheBuffer(rawname)){
1198                fLastPointerLocation++ ;
1199                if(DEBUG_SKIP_ALGORITHM){
1200                    System.out.println("Element " + fElementRawname +
1201                            " was SKIPPED at pointer location = " + fLastPointerLocation);
1202                }
1203                return true ;
1204            } else{
1205                //reset it back to zero... we haven't got the correct subset yet.
1206                fLastPointerLocation = 0 ;
1207
1208            }
1209        }
1210        //xxx: we can put some logic here as from what column it should start looking
1211        //for now we always start at 0
1212        //fallback to tolerant algorithm, it would look for differnt element stored at different
1213        //depth and get us the pointer location.
1214        return fShouldSkip && skipElement((short)0);
1215
1216    }
1217
1218    //start of the column at which it should try searching
1219    boolean skipElement(short column) throws IOException {
1220        short depth = (short)fElementStack.fDepth ;
1221
1222        if(depth > MAX_DEPTH_LIMIT){
1223            return fShouldSkip = false ;
1224        }
1225        for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){
1226            short pointer = getElementPointer(depth , i ) ;
1227
1228            if(pointer == 0){
1229                return fShouldSkip = false ;
1230            }
1231
1232            if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){
1233                if(DEBUG_SKIP_ALGORITHM){
1234                    System.out.println();
1235                    System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " +
1236                            fElementStack.fDepth + " column = " + column );
1237                    System.out.println();
1238                }
1239                fLastPointerLocation = pointer ;
1240                return fShouldSkip = true ;
1241            }
1242        }
1243        return fShouldSkip = false ;
1244    }
1245
1246    /**
1247     * Scans a start element. This method will handle the binding of
1248     * namespace information and notifying the handler of the start
1249     * of the element.
1250     * <p>
1251     * <pre>
1252     * [44] EmptyElemTag ::= '&lt;' Name (S Attribute)* S? '/>'
1253     * [40] STag ::= '&lt;' Name (S Attribute)* S? '>'
1254     * </pre>
1255     * <p>
1256     * <strong>Note:</strong> This method assumes that the leading
1257     * '&lt;' character has been consumed.
1258     * <p>
1259     * <strong>Note:</strong> This method uses the fElementQName and
1260     * fAttributes variables. The contents of these variables will be
1261     * destroyed. The caller should copy important information out of
1262     * these variables before calling this method.
1263     * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT
1264     *
1265     * @return True if element is empty. (i.e. It matches
1266     *          production [44].
1267     */
1268    // fElementQName will have the details of element just read..
1269    // fAttributes will have the details of all the attributes.
1270    protected boolean scanStartElement()
1271    throws IOException, XNIException {
1272
1273        if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()");
1274        //when skipping is true and no more elements should be added
1275        if(fSkip && !fAdd){
1276            //get the stored element -- if everything goes right this should match the
1277            //token in the buffer
1278
1279            QName name = fElementStack.getNext();
1280
1281            if(DEBUG_SKIP_ALGORITHM){
1282                System.out.println("Trying to skip String = " + name.rawname);
1283            }
1284
1285            //Be conservative -- if skipping fails -- stop.
1286            fSkip = fEntityScanner.skipString(name.rawname);
1287
1288            if(fSkip){
1289                if(DEBUG_SKIP_ALGORITHM){
1290                    System.out.println("Element SUCESSFULLY skipped = " + name.rawname);
1291                }
1292                fElementStack.push();
1293                fElementQName = name;
1294            }else{
1295                //if skipping fails reposition the stack or fallback to normal way of processing
1296                fElementStack.reposition();
1297                if(DEBUG_SKIP_ALGORITHM){
1298                    System.out.println("Element was NOT skipped, REPOSITIONING stack" );
1299                }
1300            }
1301        }
1302
1303        //we are still at the stage of adding elements
1304        //the elements were not matched or
1305        //fSkip is not set to true
1306        if(!fSkip || fAdd){
1307            //get the next element from the stack
1308            fElementQName = fElementStack.nextElement();
1309            // name
1310            if (fNamespaces) {
1311                fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART);
1312            } else {
1313                String name = fEntityScanner.scanName(NameType.ELEMENTSTART);
1314                fElementQName.setValues(null, name, name, null);
1315            }
1316
1317            if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString());
1318            if(DEBUG_SKIP_ALGORITHM){
1319                if(fAdd){
1320                    System.out.println("Elements are being ADDED -- elemet added is = " +
1321                            fElementQName.rawname + " at count = " + fElementStack.fCount);
1322                }
1323            }
1324
1325        }
1326
1327        //when the elements are being added , we need to check if we are set for skipping the elements
1328        if(fAdd){
1329            //this sets the value of fAdd variable
1330            fElementStack.matchElement(fElementQName);
1331        }
1332
1333
1334        //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName
1335        fCurrentElement = fElementQName;
1336
1337        String rawname = fElementQName.rawname;
1338
1339        fEmptyElement = false;
1340
1341        fAttributes.removeAllAttributes();
1342
1343        checkDepth(rawname);
1344        if(!seekCloseOfStartTag()){
1345            fReadingAttributes = true;
1346            fAttributeCacheUsedCount =0;
1347            fStringBufferIndex =0;
1348            fAddDefaultAttr = true;
1349            do {
1350                scanAttribute(fAttributes);
1351                if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) &&
1352                        fAttributes.getLength() > fElementAttributeLimit){
1353                    fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
1354                                                 "ElementAttributeLimit",
1355                                                 new Object[]{rawname, fElementAttributeLimit },
1356                                                 XMLErrorReporter.SEVERITY_FATAL_ERROR );
1357                }
1358
1359            } while (!seekCloseOfStartTag());
1360            fReadingAttributes=false;
1361        }
1362
1363        if (fEmptyElement) {
1364            //decrease the markup depth..
1365            fMarkupDepth--;
1366
1367            // check that this element was opened in the same entity
1368            if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1369                reportFatalError("ElementEntityMismatch",
1370                        new Object[]{fCurrentElement.rawname});
1371            }
1372            // call handler
1373            if (fDocumentHandler != null) {
1374                fDocumentHandler.emptyElement(fElementQName, fAttributes, null);
1375            }
1376
1377            //We should not be popping out the context here in endELement becaause the namespace context is still
1378            //valid when parser is at the endElement state.
1379            //if (fNamespaces) {
1380            //  fNamespaceContext.popContext();
1381            //}
1382
1383            //pop the element off the stack..
1384            fElementStack.popElement();
1385
1386        } else {
1387
1388            if(dtdGrammarUtil != null)
1389                dtdGrammarUtil.startElement(fElementQName, fAttributes);
1390            if(fDocumentHandler != null){
1391                //complete element and attributes are traversed in this function so we can send a callback
1392                //here.
1393                //<strong>we shouldn't be sending callback in scanDocument()</strong>
1394                fDocumentHandler.startElement(fElementQName, fAttributes, null);
1395            }
1396        }
1397
1398
1399        if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +
1400                "<<< scanStartElement(): "+fEmptyElement);
1401        return fEmptyElement;
1402
1403    } // scanStartElement():boolean
1404
1405    /**
1406     * Looks for the close of start tag, i.e. if it finds '>' or '/>'
1407     * Characters are consumed.
1408     */
1409    protected boolean seekCloseOfStartTag() throws IOException, XNIException {
1410        // spaces
1411        boolean sawSpace = fEntityScanner.skipSpaces();
1412
1413        // end tag?
1414        final int c = fEntityScanner.peekChar();
1415        if (c == '>') {
1416            fEntityScanner.scanChar(null);
1417            return true;
1418        } else if (c == '/') {
1419            fEntityScanner.scanChar(null);
1420            if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) {
1421                reportFatalError("ElementUnterminated",
1422                        new Object[]{fElementQName.rawname});
1423            }
1424            fEmptyElement = true;
1425            return true;
1426        } else if (!isValidNameStartChar(c) || !sawSpace) {
1427            // Second chance. Check if this character is a high
1428            // surrogate of a valid name start character.
1429            if (!isValidNameStartHighSurrogate(c) || !sawSpace) {
1430                reportFatalError("ElementUnterminated",
1431                        new Object[]{fElementQName.rawname});
1432            }
1433        }
1434
1435        return false;
1436    }
1437
1438    public boolean hasAttributes(){
1439        return fAttributes.getLength() > 0;
1440    }
1441
1442    /** return the attribute iterator implementation */
1443    public XMLAttributesIteratorImpl getAttributeIterator(){
1444        if(dtdGrammarUtil != null && fAddDefaultAttr){
1445            dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes);
1446            fAddDefaultAttr = false;
1447        }
1448        return fAttributes;
1449    }
1450
1451    /** return if standalone is set */
1452    public boolean standaloneSet(){
1453        return fStandaloneSet;
1454    }
1455    /** return if the doucment is standalone */
1456    public boolean isStandAlone(){
1457        return fStandalone ;
1458    }
1459    /**
1460     * Scans an attribute name value pair.
1461     * <p>
1462     * <pre>
1463     * [41] Attribute ::= Name Eq AttValue
1464     * </pre>
1465     * <p>
1466     * <strong>Note:</strong> This method assumes that the next
1467     * character on the stream is the first character of the attribute
1468     * name.
1469     * <p>
1470     * <strong>Note:</strong> This method uses the fAttributeQName and
1471     * fQName variables. The contents of these variables will be
1472     * destroyed.
1473     *
1474     * @param attributes The attributes list for the scanned attribute.
1475     */
1476
1477    protected void scanAttribute(XMLAttributes attributes)
1478    throws IOException, XNIException {
1479        if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()");
1480
1481        // name
1482        if (fNamespaces) {
1483            fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME);
1484        } else {
1485            String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME);
1486            fAttributeQName.setValues(null, name, name, null);
1487        }
1488
1489        // equals
1490        fEntityScanner.skipSpaces();
1491        if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) {
1492            reportFatalError("EqRequiredInAttribute",
1493                new Object[] {fCurrentElement.rawname, fAttributeQName.rawname});
1494        }
1495        fEntityScanner.skipSpaces();
1496
1497        int attIndex = 0 ;
1498        //REVISIT: one more case needs to be included: external PE and standalone is no
1499        boolean isVC =  fHasExternalDTD && !fStandalone;
1500        //fTempString would store attribute value
1501        ///fTempString2 would store attribute non-normalized value
1502
1503        //this function doesn't use 'attIndex'. We are adding the attribute later
1504        //after we have figured out that current attribute is not namespace declaration
1505        //since scanAttributeValue doesn't use attIndex parameter therefore we
1506        //can safely add the attribute later..
1507        XMLString tmpStr = getString();
1508
1509        scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes,
1510                attIndex, isVC, fCurrentElement.rawname, false);
1511
1512        // content
1513        int oldLen = attributes.getLength();
1514        //if the attribute name already exists.. new value is replaced with old value
1515        attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null);
1516
1517        // WFC: Unique Att Spec
1518        //attributes count will be same if the current attribute  name already exists for this element name.
1519        //this means there are two duplicate attributes.
1520        if (oldLen == attributes.getLength()) {
1521            reportFatalError("AttributeNotUnique",
1522                    new Object[]{fCurrentElement.rawname,
1523                            fAttributeQName.rawname});
1524        }
1525
1526        //tmpString contains attribute value
1527        //we are passing null as the attribute value
1528        attributes.setValue(attIndex, null, tmpStr);
1529
1530        ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM
1531        //attributes.setNonNormalizedValue(oldLen, fTempString2.toString());
1532        attributes.setSpecified(attIndex, true);
1533
1534        if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()");
1535
1536    } // scanAttribute(XMLAttributes)
1537
1538    /**
1539     * Scans element content.
1540     *
1541     * @return Returns the next character on the stream.
1542     */
1543    //CHANGED:
1544    //EARLIER: scanContent()
1545    //NOW: scanContent(XMLStringBuffer)
1546    //It makes things easy if this functions takes XMLStringBuffer as parameter..
1547    //this function appends the data to the buffer.
1548    protected int scanContent(XMLStringBuffer content) throws IOException, XNIException {
1549        //set the fTempString length to 0 before passing it on to scanContent
1550        //scanContent sets the correct co-ordinates as per the content read
1551        fTempString.length = 0;
1552        int c = fEntityScanner.scanContent(fTempString);
1553        content.append(fTempString);
1554        fTempString.length = 0;
1555        if (c == '\r') {
1556            // happens when there is the character reference &#13;
1557            //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1558            fEntityScanner.scanChar(null);
1559            content.append((char)c);
1560            c = -1;
1561        } else if (c == ']') {
1562            //fStringBuffer.clear();
1563            //xxx: We know the next chracter.. we should just skip it and add ']' directlry
1564            content.append((char)fEntityScanner.scanChar(null));
1565            // remember where we are in case we get an endEntity before we
1566            // could flush the buffer out - this happens when we're parsing an
1567            // entity which ends with a ]
1568            fInScanContent = true;
1569            //
1570            // We work on a single character basis to handle cases such as:
1571            // ']]]>' which we might otherwise miss.
1572            //
1573            if (fEntityScanner.skipChar(']', null)) {
1574                content.append(']');
1575                while (fEntityScanner.skipChar(']', null)) {
1576                    content.append(']');
1577                }
1578                if (fEntityScanner.skipChar('>', null)) {
1579                    reportFatalError("CDEndInContent", null);
1580                }
1581            }
1582            fInScanContent = false;
1583            c = -1;
1584        }
1585        if (fDocumentHandler != null && content.length > 0) {
1586            //fDocumentHandler.characters(content, null);
1587        }
1588        return c;
1589
1590    } // scanContent():int
1591
1592
1593    /**
1594     * Scans a CDATA section.
1595     * <p>
1596     * <strong>Note:</strong> This method uses the fTempString and
1597     * fStringBuffer variables.
1598     *
1599     * @param complete True if the CDATA section is to be scanned
1600     *                 completely.
1601     *
1602     * @return True if CDATA is completely scanned.
1603     */
1604    //CHANGED:
1605    protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete)
1606    throws IOException, XNIException {
1607
1608        // call handler
1609        if (fDocumentHandler != null) {
1610            //fDocumentHandler.startCDATA(null);
1611        }
1612
1613        while (true) {
1614            //scanData will fill the contentBuffer
1615            if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) {
1616                fInCData = false;
1617                fCDataEnd = true;
1618                fMarkupDepth--;
1619                break ;
1620            } else {
1621                int c = fEntityScanner.peekChar();
1622                if (c != -1 && isInvalidLiteral(c)) {
1623                    if (XMLChar.isHighSurrogate(c)) {
1624                        //contentBuffer.clear();
1625                        //scan surrogates if any....
1626                        scanSurrogates(contentBuffer);
1627                    } else {
1628                        reportFatalError("InvalidCharInCDSect",
1629                                new Object[]{Integer.toString(c,16)});
1630                                fEntityScanner.scanChar(null);
1631                    }
1632                } else {
1633                    //CData partially returned due to the size limit
1634                    break;
1635                }
1636                //by this time we have also read surrogate contents if any...
1637                if (fDocumentHandler != null) {
1638                    //fDocumentHandler.characters(contentBuffer, null);
1639                }
1640            }
1641        }
1642
1643        return true;
1644
1645    } // scanCDATASection(XMLStringBuffer, boolean):boolean
1646
1647    /**
1648     * Scans an end element.
1649     * <p>
1650     * <pre>
1651     * [42] ETag ::= '&lt;/' Name S? '>'
1652     * </pre>
1653     * <p>
1654     * <strong>Note:</strong> This method uses the fElementQName variable.
1655     * The contents of this variable will be destroyed. The caller should
1656     * copy the needed information out of this variable before calling
1657     * this method.
1658     *
1659     * @return The element depth.
1660     */
1661    protected int scanEndElement() throws IOException, XNIException {
1662        if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()");
1663
1664        // pop context
1665        QName endElementName = fElementStack.popElement();
1666
1667        String rawname = endElementName.rawname;
1668        if(DEBUG)System.out.println("endElementName = " + endElementName.toString());
1669        // Take advantage of the fact that next string _should_ be "fElementQName.rawName",
1670        //In scanners most of the time is consumed on checks done for XML characters, we can
1671        // optimize on it and avoid the checks done for endElement,
1672        //we will also avoid symbol table lookup - neeraj.bajaj@sun.com
1673
1674        // this should work both for namespace processing true or false...
1675
1676        //REVISIT: if the string is not the same as expected.. we need to do better error handling..
1677        //We can skip this for now... In any case if the string doesn't match -- document is not well formed.
1678
1679        if (!fEntityScanner.skipString(endElementName.rawname)) {
1680             reportFatalError("ETagRequired", new Object[]{rawname});
1681        }
1682
1683        // end
1684        fEntityScanner.skipSpaces();
1685        if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) {
1686            reportFatalError("ETagUnterminated",
1687                    new Object[]{rawname});
1688        }
1689        fMarkupDepth--;
1690
1691        //we have increased the depth for two markup "<" characters
1692        fMarkupDepth--;
1693
1694        // check that this element was opened in the same entity
1695        if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) {
1696            reportFatalError("ElementEntityMismatch",
1697                    new Object[]{rawname});
1698        }
1699
1700        //We should not be popping out the context here in endELement becaause the namespace context is still
1701        //valid when parser is at the endElement state.
1702
1703        //if (fNamespaces) {
1704        //  fNamespaceContext.popContext();
1705        //}
1706
1707        // call handler
1708        if (fDocumentHandler != null ) {
1709            //end element is scanned in this function so we can send a callback
1710            //here.
1711            //<strong>we shouldn't be sending callback in scanDocument()</strong>
1712
1713            fDocumentHandler.endElement(endElementName, null);
1714        }
1715        if(dtdGrammarUtil != null)
1716            dtdGrammarUtil.endElement(endElementName);
1717
1718        return fMarkupDepth;
1719
1720    } // scanEndElement():int
1721
1722    /**
1723     * Scans a character reference.
1724     * <p>
1725     * <pre>
1726     * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
1727     * </pre>
1728     */
1729    protected void scanCharReference()
1730    throws IOException, XNIException {
1731
1732        fStringBuffer2.clear();
1733        int ch = scanCharReferenceValue(fStringBuffer2, null);
1734        fMarkupDepth--;
1735        if (ch != -1) {
1736            // call handler
1737
1738            if (fDocumentHandler != null) {
1739                if (fNotifyCharRefs) {
1740                    fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null);
1741                }
1742                Augmentations augs = null;
1743                if (fValidation && ch <= 0x20) {
1744                    if (fTempAugmentations != null) {
1745                        fTempAugmentations.removeAllItems();
1746                    }
1747                    else {
1748                        fTempAugmentations = new AugmentationsImpl();
1749                    }
1750                    augs = fTempAugmentations;
1751                    augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE);
1752                }
1753                //xxx: How do we deal with this - how to return charReferenceValues
1754                //now this is being commented because this is taken care in scanDocument()
1755                //fDocumentHandler.characters(fStringBuffer2, null);
1756                if (fNotifyCharRefs) {
1757                    fDocumentHandler.endGeneralEntity(fCharRefLiteral, null);
1758                }
1759            }
1760        }
1761
1762    } // scanCharReference()
1763
1764
1765    /**
1766     * Scans an entity reference.
1767     *
1768     * @return returns true if the new entity is started. If it was built-in entity
1769     *         'false' is returned.
1770     * @throws IOException  Thrown if i/o error occurs.
1771     * @throws XNIException Thrown if handler throws exception upon
1772     *                      notification.
1773     */
1774    protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException {
1775        String name = fEntityScanner.scanName(NameType.REFERENCE);
1776        if (name == null) {
1777            reportFatalError("NameRequiredInReference", null);
1778            return;
1779        }
1780        if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) {
1781            reportFatalError("SemicolonRequiredInReference", new Object []{name});
1782        }
1783        if (fEntityStore.isUnparsedEntity(name)) {
1784            reportFatalError("ReferenceToUnparsedEntity", new Object[]{name});
1785        }
1786        fMarkupDepth--;
1787        fCurrentEntityName = name;
1788
1789        // handle built-in entities
1790        if (name == fAmpSymbol) {
1791            handleCharacter('&', fAmpSymbol, content);
1792            fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1793            return ;
1794        } else if (name == fLtSymbol) {
1795            handleCharacter('<', fLtSymbol, content);
1796            fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1797            return ;
1798        } else if (name == fGtSymbol) {
1799            handleCharacter('>', fGtSymbol, content);
1800            fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1801            return ;
1802        } else if (name == fQuotSymbol) {
1803            handleCharacter('"', fQuotSymbol, content);
1804            fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1805            return ;
1806        } else if (name == fAposSymbol) {
1807            handleCharacter('\'', fAposSymbol, content);
1808            fScannerState = SCANNER_STATE_BUILT_IN_REFS;
1809            return ;
1810        }
1811
1812        //1. if the entity is external and support to external entities is not required
1813        // 2. or entities should not be replaced
1814        //3. or if it is built in entity reference.
1815        boolean isEE = fEntityStore.isExternalEntity(name);
1816        if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){
1817            fScannerState = SCANNER_STATE_REFERENCE;
1818            return ;
1819        }
1820        // start general entity
1821        if (!fEntityStore.isDeclaredEntity(name)) {
1822            //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception
1823            if (!fSupportDTD && fReplaceEntityReferences) {
1824                reportFatalError("EntityNotDeclared", new Object[]{name});
1825                return;
1826            }
1827            //REVISIT: one more case needs to be included: external PE and standalone is no
1828            if ( fHasExternalDTD && !fStandalone) {
1829                if (fValidation)
1830                    fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared",
1831                            new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR);
1832            } else
1833                reportFatalError("EntityNotDeclared", new Object[]{name});
1834        }
1835        //we are starting the entity even if the entity was not declared
1836        //if that was the case it its taken care in XMLEntityManager.startEntity()
1837        //we immediately call the endEntity. Application gets to know if there was
1838        //any entity that was not declared.
1839        fEntityManager.startEntity(true, name, false);
1840        //set the scaner state to content.. parser will automatically revive itself at any point of time.
1841        //setScannerState(SCANNER_STATE_CONTENT);
1842        //return true ;
1843    } // scanEntityReference()
1844
1845    // utility methods
1846
1847    /**
1848     * Check if the depth exceeds the maxElementDepth limit
1849     * @param elementName name of the current element
1850     */
1851    void checkDepth(String elementName) {
1852        fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth);
1853        if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) {
1854            fSecurityManager.debugPrint(fLimitAnalyzer);
1855            reportFatalError("MaxElementDepthLimit", new Object[]{elementName,
1856                fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT),
1857                fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT),
1858                "maxElementDepth"});
1859        }
1860    }
1861
1862    /**
1863     * Calls document handler with a single character resulting from
1864     * built-in entity resolution.
1865     *
1866     * @param c
1867     * @param entity built-in name
1868     * @param XMLStringBuffer append the character to buffer
1869     *
1870     * we really dont need to call this function -- this function is only required when
1871     * we integrate with rest of Xerces2. SO maintaining the current behavior and still
1872     * calling this function to hanlde built-in entity reference.
1873     *
1874     */
1875    private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException {
1876        foundBuiltInRefs = true;
1877        checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1);
1878        content.append(c);
1879        if (fDocumentHandler != null) {
1880            fSingleChar[0] = c;
1881            if (fNotifyBuiltInRefs) {
1882                fDocumentHandler.startGeneralEntity(entity, null, null, null);
1883            }
1884            fTempString.setValues(fSingleChar, 0, 1);
1885            if(!fIsCoalesce){
1886                fDocumentHandler.characters(fTempString, null);
1887                builtInRefCharacterHandled = true;
1888            }
1889
1890            if (fNotifyBuiltInRefs) {
1891                fDocumentHandler.endGeneralEntity(entity, null);
1892            }
1893        }
1894    } // handleCharacter(char)
1895
1896    // helper methods
1897
1898    /**
1899     * Sets the scanner state.
1900     *
1901     * @param state The new scanner state.
1902     */
1903    protected final void setScannerState(int state) {
1904
1905        fScannerState = state;
1906        if (DEBUG_SCANNER_STATE) {
1907            System.out.print("### setScannerState: ");
1908            //System.out.print(fScannerState);
1909            System.out.print(getScannerStateName(state));
1910            System.out.println();
1911        }
1912
1913    } // setScannerState(int)
1914
1915
1916    /**
1917     * Sets the Driver.
1918     *
1919     * @param Driver The new Driver.
1920     */
1921    protected final void setDriver(Driver driver) {
1922        fDriver = driver;
1923        if (DEBUG_DISPATCHER) {
1924            System.out.print("%%% setDriver: ");
1925            System.out.print(getDriverName(driver));
1926            System.out.println();
1927        }
1928    }
1929
1930    //
1931    // Private methods
1932    //
1933
1934    /** Returns the scanner state name. */
1935    protected String getScannerStateName(int state) {
1936
1937        switch (state) {
1938            case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE";
1939            case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT";
1940            case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP";
1941            case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT";
1942            case SCANNER_STATE_PI: return "SCANNER_STATE_PI";
1943            case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT";
1944            case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE";
1945            case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT";
1946            case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED";
1947            case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA";
1948            case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL";
1949            case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE";
1950            case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE";
1951            case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG";
1952            case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG";
1953            case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ;
1954        }
1955
1956        return "??? ("+state+')';
1957
1958    } // getScannerStateName(int):String
1959    public String getEntityName(){
1960        //return the cached name
1961        return fCurrentEntityName;
1962    }
1963
1964    /** Returns the driver name. */
1965    public String getDriverName(Driver driver) {
1966
1967        if (DEBUG_DISPATCHER) {
1968            if (driver != null) {
1969                String name = driver.getClass().getName();
1970                int index = name.lastIndexOf('.');
1971                if (index != -1) {
1972                    name = name.substring(index + 1);
1973                    index = name.lastIndexOf('$');
1974                    if (index != -1) {
1975                        name = name.substring(index + 1);
1976                    }
1977                }
1978                return name;
1979            }
1980        }
1981        return "null";
1982
1983    } // getDriverName():String
1984
1985    /**
1986     * Check the protocol used in the systemId against allowed protocols
1987     *
1988     * @param systemId the Id of the URI
1989     * @param allowedProtocols a list of allowed protocols separated by comma
1990     * @return the name of the protocol if rejected, null otherwise
1991     */
1992    String checkAccess(String systemId, String allowedProtocols) throws IOException {
1993        String baseSystemId = fEntityScanner.getBaseSystemId();
1994        String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI);
1995        return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL);
1996    }
1997
1998    //
1999    // Classes
2000    //
2001
2002    /**
2003     * @author Neeraj Bajaj, Sun Microsystems.
2004     */
2005    protected static final class Element {
2006
2007        //
2008        // Data
2009        //
2010
2011        /** Symbol. */
2012        public QName qname;
2013
2014        //raw name stored as characters
2015        public char[] fRawname;
2016
2017        /** The next Element entry. */
2018        public Element next;
2019
2020        //
2021        // Constructors
2022        //
2023
2024        /**
2025         * Constructs a new Element from the given QName and next Element
2026         * reference.
2027         */
2028        public Element(QName qname, Element next) {
2029            this.qname.setValues(qname);
2030            this.fRawname = qname.rawname.toCharArray();
2031            this.next = next;
2032        }
2033
2034    } // class Element
2035
2036    /**
2037     * Element stack.
2038     *
2039     * @author Neeraj Bajaj, Sun Microsystems.
2040     */
2041    protected class ElementStack2 {
2042
2043        //
2044        // Data
2045        //
2046
2047        /** The stack data. */
2048        protected QName [] fQName = new QName[20];
2049
2050        //Element depth
2051        protected int fDepth;
2052        //total number of elements
2053        protected int fCount;
2054        //current position
2055        protected int fPosition;
2056        //Mark refers to the position
2057        protected int fMark;
2058
2059        protected int fLastDepth ;
2060
2061        //
2062        // Constructors
2063        //
2064
2065        /** Default constructor. */
2066        public ElementStack2() {
2067            for (int i = 0; i < fQName.length; i++) {
2068                fQName[i] = new QName();
2069            }
2070            fMark = fPosition = 1;
2071        } // <init>()
2072
2073        public void resize(){
2074            /**
2075             * int length = fElements.length;
2076             * Element [] temp = new Element[length * 2];
2077             * System.arraycopy(fElements, 0, temp, 0, length);
2078             * fElements = temp;
2079             */
2080            //resize QNames
2081            int oldLength = fQName.length;
2082            QName [] tmp = new QName[oldLength * 2];
2083            System.arraycopy(fQName, 0, tmp, 0, oldLength);
2084            fQName = tmp;
2085
2086            for (int i = oldLength; i < fQName.length; i++) {
2087                fQName[i] = new QName();
2088            }
2089
2090        }
2091
2092
2093        //
2094        // Public methods
2095        //
2096
2097        /** Check if the element scanned during the start element
2098         *matches the stored element.
2099         *
2100         *@return true if the match suceeds.
2101         */
2102        public boolean matchElement(QName element) {
2103            //last depth is the depth when last elemnt was pushed
2104            //if last depth is greater than current depth
2105            if(DEBUG_SKIP_ALGORITHM){
2106                System.out.println("fLastDepth = " + fLastDepth);
2107                System.out.println("fDepth = " + fDepth);
2108            }
2109            boolean match = false;
2110            if(fLastDepth > fDepth && fDepth <= 2){
2111                if(DEBUG_SKIP_ALGORITHM){
2112                    System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname);
2113                }
2114                if(element.rawname == fQName[fDepth].rawname){
2115                    fAdd = false;
2116                    //mark this position
2117                    //decrease the depth by 1 as arrays are 0 based
2118                    fMark = fDepth - 1;
2119                    //we found the match and from next element skipping will start, add 1
2120                    fPosition = fMark + 1 ;
2121                    match = true;
2122                    //Once we get match decrease the count -- this was increased by nextElement()
2123                    --fCount;
2124                    if(DEBUG_SKIP_ALGORITHM){
2125                        System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED");
2126                        System.out.println("fMark = " + fMark);
2127                        System.out.println("fPosition = " + fPosition);
2128                        System.out.println("fDepth = " + fDepth);
2129                        System.out.println("fCount = " + fCount);
2130                    }
2131                }else{
2132                    fAdd = true;
2133                    if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd);
2134                }
2135            }
2136            //store the last depth
2137            fLastDepth = fDepth++;
2138            return match;
2139        } // pushElement(QName):QName
2140
2141        /**
2142         * This function doesn't increase depth. The function in this function is
2143         *broken down into two functions for efficiency. <@see>matchElement</see>.
2144         * This function just returns the pointer to the object and its values are set.
2145         *
2146         *@return QName reference to the next element in the list
2147         */
2148        public QName nextElement() {
2149
2150            //if number of elements becomes equal to the length of array -- stop the skipping
2151            if (fCount == fQName.length) {
2152                fShouldSkip = false;
2153                fAdd = false;
2154                if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip);
2155                //xxx: this is not correct, we are returning the last element
2156                //this wont make any difference since flag has been set to 'false'
2157                return fQName[--fCount];
2158            }
2159            if(DEBUG_SKIP_ALGORITHM){
2160                System.out.println("fCount = " + fCount);
2161            }
2162            return fQName[fCount++];
2163
2164        }
2165
2166        /** Note that this function is considerably different than nextElement()
2167         * This function just returns the previously stored elements
2168         */
2169        public QName getNext(){
2170            //when position reaches number of elements in the list..
2171            //set the position back to mark,  making it a circular linked list.
2172            if(fPosition == fCount){
2173                fPosition = fMark;
2174            }
2175            return fQName[fPosition++];
2176        }
2177
2178        /** returns the current depth
2179         */
2180        public int popElement(){
2181            return fDepth--;
2182        }
2183
2184
2185        /** Clears the stack without throwing away existing QName objects. */
2186        public void clear() {
2187            fLastDepth = 0;
2188            fDepth = 0;
2189            fCount = 0 ;
2190            fPosition = fMark = 1;
2191        } // clear()
2192
2193    } // class ElementStack
2194
2195    /**
2196     * Element stack. This stack operates without synchronization, error
2197     * checking, and it re-uses objects instead of throwing popped items
2198     * away.
2199     *
2200     * @author Andy Clark, IBM
2201     */
2202    protected class ElementStack {
2203
2204        //
2205        // Data
2206        //
2207
2208        /** The stack data. */
2209        protected QName[] fElements;
2210        protected int []  fInt = new int[20];
2211
2212
2213        //Element depth
2214        protected int fDepth;
2215        //total number of elements
2216        protected int fCount;
2217        //current position
2218        protected int fPosition;
2219        //Mark refers to the position
2220        protected int fMark;
2221
2222        protected int fLastDepth ;
2223
2224        //
2225        // Constructors
2226        //
2227
2228        /** Default constructor. */
2229        public ElementStack() {
2230            fElements = new QName[20];
2231            for (int i = 0; i < fElements.length; i++) {
2232                fElements[i] = new QName();
2233            }
2234        } // <init>()
2235
2236        //
2237        // Public methods
2238        //
2239
2240        /**
2241         * Pushes an element on the stack.
2242         * <p>
2243         * <strong>Note:</strong> The QName values are copied into the
2244         * stack. In other words, the caller does <em>not</em> orphan
2245         * the element to the stack. Also, the QName object returned
2246         * is <em>not</em> orphaned to the caller. It should be
2247         * considered read-only.
2248         *
2249         * @param element The element to push onto the stack.
2250         *
2251         * @return Returns the actual QName object that stores the
2252         */
2253        //XXX: THIS FUNCTION IS NOT USED
2254        public QName pushElement(QName element) {
2255            if (fDepth == fElements.length) {
2256                QName[] array = new QName[fElements.length * 2];
2257                System.arraycopy(fElements, 0, array, 0, fDepth);
2258                fElements = array;
2259                for (int i = fDepth; i < fElements.length; i++) {
2260                    fElements[i] = new QName();
2261                }
2262            }
2263            fElements[fDepth].setValues(element);
2264            return fElements[fDepth++];
2265        } // pushElement(QName):QName
2266
2267
2268        /** Note that this function is considerably different than nextElement()
2269         * This function just returns the previously stored elements
2270         */
2271        public QName getNext(){
2272            //when position reaches number of elements in the list..
2273            //set the position back to mark,  making it a circular linked list.
2274            if(fPosition == fCount){
2275                fPosition = fMark;
2276            }
2277            //store the position of last opened tag at particular depth
2278            //fInt[++fDepth] = fPosition;
2279            if(DEBUG_SKIP_ALGORITHM){
2280                System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname);
2281            }
2282            //return fElements[fPosition++];
2283            return fElements[fPosition];
2284        }
2285
2286        /** This function should be called only when element was skipped sucessfully.
2287         * 1. Increase the depth - because element was sucessfully skipped.
2288         *2. Store the position of the element token in array  "last opened tag" at depth.
2289         *3. increase the position counter so as to point to the next element in the array
2290         */
2291        public void push(){
2292
2293            fInt[++fDepth] = fPosition++;
2294        }
2295
2296        /** Check if the element scanned during the start element
2297         *matches the stored element.
2298         *
2299         *@return true if the match suceeds.
2300         */
2301        public boolean matchElement(QName element) {
2302            //last depth is the depth when last elemnt was pushed
2303            //if last depth is greater than current depth
2304            //if(DEBUG_SKIP_ALGORITHM){
2305            //   System.out.println("Check if the element " + element.rawname + " matches");
2306            //  System.out.println("fLastDepth = " + fLastDepth);
2307            // System.out.println("fDepth = " + fDepth);
2308            //}
2309            boolean match = false;
2310            if(fLastDepth > fDepth && fDepth <= 3){
2311                if(DEBUG_SKIP_ALGORITHM){
2312                    System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----");
2313                    System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname);
2314                }
2315                if(element.rawname == fElements[fDepth - 1].rawname){
2316                    fAdd = false;
2317                    //mark this position
2318                    //decrease the depth by 1 as arrays are 0 based
2319                    fMark = fDepth - 1;
2320                    //we found the match
2321                    fPosition = fMark;
2322                    match = true;
2323                    //Once we get match decrease the count -- this was increased by nextElement()
2324                    --fCount;
2325                    if(DEBUG_SKIP_ALGORITHM){
2326                        System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false");
2327                        System.out.println("fMark = " + fMark);
2328                        System.out.println("fPosition = " + fPosition);
2329                        System.out.println("fDepth = " + fDepth);
2330                        System.out.println("fCount = " + fCount);
2331                        System.out.println("---------MATCH SUCEEDED-----------------");
2332                        System.out.println("");
2333                    }
2334                }else{
2335                    fAdd = true;
2336                    if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd);
2337                }
2338            }
2339            //store the position for the current depth
2340            //when we are adding the elements, when skipping
2341            //starts even then this should be tracked ie. when
2342            //calling getNext()
2343            if(match){
2344                //from next element skipping will start, add 1
2345                fInt[fDepth] = fPosition++;
2346            } else{
2347                if(DEBUG_SKIP_ALGORITHM){
2348                    System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1));
2349                }
2350                //sicne fInt[fDepth] contains pointer to the element array which are 0 based.
2351                fInt[fDepth] = fCount - 1;
2352            }
2353
2354            //if number of elements becomes equal to the length of array -- stop the skipping
2355            //xxx: should we do "fCount == fInt.length"
2356            if (fCount == fElements.length) {
2357                fSkip = false;
2358                fAdd = false;
2359                //reposition the stack -- it seems to be too complex document and there is no symmerty in structure
2360                reposition();
2361                if(DEBUG_SKIP_ALGORITHM){
2362                    System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED");
2363                    System.out.println("REPOSITIONING THE STACK");
2364                    System.out.println("-----------SKIPPING STOPPED----------");
2365                    System.out.println("");
2366                }
2367                return false;
2368            }
2369            if(DEBUG_SKIP_ALGORITHM){
2370                if(match){
2371                    System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth);
2372                }else{
2373                    System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth);
2374                }
2375            }
2376            //store the last depth
2377            fLastDepth = fDepth;
2378            return match;
2379        } // matchElement(QName):QName
2380
2381
2382        /**
2383         * Returns the next element on the stack.
2384         *
2385         * @return Returns the actual QName object. Callee should
2386         * use this object to store the details of next element encountered.
2387         */
2388        public QName nextElement() {
2389            if(fSkip){
2390                fDepth++;
2391                //boundary checks are done in matchElement()
2392                return fElements[fCount++];
2393            } else if (fDepth == fElements.length) {
2394                QName[] array = new QName[fElements.length * 2];
2395                System.arraycopy(fElements, 0, array, 0, fDepth);
2396                fElements = array;
2397                for (int i = fDepth; i < fElements.length; i++) {
2398                    fElements[i] = new QName();
2399                }
2400            }
2401
2402            return fElements[fDepth++];
2403
2404        } // pushElement(QName):QName
2405
2406
2407        /**
2408         * Pops an element off of the stack by setting the values of
2409         * the specified QName.
2410         * <p>
2411         * <strong>Note:</strong> The object returned is <em>not</em>
2412         * orphaned to the caller. Therefore, the caller should consider
2413         * the object to be read-only.
2414         */
2415        public QName popElement() {
2416            //return the same object that was pushed -- this would avoid
2417            //setting the values for every end element.
2418            //STRONG: this object is read only -- this object reference shouldn't be stored.
2419            if(fSkip || fAdd ){
2420                if(DEBUG_SKIP_ALGORITHM){
2421                    System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname);
2422                    System.out.println("");
2423                }
2424                return fElements[fInt[fDepth--]];
2425            } else{
2426                if(DEBUG_SKIP_ALGORITHM){
2427                    System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname );
2428                }
2429                return fElements[--fDepth] ;
2430            }
2431            //element.setValues(fElements[--fDepth]);
2432        } // popElement(QName)
2433
2434        /** Reposition the stack. fInt [] contains all the opened tags at particular depth.
2435         * Transfer all the opened tags starting from depth '2' to the current depth and reposition them
2436         *as per the depth.
2437         */
2438        public void reposition(){
2439            for( int i = 2 ; i <= fDepth ; i++){
2440                fElements[i-1] = fElements[fInt[i]];
2441            }
2442            if(DEBUG_SKIP_ALGORITHM){
2443                for( int i = 0 ; i < fDepth ; i++){
2444                    System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname);
2445                }
2446            }
2447        }
2448
2449        /** Clears the stack without throwing away existing QName objects. */
2450        public void clear() {
2451            fDepth = 0;
2452            fLastDepth = 0;
2453            fCount = 0 ;
2454            fPosition = fMark = 1;
2455
2456        } // clear()
2457
2458        /**
2459         * This function is as a result of optimization done for endElement --
2460         * we dont need to set the value for every end element encouterd.
2461         * For Well formedness checks we can have the same QName object that was pushed.
2462         * the values will be set only if application need to know about the endElement
2463         * -- neeraj.bajaj@sun.com
2464         */
2465
2466        public QName getLastPoppedElement(){
2467            return fElements[fDepth];
2468        }
2469    } // class ElementStack
2470
2471    /**
2472     * Drives the parser to the next state/event on the input. Parser is guaranteed
2473     * to stop at the next state/event.
2474     *
2475     * Internally XML document is divided into several states. Each state represents
2476     * a sections of XML document. When this functions returns normally, it has read
2477     * the section of XML document and returns the state corresponding to section of
2478     * document which has been read. For optimizations, a particular driver
2479     * can read ahead of the section of document (state returned) just read and
2480     * can maintain a different internal state.
2481     *
2482     *
2483     * @author Neeraj Bajaj, Sun Microsystems
2484     */
2485    protected interface Driver {
2486
2487
2488        /**
2489         * Drives the parser to the next state/event on the input. Parser is guaranteed
2490         * to stop at the next state/event.
2491         *
2492         * Internally XML document is divided into several states. Each state represents
2493         * a sections of XML document. When this functions returns normally, it has read
2494         * the section of XML document and returns the state corresponding to section of
2495         * document which has been read. For optimizations, a particular driver
2496         * can read ahead of the section of document (state returned) just read and
2497         * can maintain a different internal state.
2498         *
2499         * @return state representing the section of document just read.
2500         *
2501         * @throws IOException  Thrown on i/o error.
2502         * @throws XNIException Thrown on parse error.
2503         */
2504
2505        public int next() throws IOException, XNIException;
2506
2507    } // interface Driver
2508
2509    /**
2510     * Driver to handle content scanning. This driver is capable of reading
2511     * the fragment of XML document. When it has finished reading fragment
2512     * of XML documents, it can pass the job of reading to another driver.
2513     *
2514     * This class has been modified as per the new design which is more suited to
2515     * efficiently build pull parser. Lot of performance improvements have been done and
2516     * the code has been added to support stax functionality/features.
2517     *
2518     * @author Neeraj Bajaj, Sun Microsystems
2519     *
2520     *
2521     * @author Andy Clark, IBM
2522     * @author Eric Ye, IBM
2523     */
2524    protected class FragmentContentDriver
2525            implements Driver {
2526
2527        //
2528        // Driver methods
2529        //
2530
2531        /**
2532         *  decides the appropriate state of the parser
2533         */
2534        private void startOfMarkup() throws IOException {
2535            fMarkupDepth++;
2536            final int ch = fEntityScanner.peekChar();
2537            if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) {
2538                setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2539            } else {
2540                switch(ch){
2541                    case '?' :{
2542                        setScannerState(SCANNER_STATE_PI);
2543                        fEntityScanner.skipChar(ch, null);
2544                        break;
2545                    }
2546                    case '!' :{
2547                        fEntityScanner.skipChar(ch, null);
2548                        if (fEntityScanner.skipChar('-', null)) {
2549                            if (!fEntityScanner.skipChar('-', NameType.COMMENT)) {
2550                                reportFatalError("InvalidCommentStart",
2551                                        null);
2552                            }
2553                            setScannerState(SCANNER_STATE_COMMENT);
2554                        } else if (fEntityScanner.skipString(CDATA)) {
2555                            fCDataStart = true;
2556                            setScannerState(SCANNER_STATE_CDATA );
2557                        } else if (!scanForDoctypeHook()) {
2558                            reportFatalError("MarkupNotRecognizedInContent",
2559                                    null);
2560                        }
2561                        break;
2562                    }
2563                    case '/' :{
2564                        setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2565                        fEntityScanner.skipChar(ch, NameType.ELEMENTEND);
2566                        break;
2567                    }
2568                    default :{
2569                        reportFatalError("MarkupNotRecognizedInContent", null);
2570                    }
2571                }
2572            }
2573
2574        }//startOfMarkup
2575
2576        private void startOfContent() throws IOException {
2577            if (fEntityScanner.skipChar('<', null)) {
2578                setScannerState(SCANNER_STATE_START_OF_MARKUP);
2579            } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) {
2580                setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE
2581            } else {
2582                //element content is there..
2583                setScannerState(SCANNER_STATE_CHARACTER_DATA);
2584            }
2585        }//startOfContent
2586
2587
2588        /**
2589         *
2590         * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser.
2591         * At any point of time when in doubt over the current state of the parser, the state should be
2592         * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of
2593         * the parser to one of its sub state.
2594         * sub states are defined in the parser on the basis of different XML component like
2595         * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc..
2596         * These sub states help the parser to have fine control over the parsing. These are the
2597         * different milepost, parser stops at each sub state (milepost). Based on this state it is
2598         * decided if paresr needs to stop at next milepost ??
2599         *
2600         */
2601        public void decideSubState() throws IOException {
2602            while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){
2603
2604                switch (fScannerState) {
2605
2606                    case SCANNER_STATE_CONTENT: {
2607                        startOfContent() ;
2608                        break;
2609                    }
2610
2611                    case SCANNER_STATE_START_OF_MARKUP: {
2612                        startOfMarkup() ;
2613                        break;
2614                    }
2615                }
2616            }
2617        }//decideSubState
2618
2619        /**
2620         * Drives the parser to the next state/event on the input. Parser is guaranteed
2621         * to stop at the next state/event. Internally XML document
2622         * is divided into several states. Each state represents a sections of XML
2623         * document. When this functions returns normally, it has read the section
2624         * of XML document and returns the state corresponding to section of
2625         * document which has been read. For optimizations, a particular driver
2626         * can read ahead of the section of document (state returned) just read and
2627         * can maintain a different internal state.
2628         *
2629         * State returned corresponds to Stax states.
2630         *
2631         * @return state representing the section of document just read.
2632         *
2633         * @throws IOException  Thrown on i/o error.
2634         * @throws XNIException Thrown on parse error.
2635         */
2636
2637        public int next() throws IOException, XNIException {
2638            while (true) {
2639            try {
2640
2641                //decide the actual sub state of the scanner.For more information refer to the javadoc of
2642                //decideSubState.
2643
2644                if (fScannerState == SCANNER_STATE_CONTENT) {
2645                    final int ch = fEntityScanner.peekChar();
2646                    if (ch == '<') {
2647                        fEntityScanner.scanChar(null);
2648                        setScannerState(SCANNER_STATE_START_OF_MARKUP);
2649                    } else if (ch == '&') {
2650                        fEntityScanner.scanChar(NameType.REFERENCE);
2651                        setScannerState(SCANNER_STATE_REFERENCE) ;
2652                    } else {
2653                        //element content is there..
2654                        setScannerState(SCANNER_STATE_CHARACTER_DATA);
2655                    }
2656                }
2657
2658                if (fScannerState == SCANNER_STATE_START_OF_MARKUP) {
2659                    startOfMarkup();
2660                }
2661
2662                //decideSubState() ;
2663
2664                //do some special handling if isCoalesce is set to true.
2665                if (fIsCoalesce) {
2666                    fUsebuffer = true ;
2667                    //if the last section was character data
2668                    if (fLastSectionWasCharacterData) {
2669
2670                        //if we dont encounter any CDATA or ENTITY REFERENCE and
2671                        //current state is also not SCANNER_STATE_CHARACTER_DATA
2672                        //return the last scanned charactrer data.
2673                        if ((fScannerState != SCANNER_STATE_CDATA)
2674                                && (fScannerState != SCANNER_STATE_REFERENCE)
2675                                && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) {
2676                            fLastSectionWasCharacterData = false;
2677                            return XMLEvent.CHARACTERS;
2678                        }
2679                    }//if last section was CDATA or ENTITY REFERENCE
2680                    //xxx: there might be another entity reference or CDATA after this
2681                    //<foo>blah blah &amp;&lt;<![CDATA[[aa]]>blah blah</foo>
2682                    else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) {
2683                        //and current state is not SCANNER_STATE_CHARACTER_DATA
2684                        //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE
2685                        //this means there is nothing more to be coalesced.
2686                        //return the CHARACTERS event.
2687                        if ((fScannerState != SCANNER_STATE_CDATA)
2688                                && (fScannerState != SCANNER_STATE_REFERENCE)
2689                                && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){
2690
2691                            fLastSectionWasCData = false;
2692                            fLastSectionWasEntityReference = false;
2693                            return XMLEvent.CHARACTERS;
2694                        }
2695                    }
2696                }
2697
2698                switch(fScannerState){
2699
2700                    case XMLEvent.START_DOCUMENT :
2701                        return XMLEvent.START_DOCUMENT;
2702
2703                    case SCANNER_STATE_START_ELEMENT_TAG :{
2704
2705                        //returns true if the element is empty
2706                        fEmptyElement = scanStartElement() ;
2707                        //if the element is empty the next event is "end element"
2708                        if(fEmptyElement){
2709                            setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2710                        }else{
2711                            //set the next possible state
2712                            setScannerState(SCANNER_STATE_CONTENT);
2713                        }
2714                        return XMLEvent.START_ELEMENT ;
2715                    }
2716
2717                    case SCANNER_STATE_CHARACTER_DATA: {
2718
2719                        //if last section was either entity reference or cdata or
2720                        //character data we should be using buffer
2721                        fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData
2722                                || fLastSectionWasCharacterData ;
2723
2724                        //When coalesce is set to true and last state was REFERENCE or
2725                        //CDATA or CHARACTER_DATA, buffer should not be cleared.
2726                        if( fIsCoalesce && (fLastSectionWasEntityReference ||
2727                                fLastSectionWasCData || fLastSectionWasCharacterData) ){
2728                            fLastSectionWasEntityReference = false;
2729                            fLastSectionWasCData = false;
2730                            fLastSectionWasCharacterData = true ;
2731                            fUsebuffer = true;
2732                        }else{
2733                            //clear the buffer
2734                            fContentBuffer.clear();
2735                        }
2736
2737                        //set the fTempString length to 0 before passing it on to scanContent
2738                        //scanContent sets the correct co-ordinates as per the content read
2739                        fTempString.length = 0;
2740                        int c = fEntityScanner.scanContent(fTempString);
2741
2742                        if(fEntityScanner.skipChar('<', null)){
2743                            //check if we have reached end of element
2744                            if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){
2745                                //increase the mark up depth
2746                                fMarkupDepth++;
2747                                fLastSectionWasCharacterData = false;
2748                                setScannerState(SCANNER_STATE_END_ELEMENT_TAG);
2749                                //check if its start of new element
2750                            }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){
2751                                fMarkupDepth++;
2752                                fLastSectionWasCharacterData = false;
2753                                setScannerState(SCANNER_STATE_START_ELEMENT_TAG);
2754                            }else{
2755                                setScannerState(SCANNER_STATE_START_OF_MARKUP);
2756                                //there can be cdata ahead if coalesce is true we should call again
2757                                if(fIsCoalesce){
2758                                    fLastSectionWasCharacterData = true;
2759                                    bufferContent();
2760                                    continue;
2761                                }
2762                            }
2763                            //in case last section was either entity reference or
2764                            //cdata or character data -- we should be using buffer
2765                            if(fUsebuffer){
2766                                bufferContent();
2767                            }
2768
2769                            if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){
2770                                if(DEBUG)System.out.println("Return SPACE EVENT");
2771                                return XMLEvent.SPACE;
2772                            }else
2773                                return XMLEvent.CHARACTERS;
2774
2775                        } else{
2776                            bufferContent();
2777                        }
2778                        if (c == '\r') {
2779                            if(DEBUG){
2780                                System.out.println("'\r' character found");
2781                            }
2782                            // happens when there is the character reference &#13;
2783                            //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2784                            fEntityScanner.scanChar(null);
2785                            fUsebuffer = true;
2786                            fContentBuffer.append((char)c);
2787                            c = -1 ;
2788                        } else if (c == ']') {
2789                            //fStringBuffer.clear();
2790                            //xxx: We know the next chracter.. we should just skip it and add ']' directlry
2791                            fUsebuffer = true;
2792                            fContentBuffer.append((char)fEntityScanner.scanChar(null));
2793                            // remember where we are in case we get an endEntity before we
2794                            // could flush the buffer out - this happens when we're parsing an
2795                            // entity which ends with a ]
2796                            fInScanContent = true;
2797
2798                            // We work on a single character basis to handle cases such as:
2799                            // ']]]>' which we might otherwise miss.
2800                            //
2801                            if (fEntityScanner.skipChar(']', null)) {
2802                                fContentBuffer.append(']');
2803                                while (fEntityScanner.skipChar(']', null)) {
2804                                    fContentBuffer.append(']');
2805                                }
2806                                if (fEntityScanner.skipChar('>', null)) {
2807                                    reportFatalError("CDEndInContent", null);
2808                                }
2809                            }
2810                            c = -1 ;
2811                            fInScanContent = false;
2812                        }
2813
2814                        do{
2815                            //xxx: we should be using only one buffer..
2816                            // we need not to grow the buffer only when isCoalesce() is not true;
2817
2818                            if (c == '<') {
2819                                fEntityScanner.scanChar(null);
2820                                setScannerState(SCANNER_STATE_START_OF_MARKUP);
2821                                break;
2822                            }//xxx what should be the behavior if entity reference is present in the content ?
2823                            else if (c == '&') {
2824                                fEntityScanner.scanChar(NameType.REFERENCE);
2825                                setScannerState(SCANNER_STATE_REFERENCE);
2826                                break;
2827                            }///xxx since this part is also characters, it should be merged...
2828                            else if (c != -1 && isInvalidLiteral(c)) {
2829                                if (XMLChar.isHighSurrogate(c)) {
2830                                    // special case: surrogates
2831                                    scanSurrogates(fContentBuffer) ;
2832                                    setScannerState(SCANNER_STATE_CONTENT);
2833                                } else {
2834                                    reportFatalError("InvalidCharInContent",
2835                                            new Object[] {
2836                                        Integer.toString(c, 16)});
2837                                        fEntityScanner.scanChar(null);
2838                                }
2839                                break;
2840                            }
2841                            //xxx: scanContent also gives character callback.
2842                            c = scanContent(fContentBuffer) ;
2843                            //we should not be iterating again if fIsCoalesce is not set to true
2844
2845                            if(!fIsCoalesce){
2846                                setScannerState(SCANNER_STATE_CONTENT);
2847                                break;
2848                            }
2849
2850                        }while(true);
2851
2852                        //if (fDocumentHandler != null) {
2853                        //  fDocumentHandler.characters(fContentBuffer, null);
2854                        //}
2855                        if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END");
2856                        //if fIsCoalesce is true there might be more data so call fDriver.next()
2857                        if(fIsCoalesce){
2858                            fLastSectionWasCharacterData = true ;
2859                            continue;
2860                        }else{
2861                            if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){
2862                                if(DEBUG)System.out.println("Return SPACE EVENT");
2863                                return XMLEvent.SPACE;
2864                            } else
2865                                return XMLEvent.CHARACTERS ;
2866                        }
2867                    }
2868
2869                    case SCANNER_STATE_END_ELEMENT_TAG :{
2870                        if(fEmptyElement){
2871                            //set it back to false.
2872                            fEmptyElement = false;
2873                            setScannerState(SCANNER_STATE_CONTENT);
2874                            //check the case when there is comment after single element document
2875                            //<foo/> and some comment after this
2876                            return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ?
2877                                    XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ;
2878
2879                        } else if(scanEndElement() == 0) {
2880                            //It is last element of the document
2881                            if (elementDepthIsZeroHook()) {
2882                                //if element depth is zero , it indicates the end of the document
2883                                //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function
2884                                //xxx understand this point once again..
2885                                return XMLEvent.END_ELEMENT ;
2886                            }
2887
2888                        }
2889                        setScannerState(SCANNER_STATE_CONTENT);
2890                        return XMLEvent.END_ELEMENT ;
2891                    }
2892
2893                    case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT:
2894                        scanComment();
2895                        setScannerState(SCANNER_STATE_CONTENT);
2896                        return XMLEvent.COMMENT;
2897                        //break;
2898                    }
2899                    case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: {
2900                        //clear the buffer first
2901                        fContentBuffer.clear() ;
2902                        //xxx: which buffer should be passed. Ideally we shouldn't have
2903                        //more than two buffers --
2904                        //xxx: where should we add the switch for buffering.
2905                        scanPI(fContentBuffer);
2906                        setScannerState(SCANNER_STATE_CONTENT);
2907                        return XMLEvent.PROCESSING_INSTRUCTION;
2908                        //break;
2909                    }
2910                    case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: {
2911                        //xxx: What if CDATA is the first event
2912                        //<foo><![CDATA[hello<><>]]>append</foo>
2913
2914                        //we should not clear the buffer only when the last state was
2915                        //either SCANNER_STATE_REFERENCE or
2916                        //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
2917                        if(fIsCoalesce && ( fLastSectionWasEntityReference ||
2918                                fLastSectionWasCData || fLastSectionWasCharacterData)){
2919                            fLastSectionWasCData = true ;
2920                            fLastSectionWasEntityReference = false;
2921                            fLastSectionWasCharacterData = false;
2922                        }//if we dont need to coalesce clear the buffer
2923                        else{
2924                            fContentBuffer.clear();
2925                        }
2926                        fUsebuffer = true;
2927                        //CDATA section is read up to the chunk size limit
2928                        scanCDATASection(fContentBuffer , true);
2929                        setScannerState(SCANNER_STATE_CONTENT);
2930                        //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true
2931                        //and just call fDispatche.next(). Since we have set the scanner state to
2932                        //SCANNER_STATE_CONTENT (super state) parser will automatically recover and
2933                        //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event
2934                        //2. Check if application has set for reporting CDATA event
2935                        //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent
2936                        //return the cdata event as characters.
2937                        if (fIsCoalesce) {
2938                            fLastSectionWasCData = true ;
2939                            //there might be more data to coalesce.
2940                            continue;
2941                        } else if(fReportCdataEvent) {
2942                            if (!fCDataEnd) {
2943                                setScannerState(SCANNER_STATE_CDATA);
2944                            }
2945                            return XMLEvent.CDATA;
2946                        } else {
2947                            return XMLEvent.CHARACTERS;
2948                        }
2949                    }
2950
2951                    case SCANNER_STATE_REFERENCE :{
2952                        fMarkupDepth++;
2953                        foundBuiltInRefs = false;
2954
2955                        //we should not clear the buffer only when the last state was
2956                        //either CDATA or
2957                        //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE
2958                        if(fIsCoalesce && ( fLastSectionWasEntityReference ||
2959                                fLastSectionWasCData || fLastSectionWasCharacterData)){
2960                            //fLastSectionWasEntityReference or fLastSectionWasCData are only
2961                            //used when fIsCoalesce is set to true.
2962                            fLastSectionWasEntityReference = true ;
2963                            fLastSectionWasCData = false;
2964                            fLastSectionWasCharacterData = false;
2965                        }//if we dont need to coalesce clear the buffer
2966                        else{
2967                            fContentBuffer.clear();
2968                        }
2969                        fUsebuffer = true ;
2970                        //take care of character reference
2971                        if (fEntityScanner.skipChar('#', NameType.REFERENCE)) {
2972                            scanCharReferenceValue(fContentBuffer, null);
2973                            fMarkupDepth--;
2974                            if(!fIsCoalesce){
2975                                setScannerState(SCANNER_STATE_CONTENT);
2976                                return XMLEvent.CHARACTERS;
2977                            }
2978                        } else {
2979                            // this function also starts new entity
2980                            scanEntityReference(fContentBuffer);
2981                            //if there was built-in entity reference & coalesce is not true
2982                            //return CHARACTERS
2983                            if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){
2984                                setScannerState(SCANNER_STATE_CONTENT);
2985                                if (builtInRefCharacterHandled) {
2986                                    builtInRefCharacterHandled = false;
2987                                    return XMLEvent.ENTITY_REFERENCE;
2988                                } else {
2989                                    return XMLEvent.CHARACTERS;
2990                                }
2991                            }
2992
2993                            //if there was a text declaration, call next() it will be taken care.
2994                            if(fScannerState == SCANNER_STATE_TEXT_DECL){
2995                                fLastSectionWasEntityReference = true ;
2996                                continue;
2997                            }
2998
2999                            if(fScannerState == SCANNER_STATE_REFERENCE){
3000                                setScannerState(SCANNER_STATE_CONTENT);
3001                                if (fReplaceEntityReferences &&
3002                                        fEntityStore.isDeclaredEntity(fCurrentEntityName)) {
3003                                    // Skip the entity reference, we don't care
3004                                    continue;
3005                                }
3006                                return XMLEvent.ENTITY_REFERENCE;
3007                            }
3008                        }
3009                        //Wether it was character reference, entity reference or built-in entity
3010                        //set the next possible state to SCANNER_STATE_CONTENT
3011                        setScannerState(SCANNER_STATE_CONTENT);
3012                        fLastSectionWasEntityReference = true ;
3013                        continue;
3014                    }
3015
3016                    case SCANNER_STATE_TEXT_DECL: {
3017                        // scan text decl
3018                        if (fEntityScanner.skipString("<?xml")) {
3019                            fMarkupDepth++;
3020                            // NOTE: special case where entity starts with a PI
3021                            //       whose name starts with "xml" (e.g. "xmlfoo")
3022                            if (isValidNameChar(fEntityScanner.peekChar())) {
3023                                fStringBuffer.clear();
3024                                fStringBuffer.append("xml");
3025
3026                                if (fNamespaces) {
3027                                    while (isValidNCName(fEntityScanner.peekChar())) {
3028                                        fStringBuffer.append((char)fEntityScanner.scanChar(null));
3029                                    }
3030                                } else {
3031                                    while (isValidNameChar(fEntityScanner.peekChar())) {
3032                                        fStringBuffer.append((char)fEntityScanner.scanChar(null));
3033                                    }
3034                                }
3035                                String target = fSymbolTable.addSymbol(fStringBuffer.ch,
3036                                        fStringBuffer.offset, fStringBuffer.length);
3037                                fContentBuffer.clear();
3038                                scanPIData(target, fContentBuffer);
3039                            }
3040
3041                            // standard text declaration
3042                            else {
3043                                //xxx: this function gives callback
3044                                scanXMLDeclOrTextDecl(true);
3045                            }
3046                        }
3047                        // now that we've straightened out the readers, we can read in chunks:
3048                        fEntityManager.fCurrentEntity.mayReadChunks = true;
3049                        setScannerState(SCANNER_STATE_CONTENT);
3050                        //xxx: we don't return any state, so how do we get to know about TEXT declarations.
3051                        //it seems we have to careful when to allow function issue a callback
3052                        //and when to allow adapter issue a callback.
3053                        continue;
3054                    }
3055
3056
3057                    case SCANNER_STATE_ROOT_ELEMENT: {
3058                        if (scanRootElementHook()) {
3059                            fEmptyElement = true;
3060                            //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook
3061                            return XMLEvent.START_ELEMENT;
3062                        }
3063                        setScannerState(SCANNER_STATE_CONTENT);
3064                        return XMLEvent.START_ELEMENT ;
3065                    }
3066                    case SCANNER_STATE_CHAR_REFERENCE : {
3067                        fContentBuffer.clear();
3068                        scanCharReferenceValue(fContentBuffer, null);
3069                        fMarkupDepth--;
3070                        setScannerState(SCANNER_STATE_CONTENT);
3071                        return XMLEvent.CHARACTERS;
3072                    }
3073                    default:
3074                        throw new XNIException("Scanner State " + fScannerState + " not Recognized ");
3075
3076                }//switch
3077            }
3078            // premature end of file
3079            catch (EOFException e) {
3080                endOfFileHook(e);
3081                return -1;
3082            }
3083            } //while loop
3084        }//next
3085
3086        //
3087        // Protected methods
3088        //
3089
3090        // hooks
3091
3092        // NOTE: These hook methods are added so that the full document
3093        //       scanner can share the majority of code with this class.
3094
3095        /**
3096         * Scan for DOCTYPE hook. This method is a hook for subclasses
3097         * to add code to handle scanning for a the "DOCTYPE" string
3098         * after the string "<!" has been scanned.
3099         *
3100         * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE"
3101         *          was not scanned.
3102         */
3103        protected boolean scanForDoctypeHook()
3104        throws IOException, XNIException {
3105            return false;
3106        } // scanForDoctypeHook():boolean
3107
3108        /**
3109         * Element depth iz zero. This methos is a hook for subclasses
3110         * to add code to handle when the element depth hits zero. When
3111         * scanning a document fragment, an element depth of zero is
3112         * normal. However, when scanning a full XML document, the
3113         * scanner must handle the trailing miscellanous section of
3114         * the document after the end of the document's root element.
3115         *
3116         * @return True if the caller should stop and return true which
3117         *          allows the scanner to switch to a new scanning
3118         *          driver. A return value of false indicates that
3119         *          the content driver should continue as normal.
3120         */
3121        protected boolean elementDepthIsZeroHook()
3122        throws IOException, XNIException {
3123            return false;
3124        } // elementDepthIsZeroHook():boolean
3125
3126        /**
3127         * Scan for root element hook. This method is a hook for
3128         * subclasses to add code that handles scanning for the root
3129         * element. When scanning a document fragment, there is no
3130         * "root" element. However, when scanning a full XML document,
3131         * the scanner must handle the root element specially.
3132         *
3133         * @return True if the caller should stop and return true which
3134         *          allows the scanner to switch to a new scanning
3135         *          driver. A return value of false indicates that
3136         *          the content driver should continue as normal.
3137         */
3138        protected boolean scanRootElementHook()
3139        throws IOException, XNIException {
3140            return false;
3141        } // scanRootElementHook():boolean
3142
3143        /**
3144         * End of file hook. This method is a hook for subclasses to
3145         * add code that handles the end of file. The end of file in
3146         * a document fragment is OK if the markup depth is zero.
3147         * However, when scanning a full XML document, an end of file
3148         * is always premature.
3149         */
3150        protected void endOfFileHook(EOFException e)
3151        throws IOException, XNIException {
3152
3153            // NOTE: An end of file is only only an error if we were
3154            //       in the middle of scanning some markup. -Ac
3155            if (fMarkupDepth != 0) {
3156                reportFatalError("PrematureEOF", null);
3157            }
3158
3159        } // endOfFileHook()
3160
3161    } // class FragmentContentDriver
3162
3163    static void pr(String str) {
3164        System.out.println(str) ;
3165    }
3166
3167    protected boolean fUsebuffer ;
3168
3169    /** this function gets an XMLString (which is used to store the attribute value) from the special pool
3170     *  maintained for attributes.
3171     *  fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool.
3172     *  if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same
3173     *  XMLString.
3174     *
3175     * @return XMLString XMLString used to store an attribute value.
3176     */
3177
3178    protected XMLString getString(){
3179        if(fAttributeCacheUsedCount < initialCacheCount ||
3180                fAttributeCacheUsedCount < attributeValueCache.size()){
3181            return attributeValueCache.get(fAttributeCacheUsedCount++);
3182        } else{
3183            XMLString str = new XMLString();
3184            fAttributeCacheUsedCount++;
3185            attributeValueCache.add(str);
3186            return str;
3187        }
3188    }
3189
3190    /**
3191     * Implements XMLBufferListener interface.
3192     */
3193
3194    public void refresh(){
3195        refresh(0);
3196    }
3197
3198    /**
3199     * receives callbacks from {@link XMLEntityReader } when buffer
3200     * is being changed.
3201     * @param refreshPosition
3202     */
3203    public void refresh(int refreshPosition){
3204        //If you are reading attributes and you got a callback
3205        //cache available attributes.
3206        if(fReadingAttributes){
3207            fAttributes.refresh();
3208        }
3209        if(fScannerState == SCANNER_STATE_CHARACTER_DATA){
3210            bufferContent();
3211        }
3212    }
3213
3214    /**
3215     * Since 'TempString' shares the buffer (a char array) with the CurrentEntity,
3216     * when the cursor position reaches the end, that is, before the buffer is
3217     * being loaded with new data, the content in the TempString needs to be
3218     * copied into the ContentBuffer.
3219     */
3220    private void bufferContent() {
3221        fContentBuffer.append(fTempString);
3222        //clear the XMLString so that data can't be added again.
3223        fTempString.length = 0;
3224        fUsebuffer = true;
3225    }
3226} // class XMLDocumentFragmentScannerImpl
3227