1/* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5/* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22package com.sun.org.apache.xerces.internal.impl; 23 24import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 25import com.sun.org.apache.xerces.internal.util.AugmentationsImpl; 26import com.sun.org.apache.xerces.internal.util.XMLAttributesIteratorImpl; 27import com.sun.org.apache.xerces.internal.util.XMLChar; 28import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 29import com.sun.org.apache.xerces.internal.util.XMLSymbols; 30import com.sun.org.apache.xerces.internal.xni.QName; 31import com.sun.org.apache.xerces.internal.xni.XMLAttributes; 32import com.sun.org.apache.xerces.internal.xni.XMLDocumentHandler; 33import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 34import com.sun.org.apache.xerces.internal.xni.XMLString; 35import com.sun.org.apache.xerces.internal.xni.XNIException; 36import com.sun.org.apache.xerces.internal.xni.parser.XMLComponent; 37import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 38import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 39import com.sun.org.apache.xerces.internal.xni.parser.XMLDocumentScanner; 40import com.sun.org.apache.xerces.internal.xni.parser.XMLInputSource; 41import com.sun.org.apache.xerces.internal.xni.Augmentations; 42import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 43import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 44import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 45import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 46import com.sun.xml.internal.stream.XMLBufferListener; 47import com.sun.xml.internal.stream.XMLEntityStorage; 48import com.sun.xml.internal.stream.dtd.DTDGrammarUtil; 49import java.io.EOFException; 50import java.io.IOException; 51import javax.xml.XMLConstants; 52import javax.xml.stream.XMLInputFactory; 53import javax.xml.stream.XMLStreamConstants; 54import javax.xml.stream.events.XMLEvent; 55import jdk.xml.internal.JdkXmlUtils; 56 57/** 58 * 59 * This class is responsible for scanning the structure and content 60 * of document fragments. 61 * 62 * This class has been modified as per the new design which is more suited to 63 * efficiently build pull parser. Lot of improvements have been done and 64 * the code has been added to support stax functionality/features. 65 * 66 * @author Neeraj Bajaj SUN Microsystems 67 * @author K.Venugopal SUN Microsystems 68 * @author Glenn Marcy, IBM 69 * @author Andy Clark, IBM 70 * @author Arnaud Le Hors, IBM 71 * @author Eric Ye, IBM 72 * @author Sunitha Reddy, SUN Microsystems 73 * 74 */ 75public class XMLDocumentFragmentScannerImpl 76 extends XMLScanner 77 implements XMLDocumentScanner, XMLComponent, XMLEntityHandler, XMLBufferListener { 78 79 // 80 // Constants 81 // 82 83 protected int fElementAttributeLimit, fXMLNameLimit; 84 85 /** External subset resolver. **/ 86 protected ExternalSubsetResolver fExternalSubsetResolver; 87 88 // scanner states 89 90 //XXX this should be divided into more states. 91 /** Scanner state: start of markup. */ 92 protected static final int SCANNER_STATE_START_OF_MARKUP = 21; 93 94 /** Scanner state: content. */ 95 protected static final int SCANNER_STATE_CONTENT = 22; 96 97 /** Scanner state: processing instruction. */ 98 protected static final int SCANNER_STATE_PI = 23; 99 100 /** Scanner state: DOCTYPE. */ 101 protected static final int SCANNER_STATE_DOCTYPE = 24; 102 103 /** Scanner state: XML Declaration */ 104 protected static final int SCANNER_STATE_XML_DECL = 25; 105 106 /** Scanner state: root element. */ 107 protected static final int SCANNER_STATE_ROOT_ELEMENT = 26; 108 109 /** Scanner state: comment. */ 110 protected static final int SCANNER_STATE_COMMENT = 27; 111 112 /** Scanner state: reference. */ 113 protected static final int SCANNER_STATE_REFERENCE = 28; 114 115 // <book type="hard"> reading attribute name 'type' 116 protected static final int SCANNER_STATE_ATTRIBUTE = 29; 117 118 // <book type="hard"> //reading attribute value. 119 protected static final int SCANNER_STATE_ATTRIBUTE_VALUE = 30; 120 121 /** Scanner state: trailing misc. USED BY DOCUMENT_SCANNER_IMPL*/ 122 //protected static final int SCANNER_STATE_TRAILING_MISC = 32; 123 124 /** Scanner state: end of input. */ 125 protected static final int SCANNER_STATE_END_OF_INPUT = 33; 126 127 /** Scanner state: terminated. */ 128 protected static final int SCANNER_STATE_TERMINATED = 34; 129 130 /** Scanner state: CDATA section. */ 131 protected static final int SCANNER_STATE_CDATA = 35; 132 133 /** Scanner state: Text declaration. */ 134 protected static final int SCANNER_STATE_TEXT_DECL = 36; 135 136 /** Scanner state: Text declaration. */ 137 protected static final int SCANNER_STATE_CHARACTER_DATA = 37; 138 139 //<book type="hard">foo</book> 140 protected static final int SCANNER_STATE_START_ELEMENT_TAG = 38; 141 142 //<book type="hard">foo</book> reading </book> 143 protected static final int SCANNER_STATE_END_ELEMENT_TAG = 39; 144 145 protected static final int SCANNER_STATE_CHAR_REFERENCE = 40; 146 protected static final int SCANNER_STATE_BUILT_IN_REFS = 41; 147 148 // feature identifiers 149 150 151 /** Feature identifier: notify built-in refereces. */ 152 protected static final String NOTIFY_BUILTIN_REFS = 153 Constants.XERCES_FEATURE_PREFIX + Constants.NOTIFY_BUILTIN_REFS_FEATURE; 154 155 /** Property identifier: entity resolver. */ 156 protected static final String ENTITY_RESOLVER = 157 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 158 159 /** Feature identifier: standard uri conformant */ 160 protected static final String STANDARD_URI_CONFORMANT = 161 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 162 163 /** Property identifier: Security property manager. */ 164 private static final String XML_SECURITY_PROPERTY_MANAGER = 165 Constants.XML_SECURITY_PROPERTY_MANAGER; 166 167 /** access external dtd: file protocol 168 * For DOM/SAX, the secure feature is set to true by default 169 */ 170 final static String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 171 172 // recognized features and properties 173 174 /** Recognized features. */ 175 private static final String[] RECOGNIZED_FEATURES = { 176 NAMESPACES, 177 VALIDATION, 178 NOTIFY_BUILTIN_REFS, 179 NOTIFY_CHAR_REFS, 180 Constants.STAX_REPORT_CDATA_EVENT, 181 XMLConstants.USE_CATALOG 182 }; 183 184 /** Feature defaults. */ 185 private static final Boolean[] FEATURE_DEFAULTS = { 186 Boolean.TRUE, 187 null, 188 Boolean.FALSE, 189 Boolean.FALSE, 190 Boolean.TRUE, 191 JdkXmlUtils.USE_CATALOG_DEFAULT 192 }; 193 194 /** Recognized properties. */ 195 private static final String[] RECOGNIZED_PROPERTIES = { 196 SYMBOL_TABLE, 197 ERROR_REPORTER, 198 ENTITY_MANAGER, 199 XML_SECURITY_PROPERTY_MANAGER, 200 JdkXmlUtils.CATALOG_DEFER, 201 JdkXmlUtils.CATALOG_FILES, 202 JdkXmlUtils.CATALOG_PREFER, 203 JdkXmlUtils.CATALOG_RESOLVE, 204 JdkXmlUtils.CDATA_CHUNK_SIZE 205 }; 206 207 /** Property defaults. */ 208 private static final Object[] PROPERTY_DEFAULTS = { 209 null, 210 null, 211 null, 212 null, 213 null, 214 null, 215 null, 216 null, 217 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT 218 }; 219 220 221 private static final char [] CDATA = {'[','C','D','A','T','A','['}; 222 static final char [] XMLDECL = {'<','?','x','m','l'}; 223 // private static final char [] endTag = {'<','/'}; 224 // debugging 225 226 /** Debug scanner state. */ 227 private static final boolean DEBUG_SCANNER_STATE = false; 228 229 /** Debug driver. */ 230 private static final boolean DEBUG_DISPATCHER = false; 231 232 /** Debug content driver scanning. */ 233 protected static final boolean DEBUG_START_END_ELEMENT = false; 234 235 /** Debug driver next */ 236 protected static final boolean DEBUG = false; 237 238 // 239 // Data 240 // 241 242 // protected data 243 244 /** Document handler. */ 245 protected XMLDocumentHandler fDocumentHandler; 246 protected int fScannerLastState ; 247 248 /** Entity Storage */ 249 protected XMLEntityStorage fEntityStore; 250 251 /** Entity stack. */ 252 protected int[] fEntityStack = new int[4]; 253 254 /** Markup depth. */ 255 protected int fMarkupDepth; 256 257 //is the element empty 258 protected boolean fEmptyElement ; 259 260 //track if we are reading attributes, this is usefule while 261 //there is a callback 262 protected boolean fReadingAttributes = false; 263 264 /** Scanner state. */ 265 protected int fScannerState; 266 267 /** SubScanner state: inside scanContent method. */ 268 protected boolean fInScanContent = false; 269 protected boolean fLastSectionWasCData = false; 270 protected boolean fCDataStart = false; 271 protected boolean fInCData = false; 272 protected boolean fCDataEnd = false; 273 protected boolean fLastSectionWasEntityReference = false; 274 protected boolean fLastSectionWasCharacterData = false; 275 276 /** has external dtd */ 277 protected boolean fHasExternalDTD; 278 279 /** Standalone. */ 280 protected boolean fStandaloneSet; 281 protected boolean fStandalone; 282 protected String fVersion; 283 284 // element information 285 286 /** Current element. */ 287 protected QName fCurrentElement; 288 289 /** Element stack. */ 290 protected ElementStack fElementStack = new ElementStack(); 291 protected ElementStack2 fElementStack2 = new ElementStack2(); 292 293 // other info 294 295 /** Document system identifier. 296 * REVISIT: So what's this used for? - NG 297 * protected String fDocumentSystemId; 298 ******/ 299 300 protected String fPITarget ; 301 302 //xxx do we need to create an extra XMLString object... look for using fTempString for collecting all the data values 303 protected XMLString fPIData = new XMLString(); 304 305 // features 306 307 308 /** Notify built-in references. */ 309 protected boolean fNotifyBuiltInRefs = false; 310 311 //STAX related properties 312 //defaultValues. 313 protected boolean fSupportDTD = true; 314 protected boolean fReplaceEntityReferences = true; 315 protected boolean fSupportExternalEntities = false; 316 protected boolean fReportCdataEvent = false ; 317 protected boolean fIsCoalesce = false ; 318 protected String fDeclaredEncoding = null; 319 /** Xerces Feature: Disallow doctype declaration. */ 320 protected boolean fDisallowDoctype = false; 321 322 /** 323 * CDATA chunk size limit 324 */ 325 private int fChunkSize; 326 327 /** 328 * comma-delimited list of protocols that are allowed for the purpose 329 * of accessing external dtd or entity references 330 */ 331 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 332 333 /** 334 * standard uri conformant (strict uri). 335 * http://apache.org/xml/features/standard-uri-conformant 336 */ 337 protected boolean fStrictURI; 338 339 // drivers 340 341 /** Active driver. */ 342 protected Driver fDriver; 343 344 /** Content driver. */ 345 protected Driver fContentDriver = createContentDriver(); 346 347 // temporary variables 348 349 /** Element QName. */ 350 protected QName fElementQName = new QName(); 351 352 /** Attribute QName. */ 353 protected QName fAttributeQName = new QName(); 354 355 /** 356 * CHANGED: Using XMLAttributesIteratorImpl instead of XMLAttributesImpl. This class 357 * implements Iterator interface so we can directly give Attributes in the form of 358 * iterator. 359 */ 360 protected XMLAttributesIteratorImpl fAttributes = new XMLAttributesIteratorImpl(); 361 362 363 /** String. */ 364 protected XMLString fTempString = new XMLString(); 365 366 /** String. */ 367 protected XMLString fTempString2 = new XMLString(); 368 369 /** Array of 3 strings. */ 370 private final String[] fStrings = new String[3]; 371 372 /** Making the buffer accessible to derived class -- String buffer. */ 373 protected XMLStringBuffer fStringBuffer = new XMLStringBuffer(); 374 375 /** Making the buffer accessible to derived class -- String buffer. */ 376 protected XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); 377 378 /** stores character data. */ 379 /** Making the buffer accessible to derived class -- stores PI data */ 380 protected XMLStringBuffer fContentBuffer = new XMLStringBuffer(); 381 382 /** Single character array. */ 383 private final char[] fSingleChar = new char[1]; 384 private String fCurrentEntityName = null; 385 386 // New members 387 protected boolean fScanToEnd = false; 388 389 protected DTDGrammarUtil dtdGrammarUtil= null; 390 391 protected boolean fAddDefaultAttr = false; 392 393 protected boolean foundBuiltInRefs = false; 394 395 /** Built-in reference character event */ 396 protected boolean builtInRefCharacterHandled = false; 397 398 //skip element algorithm 399 static final short MAX_DEPTH_LIMIT = 5 ; 400 static final short ELEMENT_ARRAY_LENGTH = 200 ; 401 static final short MAX_POINTER_AT_A_DEPTH = 4 ; 402 static final boolean DEBUG_SKIP_ALGORITHM = false; 403 //create a elemnet array of length equal to ELEMENT_ARRAY_LENGTH 404 String [] fElementArray = new String[ELEMENT_ARRAY_LENGTH] ; 405 //pointer location where last element was skipped 406 short fLastPointerLocation = 0 ; 407 short fElementPointer = 0 ; 408 //2D array to store pointer info 409 short [] [] fPointerInfo = new short[MAX_DEPTH_LIMIT] [MAX_POINTER_AT_A_DEPTH] ; 410 protected String fElementRawname ; 411 protected boolean fShouldSkip = false; 412 protected boolean fAdd = false ; 413 protected boolean fSkip = false; 414 415 /** Reusable Augmentations. */ 416 private Augmentations fTempAugmentations = null; 417 // 418 // Constructors 419 // 420 421 /** Default constructor. */ 422 public XMLDocumentFragmentScannerImpl() { 423 } // <init>() 424 425 // 426 // XMLDocumentScanner methods 427 // 428 429 /** 430 * Sets the input source. 431 * 432 * @param inputSource The input source. 433 * 434 * @throws IOException Thrown on i/o error. 435 */ 436 public void setInputSource(XMLInputSource inputSource) throws IOException { 437 fEntityManager.setEntityHandler(this); 438 fEntityManager.startEntity(false, "$fragment$", inputSource, false, true); 439 // fDocumentSystemId = fEntityManager.expandSystemId(inputSource.getSystemId()); 440 } // setInputSource(XMLInputSource) 441 442 /** 443 * Scans a document. 444 * 445 * @param complete True if the scanner should scan the document 446 * completely, pushing all events to the registered 447 * document handler. A value of false indicates that 448 * that the scanner should only scan the next portion 449 * of the document and return. A scanner instance is 450 * permitted to completely scan a document if it does 451 * not support this "pull" scanning model. 452 * 453 * @return True if there is more to scan, false otherwise. 454 */ 455 public boolean scanDocument(boolean complete) 456 throws IOException, XNIException { 457 458 // keep dispatching "events" 459 fEntityManager.setEntityHandler(this); 460 //System.out.println(" get Document Handler in NSDocumentHandler " + fDocumentHandler ); 461 462 int event = next(); 463 do { 464 switch (event) { 465 case XMLStreamConstants.START_DOCUMENT : 466 //fDocumentHandler.startDocument(fEntityManager.getEntityScanner(),fEntityManager.getEntityScanner().getVersion(),fNamespaceContext,null);// not able to get 467 break; 468 case XMLStreamConstants.START_ELEMENT : 469 //System.out.println(" in scann element"); 470 //fDocumentHandler.startElement(getElementQName(),fAttributes,null); 471 break; 472 case XMLStreamConstants.CHARACTERS : 473 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 474 fDocumentHandler.characters(getCharacterData(),null); 475 break; 476 case XMLStreamConstants.SPACE: 477 //check if getCharacterData() is the right function to retrieve ignorableWhitespace information. 478 //System.out.println("in the space"); 479 //fDocumentHandler.ignorableWhitespace(getCharacterData(), null); 480 break; 481 case XMLStreamConstants.ENTITY_REFERENCE : 482 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 483 //entity reference callback are given in startEntity 484 break; 485 case XMLStreamConstants.PROCESSING_INSTRUCTION : 486 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 487 fDocumentHandler.processingInstruction(getPITarget(),getPIData(),null); 488 break; 489 case XMLStreamConstants.COMMENT : 490 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 491 fDocumentHandler.comment(getCharacterData(),null); 492 break; 493 case XMLStreamConstants.DTD : 494 //all DTD related callbacks are handled in DTDScanner. 495 //1. Stax doesn't define DTD states as it does for XML Document. 496 //therefore we don't need to take care of anything here. So Just break; 497 break; 498 case XMLStreamConstants.CDATA: 499 fEntityScanner.checkNodeCount(fEntityScanner.fCurrentEntity); 500 if (fCDataStart) { 501 fDocumentHandler.startCDATA(null); 502 fCDataStart = false; 503 fInCData = true; 504 } 505 506 fDocumentHandler.characters(getCharacterData(),null); 507 if (fCDataEnd) { 508 fDocumentHandler.endCDATA(null); 509 fCDataEnd = false; 510 } 511 break; 512 case XMLStreamConstants.NOTATION_DECLARATION : 513 break; 514 case XMLStreamConstants.ENTITY_DECLARATION : 515 break; 516 case XMLStreamConstants.NAMESPACE : 517 break; 518 case XMLStreamConstants.ATTRIBUTE : 519 break; 520 case XMLStreamConstants.END_ELEMENT : 521 //do not give callback here. 522 //this callback is given in scanEndElement function. 523 //fDocumentHandler.endElement(getElementQName(),null); 524 break; 525 default : 526 // Errors should have already been handled by the Scanner 527 return false; 528 529 } 530 //System.out.println("here in before calling next"); 531 event = next(); 532 //System.out.println("here in after calling next"); 533 } while (event!=XMLStreamConstants.END_DOCUMENT && complete); 534 535 if(event == XMLStreamConstants.END_DOCUMENT) { 536 fDocumentHandler.endDocument(null); 537 return false; 538 } 539 540 return true; 541 542 } // scanDocument(boolean):boolean 543 544 545 546 public com.sun.org.apache.xerces.internal.xni.QName getElementQName(){ 547 if(fScannerLastState == XMLEvent.END_ELEMENT){ 548 fElementQName.setValues(fElementStack.getLastPoppedElement()); 549 } 550 return fElementQName ; 551 } 552 553 /** return the next state on the input 554 * @return int 555 */ 556 557 public int next() throws IOException, XNIException { 558 return fDriver.next(); 559 } 560 561 // 562 // XMLComponent methods 563 // 564 565 /** 566 * Resets the component. The component can query the component manager 567 * about any features and properties that affect the operation of the 568 * component. 569 * 570 * @param componentManager The component manager. 571 * 572 * @throws SAXException Thrown by component on initialization error. 573 * For example, if a feature or property is 574 * required for the operation of the component, the 575 * component manager may throw a 576 * SAXNotRecognizedException or a 577 * SAXNotSupportedException. 578 */ 579 580 public void reset(XMLComponentManager componentManager) 581 throws XMLConfigurationException { 582 583 super.reset(componentManager); 584 585 // other settings 586 // fDocumentSystemId = null; 587 588 // sax features 589 //fAttributes.setNamespaces(fNamespaces); 590 591 // xerces features 592 fReportCdataEvent = componentManager.getFeature(Constants.STAX_REPORT_CDATA_EVENT, true); 593 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(Constants.SECURITY_MANAGER, null); 594 fNotifyBuiltInRefs = componentManager.getFeature(NOTIFY_BUILTIN_REFS, false); 595 596 Object resolver = componentManager.getProperty(ENTITY_RESOLVER, null); 597 fExternalSubsetResolver = (resolver instanceof ExternalSubsetResolver) ? 598 (ExternalSubsetResolver) resolver : null; 599 600 //attribute 601 fReadingAttributes = false; 602 //xxx: external entities are supported in Xerces 603 // it would be good to define feature for this case 604 fSupportExternalEntities = true; 605 fReplaceEntityReferences = true; 606 fIsCoalesce = false; 607 608 // setup Driver 609 setScannerState(SCANNER_STATE_CONTENT); 610 setDriver(fContentDriver); 611 612 // JAXP 1.5 features and properties 613 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 614 componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 615 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 616 617 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 618 fChunkSize = JdkXmlUtils.getValue(componentManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 619 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 620 621 resetCommon(); 622 //fEntityManager.test(); 623 } // reset(XMLComponentManager) 624 625 626 public void reset(PropertyManager propertyManager){ 627 628 super.reset(propertyManager); 629 630 // other settings 631 // fDocumentSystemId = null; 632 fNamespaces = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_NAMESPACE_AWARE)); 633 fNotifyBuiltInRefs = false ; 634 635 //fElementStack2.clear(); 636 //fReplaceEntityReferences = true; 637 //fSupportExternalEntities = true; 638 Boolean bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES); 639 fReplaceEntityReferences = bo; 640 bo = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES); 641 fSupportExternalEntities = bo; 642 Boolean cdata = (Boolean)propertyManager.getProperty( 643 Constants.ZEPHYR_PROPERTY_PREFIX + Constants.STAX_REPORT_CDATA_EVENT) ; 644 if(cdata != null) 645 fReportCdataEvent = cdata ; 646 Boolean coalesce = (Boolean)propertyManager.getProperty(XMLInputFactory.IS_COALESCING) ; 647 if(coalesce != null) 648 fIsCoalesce = coalesce; 649 fReportCdataEvent = fIsCoalesce ? false : (fReportCdataEvent && true) ; 650 //if fIsCoalesce is set to true, set the value of fReplaceEntityReferences to true, 651 //if fIsCoalesce is set to false, take the value of fReplaceEntityReferences as set by application 652 fReplaceEntityReferences = fIsCoalesce ? true : fReplaceEntityReferences; 653 // setup Driver 654 //we dont need to do this -- nb. 655 //setScannerState(SCANNER_STATE_CONTENT); 656 //setDriver(fContentDriver); 657 //fEntityManager.test(); 658 659 // JAXP 1.5 features and properties 660 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) 661 propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 662 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 663 664 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(Constants.SECURITY_MANAGER); 665 fChunkSize = JdkXmlUtils.getValue(propertyManager.getProperty(JdkXmlUtils.CDATA_CHUNK_SIZE), 666 JdkXmlUtils.CDATA_CHUNK_SIZE_DEFAULT); 667 resetCommon(); 668 } // reset(XMLComponentManager) 669 670 void resetCommon() { 671 // initialize vars 672 fMarkupDepth = 0; 673 fCurrentElement = null; 674 fElementStack.clear(); 675 fHasExternalDTD = false; 676 fStandaloneSet = false; 677 fStandalone = false; 678 fInScanContent = false; 679 //skipping algorithm 680 fShouldSkip = false; 681 fAdd = false; 682 fSkip = false; 683 684 fEntityStore = fEntityManager.getEntityStore(); 685 dtdGrammarUtil = null; 686 687 if (fSecurityManager != null) { 688 fElementAttributeLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.ELEMENT_ATTRIBUTE_LIMIT); 689 fXMLNameLimit = fSecurityManager.getLimit(XMLSecurityManager.Limit.MAX_NAME_LIMIT); 690 } else { 691 fElementAttributeLimit = 0; 692 fXMLNameLimit = XMLSecurityManager.Limit.MAX_NAME_LIMIT.defaultValue(); 693 } 694 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 695 } 696 697 /** 698 * Returns a list of feature identifiers that are recognized by 699 * this component. This method may return null if no features 700 * are recognized by this component. 701 */ 702 public String[] getRecognizedFeatures() { 703 return RECOGNIZED_FEATURES.clone(); 704 } // getRecognizedFeatures():String[] 705 706 /** 707 * Sets the state of a feature. This method is called by the component 708 * manager any time after reset when a feature changes state. 709 * <p> 710 * <strong>Note:</strong> Components should silently ignore features 711 * that do not affect the operation of the component. 712 * 713 * @param featureId The feature identifier. 714 * @param state The state of the feature. 715 * 716 * @throws SAXNotRecognizedException The component should not throw 717 * this exception. 718 * @throws SAXNotSupportedException The component should not throw 719 * this exception. 720 */ 721 public void setFeature(String featureId, boolean state) 722 throws XMLConfigurationException { 723 724 super.setFeature(featureId, state); 725 726 // Xerces properties 727 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 728 String feature = featureId.substring(Constants.XERCES_FEATURE_PREFIX.length()); 729 if (feature.equals(Constants.NOTIFY_BUILTIN_REFS_FEATURE)) { 730 fNotifyBuiltInRefs = state; 731 } 732 } 733 734 } // setFeature(String,boolean) 735 736 /** 737 * Returns a list of property identifiers that are recognized by 738 * this component. This method may return null if no properties 739 * are recognized by this component. 740 */ 741 public String[] getRecognizedProperties() { 742 return RECOGNIZED_PROPERTIES.clone(); 743 } // getRecognizedProperties():String[] 744 745 /** 746 * Sets the value of a property. This method is called by the component 747 * manager any time after reset when a property changes value. 748 * <p> 749 * <strong>Note:</strong> Components should silently ignore properties 750 * that do not affect the operation of the component. 751 * 752 * @param propertyId The property identifier. 753 * @param value The value of the property. 754 * 755 * @throws SAXNotRecognizedException The component should not throw 756 * this exception. 757 * @throws SAXNotSupportedException The component should not throw 758 * this exception. 759 */ 760 public void setProperty(String propertyId, Object value) 761 throws XMLConfigurationException { 762 763 super.setProperty(propertyId, value); 764 765 // Xerces properties 766 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 767 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 768 if (suffixLength == Constants.ENTITY_MANAGER_PROPERTY.length() && 769 propertyId.endsWith(Constants.ENTITY_MANAGER_PROPERTY)) { 770 fEntityManager = (XMLEntityManager)value; 771 return; 772 } 773 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 774 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 775 fExternalSubsetResolver = (value instanceof ExternalSubsetResolver) ? 776 (ExternalSubsetResolver) value : null; 777 return; 778 } 779 } 780 781 782 // Xerces properties 783 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 784 String property = propertyId.substring(Constants.XERCES_PROPERTY_PREFIX.length()); 785 if (property.equals(Constants.ENTITY_MANAGER_PROPERTY)) { 786 fEntityManager = (XMLEntityManager)value; 787 } 788 return; 789 } 790 791 //JAXP 1.5 properties 792 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 793 { 794 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 795 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 796 } 797 798 } // setProperty(String,Object) 799 800 /** 801 * Returns the default state for a feature, or null if this 802 * component does not want to report a default value for this 803 * feature. 804 * 805 * @param featureId The feature identifier. 806 * 807 * @since Xerces 2.2.0 808 */ 809 public Boolean getFeatureDefault(String featureId) { 810 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 811 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 812 return FEATURE_DEFAULTS[i]; 813 } 814 } 815 return null; 816 } // getFeatureDefault(String):Boolean 817 818 /** 819 * Returns the default state for a property, or null if this 820 * component does not want to report a default value for this 821 * property. 822 * 823 * @param propertyId The property identifier. 824 * 825 * @since Xerces 2.2.0 826 */ 827 public Object getPropertyDefault(String propertyId) { 828 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 829 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 830 return PROPERTY_DEFAULTS[i]; 831 } 832 } 833 return null; 834 } // getPropertyDefault(String):Object 835 836 // 837 // XMLDocumentSource methods 838 // 839 840 /** 841 * setDocumentHandler 842 * 843 * @param documentHandler 844 */ 845 public void setDocumentHandler(XMLDocumentHandler documentHandler) { 846 fDocumentHandler = documentHandler; 847 //System.out.println(" In Set DOCUMENT HANDLER" + fDocumentHandler + " scanner =" + this); 848 } // setDocumentHandler(XMLDocumentHandler) 849 850 851 /** Returns the document handler */ 852 public XMLDocumentHandler getDocumentHandler(){ 853 return fDocumentHandler; 854 } 855 856 // 857 // XMLEntityHandler methods 858 // 859 860 /** 861 * This method notifies of the start of an entity. The DTD has the 862 * pseudo-name of "[dtd]" parameter entity names start with '%'; and 863 * general entities are just specified by their name. 864 * 865 * @param name The name of the entity. 866 * @param identifier The resource identifier. 867 * @param encoding The auto-detected IANA encoding name of the entity 868 * stream. This value will be null in those situations 869 * where the entity encoding is not auto-detected (e.g. 870 * internal entities or a document entity that is 871 * parsed from a java.io.Reader). 872 * @param augs Additional information that may include infoset augmentations 873 * 874 * @throws XNIException Thrown by handler to signal an error. 875 */ 876 public void startEntity(String name, 877 XMLResourceIdentifier identifier, 878 String encoding, Augmentations augs) throws XNIException { 879 880 // keep track of this entity before fEntityDepth is increased 881 if (fEntityDepth == fEntityStack.length) { 882 int[] entityarray = new int[fEntityStack.length * 2]; 883 System.arraycopy(fEntityStack, 0, entityarray, 0, fEntityStack.length); 884 fEntityStack = entityarray; 885 } 886 fEntityStack[fEntityDepth] = fMarkupDepth; 887 888 super.startEntity(name, identifier, encoding, augs); 889 890 // WFC: entity declared in external subset in standalone doc 891 if(fStandalone && fEntityStore.isEntityDeclInExternalSubset(name)) { 892 reportFatalError("MSG_REFERENCE_TO_EXTERNALLY_DECLARED_ENTITY_WHEN_STANDALONE", 893 new Object[]{name}); 894 } 895 896 /** we are not calling the handlers yet.. */ 897 // call handler 898 if (fDocumentHandler != null && !fScanningAttribute) { 899 if (!name.equals("[xml]")) { 900 fDocumentHandler.startGeneralEntity(name, identifier, encoding, augs); 901 } 902 } 903 904 } // startEntity(String,XMLResourceIdentifier,String) 905 906 /** 907 * This method notifies the end of an entity. The DTD has the pseudo-name 908 * of "[dtd]" parameter entity names start with '%'; and general entities 909 * are just specified by their name. 910 * 911 * @param name The name of the entity. 912 * @param augs Additional information that may include infoset augmentations 913 * 914 * @throws XNIException Thrown by handler to signal an error. 915 */ 916 public void endEntity(String name, Augmentations augs) throws IOException, XNIException { 917 918 /** 919 * // flush possible pending output buffer - see scanContent 920 * if (fInScanContent && fStringBuffer.length != 0 921 * && fDocumentHandler != null) { 922 * fDocumentHandler.characters(fStringBuffer, null); 923 * fStringBuffer.length = 0; // make sure we know it's been flushed 924 * } 925 */ 926 super.endEntity(name, augs); 927 928 // make sure markup is properly balanced 929 if (fMarkupDepth != fEntityStack[fEntityDepth]) { 930 reportFatalError("MarkupEntityMismatch", null); 931 } 932 933 /**/ 934 // call handler 935 if (fDocumentHandler != null && !fScanningAttribute) { 936 if (!name.equals("[xml]")) { 937 fDocumentHandler.endGeneralEntity(name, augs); 938 } 939 } 940 941 942 } // endEntity(String) 943 944 // 945 // Protected methods 946 // 947 948 // Driver factory methods 949 950 /** Creates a content Driver. */ 951 protected Driver createContentDriver() { 952 return new FragmentContentDriver(); 953 } // createContentDriver():Driver 954 955 // scanning methods 956 957 /** 958 * Scans an XML or text declaration. 959 * <p> 960 * <pre> 961 * [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' 962 * [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ") 963 * [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) 964 * [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* 965 * [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") 966 * | ('"' ('yes' | 'no') '"')) 967 * 968 * [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>' 969 * </pre> 970 * 971 * @param scanningTextDecl True if a text declaration is to 972 * be scanned instead of an XML 973 * declaration. 974 */ 975 protected void scanXMLDeclOrTextDecl(boolean scanningTextDecl) 976 throws IOException, XNIException { 977 978 // scan decl 979 super.scanXMLDeclOrTextDecl(scanningTextDecl, fStrings); 980 fMarkupDepth--; 981 982 // pseudo-attribute values 983 String version = fStrings[0]; 984 String encoding = fStrings[1]; 985 String standalone = fStrings[2]; 986 fDeclaredEncoding = encoding; 987 // set standalone 988 fStandaloneSet = standalone != null; 989 fStandalone = fStandaloneSet && standalone.equals("yes"); 990 ///xxx see where its used.. this is not used anywhere. 991 //it may be useful for entity to store this information 992 //but this information is only related with Document Entity. 993 fEntityManager.setStandalone(fStandalone); 994 995 996 // call handler 997 if (fDocumentHandler != null) { 998 if (scanningTextDecl) { 999 fDocumentHandler.textDecl(version, encoding, null); 1000 } else { 1001 fDocumentHandler.xmlDecl(version, encoding, standalone, null); 1002 } 1003 } 1004 1005 if(version != null){ 1006 fEntityScanner.setVersion(version); 1007 fEntityScanner.setXMLVersion(version); 1008 } 1009 // set encoding on reader, only if encoding was not specified by the application explicitly 1010 if (encoding != null && !fEntityScanner.getCurrentEntity().isEncodingExternallySpecified()) { 1011 fEntityScanner.setEncoding(encoding); 1012 } 1013 1014 } // scanXMLDeclOrTextDecl(boolean) 1015 1016 public String getPITarget(){ 1017 return fPITarget ; 1018 } 1019 1020 public XMLStringBuffer getPIData(){ 1021 return fContentBuffer ; 1022 } 1023 1024 //XXX: why not this function behave as per the state of the parser? 1025 public XMLString getCharacterData(){ 1026 if(fUsebuffer){ 1027 return fContentBuffer ; 1028 }else{ 1029 return fTempString; 1030 } 1031 1032 } 1033 1034 1035 /** 1036 * Scans a processing data. This is needed to handle the situation 1037 * where a document starts with a processing instruction whose 1038 * target name <em>starts with</em> "xml". (e.g. xmlfoo) 1039 * 1040 * @param target The PI target 1041 * @param data The XMLStringBuffer to fill in with the data 1042 */ 1043 protected void scanPIData(String target, XMLStringBuffer data) 1044 throws IOException, XNIException { 1045 1046 super.scanPIData(target, data); 1047 1048 //set the PI target and values 1049 fPITarget = target ; 1050 1051 fMarkupDepth--; 1052 1053 } // scanPIData(String) 1054 1055 /** 1056 * Scans a comment. 1057 * <p> 1058 * <pre> 1059 * [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' 1060 * </pre> 1061 * <p> 1062 * <strong>Note:</strong> Called after scanning past '<!--' 1063 */ 1064 protected void scanComment() throws IOException, XNIException { 1065 fContentBuffer.clear(); 1066 scanComment(fContentBuffer); 1067 //getTextCharacters can also be called for reading comments 1068 fUsebuffer = true; 1069 fMarkupDepth--; 1070 1071 } // scanComment() 1072 1073 //xxx value returned by this function may not remain valid if another event is scanned. 1074 public String getComment(){ 1075 return fContentBuffer.toString(); 1076 } 1077 1078 void addElement(String rawname){ 1079 if(fElementPointer < ELEMENT_ARRAY_LENGTH){ 1080 //storing element raw name in a linear list of array 1081 fElementArray[fElementPointer] = rawname ; 1082 //storing elemnetPointer for particular element depth 1083 1084 if(DEBUG_SKIP_ALGORITHM){ 1085 StringBuffer sb = new StringBuffer() ; 1086 sb.append(" Storing element information ") ; 1087 sb.append(" fElementPointer = " + fElementPointer) ; 1088 sb.append(" fElementRawname = " + fElementQName.rawname) ; 1089 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1090 System.out.println(sb.toString()) ; 1091 } 1092 1093 //store pointer information only when element depth is less MAX_DEPTH_LIMIT 1094 if(fElementStack.fDepth < MAX_DEPTH_LIMIT){ 1095 short column = storePointerForADepth(fElementPointer); 1096 if(column > 0){ 1097 short pointer = getElementPointer((short)fElementStack.fDepth, (short)(column - 1) ); 1098 //identity comparison shouldn't take much time and we can rely on this 1099 //since its guaranteed to have same object id for same string. 1100 if(rawname == fElementArray[pointer]){ 1101 fShouldSkip = true ; 1102 fLastPointerLocation = pointer ; 1103 //reset the things and return. 1104 resetPointer((short)fElementStack.fDepth , column) ; 1105 fElementArray[fElementPointer] = null ; 1106 return ; 1107 }else{ 1108 fShouldSkip = false ; 1109 } 1110 } 1111 } 1112 fElementPointer++ ; 1113 } 1114 } 1115 1116 1117 void resetPointer(short depth, short column){ 1118 fPointerInfo[depth] [column] = (short)0; 1119 } 1120 1121 //returns column information at which pointer was stored. 1122 short storePointerForADepth(short elementPointer){ 1123 short depth = (short) fElementStack.fDepth ; 1124 1125 //Stores element pointer locations at particular depth , only 4 pointer locations 1126 //are stored at particular depth for now. 1127 for(short i = 0 ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1128 1129 if(canStore(depth, i)){ 1130 fPointerInfo[depth][i] = elementPointer ; 1131 if(DEBUG_SKIP_ALGORITHM){ 1132 StringBuffer sb = new StringBuffer() ; 1133 sb.append(" Pointer information ") ; 1134 sb.append(" fElementPointer = " + fElementPointer) ; 1135 sb.append(" fElementStack.fDepth = " + fElementStack.fDepth); 1136 sb.append(" column = " + i ) ; 1137 System.out.println(sb.toString()) ; 1138 } 1139 return i; 1140 } 1141 //else 1142 //pointer was not stored because we reached the limit 1143 } 1144 return -1 ; 1145 } 1146 1147 boolean canStore(short depth, short column){ 1148 //colum = 0 , means first element at particular depth 1149 //column = 1, means second element at particular depth 1150 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1151 return fPointerInfo[depth][column] == 0 ? true : false ; 1152 } 1153 1154 1155 short getElementPointer(short depth, short column){ 1156 //colum = 0 , means first element at particular depth 1157 //column = 1, means second element at particular depth 1158 // calle should make sure that it doesn't call for value outside allowed co-ordinates 1159 return fPointerInfo[depth][column] ; 1160 } 1161 1162 //this function assumes that string passed is not null and skips 1163 //the following string from the buffer this makes sure 1164 boolean skipFromTheBuffer(String rawname) throws IOException{ 1165 if(fEntityScanner.skipString(rawname)){ 1166 char c = (char)fEntityScanner.peekChar() ; 1167 //If the start element was completely skipped we should encounter either ' '(space), 1168 //or '/' (in case of empty element) or '>' 1169 if( c == ' ' || c == '/' || c == '>'){ 1170 fElementRawname = rawname ; 1171 return true ; 1172 } else{ 1173 return false; 1174 } 1175 } else 1176 return false ; 1177 } 1178 1179 boolean skipQElement(String rawname) throws IOException{ 1180 1181 final int c = fEntityScanner.getChar(rawname.length()); 1182 //if this character is still valid element name -- this means string can't match 1183 if(XMLChar.isName(c)){ 1184 return false; 1185 }else{ 1186 return fEntityScanner.skipString(rawname); 1187 } 1188 } 1189 1190 protected boolean skipElement() throws IOException { 1191 1192 if(!fShouldSkip) return false ; 1193 1194 if(fLastPointerLocation != 0){ 1195 //Look at the next element stored in the array list.. we might just get a match. 1196 String rawname = fElementArray[fLastPointerLocation + 1] ; 1197 if(rawname != null && skipFromTheBuffer(rawname)){ 1198 fLastPointerLocation++ ; 1199 if(DEBUG_SKIP_ALGORITHM){ 1200 System.out.println("Element " + fElementRawname + 1201 " was SKIPPED at pointer location = " + fLastPointerLocation); 1202 } 1203 return true ; 1204 } else{ 1205 //reset it back to zero... we haven't got the correct subset yet. 1206 fLastPointerLocation = 0 ; 1207 1208 } 1209 } 1210 //xxx: we can put some logic here as from what column it should start looking 1211 //for now we always start at 0 1212 //fallback to tolerant algorithm, it would look for differnt element stored at different 1213 //depth and get us the pointer location. 1214 return fShouldSkip && skipElement((short)0); 1215 1216 } 1217 1218 //start of the column at which it should try searching 1219 boolean skipElement(short column) throws IOException { 1220 short depth = (short)fElementStack.fDepth ; 1221 1222 if(depth > MAX_DEPTH_LIMIT){ 1223 return fShouldSkip = false ; 1224 } 1225 for(short i = column ; i < MAX_POINTER_AT_A_DEPTH ; i++){ 1226 short pointer = getElementPointer(depth , i ) ; 1227 1228 if(pointer == 0){ 1229 return fShouldSkip = false ; 1230 } 1231 1232 if(fElementArray[pointer] != null && skipFromTheBuffer(fElementArray[pointer])){ 1233 if(DEBUG_SKIP_ALGORITHM){ 1234 System.out.println(); 1235 System.out.println("Element " + fElementRawname + " was SKIPPED at depth = " + 1236 fElementStack.fDepth + " column = " + column ); 1237 System.out.println(); 1238 } 1239 fLastPointerLocation = pointer ; 1240 return fShouldSkip = true ; 1241 } 1242 } 1243 return fShouldSkip = false ; 1244 } 1245 1246 /** 1247 * Scans a start element. This method will handle the binding of 1248 * namespace information and notifying the handler of the start 1249 * of the element. 1250 * <p> 1251 * <pre> 1252 * [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>' 1253 * [40] STag ::= '<' Name (S Attribute)* S? '>' 1254 * </pre> 1255 * <p> 1256 * <strong>Note:</strong> This method assumes that the leading 1257 * '<' character has been consumed. 1258 * <p> 1259 * <strong>Note:</strong> This method uses the fElementQName and 1260 * fAttributes variables. The contents of these variables will be 1261 * destroyed. The caller should copy important information out of 1262 * these variables before calling this method. 1263 * NB: Content in fAttributes is valid only till the state of the parser is XMLEvent.START_ELEMENT 1264 * 1265 * @return True if element is empty. (i.e. It matches 1266 * production [44]. 1267 */ 1268 // fElementQName will have the details of element just read.. 1269 // fAttributes will have the details of all the attributes. 1270 protected boolean scanStartElement() 1271 throws IOException, XNIException { 1272 1273 if (DEBUG_START_END_ELEMENT) System.out.println( this.getClass().toString() + ">>> scanStartElement()"); 1274 //when skipping is true and no more elements should be added 1275 if(fSkip && !fAdd){ 1276 //get the stored element -- if everything goes right this should match the 1277 //token in the buffer 1278 1279 QName name = fElementStack.getNext(); 1280 1281 if(DEBUG_SKIP_ALGORITHM){ 1282 System.out.println("Trying to skip String = " + name.rawname); 1283 } 1284 1285 //Be conservative -- if skipping fails -- stop. 1286 fSkip = fEntityScanner.skipString(name.rawname); 1287 1288 if(fSkip){ 1289 if(DEBUG_SKIP_ALGORITHM){ 1290 System.out.println("Element SUCESSFULLY skipped = " + name.rawname); 1291 } 1292 fElementStack.push(); 1293 fElementQName = name; 1294 }else{ 1295 //if skipping fails reposition the stack or fallback to normal way of processing 1296 fElementStack.reposition(); 1297 if(DEBUG_SKIP_ALGORITHM){ 1298 System.out.println("Element was NOT skipped, REPOSITIONING stack" ); 1299 } 1300 } 1301 } 1302 1303 //we are still at the stage of adding elements 1304 //the elements were not matched or 1305 //fSkip is not set to true 1306 if(!fSkip || fAdd){ 1307 //get the next element from the stack 1308 fElementQName = fElementStack.nextElement(); 1309 // name 1310 if (fNamespaces) { 1311 fEntityScanner.scanQName(fElementQName, NameType.ELEMENTSTART); 1312 } else { 1313 String name = fEntityScanner.scanName(NameType.ELEMENTSTART); 1314 fElementQName.setValues(null, name, name, null); 1315 } 1316 1317 if(DEBUG)System.out.println("Element scanned in start element is " + fElementQName.toString()); 1318 if(DEBUG_SKIP_ALGORITHM){ 1319 if(fAdd){ 1320 System.out.println("Elements are being ADDED -- elemet added is = " + 1321 fElementQName.rawname + " at count = " + fElementStack.fCount); 1322 } 1323 } 1324 1325 } 1326 1327 //when the elements are being added , we need to check if we are set for skipping the elements 1328 if(fAdd){ 1329 //this sets the value of fAdd variable 1330 fElementStack.matchElement(fElementQName); 1331 } 1332 1333 1334 //xxx: We dont need another pointer, fCurrentElement, we can use fElementQName 1335 fCurrentElement = fElementQName; 1336 1337 String rawname = fElementQName.rawname; 1338 1339 fEmptyElement = false; 1340 1341 fAttributes.removeAllAttributes(); 1342 1343 checkDepth(rawname); 1344 if(!seekCloseOfStartTag()){ 1345 fReadingAttributes = true; 1346 fAttributeCacheUsedCount =0; 1347 fStringBufferIndex =0; 1348 fAddDefaultAttr = true; 1349 do { 1350 scanAttribute(fAttributes); 1351 if (fSecurityManager != null && !fSecurityManager.isNoLimit(fElementAttributeLimit) && 1352 fAttributes.getLength() > fElementAttributeLimit){ 1353 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1354 "ElementAttributeLimit", 1355 new Object[]{rawname, fElementAttributeLimit }, 1356 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1357 } 1358 1359 } while (!seekCloseOfStartTag()); 1360 fReadingAttributes=false; 1361 } 1362 1363 if (fEmptyElement) { 1364 //decrease the markup depth.. 1365 fMarkupDepth--; 1366 1367 // check that this element was opened in the same entity 1368 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1369 reportFatalError("ElementEntityMismatch", 1370 new Object[]{fCurrentElement.rawname}); 1371 } 1372 // call handler 1373 if (fDocumentHandler != null) { 1374 fDocumentHandler.emptyElement(fElementQName, fAttributes, null); 1375 } 1376 1377 //We should not be popping out the context here in endELement becaause the namespace context is still 1378 //valid when parser is at the endElement state. 1379 //if (fNamespaces) { 1380 // fNamespaceContext.popContext(); 1381 //} 1382 1383 //pop the element off the stack.. 1384 fElementStack.popElement(); 1385 1386 } else { 1387 1388 if(dtdGrammarUtil != null) 1389 dtdGrammarUtil.startElement(fElementQName, fAttributes); 1390 if(fDocumentHandler != null){ 1391 //complete element and attributes are traversed in this function so we can send a callback 1392 //here. 1393 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1394 fDocumentHandler.startElement(fElementQName, fAttributes, null); 1395 } 1396 } 1397 1398 1399 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() + 1400 "<<< scanStartElement(): "+fEmptyElement); 1401 return fEmptyElement; 1402 1403 } // scanStartElement():boolean 1404 1405 /** 1406 * Looks for the close of start tag, i.e. if it finds '>' or '/>' 1407 * Characters are consumed. 1408 */ 1409 protected boolean seekCloseOfStartTag() throws IOException, XNIException { 1410 // spaces 1411 boolean sawSpace = fEntityScanner.skipSpaces(); 1412 1413 // end tag? 1414 final int c = fEntityScanner.peekChar(); 1415 if (c == '>') { 1416 fEntityScanner.scanChar(null); 1417 return true; 1418 } else if (c == '/') { 1419 fEntityScanner.scanChar(null); 1420 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1421 reportFatalError("ElementUnterminated", 1422 new Object[]{fElementQName.rawname}); 1423 } 1424 fEmptyElement = true; 1425 return true; 1426 } else if (!isValidNameStartChar(c) || !sawSpace) { 1427 // Second chance. Check if this character is a high 1428 // surrogate of a valid name start character. 1429 if (!isValidNameStartHighSurrogate(c) || !sawSpace) { 1430 reportFatalError("ElementUnterminated", 1431 new Object[]{fElementQName.rawname}); 1432 } 1433 } 1434 1435 return false; 1436 } 1437 1438 public boolean hasAttributes(){ 1439 return fAttributes.getLength() > 0; 1440 } 1441 1442 /** return the attribute iterator implementation */ 1443 public XMLAttributesIteratorImpl getAttributeIterator(){ 1444 if(dtdGrammarUtil != null && fAddDefaultAttr){ 1445 dtdGrammarUtil.addDTDDefaultAttrs(fElementQName,fAttributes); 1446 fAddDefaultAttr = false; 1447 } 1448 return fAttributes; 1449 } 1450 1451 /** return if standalone is set */ 1452 public boolean standaloneSet(){ 1453 return fStandaloneSet; 1454 } 1455 /** return if the doucment is standalone */ 1456 public boolean isStandAlone(){ 1457 return fStandalone ; 1458 } 1459 /** 1460 * Scans an attribute name value pair. 1461 * <p> 1462 * <pre> 1463 * [41] Attribute ::= Name Eq AttValue 1464 * </pre> 1465 * <p> 1466 * <strong>Note:</strong> This method assumes that the next 1467 * character on the stream is the first character of the attribute 1468 * name. 1469 * <p> 1470 * <strong>Note:</strong> This method uses the fAttributeQName and 1471 * fQName variables. The contents of these variables will be 1472 * destroyed. 1473 * 1474 * @param attributes The attributes list for the scanned attribute. 1475 */ 1476 1477 protected void scanAttribute(XMLAttributes attributes) 1478 throws IOException, XNIException { 1479 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanAttribute()"); 1480 1481 // name 1482 if (fNamespaces) { 1483 fEntityScanner.scanQName(fAttributeQName, NameType.ATTRIBUTENAME); 1484 } else { 1485 String name = fEntityScanner.scanName(NameType.ATTRIBUTENAME); 1486 fAttributeQName.setValues(null, name, name, null); 1487 } 1488 1489 // equals 1490 fEntityScanner.skipSpaces(); 1491 if (!fEntityScanner.skipChar('=', NameType.ATTRIBUTE)) { 1492 reportFatalError("EqRequiredInAttribute", 1493 new Object[] {fCurrentElement.rawname, fAttributeQName.rawname}); 1494 } 1495 fEntityScanner.skipSpaces(); 1496 1497 int attIndex = 0 ; 1498 //REVISIT: one more case needs to be included: external PE and standalone is no 1499 boolean isVC = fHasExternalDTD && !fStandalone; 1500 //fTempString would store attribute value 1501 ///fTempString2 would store attribute non-normalized value 1502 1503 //this function doesn't use 'attIndex'. We are adding the attribute later 1504 //after we have figured out that current attribute is not namespace declaration 1505 //since scanAttributeValue doesn't use attIndex parameter therefore we 1506 //can safely add the attribute later.. 1507 XMLString tmpStr = getString(); 1508 1509 scanAttributeValue(tmpStr, fTempString2, fAttributeQName.rawname, attributes, 1510 attIndex, isVC, fCurrentElement.rawname, false); 1511 1512 // content 1513 int oldLen = attributes.getLength(); 1514 //if the attribute name already exists.. new value is replaced with old value 1515 attIndex = attributes.addAttribute(fAttributeQName, XMLSymbols.fCDATASymbol, null); 1516 1517 // WFC: Unique Att Spec 1518 //attributes count will be same if the current attribute name already exists for this element name. 1519 //this means there are two duplicate attributes. 1520 if (oldLen == attributes.getLength()) { 1521 reportFatalError("AttributeNotUnique", 1522 new Object[]{fCurrentElement.rawname, 1523 fAttributeQName.rawname}); 1524 } 1525 1526 //tmpString contains attribute value 1527 //we are passing null as the attribute value 1528 attributes.setValue(attIndex, null, tmpStr); 1529 1530 ///xxx: nonNormalizedValue is not being set as it is not required by SAX & DOM 1531 //attributes.setNonNormalizedValue(oldLen, fTempString2.toString()); 1532 attributes.setSpecified(attIndex, true); 1533 1534 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +"<<< scanAttribute()"); 1535 1536 } // scanAttribute(XMLAttributes) 1537 1538 /** 1539 * Scans element content. 1540 * 1541 * @return Returns the next character on the stream. 1542 */ 1543 //CHANGED: 1544 //EARLIER: scanContent() 1545 //NOW: scanContent(XMLStringBuffer) 1546 //It makes things easy if this functions takes XMLStringBuffer as parameter.. 1547 //this function appends the data to the buffer. 1548 protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { 1549 //set the fTempString length to 0 before passing it on to scanContent 1550 //scanContent sets the correct co-ordinates as per the content read 1551 fTempString.length = 0; 1552 int c = fEntityScanner.scanContent(fTempString); 1553 content.append(fTempString); 1554 fTempString.length = 0; 1555 if (c == '\r') { 1556 // happens when there is the character reference 1557 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1558 fEntityScanner.scanChar(null); 1559 content.append((char)c); 1560 c = -1; 1561 } else if (c == ']') { 1562 //fStringBuffer.clear(); 1563 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 1564 content.append((char)fEntityScanner.scanChar(null)); 1565 // remember where we are in case we get an endEntity before we 1566 // could flush the buffer out - this happens when we're parsing an 1567 // entity which ends with a ] 1568 fInScanContent = true; 1569 // 1570 // We work on a single character basis to handle cases such as: 1571 // ']]]>' which we might otherwise miss. 1572 // 1573 if (fEntityScanner.skipChar(']', null)) { 1574 content.append(']'); 1575 while (fEntityScanner.skipChar(']', null)) { 1576 content.append(']'); 1577 } 1578 if (fEntityScanner.skipChar('>', null)) { 1579 reportFatalError("CDEndInContent", null); 1580 } 1581 } 1582 fInScanContent = false; 1583 c = -1; 1584 } 1585 if (fDocumentHandler != null && content.length > 0) { 1586 //fDocumentHandler.characters(content, null); 1587 } 1588 return c; 1589 1590 } // scanContent():int 1591 1592 1593 /** 1594 * Scans a CDATA section. 1595 * <p> 1596 * <strong>Note:</strong> This method uses the fTempString and 1597 * fStringBuffer variables. 1598 * 1599 * @param complete True if the CDATA section is to be scanned 1600 * completely. 1601 * 1602 * @return True if CDATA is completely scanned. 1603 */ 1604 //CHANGED: 1605 protected boolean scanCDATASection(XMLStringBuffer contentBuffer, boolean complete) 1606 throws IOException, XNIException { 1607 1608 // call handler 1609 if (fDocumentHandler != null) { 1610 //fDocumentHandler.startCDATA(null); 1611 } 1612 1613 while (true) { 1614 //scanData will fill the contentBuffer 1615 if (!fEntityScanner.scanData("]]>", contentBuffer, fChunkSize)) { 1616 fInCData = false; 1617 fCDataEnd = true; 1618 fMarkupDepth--; 1619 break ; 1620 } else { 1621 int c = fEntityScanner.peekChar(); 1622 if (c != -1 && isInvalidLiteral(c)) { 1623 if (XMLChar.isHighSurrogate(c)) { 1624 //contentBuffer.clear(); 1625 //scan surrogates if any.... 1626 scanSurrogates(contentBuffer); 1627 } else { 1628 reportFatalError("InvalidCharInCDSect", 1629 new Object[]{Integer.toString(c,16)}); 1630 fEntityScanner.scanChar(null); 1631 } 1632 } else { 1633 //CData partially returned due to the size limit 1634 break; 1635 } 1636 //by this time we have also read surrogate contents if any... 1637 if (fDocumentHandler != null) { 1638 //fDocumentHandler.characters(contentBuffer, null); 1639 } 1640 } 1641 } 1642 1643 return true; 1644 1645 } // scanCDATASection(XMLStringBuffer, boolean):boolean 1646 1647 /** 1648 * Scans an end element. 1649 * <p> 1650 * <pre> 1651 * [42] ETag ::= '</' Name S? '>' 1652 * </pre> 1653 * <p> 1654 * <strong>Note:</strong> This method uses the fElementQName variable. 1655 * The contents of this variable will be destroyed. The caller should 1656 * copy the needed information out of this variable before calling 1657 * this method. 1658 * 1659 * @return The element depth. 1660 */ 1661 protected int scanEndElement() throws IOException, XNIException { 1662 if (DEBUG_START_END_ELEMENT) System.out.println(this.getClass().toString() +">>> scanEndElement()"); 1663 1664 // pop context 1665 QName endElementName = fElementStack.popElement(); 1666 1667 String rawname = endElementName.rawname; 1668 if(DEBUG)System.out.println("endElementName = " + endElementName.toString()); 1669 // Take advantage of the fact that next string _should_ be "fElementQName.rawName", 1670 //In scanners most of the time is consumed on checks done for XML characters, we can 1671 // optimize on it and avoid the checks done for endElement, 1672 //we will also avoid symbol table lookup - neeraj.bajaj@sun.com 1673 1674 // this should work both for namespace processing true or false... 1675 1676 //REVISIT: if the string is not the same as expected.. we need to do better error handling.. 1677 //We can skip this for now... In any case if the string doesn't match -- document is not well formed. 1678 1679 if (!fEntityScanner.skipString(endElementName.rawname)) { 1680 reportFatalError("ETagRequired", new Object[]{rawname}); 1681 } 1682 1683 // end 1684 fEntityScanner.skipSpaces(); 1685 if (!fEntityScanner.skipChar('>', NameType.ELEMENTEND)) { 1686 reportFatalError("ETagUnterminated", 1687 new Object[]{rawname}); 1688 } 1689 fMarkupDepth--; 1690 1691 //we have increased the depth for two markup "<" characters 1692 fMarkupDepth--; 1693 1694 // check that this element was opened in the same entity 1695 if (fMarkupDepth < fEntityStack[fEntityDepth - 1]) { 1696 reportFatalError("ElementEntityMismatch", 1697 new Object[]{rawname}); 1698 } 1699 1700 //We should not be popping out the context here in endELement becaause the namespace context is still 1701 //valid when parser is at the endElement state. 1702 1703 //if (fNamespaces) { 1704 // fNamespaceContext.popContext(); 1705 //} 1706 1707 // call handler 1708 if (fDocumentHandler != null ) { 1709 //end element is scanned in this function so we can send a callback 1710 //here. 1711 //<strong>we shouldn't be sending callback in scanDocument()</strong> 1712 1713 fDocumentHandler.endElement(endElementName, null); 1714 } 1715 if(dtdGrammarUtil != null) 1716 dtdGrammarUtil.endElement(endElementName); 1717 1718 return fMarkupDepth; 1719 1720 } // scanEndElement():int 1721 1722 /** 1723 * Scans a character reference. 1724 * <p> 1725 * <pre> 1726 * [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' 1727 * </pre> 1728 */ 1729 protected void scanCharReference() 1730 throws IOException, XNIException { 1731 1732 fStringBuffer2.clear(); 1733 int ch = scanCharReferenceValue(fStringBuffer2, null); 1734 fMarkupDepth--; 1735 if (ch != -1) { 1736 // call handler 1737 1738 if (fDocumentHandler != null) { 1739 if (fNotifyCharRefs) { 1740 fDocumentHandler.startGeneralEntity(fCharRefLiteral, null, null, null); 1741 } 1742 Augmentations augs = null; 1743 if (fValidation && ch <= 0x20) { 1744 if (fTempAugmentations != null) { 1745 fTempAugmentations.removeAllItems(); 1746 } 1747 else { 1748 fTempAugmentations = new AugmentationsImpl(); 1749 } 1750 augs = fTempAugmentations; 1751 augs.putItem(Constants.CHAR_REF_PROBABLE_WS, Boolean.TRUE); 1752 } 1753 //xxx: How do we deal with this - how to return charReferenceValues 1754 //now this is being commented because this is taken care in scanDocument() 1755 //fDocumentHandler.characters(fStringBuffer2, null); 1756 if (fNotifyCharRefs) { 1757 fDocumentHandler.endGeneralEntity(fCharRefLiteral, null); 1758 } 1759 } 1760 } 1761 1762 } // scanCharReference() 1763 1764 1765 /** 1766 * Scans an entity reference. 1767 * 1768 * @return returns true if the new entity is started. If it was built-in entity 1769 * 'false' is returned. 1770 * @throws IOException Thrown if i/o error occurs. 1771 * @throws XNIException Thrown if handler throws exception upon 1772 * notification. 1773 */ 1774 protected void scanEntityReference(XMLStringBuffer content) throws IOException, XNIException { 1775 String name = fEntityScanner.scanName(NameType.REFERENCE); 1776 if (name == null) { 1777 reportFatalError("NameRequiredInReference", null); 1778 return; 1779 } 1780 if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { 1781 reportFatalError("SemicolonRequiredInReference", new Object []{name}); 1782 } 1783 if (fEntityStore.isUnparsedEntity(name)) { 1784 reportFatalError("ReferenceToUnparsedEntity", new Object[]{name}); 1785 } 1786 fMarkupDepth--; 1787 fCurrentEntityName = name; 1788 1789 // handle built-in entities 1790 if (name == fAmpSymbol) { 1791 handleCharacter('&', fAmpSymbol, content); 1792 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1793 return ; 1794 } else if (name == fLtSymbol) { 1795 handleCharacter('<', fLtSymbol, content); 1796 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1797 return ; 1798 } else if (name == fGtSymbol) { 1799 handleCharacter('>', fGtSymbol, content); 1800 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1801 return ; 1802 } else if (name == fQuotSymbol) { 1803 handleCharacter('"', fQuotSymbol, content); 1804 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1805 return ; 1806 } else if (name == fAposSymbol) { 1807 handleCharacter('\'', fAposSymbol, content); 1808 fScannerState = SCANNER_STATE_BUILT_IN_REFS; 1809 return ; 1810 } 1811 1812 //1. if the entity is external and support to external entities is not required 1813 // 2. or entities should not be replaced 1814 //3. or if it is built in entity reference. 1815 boolean isEE = fEntityStore.isExternalEntity(name); 1816 if((isEE && !fSupportExternalEntities) || (!isEE && !fReplaceEntityReferences) || foundBuiltInRefs){ 1817 fScannerState = SCANNER_STATE_REFERENCE; 1818 return ; 1819 } 1820 // start general entity 1821 if (!fEntityStore.isDeclaredEntity(name)) { 1822 //SUPPORT_DTD=false && ReplaceEntityReferences should throw exception 1823 if (!fSupportDTD && fReplaceEntityReferences) { 1824 reportFatalError("EntityNotDeclared", new Object[]{name}); 1825 return; 1826 } 1827 //REVISIT: one more case needs to be included: external PE and standalone is no 1828 if ( fHasExternalDTD && !fStandalone) { 1829 if (fValidation) 1830 fErrorReporter.reportError(fEntityScanner, XMLMessageFormatter.XML_DOMAIN,"EntityNotDeclared", 1831 new Object[]{name}, XMLErrorReporter.SEVERITY_ERROR); 1832 } else 1833 reportFatalError("EntityNotDeclared", new Object[]{name}); 1834 } 1835 //we are starting the entity even if the entity was not declared 1836 //if that was the case it its taken care in XMLEntityManager.startEntity() 1837 //we immediately call the endEntity. Application gets to know if there was 1838 //any entity that was not declared. 1839 fEntityManager.startEntity(true, name, false); 1840 //set the scaner state to content.. parser will automatically revive itself at any point of time. 1841 //setScannerState(SCANNER_STATE_CONTENT); 1842 //return true ; 1843 } // scanEntityReference() 1844 1845 // utility methods 1846 1847 /** 1848 * Check if the depth exceeds the maxElementDepth limit 1849 * @param elementName name of the current element 1850 */ 1851 void checkDepth(String elementName) { 1852 fLimitAnalyzer.addValue(Limit.MAX_ELEMENT_DEPTH_LIMIT, elementName, fElementStack.fDepth); 1853 if (fSecurityManager.isOverLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT,fLimitAnalyzer)) { 1854 fSecurityManager.debugPrint(fLimitAnalyzer); 1855 reportFatalError("MaxElementDepthLimit", new Object[]{elementName, 1856 fLimitAnalyzer.getTotalValue(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1857 fSecurityManager.getLimit(Limit.MAX_ELEMENT_DEPTH_LIMIT), 1858 "maxElementDepth"}); 1859 } 1860 } 1861 1862 /** 1863 * Calls document handler with a single character resulting from 1864 * built-in entity resolution. 1865 * 1866 * @param c 1867 * @param entity built-in name 1868 * @param XMLStringBuffer append the character to buffer 1869 * 1870 * we really dont need to call this function -- this function is only required when 1871 * we integrate with rest of Xerces2. SO maintaining the current behavior and still 1872 * calling this function to hanlde built-in entity reference. 1873 * 1874 */ 1875 private void handleCharacter(char c, String entity, XMLStringBuffer content) throws XNIException { 1876 foundBuiltInRefs = true; 1877 checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); 1878 content.append(c); 1879 if (fDocumentHandler != null) { 1880 fSingleChar[0] = c; 1881 if (fNotifyBuiltInRefs) { 1882 fDocumentHandler.startGeneralEntity(entity, null, null, null); 1883 } 1884 fTempString.setValues(fSingleChar, 0, 1); 1885 if(!fIsCoalesce){ 1886 fDocumentHandler.characters(fTempString, null); 1887 builtInRefCharacterHandled = true; 1888 } 1889 1890 if (fNotifyBuiltInRefs) { 1891 fDocumentHandler.endGeneralEntity(entity, null); 1892 } 1893 } 1894 } // handleCharacter(char) 1895 1896 // helper methods 1897 1898 /** 1899 * Sets the scanner state. 1900 * 1901 * @param state The new scanner state. 1902 */ 1903 protected final void setScannerState(int state) { 1904 1905 fScannerState = state; 1906 if (DEBUG_SCANNER_STATE) { 1907 System.out.print("### setScannerState: "); 1908 //System.out.print(fScannerState); 1909 System.out.print(getScannerStateName(state)); 1910 System.out.println(); 1911 } 1912 1913 } // setScannerState(int) 1914 1915 1916 /** 1917 * Sets the Driver. 1918 * 1919 * @param Driver The new Driver. 1920 */ 1921 protected final void setDriver(Driver driver) { 1922 fDriver = driver; 1923 if (DEBUG_DISPATCHER) { 1924 System.out.print("%%% setDriver: "); 1925 System.out.print(getDriverName(driver)); 1926 System.out.println(); 1927 } 1928 } 1929 1930 // 1931 // Private methods 1932 // 1933 1934 /** Returns the scanner state name. */ 1935 protected String getScannerStateName(int state) { 1936 1937 switch (state) { 1938 case SCANNER_STATE_DOCTYPE: return "SCANNER_STATE_DOCTYPE"; 1939 case SCANNER_STATE_ROOT_ELEMENT: return "SCANNER_STATE_ROOT_ELEMENT"; 1940 case SCANNER_STATE_START_OF_MARKUP: return "SCANNER_STATE_START_OF_MARKUP"; 1941 case SCANNER_STATE_COMMENT: return "SCANNER_STATE_COMMENT"; 1942 case SCANNER_STATE_PI: return "SCANNER_STATE_PI"; 1943 case SCANNER_STATE_CONTENT: return "SCANNER_STATE_CONTENT"; 1944 case SCANNER_STATE_REFERENCE: return "SCANNER_STATE_REFERENCE"; 1945 case SCANNER_STATE_END_OF_INPUT: return "SCANNER_STATE_END_OF_INPUT"; 1946 case SCANNER_STATE_TERMINATED: return "SCANNER_STATE_TERMINATED"; 1947 case SCANNER_STATE_CDATA: return "SCANNER_STATE_CDATA"; 1948 case SCANNER_STATE_TEXT_DECL: return "SCANNER_STATE_TEXT_DECL"; 1949 case SCANNER_STATE_ATTRIBUTE: return "SCANNER_STATE_ATTRIBUTE"; 1950 case SCANNER_STATE_ATTRIBUTE_VALUE: return "SCANNER_STATE_ATTRIBUTE_VALUE"; 1951 case SCANNER_STATE_START_ELEMENT_TAG: return "SCANNER_STATE_START_ELEMENT_TAG"; 1952 case SCANNER_STATE_END_ELEMENT_TAG: return "SCANNER_STATE_END_ELEMENT_TAG"; 1953 case SCANNER_STATE_CHARACTER_DATA: return "SCANNER_STATE_CHARACTER_DATA" ; 1954 } 1955 1956 return "??? ("+state+')'; 1957 1958 } // getScannerStateName(int):String 1959 public String getEntityName(){ 1960 //return the cached name 1961 return fCurrentEntityName; 1962 } 1963 1964 /** Returns the driver name. */ 1965 public String getDriverName(Driver driver) { 1966 1967 if (DEBUG_DISPATCHER) { 1968 if (driver != null) { 1969 String name = driver.getClass().getName(); 1970 int index = name.lastIndexOf('.'); 1971 if (index != -1) { 1972 name = name.substring(index + 1); 1973 index = name.lastIndexOf('$'); 1974 if (index != -1) { 1975 name = name.substring(index + 1); 1976 } 1977 } 1978 return name; 1979 } 1980 } 1981 return "null"; 1982 1983 } // getDriverName():String 1984 1985 /** 1986 * Check the protocol used in the systemId against allowed protocols 1987 * 1988 * @param systemId the Id of the URI 1989 * @param allowedProtocols a list of allowed protocols separated by comma 1990 * @return the name of the protocol if rejected, null otherwise 1991 */ 1992 String checkAccess(String systemId, String allowedProtocols) throws IOException { 1993 String baseSystemId = fEntityScanner.getBaseSystemId(); 1994 String expandedSystemId = XMLEntityManager.expandSystemId(systemId, baseSystemId, fStrictURI); 1995 return SecuritySupport.checkAccess(expandedSystemId, allowedProtocols, Constants.ACCESS_EXTERNAL_ALL); 1996 } 1997 1998 // 1999 // Classes 2000 // 2001 2002 /** 2003 * @author Neeraj Bajaj, Sun Microsystems. 2004 */ 2005 protected static final class Element { 2006 2007 // 2008 // Data 2009 // 2010 2011 /** Symbol. */ 2012 public QName qname; 2013 2014 //raw name stored as characters 2015 public char[] fRawname; 2016 2017 /** The next Element entry. */ 2018 public Element next; 2019 2020 // 2021 // Constructors 2022 // 2023 2024 /** 2025 * Constructs a new Element from the given QName and next Element 2026 * reference. 2027 */ 2028 public Element(QName qname, Element next) { 2029 this.qname.setValues(qname); 2030 this.fRawname = qname.rawname.toCharArray(); 2031 this.next = next; 2032 } 2033 2034 } // class Element 2035 2036 /** 2037 * Element stack. 2038 * 2039 * @author Neeraj Bajaj, Sun Microsystems. 2040 */ 2041 protected class ElementStack2 { 2042 2043 // 2044 // Data 2045 // 2046 2047 /** The stack data. */ 2048 protected QName [] fQName = new QName[20]; 2049 2050 //Element depth 2051 protected int fDepth; 2052 //total number of elements 2053 protected int fCount; 2054 //current position 2055 protected int fPosition; 2056 //Mark refers to the position 2057 protected int fMark; 2058 2059 protected int fLastDepth ; 2060 2061 // 2062 // Constructors 2063 // 2064 2065 /** Default constructor. */ 2066 public ElementStack2() { 2067 for (int i = 0; i < fQName.length; i++) { 2068 fQName[i] = new QName(); 2069 } 2070 fMark = fPosition = 1; 2071 } // <init>() 2072 2073 public void resize(){ 2074 /** 2075 * int length = fElements.length; 2076 * Element [] temp = new Element[length * 2]; 2077 * System.arraycopy(fElements, 0, temp, 0, length); 2078 * fElements = temp; 2079 */ 2080 //resize QNames 2081 int oldLength = fQName.length; 2082 QName [] tmp = new QName[oldLength * 2]; 2083 System.arraycopy(fQName, 0, tmp, 0, oldLength); 2084 fQName = tmp; 2085 2086 for (int i = oldLength; i < fQName.length; i++) { 2087 fQName[i] = new QName(); 2088 } 2089 2090 } 2091 2092 2093 // 2094 // Public methods 2095 // 2096 2097 /** Check if the element scanned during the start element 2098 *matches the stored element. 2099 * 2100 *@return true if the match suceeds. 2101 */ 2102 public boolean matchElement(QName element) { 2103 //last depth is the depth when last elemnt was pushed 2104 //if last depth is greater than current depth 2105 if(DEBUG_SKIP_ALGORITHM){ 2106 System.out.println("fLastDepth = " + fLastDepth); 2107 System.out.println("fDepth = " + fDepth); 2108 } 2109 boolean match = false; 2110 if(fLastDepth > fDepth && fDepth <= 2){ 2111 if(DEBUG_SKIP_ALGORITHM){ 2112 System.out.println("Checking if the elements match " + element.rawname + " , " + fQName[fDepth].rawname); 2113 } 2114 if(element.rawname == fQName[fDepth].rawname){ 2115 fAdd = false; 2116 //mark this position 2117 //decrease the depth by 1 as arrays are 0 based 2118 fMark = fDepth - 1; 2119 //we found the match and from next element skipping will start, add 1 2120 fPosition = fMark + 1 ; 2121 match = true; 2122 //Once we get match decrease the count -- this was increased by nextElement() 2123 --fCount; 2124 if(DEBUG_SKIP_ALGORITHM){ 2125 System.out.println("fAdd FALSE -- NOW ELEMENT SHOULD NOT BE ADDED"); 2126 System.out.println("fMark = " + fMark); 2127 System.out.println("fPosition = " + fPosition); 2128 System.out.println("fDepth = " + fDepth); 2129 System.out.println("fCount = " + fCount); 2130 } 2131 }else{ 2132 fAdd = true; 2133 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2134 } 2135 } 2136 //store the last depth 2137 fLastDepth = fDepth++; 2138 return match; 2139 } // pushElement(QName):QName 2140 2141 /** 2142 * This function doesn't increase depth. The function in this function is 2143 *broken down into two functions for efficiency. <@see>matchElement</see>. 2144 * This function just returns the pointer to the object and its values are set. 2145 * 2146 *@return QName reference to the next element in the list 2147 */ 2148 public QName nextElement() { 2149 2150 //if number of elements becomes equal to the length of array -- stop the skipping 2151 if (fCount == fQName.length) { 2152 fShouldSkip = false; 2153 fAdd = false; 2154 if(DEBUG_SKIP_ALGORITHM)System.out.println("SKIPPING STOPPED, fShouldSkip = " + fShouldSkip); 2155 //xxx: this is not correct, we are returning the last element 2156 //this wont make any difference since flag has been set to 'false' 2157 return fQName[--fCount]; 2158 } 2159 if(DEBUG_SKIP_ALGORITHM){ 2160 System.out.println("fCount = " + fCount); 2161 } 2162 return fQName[fCount++]; 2163 2164 } 2165 2166 /** Note that this function is considerably different than nextElement() 2167 * This function just returns the previously stored elements 2168 */ 2169 public QName getNext(){ 2170 //when position reaches number of elements in the list.. 2171 //set the position back to mark, making it a circular linked list. 2172 if(fPosition == fCount){ 2173 fPosition = fMark; 2174 } 2175 return fQName[fPosition++]; 2176 } 2177 2178 /** returns the current depth 2179 */ 2180 public int popElement(){ 2181 return fDepth--; 2182 } 2183 2184 2185 /** Clears the stack without throwing away existing QName objects. */ 2186 public void clear() { 2187 fLastDepth = 0; 2188 fDepth = 0; 2189 fCount = 0 ; 2190 fPosition = fMark = 1; 2191 } // clear() 2192 2193 } // class ElementStack 2194 2195 /** 2196 * Element stack. This stack operates without synchronization, error 2197 * checking, and it re-uses objects instead of throwing popped items 2198 * away. 2199 * 2200 * @author Andy Clark, IBM 2201 */ 2202 protected class ElementStack { 2203 2204 // 2205 // Data 2206 // 2207 2208 /** The stack data. */ 2209 protected QName[] fElements; 2210 protected int [] fInt = new int[20]; 2211 2212 2213 //Element depth 2214 protected int fDepth; 2215 //total number of elements 2216 protected int fCount; 2217 //current position 2218 protected int fPosition; 2219 //Mark refers to the position 2220 protected int fMark; 2221 2222 protected int fLastDepth ; 2223 2224 // 2225 // Constructors 2226 // 2227 2228 /** Default constructor. */ 2229 public ElementStack() { 2230 fElements = new QName[20]; 2231 for (int i = 0; i < fElements.length; i++) { 2232 fElements[i] = new QName(); 2233 } 2234 } // <init>() 2235 2236 // 2237 // Public methods 2238 // 2239 2240 /** 2241 * Pushes an element on the stack. 2242 * <p> 2243 * <strong>Note:</strong> The QName values are copied into the 2244 * stack. In other words, the caller does <em>not</em> orphan 2245 * the element to the stack. Also, the QName object returned 2246 * is <em>not</em> orphaned to the caller. It should be 2247 * considered read-only. 2248 * 2249 * @param element The element to push onto the stack. 2250 * 2251 * @return Returns the actual QName object that stores the 2252 */ 2253 //XXX: THIS FUNCTION IS NOT USED 2254 public QName pushElement(QName element) { 2255 if (fDepth == fElements.length) { 2256 QName[] array = new QName[fElements.length * 2]; 2257 System.arraycopy(fElements, 0, array, 0, fDepth); 2258 fElements = array; 2259 for (int i = fDepth; i < fElements.length; i++) { 2260 fElements[i] = new QName(); 2261 } 2262 } 2263 fElements[fDepth].setValues(element); 2264 return fElements[fDepth++]; 2265 } // pushElement(QName):QName 2266 2267 2268 /** Note that this function is considerably different than nextElement() 2269 * This function just returns the previously stored elements 2270 */ 2271 public QName getNext(){ 2272 //when position reaches number of elements in the list.. 2273 //set the position back to mark, making it a circular linked list. 2274 if(fPosition == fCount){ 2275 fPosition = fMark; 2276 } 2277 //store the position of last opened tag at particular depth 2278 //fInt[++fDepth] = fPosition; 2279 if(DEBUG_SKIP_ALGORITHM){ 2280 System.out.println("Element at fPosition = " + fPosition + " is " + fElements[fPosition].rawname); 2281 } 2282 //return fElements[fPosition++]; 2283 return fElements[fPosition]; 2284 } 2285 2286 /** This function should be called only when element was skipped sucessfully. 2287 * 1. Increase the depth - because element was sucessfully skipped. 2288 *2. Store the position of the element token in array "last opened tag" at depth. 2289 *3. increase the position counter so as to point to the next element in the array 2290 */ 2291 public void push(){ 2292 2293 fInt[++fDepth] = fPosition++; 2294 } 2295 2296 /** Check if the element scanned during the start element 2297 *matches the stored element. 2298 * 2299 *@return true if the match suceeds. 2300 */ 2301 public boolean matchElement(QName element) { 2302 //last depth is the depth when last elemnt was pushed 2303 //if last depth is greater than current depth 2304 //if(DEBUG_SKIP_ALGORITHM){ 2305 // System.out.println("Check if the element " + element.rawname + " matches"); 2306 // System.out.println("fLastDepth = " + fLastDepth); 2307 // System.out.println("fDepth = " + fDepth); 2308 //} 2309 boolean match = false; 2310 if(fLastDepth > fDepth && fDepth <= 3){ 2311 if(DEBUG_SKIP_ALGORITHM){ 2312 System.out.println("----------ENTERED THE LOOP WHERE WE CHECK FOR MATCHING OF ELMENT-----"); 2313 System.out.println("Depth = " + fDepth + " Checking if INCOMING element " + element.rawname + " match STORED ELEMENT " + fElements[fDepth - 1].rawname); 2314 } 2315 if(element.rawname == fElements[fDepth - 1].rawname){ 2316 fAdd = false; 2317 //mark this position 2318 //decrease the depth by 1 as arrays are 0 based 2319 fMark = fDepth - 1; 2320 //we found the match 2321 fPosition = fMark; 2322 match = true; 2323 //Once we get match decrease the count -- this was increased by nextElement() 2324 --fCount; 2325 if(DEBUG_SKIP_ALGORITHM){ 2326 System.out.println("NOW ELEMENT SHOULD NOT BE ADDED, fAdd is set to false"); 2327 System.out.println("fMark = " + fMark); 2328 System.out.println("fPosition = " + fPosition); 2329 System.out.println("fDepth = " + fDepth); 2330 System.out.println("fCount = " + fCount); 2331 System.out.println("---------MATCH SUCEEDED-----------------"); 2332 System.out.println(""); 2333 } 2334 }else{ 2335 fAdd = true; 2336 if(DEBUG_SKIP_ALGORITHM)System.out.println("fAdd is " + fAdd); 2337 } 2338 } 2339 //store the position for the current depth 2340 //when we are adding the elements, when skipping 2341 //starts even then this should be tracked ie. when 2342 //calling getNext() 2343 if(match){ 2344 //from next element skipping will start, add 1 2345 fInt[fDepth] = fPosition++; 2346 } else{ 2347 if(DEBUG_SKIP_ALGORITHM){ 2348 System.out.println("At depth = " + fDepth + "array position is = " + (fCount - 1)); 2349 } 2350 //sicne fInt[fDepth] contains pointer to the element array which are 0 based. 2351 fInt[fDepth] = fCount - 1; 2352 } 2353 2354 //if number of elements becomes equal to the length of array -- stop the skipping 2355 //xxx: should we do "fCount == fInt.length" 2356 if (fCount == fElements.length) { 2357 fSkip = false; 2358 fAdd = false; 2359 //reposition the stack -- it seems to be too complex document and there is no symmerty in structure 2360 reposition(); 2361 if(DEBUG_SKIP_ALGORITHM){ 2362 System.out.println("ALL THE ELMENTS IN ARRAY HAVE BEEN FILLED"); 2363 System.out.println("REPOSITIONING THE STACK"); 2364 System.out.println("-----------SKIPPING STOPPED----------"); 2365 System.out.println(""); 2366 } 2367 return false; 2368 } 2369 if(DEBUG_SKIP_ALGORITHM){ 2370 if(match){ 2371 System.out.println("Storing fPosition = " + fInt[fDepth] + " at fDepth = " + fDepth); 2372 }else{ 2373 System.out.println("Storing fCount = " + fInt[fDepth] + " at fDepth = " + fDepth); 2374 } 2375 } 2376 //store the last depth 2377 fLastDepth = fDepth; 2378 return match; 2379 } // matchElement(QName):QName 2380 2381 2382 /** 2383 * Returns the next element on the stack. 2384 * 2385 * @return Returns the actual QName object. Callee should 2386 * use this object to store the details of next element encountered. 2387 */ 2388 public QName nextElement() { 2389 if(fSkip){ 2390 fDepth++; 2391 //boundary checks are done in matchElement() 2392 return fElements[fCount++]; 2393 } else if (fDepth == fElements.length) { 2394 QName[] array = new QName[fElements.length * 2]; 2395 System.arraycopy(fElements, 0, array, 0, fDepth); 2396 fElements = array; 2397 for (int i = fDepth; i < fElements.length; i++) { 2398 fElements[i] = new QName(); 2399 } 2400 } 2401 2402 return fElements[fDepth++]; 2403 2404 } // pushElement(QName):QName 2405 2406 2407 /** 2408 * Pops an element off of the stack by setting the values of 2409 * the specified QName. 2410 * <p> 2411 * <strong>Note:</strong> The object returned is <em>not</em> 2412 * orphaned to the caller. Therefore, the caller should consider 2413 * the object to be read-only. 2414 */ 2415 public QName popElement() { 2416 //return the same object that was pushed -- this would avoid 2417 //setting the values for every end element. 2418 //STRONG: this object is read only -- this object reference shouldn't be stored. 2419 if(fSkip || fAdd ){ 2420 if(DEBUG_SKIP_ALGORITHM){ 2421 System.out.println("POPPING Element, at position " + fInt[fDepth] + " element at that count is = " + fElements[fInt[fDepth]].rawname); 2422 System.out.println(""); 2423 } 2424 return fElements[fInt[fDepth--]]; 2425 } else{ 2426 if(DEBUG_SKIP_ALGORITHM){ 2427 System.out.println("Retrieveing element at depth = " + fDepth + " is " + fElements[fDepth].rawname ); 2428 } 2429 return fElements[--fDepth] ; 2430 } 2431 //element.setValues(fElements[--fDepth]); 2432 } // popElement(QName) 2433 2434 /** Reposition the stack. fInt [] contains all the opened tags at particular depth. 2435 * Transfer all the opened tags starting from depth '2' to the current depth and reposition them 2436 *as per the depth. 2437 */ 2438 public void reposition(){ 2439 for( int i = 2 ; i <= fDepth ; i++){ 2440 fElements[i-1] = fElements[fInt[i]]; 2441 } 2442 if(DEBUG_SKIP_ALGORITHM){ 2443 for( int i = 0 ; i < fDepth ; i++){ 2444 System.out.println("fElements[" + i + "]" + " = " + fElements[i].rawname); 2445 } 2446 } 2447 } 2448 2449 /** Clears the stack without throwing away existing QName objects. */ 2450 public void clear() { 2451 fDepth = 0; 2452 fLastDepth = 0; 2453 fCount = 0 ; 2454 fPosition = fMark = 1; 2455 2456 } // clear() 2457 2458 /** 2459 * This function is as a result of optimization done for endElement -- 2460 * we dont need to set the value for every end element encouterd. 2461 * For Well formedness checks we can have the same QName object that was pushed. 2462 * the values will be set only if application need to know about the endElement 2463 * -- neeraj.bajaj@sun.com 2464 */ 2465 2466 public QName getLastPoppedElement(){ 2467 return fElements[fDepth]; 2468 } 2469 } // class ElementStack 2470 2471 /** 2472 * Drives the parser to the next state/event on the input. Parser is guaranteed 2473 * to stop at the next state/event. 2474 * 2475 * Internally XML document is divided into several states. Each state represents 2476 * a sections of XML document. When this functions returns normally, it has read 2477 * the section of XML document and returns the state corresponding to section of 2478 * document which has been read. For optimizations, a particular driver 2479 * can read ahead of the section of document (state returned) just read and 2480 * can maintain a different internal state. 2481 * 2482 * 2483 * @author Neeraj Bajaj, Sun Microsystems 2484 */ 2485 protected interface Driver { 2486 2487 2488 /** 2489 * Drives the parser to the next state/event on the input. Parser is guaranteed 2490 * to stop at the next state/event. 2491 * 2492 * Internally XML document is divided into several states. Each state represents 2493 * a sections of XML document. When this functions returns normally, it has read 2494 * the section of XML document and returns the state corresponding to section of 2495 * document which has been read. For optimizations, a particular driver 2496 * can read ahead of the section of document (state returned) just read and 2497 * can maintain a different internal state. 2498 * 2499 * @return state representing the section of document just read. 2500 * 2501 * @throws IOException Thrown on i/o error. 2502 * @throws XNIException Thrown on parse error. 2503 */ 2504 2505 public int next() throws IOException, XNIException; 2506 2507 } // interface Driver 2508 2509 /** 2510 * Driver to handle content scanning. This driver is capable of reading 2511 * the fragment of XML document. When it has finished reading fragment 2512 * of XML documents, it can pass the job of reading to another driver. 2513 * 2514 * This class has been modified as per the new design which is more suited to 2515 * efficiently build pull parser. Lot of performance improvements have been done and 2516 * the code has been added to support stax functionality/features. 2517 * 2518 * @author Neeraj Bajaj, Sun Microsystems 2519 * 2520 * 2521 * @author Andy Clark, IBM 2522 * @author Eric Ye, IBM 2523 */ 2524 protected class FragmentContentDriver 2525 implements Driver { 2526 2527 // 2528 // Driver methods 2529 // 2530 2531 /** 2532 * decides the appropriate state of the parser 2533 */ 2534 private void startOfMarkup() throws IOException { 2535 fMarkupDepth++; 2536 final int ch = fEntityScanner.peekChar(); 2537 if (isValidNameStartChar(ch) || isValidNameStartHighSurrogate(ch)) { 2538 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2539 } else { 2540 switch(ch){ 2541 case '?' :{ 2542 setScannerState(SCANNER_STATE_PI); 2543 fEntityScanner.skipChar(ch, null); 2544 break; 2545 } 2546 case '!' :{ 2547 fEntityScanner.skipChar(ch, null); 2548 if (fEntityScanner.skipChar('-', null)) { 2549 if (!fEntityScanner.skipChar('-', NameType.COMMENT)) { 2550 reportFatalError("InvalidCommentStart", 2551 null); 2552 } 2553 setScannerState(SCANNER_STATE_COMMENT); 2554 } else if (fEntityScanner.skipString(CDATA)) { 2555 fCDataStart = true; 2556 setScannerState(SCANNER_STATE_CDATA ); 2557 } else if (!scanForDoctypeHook()) { 2558 reportFatalError("MarkupNotRecognizedInContent", 2559 null); 2560 } 2561 break; 2562 } 2563 case '/' :{ 2564 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2565 fEntityScanner.skipChar(ch, NameType.ELEMENTEND); 2566 break; 2567 } 2568 default :{ 2569 reportFatalError("MarkupNotRecognizedInContent", null); 2570 } 2571 } 2572 } 2573 2574 }//startOfMarkup 2575 2576 private void startOfContent() throws IOException { 2577 if (fEntityScanner.skipChar('<', null)) { 2578 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2579 } else if (fEntityScanner.skipChar('&', NameType.REFERENCE)) { 2580 setScannerState(SCANNER_STATE_REFERENCE) ; //XMLEvent.ENTITY_REFERENCE ); //SCANNER_STATE_REFERENCE 2581 } else { 2582 //element content is there.. 2583 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2584 } 2585 }//startOfContent 2586 2587 2588 /** 2589 * 2590 * SCANNER_STATE_CONTENT and SCANNER_STATE_START_OF_MARKUP are two super states of the parser. 2591 * At any point of time when in doubt over the current state of the parser, the state should be 2592 * set to SCANNER_STATE_CONTENT. Parser will automatically revive itself and will set state of 2593 * the parser to one of its sub state. 2594 * sub states are defined in the parser on the basis of different XML component like 2595 * SCANNER_STATE_ENTITY_REFERENCE , SCANNER_STATE_START_ELEMENT, SCANNER_STATE_CDATA etc.. 2596 * These sub states help the parser to have fine control over the parsing. These are the 2597 * different milepost, parser stops at each sub state (milepost). Based on this state it is 2598 * decided if paresr needs to stop at next milepost ?? 2599 * 2600 */ 2601 public void decideSubState() throws IOException { 2602 while( fScannerState == SCANNER_STATE_CONTENT || fScannerState == SCANNER_STATE_START_OF_MARKUP){ 2603 2604 switch (fScannerState) { 2605 2606 case SCANNER_STATE_CONTENT: { 2607 startOfContent() ; 2608 break; 2609 } 2610 2611 case SCANNER_STATE_START_OF_MARKUP: { 2612 startOfMarkup() ; 2613 break; 2614 } 2615 } 2616 } 2617 }//decideSubState 2618 2619 /** 2620 * Drives the parser to the next state/event on the input. Parser is guaranteed 2621 * to stop at the next state/event. Internally XML document 2622 * is divided into several states. Each state represents a sections of XML 2623 * document. When this functions returns normally, it has read the section 2624 * of XML document and returns the state corresponding to section of 2625 * document which has been read. For optimizations, a particular driver 2626 * can read ahead of the section of document (state returned) just read and 2627 * can maintain a different internal state. 2628 * 2629 * State returned corresponds to Stax states. 2630 * 2631 * @return state representing the section of document just read. 2632 * 2633 * @throws IOException Thrown on i/o error. 2634 * @throws XNIException Thrown on parse error. 2635 */ 2636 2637 public int next() throws IOException, XNIException { 2638 while (true) { 2639 try { 2640 2641 //decide the actual sub state of the scanner.For more information refer to the javadoc of 2642 //decideSubState. 2643 2644 if (fScannerState == SCANNER_STATE_CONTENT) { 2645 final int ch = fEntityScanner.peekChar(); 2646 if (ch == '<') { 2647 fEntityScanner.scanChar(null); 2648 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2649 } else if (ch == '&') { 2650 fEntityScanner.scanChar(NameType.REFERENCE); 2651 setScannerState(SCANNER_STATE_REFERENCE) ; 2652 } else { 2653 //element content is there.. 2654 setScannerState(SCANNER_STATE_CHARACTER_DATA); 2655 } 2656 } 2657 2658 if (fScannerState == SCANNER_STATE_START_OF_MARKUP) { 2659 startOfMarkup(); 2660 } 2661 2662 //decideSubState() ; 2663 2664 //do some special handling if isCoalesce is set to true. 2665 if (fIsCoalesce) { 2666 fUsebuffer = true ; 2667 //if the last section was character data 2668 if (fLastSectionWasCharacterData) { 2669 2670 //if we dont encounter any CDATA or ENTITY REFERENCE and 2671 //current state is also not SCANNER_STATE_CHARACTER_DATA 2672 //return the last scanned charactrer data. 2673 if ((fScannerState != SCANNER_STATE_CDATA) 2674 && (fScannerState != SCANNER_STATE_REFERENCE) 2675 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)) { 2676 fLastSectionWasCharacterData = false; 2677 return XMLEvent.CHARACTERS; 2678 } 2679 }//if last section was CDATA or ENTITY REFERENCE 2680 //xxx: there might be another entity reference or CDATA after this 2681 //<foo>blah blah &<<![CDATA[[aa]]>blah blah</foo> 2682 else if ((fLastSectionWasCData || fLastSectionWasEntityReference)) { 2683 //and current state is not SCANNER_STATE_CHARACTER_DATA 2684 //or SCANNER_STATE_CDATA or SCANNER_STATE_REFERENCE 2685 //this means there is nothing more to be coalesced. 2686 //return the CHARACTERS event. 2687 if ((fScannerState != SCANNER_STATE_CDATA) 2688 && (fScannerState != SCANNER_STATE_REFERENCE) 2689 && (fScannerState != SCANNER_STATE_CHARACTER_DATA)){ 2690 2691 fLastSectionWasCData = false; 2692 fLastSectionWasEntityReference = false; 2693 return XMLEvent.CHARACTERS; 2694 } 2695 } 2696 } 2697 2698 switch(fScannerState){ 2699 2700 case XMLEvent.START_DOCUMENT : 2701 return XMLEvent.START_DOCUMENT; 2702 2703 case SCANNER_STATE_START_ELEMENT_TAG :{ 2704 2705 //returns true if the element is empty 2706 fEmptyElement = scanStartElement() ; 2707 //if the element is empty the next event is "end element" 2708 if(fEmptyElement){ 2709 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2710 }else{ 2711 //set the next possible state 2712 setScannerState(SCANNER_STATE_CONTENT); 2713 } 2714 return XMLEvent.START_ELEMENT ; 2715 } 2716 2717 case SCANNER_STATE_CHARACTER_DATA: { 2718 2719 //if last section was either entity reference or cdata or 2720 //character data we should be using buffer 2721 fUsebuffer = fLastSectionWasEntityReference || fLastSectionWasCData 2722 || fLastSectionWasCharacterData ; 2723 2724 //When coalesce is set to true and last state was REFERENCE or 2725 //CDATA or CHARACTER_DATA, buffer should not be cleared. 2726 if( fIsCoalesce && (fLastSectionWasEntityReference || 2727 fLastSectionWasCData || fLastSectionWasCharacterData) ){ 2728 fLastSectionWasEntityReference = false; 2729 fLastSectionWasCData = false; 2730 fLastSectionWasCharacterData = true ; 2731 fUsebuffer = true; 2732 }else{ 2733 //clear the buffer 2734 fContentBuffer.clear(); 2735 } 2736 2737 //set the fTempString length to 0 before passing it on to scanContent 2738 //scanContent sets the correct co-ordinates as per the content read 2739 fTempString.length = 0; 2740 int c = fEntityScanner.scanContent(fTempString); 2741 2742 if(fEntityScanner.skipChar('<', null)){ 2743 //check if we have reached end of element 2744 if(fEntityScanner.skipChar('/', NameType.ELEMENTEND)){ 2745 //increase the mark up depth 2746 fMarkupDepth++; 2747 fLastSectionWasCharacterData = false; 2748 setScannerState(SCANNER_STATE_END_ELEMENT_TAG); 2749 //check if its start of new element 2750 }else if(XMLChar.isNameStart(fEntityScanner.peekChar())){ 2751 fMarkupDepth++; 2752 fLastSectionWasCharacterData = false; 2753 setScannerState(SCANNER_STATE_START_ELEMENT_TAG); 2754 }else{ 2755 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2756 //there can be cdata ahead if coalesce is true we should call again 2757 if(fIsCoalesce){ 2758 fLastSectionWasCharacterData = true; 2759 bufferContent(); 2760 continue; 2761 } 2762 } 2763 //in case last section was either entity reference or 2764 //cdata or character data -- we should be using buffer 2765 if(fUsebuffer){ 2766 bufferContent(); 2767 } 2768 2769 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2770 if(DEBUG)System.out.println("Return SPACE EVENT"); 2771 return XMLEvent.SPACE; 2772 }else 2773 return XMLEvent.CHARACTERS; 2774 2775 } else{ 2776 bufferContent(); 2777 } 2778 if (c == '\r') { 2779 if(DEBUG){ 2780 System.out.println("'\r' character found"); 2781 } 2782 // happens when there is the character reference 2783 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2784 fEntityScanner.scanChar(null); 2785 fUsebuffer = true; 2786 fContentBuffer.append((char)c); 2787 c = -1 ; 2788 } else if (c == ']') { 2789 //fStringBuffer.clear(); 2790 //xxx: We know the next chracter.. we should just skip it and add ']' directlry 2791 fUsebuffer = true; 2792 fContentBuffer.append((char)fEntityScanner.scanChar(null)); 2793 // remember where we are in case we get an endEntity before we 2794 // could flush the buffer out - this happens when we're parsing an 2795 // entity which ends with a ] 2796 fInScanContent = true; 2797 2798 // We work on a single character basis to handle cases such as: 2799 // ']]]>' which we might otherwise miss. 2800 // 2801 if (fEntityScanner.skipChar(']', null)) { 2802 fContentBuffer.append(']'); 2803 while (fEntityScanner.skipChar(']', null)) { 2804 fContentBuffer.append(']'); 2805 } 2806 if (fEntityScanner.skipChar('>', null)) { 2807 reportFatalError("CDEndInContent", null); 2808 } 2809 } 2810 c = -1 ; 2811 fInScanContent = false; 2812 } 2813 2814 do{ 2815 //xxx: we should be using only one buffer.. 2816 // we need not to grow the buffer only when isCoalesce() is not true; 2817 2818 if (c == '<') { 2819 fEntityScanner.scanChar(null); 2820 setScannerState(SCANNER_STATE_START_OF_MARKUP); 2821 break; 2822 }//xxx what should be the behavior if entity reference is present in the content ? 2823 else if (c == '&') { 2824 fEntityScanner.scanChar(NameType.REFERENCE); 2825 setScannerState(SCANNER_STATE_REFERENCE); 2826 break; 2827 }///xxx since this part is also characters, it should be merged... 2828 else if (c != -1 && isInvalidLiteral(c)) { 2829 if (XMLChar.isHighSurrogate(c)) { 2830 // special case: surrogates 2831 scanSurrogates(fContentBuffer) ; 2832 setScannerState(SCANNER_STATE_CONTENT); 2833 } else { 2834 reportFatalError("InvalidCharInContent", 2835 new Object[] { 2836 Integer.toString(c, 16)}); 2837 fEntityScanner.scanChar(null); 2838 } 2839 break; 2840 } 2841 //xxx: scanContent also gives character callback. 2842 c = scanContent(fContentBuffer) ; 2843 //we should not be iterating again if fIsCoalesce is not set to true 2844 2845 if(!fIsCoalesce){ 2846 setScannerState(SCANNER_STATE_CONTENT); 2847 break; 2848 } 2849 2850 }while(true); 2851 2852 //if (fDocumentHandler != null) { 2853 // fDocumentHandler.characters(fContentBuffer, null); 2854 //} 2855 if(DEBUG)System.out.println("USING THE BUFFER, STRING START=" + fContentBuffer.toString() +"=END"); 2856 //if fIsCoalesce is true there might be more data so call fDriver.next() 2857 if(fIsCoalesce){ 2858 fLastSectionWasCharacterData = true ; 2859 continue; 2860 }else{ 2861 if(dtdGrammarUtil!= null && dtdGrammarUtil.isIgnorableWhiteSpace(fContentBuffer)){ 2862 if(DEBUG)System.out.println("Return SPACE EVENT"); 2863 return XMLEvent.SPACE; 2864 } else 2865 return XMLEvent.CHARACTERS ; 2866 } 2867 } 2868 2869 case SCANNER_STATE_END_ELEMENT_TAG :{ 2870 if(fEmptyElement){ 2871 //set it back to false. 2872 fEmptyElement = false; 2873 setScannerState(SCANNER_STATE_CONTENT); 2874 //check the case when there is comment after single element document 2875 //<foo/> and some comment after this 2876 return (fMarkupDepth == 0 && elementDepthIsZeroHook() ) ? 2877 XMLEvent.END_ELEMENT : XMLEvent.END_ELEMENT ; 2878 2879 } else if(scanEndElement() == 0) { 2880 //It is last element of the document 2881 if (elementDepthIsZeroHook()) { 2882 //if element depth is zero , it indicates the end of the document 2883 //the state shouldn't be set, because it is set by elementDepthIsZeroHook() function 2884 //xxx understand this point once again.. 2885 return XMLEvent.END_ELEMENT ; 2886 } 2887 2888 } 2889 setScannerState(SCANNER_STATE_CONTENT); 2890 return XMLEvent.END_ELEMENT ; 2891 } 2892 2893 case SCANNER_STATE_COMMENT: { //SCANNER_STATE_COMMENT: 2894 scanComment(); 2895 setScannerState(SCANNER_STATE_CONTENT); 2896 return XMLEvent.COMMENT; 2897 //break; 2898 } 2899 case SCANNER_STATE_PI:{ //SCANNER_STATE_PI: { 2900 //clear the buffer first 2901 fContentBuffer.clear() ; 2902 //xxx: which buffer should be passed. Ideally we shouldn't have 2903 //more than two buffers -- 2904 //xxx: where should we add the switch for buffering. 2905 scanPI(fContentBuffer); 2906 setScannerState(SCANNER_STATE_CONTENT); 2907 return XMLEvent.PROCESSING_INSTRUCTION; 2908 //break; 2909 } 2910 case SCANNER_STATE_CDATA :{ //SCANNER_STATE_CDATA: { 2911 //xxx: What if CDATA is the first event 2912 //<foo><![CDATA[hello<><>]]>append</foo> 2913 2914 //we should not clear the buffer only when the last state was 2915 //either SCANNER_STATE_REFERENCE or 2916 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2917 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2918 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2919 fLastSectionWasCData = true ; 2920 fLastSectionWasEntityReference = false; 2921 fLastSectionWasCharacterData = false; 2922 }//if we dont need to coalesce clear the buffer 2923 else{ 2924 fContentBuffer.clear(); 2925 } 2926 fUsebuffer = true; 2927 //CDATA section is read up to the chunk size limit 2928 scanCDATASection(fContentBuffer , true); 2929 setScannerState(SCANNER_STATE_CONTENT); 2930 //1. if fIsCoalesce is set to true we set the variable fLastSectionWasCData to true 2931 //and just call fDispatche.next(). Since we have set the scanner state to 2932 //SCANNER_STATE_CONTENT (super state) parser will automatically recover and 2933 //behave appropriately. When isCoalesce is set to true we dont need to reportCDATA event 2934 //2. Check if application has set for reporting CDATA event 2935 //3. if the application has neither set the fIsCoalesce to true nor fReportCdataEvent 2936 //return the cdata event as characters. 2937 if (fIsCoalesce) { 2938 fLastSectionWasCData = true ; 2939 //there might be more data to coalesce. 2940 continue; 2941 } else if(fReportCdataEvent) { 2942 if (!fCDataEnd) { 2943 setScannerState(SCANNER_STATE_CDATA); 2944 } 2945 return XMLEvent.CDATA; 2946 } else { 2947 return XMLEvent.CHARACTERS; 2948 } 2949 } 2950 2951 case SCANNER_STATE_REFERENCE :{ 2952 fMarkupDepth++; 2953 foundBuiltInRefs = false; 2954 2955 //we should not clear the buffer only when the last state was 2956 //either CDATA or 2957 //SCANNER_STATE_CHARACTER_DATA or SCANNER_STATE_REFERENCE 2958 if(fIsCoalesce && ( fLastSectionWasEntityReference || 2959 fLastSectionWasCData || fLastSectionWasCharacterData)){ 2960 //fLastSectionWasEntityReference or fLastSectionWasCData are only 2961 //used when fIsCoalesce is set to true. 2962 fLastSectionWasEntityReference = true ; 2963 fLastSectionWasCData = false; 2964 fLastSectionWasCharacterData = false; 2965 }//if we dont need to coalesce clear the buffer 2966 else{ 2967 fContentBuffer.clear(); 2968 } 2969 fUsebuffer = true ; 2970 //take care of character reference 2971 if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { 2972 scanCharReferenceValue(fContentBuffer, null); 2973 fMarkupDepth--; 2974 if(!fIsCoalesce){ 2975 setScannerState(SCANNER_STATE_CONTENT); 2976 return XMLEvent.CHARACTERS; 2977 } 2978 } else { 2979 // this function also starts new entity 2980 scanEntityReference(fContentBuffer); 2981 //if there was built-in entity reference & coalesce is not true 2982 //return CHARACTERS 2983 if(fScannerState == SCANNER_STATE_BUILT_IN_REFS && !fIsCoalesce){ 2984 setScannerState(SCANNER_STATE_CONTENT); 2985 if (builtInRefCharacterHandled) { 2986 builtInRefCharacterHandled = false; 2987 return XMLEvent.ENTITY_REFERENCE; 2988 } else { 2989 return XMLEvent.CHARACTERS; 2990 } 2991 } 2992 2993 //if there was a text declaration, call next() it will be taken care. 2994 if(fScannerState == SCANNER_STATE_TEXT_DECL){ 2995 fLastSectionWasEntityReference = true ; 2996 continue; 2997 } 2998 2999 if(fScannerState == SCANNER_STATE_REFERENCE){ 3000 setScannerState(SCANNER_STATE_CONTENT); 3001 if (fReplaceEntityReferences && 3002 fEntityStore.isDeclaredEntity(fCurrentEntityName)) { 3003 // Skip the entity reference, we don't care 3004 continue; 3005 } 3006 return XMLEvent.ENTITY_REFERENCE; 3007 } 3008 } 3009 //Wether it was character reference, entity reference or built-in entity 3010 //set the next possible state to SCANNER_STATE_CONTENT 3011 setScannerState(SCANNER_STATE_CONTENT); 3012 fLastSectionWasEntityReference = true ; 3013 continue; 3014 } 3015 3016 case SCANNER_STATE_TEXT_DECL: { 3017 // scan text decl 3018 if (fEntityScanner.skipString("<?xml")) { 3019 fMarkupDepth++; 3020 // NOTE: special case where entity starts with a PI 3021 // whose name starts with "xml" (e.g. "xmlfoo") 3022 if (isValidNameChar(fEntityScanner.peekChar())) { 3023 fStringBuffer.clear(); 3024 fStringBuffer.append("xml"); 3025 3026 if (fNamespaces) { 3027 while (isValidNCName(fEntityScanner.peekChar())) { 3028 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3029 } 3030 } else { 3031 while (isValidNameChar(fEntityScanner.peekChar())) { 3032 fStringBuffer.append((char)fEntityScanner.scanChar(null)); 3033 } 3034 } 3035 String target = fSymbolTable.addSymbol(fStringBuffer.ch, 3036 fStringBuffer.offset, fStringBuffer.length); 3037 fContentBuffer.clear(); 3038 scanPIData(target, fContentBuffer); 3039 } 3040 3041 // standard text declaration 3042 else { 3043 //xxx: this function gives callback 3044 scanXMLDeclOrTextDecl(true); 3045 } 3046 } 3047 // now that we've straightened out the readers, we can read in chunks: 3048 fEntityManager.fCurrentEntity.mayReadChunks = true; 3049 setScannerState(SCANNER_STATE_CONTENT); 3050 //xxx: we don't return any state, so how do we get to know about TEXT declarations. 3051 //it seems we have to careful when to allow function issue a callback 3052 //and when to allow adapter issue a callback. 3053 continue; 3054 } 3055 3056 3057 case SCANNER_STATE_ROOT_ELEMENT: { 3058 if (scanRootElementHook()) { 3059 fEmptyElement = true; 3060 //rest would be taken care by fTrailingMiscDriver set by scanRootElementHook 3061 return XMLEvent.START_ELEMENT; 3062 } 3063 setScannerState(SCANNER_STATE_CONTENT); 3064 return XMLEvent.START_ELEMENT ; 3065 } 3066 case SCANNER_STATE_CHAR_REFERENCE : { 3067 fContentBuffer.clear(); 3068 scanCharReferenceValue(fContentBuffer, null); 3069 fMarkupDepth--; 3070 setScannerState(SCANNER_STATE_CONTENT); 3071 return XMLEvent.CHARACTERS; 3072 } 3073 default: 3074 throw new XNIException("Scanner State " + fScannerState + " not Recognized "); 3075 3076 }//switch 3077 } 3078 // premature end of file 3079 catch (EOFException e) { 3080 endOfFileHook(e); 3081 return -1; 3082 } 3083 } //while loop 3084 }//next 3085 3086 // 3087 // Protected methods 3088 // 3089 3090 // hooks 3091 3092 // NOTE: These hook methods are added so that the full document 3093 // scanner can share the majority of code with this class. 3094 3095 /** 3096 * Scan for DOCTYPE hook. This method is a hook for subclasses 3097 * to add code to handle scanning for a the "DOCTYPE" string 3098 * after the string "<!" has been scanned. 3099 * 3100 * @return True if the "DOCTYPE" was scanned; false if "DOCTYPE" 3101 * was not scanned. 3102 */ 3103 protected boolean scanForDoctypeHook() 3104 throws IOException, XNIException { 3105 return false; 3106 } // scanForDoctypeHook():boolean 3107 3108 /** 3109 * Element depth iz zero. This methos is a hook for subclasses 3110 * to add code to handle when the element depth hits zero. When 3111 * scanning a document fragment, an element depth of zero is 3112 * normal. However, when scanning a full XML document, the 3113 * scanner must handle the trailing miscellanous section of 3114 * the document after the end of the document's root element. 3115 * 3116 * @return True if the caller should stop and return true which 3117 * allows the scanner to switch to a new scanning 3118 * driver. A return value of false indicates that 3119 * the content driver should continue as normal. 3120 */ 3121 protected boolean elementDepthIsZeroHook() 3122 throws IOException, XNIException { 3123 return false; 3124 } // elementDepthIsZeroHook():boolean 3125 3126 /** 3127 * Scan for root element hook. This method is a hook for 3128 * subclasses to add code that handles scanning for the root 3129 * element. When scanning a document fragment, there is no 3130 * "root" element. However, when scanning a full XML document, 3131 * the scanner must handle the root element specially. 3132 * 3133 * @return True if the caller should stop and return true which 3134 * allows the scanner to switch to a new scanning 3135 * driver. A return value of false indicates that 3136 * the content driver should continue as normal. 3137 */ 3138 protected boolean scanRootElementHook() 3139 throws IOException, XNIException { 3140 return false; 3141 } // scanRootElementHook():boolean 3142 3143 /** 3144 * End of file hook. This method is a hook for subclasses to 3145 * add code that handles the end of file. The end of file in 3146 * a document fragment is OK if the markup depth is zero. 3147 * However, when scanning a full XML document, an end of file 3148 * is always premature. 3149 */ 3150 protected void endOfFileHook(EOFException e) 3151 throws IOException, XNIException { 3152 3153 // NOTE: An end of file is only only an error if we were 3154 // in the middle of scanning some markup. -Ac 3155 if (fMarkupDepth != 0) { 3156 reportFatalError("PrematureEOF", null); 3157 } 3158 3159 } // endOfFileHook() 3160 3161 } // class FragmentContentDriver 3162 3163 static void pr(String str) { 3164 System.out.println(str) ; 3165 } 3166 3167 protected boolean fUsebuffer ; 3168 3169 /** this function gets an XMLString (which is used to store the attribute value) from the special pool 3170 * maintained for attributes. 3171 * fAttributeCacheUsedCount tracks the number of attributes that has been consumed from the pool. 3172 * if all the attributes has been consumed, it adds a new XMLString inthe pool and returns the same 3173 * XMLString. 3174 * 3175 * @return XMLString XMLString used to store an attribute value. 3176 */ 3177 3178 protected XMLString getString(){ 3179 if(fAttributeCacheUsedCount < initialCacheCount || 3180 fAttributeCacheUsedCount < attributeValueCache.size()){ 3181 return attributeValueCache.get(fAttributeCacheUsedCount++); 3182 } else{ 3183 XMLString str = new XMLString(); 3184 fAttributeCacheUsedCount++; 3185 attributeValueCache.add(str); 3186 return str; 3187 } 3188 } 3189 3190 /** 3191 * Implements XMLBufferListener interface. 3192 */ 3193 3194 public void refresh(){ 3195 refresh(0); 3196 } 3197 3198 /** 3199 * receives callbacks from {@link XMLEntityReader } when buffer 3200 * is being changed. 3201 * @param refreshPosition 3202 */ 3203 public void refresh(int refreshPosition){ 3204 //If you are reading attributes and you got a callback 3205 //cache available attributes. 3206 if(fReadingAttributes){ 3207 fAttributes.refresh(); 3208 } 3209 if(fScannerState == SCANNER_STATE_CHARACTER_DATA){ 3210 bufferContent(); 3211 } 3212 } 3213 3214 /** 3215 * Since 'TempString' shares the buffer (a char array) with the CurrentEntity, 3216 * when the cursor position reaches the end, that is, before the buffer is 3217 * being loaded with new data, the content in the TempString needs to be 3218 * copied into the ContentBuffer. 3219 */ 3220 private void bufferContent() { 3221 fContentBuffer.append(fTempString); 3222 //clear the XMLString so that data can't be added again. 3223 fTempString.length = 0; 3224 fUsebuffer = true; 3225 } 3226} // class XMLDocumentFragmentScannerImpl 3227