XMLEntityManager.java revision 649:507d4f7efba6
1/* 2 * Copyright (c) 2009, 2014, Oracle and/or its affiliates. All rights reserved. 3 */ 4/* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21package com.sun.org.apache.xerces.internal.impl ; 22 23import com.sun.org.apache.xerces.internal.impl.Constants; 24import com.sun.org.apache.xerces.internal.impl.XMLEntityHandler; 25import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29import com.sun.org.apache.xerces.internal.impl.validation.ValidationManager; 30import com.sun.org.apache.xerces.internal.util.*; 31import com.sun.org.apache.xerces.internal.util.URI; 32import com.sun.org.apache.xerces.internal.utils.SecuritySupport; 33import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35import com.sun.org.apache.xerces.internal.utils.XMLSecurityPropertyManager; 36import com.sun.org.apache.xerces.internal.xni.Augmentations; 37import com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier; 38import com.sun.org.apache.xerces.internal.xni.XNIException; 39import com.sun.org.apache.xerces.internal.xni.parser.*; 40import com.sun.xml.internal.stream.Entity; 41import com.sun.xml.internal.stream.StaxEntityResolverWrapper; 42import com.sun.xml.internal.stream.StaxXMLInputSource; 43import com.sun.xml.internal.stream.XMLEntityStorage; 44import java.io.*; 45import java.lang.reflect.Method; 46import java.net.HttpURLConnection; 47import java.net.URISyntaxException; 48import java.net.URL; 49import java.net.URLConnection; 50import java.util.Hashtable; 51import java.util.Iterator; 52import java.util.Locale; 53import java.util.Map; 54import java.util.Stack; 55import java.util.StringTokenizer; 56import javax.xml.stream.XMLInputFactory; 57 58 59/** 60 * Will keep track of current entity. 61 * 62 * The entity manager handles the registration of general and parameter 63 * entities; resolves entities; and starts entities. The entity manager 64 * is a central component in a standard parser configuration and this 65 * class works directly with the entity scanner to manage the underlying 66 * xni. 67 * <p> 68 * This component requires the following features and properties from the 69 * component manager that uses it: 70 * <ul> 71 * <li>http://xml.org/sax/features/validation</li> 72 * <li>http://xml.org/sax/features/external-general-entities</li> 73 * <li>http://xml.org/sax/features/external-parameter-entities</li> 74 * <li>http://apache.org/xml/features/allow-java-encodings</li> 75 * <li>http://apache.org/xml/properties/internal/symbol-table</li> 76 * <li>http://apache.org/xml/properties/internal/error-reporter</li> 77 * <li>http://apache.org/xml/properties/internal/entity-resolver</li> 78 * </ul> 79 * 80 * 81 * @author Andy Clark, IBM 82 * @author Arnaud Le Hors, IBM 83 * @author K.Venugopal SUN Microsystems 84 * @author Neeraj Bajaj SUN Microsystems 85 * @author Sunitha Reddy SUN Microsystems 86 */ 87public class XMLEntityManager implements XMLComponent, XMLEntityResolver { 88 89 // 90 // Constants 91 // 92 93 /** Default buffer size (2048). */ 94 public static final int DEFAULT_BUFFER_SIZE = 8192; 95 96 /** Default buffer size before we've finished with the XMLDecl: */ 97 public static final int DEFAULT_XMLDECL_BUFFER_SIZE = 64; 98 99 /** Default internal entity buffer size (1024). */ 100 public static final int DEFAULT_INTERNAL_BUFFER_SIZE = 1024; 101 102 // feature identifiers 103 104 /** Feature identifier: validation. */ 105 protected static final String VALIDATION = 106 Constants.SAX_FEATURE_PREFIX + Constants.VALIDATION_FEATURE; 107 108 /** 109 * standard uri conformant (strict uri). 110 * http://apache.org/xml/features/standard-uri-conformant 111 */ 112 protected boolean fStrictURI; 113 114 115 /** Feature identifier: external general entities. */ 116 protected static final String EXTERNAL_GENERAL_ENTITIES = 117 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_GENERAL_ENTITIES_FEATURE; 118 119 /** Feature identifier: external parameter entities. */ 120 protected static final String EXTERNAL_PARAMETER_ENTITIES = 121 Constants.SAX_FEATURE_PREFIX + Constants.EXTERNAL_PARAMETER_ENTITIES_FEATURE; 122 123 /** Feature identifier: allow Java encodings. */ 124 protected static final String ALLOW_JAVA_ENCODINGS = 125 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 126 127 /** Feature identifier: warn on duplicate EntityDef */ 128 protected static final String WARN_ON_DUPLICATE_ENTITYDEF = 129 Constants.XERCES_FEATURE_PREFIX +Constants.WARN_ON_DUPLICATE_ENTITYDEF_FEATURE; 130 131 /** Feature identifier: load external DTD. */ 132 protected static final String LOAD_EXTERNAL_DTD = 133 Constants.XERCES_FEATURE_PREFIX + Constants.LOAD_EXTERNAL_DTD_FEATURE; 134 135 // property identifiers 136 137 /** Property identifier: symbol table. */ 138 protected static final String SYMBOL_TABLE = 139 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 140 141 /** Property identifier: error reporter. */ 142 protected static final String ERROR_REPORTER = 143 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 144 145 /** Feature identifier: standard uri conformant */ 146 protected static final String STANDARD_URI_CONFORMANT = 147 Constants.XERCES_FEATURE_PREFIX +Constants.STANDARD_URI_CONFORMANT_FEATURE; 148 149 /** Property identifier: entity resolver. */ 150 protected static final String ENTITY_RESOLVER = 151 Constants.XERCES_PROPERTY_PREFIX + Constants.ENTITY_RESOLVER_PROPERTY; 152 153 protected static final String STAX_ENTITY_RESOLVER = 154 Constants.XERCES_PROPERTY_PREFIX + Constants.STAX_ENTITY_RESOLVER_PROPERTY; 155 156 // property identifier: ValidationManager 157 protected static final String VALIDATION_MANAGER = 158 Constants.XERCES_PROPERTY_PREFIX + Constants.VALIDATION_MANAGER_PROPERTY; 159 160 /** property identifier: buffer size. */ 161 protected static final String BUFFER_SIZE = 162 Constants.XERCES_PROPERTY_PREFIX + Constants.BUFFER_SIZE_PROPERTY; 163 164 /** property identifier: security manager. */ 165 protected static final String SECURITY_MANAGER = 166 Constants.XERCES_PROPERTY_PREFIX + Constants.SECURITY_MANAGER_PROPERTY; 167 168 protected static final String PARSER_SETTINGS = 169 Constants.XERCES_FEATURE_PREFIX + Constants.PARSER_SETTINGS; 170 171 /** Property identifier: Security property manager. */ 172 private static final String XML_SECURITY_PROPERTY_MANAGER = 173 Constants.XML_SECURITY_PROPERTY_MANAGER; 174 175 /** access external dtd: file protocol */ 176 static final String EXTERNAL_ACCESS_DEFAULT = Constants.EXTERNAL_ACCESS_DEFAULT; 177 178 // recognized features and properties 179 180 /** Recognized features. */ 181 private static final String[] RECOGNIZED_FEATURES = { 182 VALIDATION, 183 EXTERNAL_GENERAL_ENTITIES, 184 EXTERNAL_PARAMETER_ENTITIES, 185 ALLOW_JAVA_ENCODINGS, 186 WARN_ON_DUPLICATE_ENTITYDEF, 187 STANDARD_URI_CONFORMANT 188 }; 189 190 /** Feature defaults. */ 191 private static final Boolean[] FEATURE_DEFAULTS = { 192 null, 193 Boolean.TRUE, 194 Boolean.TRUE, 195 Boolean.TRUE, 196 Boolean.FALSE, 197 Boolean.FALSE 198 }; 199 200 /** Recognized properties. */ 201 private static final String[] RECOGNIZED_PROPERTIES = { 202 SYMBOL_TABLE, 203 ERROR_REPORTER, 204 ENTITY_RESOLVER, 205 VALIDATION_MANAGER, 206 BUFFER_SIZE, 207 SECURITY_MANAGER, 208 XML_SECURITY_PROPERTY_MANAGER 209 }; 210 211 /** Property defaults. */ 212 private static final Object[] PROPERTY_DEFAULTS = { 213 null, 214 null, 215 null, 216 null, 217 new Integer(DEFAULT_BUFFER_SIZE), 218 null, 219 null 220 }; 221 222 private static final String XMLEntity = "[xml]".intern(); 223 private static final String DTDEntity = "[dtd]".intern(); 224 225 // debugging 226 227 /** 228 * Debug printing of buffer. This debugging flag works best when you 229 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 230 * 64 characters. 231 */ 232 private static final boolean DEBUG_BUFFER = false; 233 234 /** warn on duplicate Entity declaration. 235 * http://apache.org/xml/features/warn-on-duplicate-entitydef 236 */ 237 protected boolean fWarnDuplicateEntityDef; 238 239 /** Debug some basic entities. */ 240 private static final boolean DEBUG_ENTITIES = false; 241 242 /** Debug switching readers for encodings. */ 243 private static final boolean DEBUG_ENCODINGS = false; 244 245 // should be diplayed trace resolving messages 246 private static final boolean DEBUG_RESOLVER = false ; 247 248 // 249 // Data 250 // 251 252 // features 253 254 /** 255 * Validation. This feature identifier is: 256 * http://xml.org/sax/features/validation 257 */ 258 protected boolean fValidation; 259 260 /** 261 * External general entities. This feature identifier is: 262 * http://xml.org/sax/features/external-general-entities 263 */ 264 protected boolean fExternalGeneralEntities; 265 266 /** 267 * External parameter entities. This feature identifier is: 268 * http://xml.org/sax/features/external-parameter-entities 269 */ 270 protected boolean fExternalParameterEntities; 271 272 /** 273 * Allow Java encoding names. This feature identifier is: 274 * http://apache.org/xml/features/allow-java-encodings 275 */ 276 protected boolean fAllowJavaEncodings = true ; 277 278 /** Load external DTD. */ 279 protected boolean fLoadExternalDTD = true; 280 281 // properties 282 283 /** 284 * Symbol table. This property identifier is: 285 * http://apache.org/xml/properties/internal/symbol-table 286 */ 287 protected SymbolTable fSymbolTable; 288 289 /** 290 * Error reporter. This property identifier is: 291 * http://apache.org/xml/properties/internal/error-reporter 292 */ 293 protected XMLErrorReporter fErrorReporter; 294 295 /** 296 * Entity resolver. This property identifier is: 297 * http://apache.org/xml/properties/internal/entity-resolver 298 */ 299 protected XMLEntityResolver fEntityResolver; 300 301 /** Stax Entity Resolver. This property identifier is XMLInputFactory.ENTITY_RESOLVER */ 302 303 protected StaxEntityResolverWrapper fStaxEntityResolver; 304 305 /** Property Manager. This is used from Stax */ 306 protected PropertyManager fPropertyManager ; 307 308 /** StAX properties */ 309 boolean fSupportDTD = true; 310 boolean fReplaceEntityReferences = true; 311 boolean fSupportExternalEntities = true; 312 313 /** used to restrict external access */ 314 protected String fAccessExternalDTD = EXTERNAL_ACCESS_DEFAULT; 315 316 // settings 317 318 /** 319 * Validation manager. This property identifier is: 320 * http://apache.org/xml/properties/internal/validation-manager 321 */ 322 protected ValidationManager fValidationManager; 323 324 // settings 325 326 /** 327 * Buffer size. We get this value from a property. The default size 328 * is used if the input buffer size property is not specified. 329 * REVISIT: do we need a property for internal entity buffer size? 330 */ 331 protected int fBufferSize = DEFAULT_BUFFER_SIZE; 332 333 /** Security Manager */ 334 protected XMLSecurityManager fSecurityManager = null; 335 336 protected XMLLimitAnalyzer fLimitAnalyzer = null; 337 338 protected int entityExpansionIndex; 339 340 /** 341 * True if the document entity is standalone. This should really 342 * only be set by the document source (e.g. XMLDocumentScanner). 343 */ 344 protected boolean fStandalone; 345 346 // are the entities being parsed in the external subset? 347 // NOTE: this *is not* the same as whether they're external entities! 348 protected boolean fInExternalSubset = false; 349 350 351 // handlers 352 /** Entity handler. */ 353 protected XMLEntityHandler fEntityHandler; 354 355 /** Current entity scanner */ 356 protected XMLEntityScanner fEntityScanner ; 357 358 /** XML 1.0 entity scanner. */ 359 protected XMLEntityScanner fXML10EntityScanner; 360 361 /** XML 1.1 entity scanner. */ 362 protected XMLEntityScanner fXML11EntityScanner; 363 364 /** count of entities expanded: */ 365 protected int fEntityExpansionCount = 0; 366 367 // entities 368 369 /** Entities. */ 370 protected Hashtable fEntities = new Hashtable(); 371 372 /** Entity stack. */ 373 protected Stack fEntityStack = new Stack(); 374 375 /** Current entity. */ 376 protected Entity.ScannedEntity fCurrentEntity = null; 377 378 /** identify if the InputSource is created by a resolver */ 379 boolean fISCreatedByResolver = false; 380 381 // shared context 382 383 protected XMLEntityStorage fEntityStorage ; 384 385 protected final Object [] defaultEncoding = new Object[]{"UTF-8", null}; 386 387 388 // temp vars 389 390 /** Resource identifer. */ 391 private final XMLResourceIdentifierImpl fResourceIdentifier = new XMLResourceIdentifierImpl(); 392 393 /** Augmentations for entities. */ 394 private final Augmentations fEntityAugs = new AugmentationsImpl(); 395 396 /** Pool of character buffers. */ 397 private CharacterBufferPool fBufferPool = new CharacterBufferPool(fBufferSize, DEFAULT_INTERNAL_BUFFER_SIZE); 398 399 // 400 // Constructors 401 // 402 403 /** 404 * If this constructor is used to create the object, reset() should be invoked on this object 405 */ 406 public XMLEntityManager() { 407 fEntityStorage = new XMLEntityStorage(this) ; 408 setScannerVersion(Constants.XML_VERSION_1_0); 409 } // <init>() 410 411 /** Default constructor. */ 412 public XMLEntityManager(PropertyManager propertyManager) { 413 fPropertyManager = propertyManager ; 414 //pass a reference to current entity being scanned 415 //fEntityStorage = new XMLEntityStorage(fCurrentEntity) ; 416 fEntityStorage = new XMLEntityStorage(this) ; 417 fEntityScanner = new XMLEntityScanner(propertyManager, this) ; 418 reset(propertyManager); 419 } // <init>() 420 421 /** 422 * Adds an internal entity declaration. 423 * <p> 424 * <strong>Note:</strong> This method ignores subsequent entity 425 * declarations. 426 * <p> 427 * <strong>Note:</strong> The name should be a unique symbol. The 428 * SymbolTable can be used for this purpose. 429 * 430 * @param name The name of the entity. 431 * @param text The text of the entity. 432 * 433 * @see SymbolTable 434 */ 435 public void addInternalEntity(String name, String text) { 436 if (!fEntities.containsKey(name)) { 437 Entity entity = new Entity.InternalEntity(name, text, fInExternalSubset); 438 fEntities.put(name, entity); 439 } else{ 440 if(fWarnDuplicateEntityDef){ 441 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 442 "MSG_DUPLICATE_ENTITY_DEFINITION", 443 new Object[]{ name }, 444 XMLErrorReporter.SEVERITY_WARNING ); 445 } 446 } 447 448 } // addInternalEntity(String,String) 449 450 /** 451 * Adds an external entity declaration. 452 * <p> 453 * <strong>Note:</strong> This method ignores subsequent entity 454 * declarations. 455 * <p> 456 * <strong>Note:</strong> The name should be a unique symbol. The 457 * SymbolTable can be used for this purpose. 458 * 459 * @param name The name of the entity. 460 * @param publicId The public identifier of the entity. 461 * @param literalSystemId The system identifier of the entity. 462 * @param baseSystemId The base system identifier of the entity. 463 * This is the system identifier of the entity 464 * where <em>the entity being added</em> and 465 * is used to expand the system identifier when 466 * the system identifier is a relative URI. 467 * When null the system identifier of the first 468 * external entity on the stack is used instead. 469 * 470 * @see SymbolTable 471 */ 472 public void addExternalEntity(String name, 473 String publicId, String literalSystemId, 474 String baseSystemId) throws IOException { 475 if (!fEntities.containsKey(name)) { 476 if (baseSystemId == null) { 477 // search for the first external entity on the stack 478 int size = fEntityStack.size(); 479 if (size == 0 && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 480 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 481 } 482 for (int i = size - 1; i >= 0 ; i--) { 483 Entity.ScannedEntity externalEntity = 484 (Entity.ScannedEntity)fEntityStack.elementAt(i); 485 if (externalEntity.entityLocation != null && externalEntity.entityLocation.getExpandedSystemId() != null) { 486 baseSystemId = externalEntity.entityLocation.getExpandedSystemId(); 487 break; 488 } 489 } 490 } 491 Entity entity = new Entity.ExternalEntity(name, 492 new XMLEntityDescriptionImpl(name, publicId, literalSystemId, baseSystemId, 493 expandSystemId(literalSystemId, baseSystemId, false)), null, fInExternalSubset); 494 fEntities.put(name, entity); 495 } else{ 496 if(fWarnDuplicateEntityDef){ 497 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 498 "MSG_DUPLICATE_ENTITY_DEFINITION", 499 new Object[]{ name }, 500 XMLErrorReporter.SEVERITY_WARNING ); 501 } 502 } 503 504 } // addExternalEntity(String,String,String,String) 505 506 507 /** 508 * Adds an unparsed entity declaration. 509 * <p> 510 * <strong>Note:</strong> This method ignores subsequent entity 511 * declarations. 512 * <p> 513 * <strong>Note:</strong> The name should be a unique symbol. The 514 * SymbolTable can be used for this purpose. 515 * 516 * @param name The name of the entity. 517 * @param publicId The public identifier of the entity. 518 * @param systemId The system identifier of the entity. 519 * @param notation The name of the notation. 520 * 521 * @see SymbolTable 522 */ 523 public void addUnparsedEntity(String name, 524 String publicId, String systemId, 525 String baseSystemId, String notation) { 526 if (!fEntities.containsKey(name)) { 527 Entity.ExternalEntity entity = new Entity.ExternalEntity(name, 528 new XMLEntityDescriptionImpl(name, publicId, systemId, baseSystemId, null), 529 notation, fInExternalSubset); 530 fEntities.put(name, entity); 531 } else{ 532 if(fWarnDuplicateEntityDef){ 533 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 534 "MSG_DUPLICATE_ENTITY_DEFINITION", 535 new Object[]{ name }, 536 XMLErrorReporter.SEVERITY_WARNING ); 537 } 538 } 539 } // addUnparsedEntity(String,String,String,String) 540 541 542 /** get the entity storage object from entity manager */ 543 public XMLEntityStorage getEntityStore(){ 544 return fEntityStorage ; 545 } 546 547 /** return the entity responsible for reading the entity */ 548 public XMLEntityScanner getEntityScanner(){ 549 if(fEntityScanner == null) { 550 // default to 1.0 551 if(fXML10EntityScanner == null) { 552 fXML10EntityScanner = new XMLEntityScanner(); 553 } 554 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 555 fEntityScanner = fXML10EntityScanner; 556 } 557 return fEntityScanner; 558 559 } 560 561 public void setScannerVersion(short version) { 562 563 if(version == Constants.XML_VERSION_1_0) { 564 if(fXML10EntityScanner == null) { 565 fXML10EntityScanner = new XMLEntityScanner(); 566 } 567 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 568 fEntityScanner = fXML10EntityScanner; 569 fEntityScanner.setCurrentEntity(fCurrentEntity); 570 } else { 571 if(fXML11EntityScanner == null) { 572 fXML11EntityScanner = new XML11EntityScanner(); 573 } 574 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 575 fEntityScanner = fXML11EntityScanner; 576 fEntityScanner.setCurrentEntity(fCurrentEntity); 577 } 578 579 } 580 581 /** 582 * This method uses the passed-in XMLInputSource to make 583 * fCurrentEntity usable for reading. 584 * @param name name of the entity (XML is it's the document entity) 585 * @param xmlInputSource the input source, with sufficient information 586 * to begin scanning characters. 587 * @param literal True if this entity is started within a 588 * literal value. 589 * @param isExternal whether this entity should be treated as an internal or external entity. 590 * @throws IOException if anything can't be read 591 * XNIException If any parser-specific goes wrong. 592 * @return the encoding of the new entity or null if a character stream was employed 593 */ 594 public String setupCurrentEntity(String name, XMLInputSource xmlInputSource, 595 boolean literal, boolean isExternal) 596 throws IOException, XNIException { 597 // get information 598 599 final String publicId = xmlInputSource.getPublicId(); 600 String literalSystemId = xmlInputSource.getSystemId(); 601 String baseSystemId = xmlInputSource.getBaseSystemId(); 602 String encoding = xmlInputSource.getEncoding(); 603 final boolean encodingExternallySpecified = (encoding != null); 604 Boolean isBigEndian = null; 605 606 // create reader 607 InputStream stream = null; 608 Reader reader = xmlInputSource.getCharacterStream(); 609 610 // First chance checking strict URI 611 String expandedSystemId = expandSystemId(literalSystemId, baseSystemId, fStrictURI); 612 if (baseSystemId == null) { 613 baseSystemId = expandedSystemId; 614 } 615 if (reader == null) { 616 stream = xmlInputSource.getByteStream(); 617 if (stream == null) { 618 URL location = new URL(expandedSystemId); 619 URLConnection connect = location.openConnection(); 620 if (!(connect instanceof HttpURLConnection)) { 621 stream = connect.getInputStream(); 622 } 623 else { 624 boolean followRedirects = true; 625 626 // setup URLConnection if we have an HTTPInputSource 627 if (xmlInputSource instanceof HTTPInputSource) { 628 final HttpURLConnection urlConnection = (HttpURLConnection) connect; 629 final HTTPInputSource httpInputSource = (HTTPInputSource) xmlInputSource; 630 631 // set request properties 632 Iterator propIter = httpInputSource.getHTTPRequestProperties(); 633 while (propIter.hasNext()) { 634 Map.Entry entry = (Map.Entry) propIter.next(); 635 urlConnection.setRequestProperty((String) entry.getKey(), (String) entry.getValue()); 636 } 637 638 // set preference for redirection 639 followRedirects = httpInputSource.getFollowHTTPRedirects(); 640 if (!followRedirects) { 641 setInstanceFollowRedirects(urlConnection, followRedirects); 642 } 643 } 644 645 stream = connect.getInputStream(); 646 647 // REVISIT: If the URLConnection has external encoding 648 // information, we should be reading it here. It's located 649 // in the charset parameter of Content-Type. -- mrglavas 650 651 if (followRedirects) { 652 String redirect = connect.getURL().toString(); 653 // E43: Check if the URL was redirected, and then 654 // update literal and expanded system IDs if needed. 655 if (!redirect.equals(expandedSystemId)) { 656 literalSystemId = redirect; 657 expandedSystemId = redirect; 658 } 659 } 660 } 661 } 662 663 // wrap this stream in RewindableInputStream 664 stream = new RewindableInputStream(stream); 665 666 // perform auto-detect of encoding if necessary 667 if (encoding == null) { 668 // read first four bytes and determine encoding 669 final byte[] b4 = new byte[4]; 670 int count = 0; 671 for (; count<4; count++ ) { 672 b4[count] = (byte)stream.read(); 673 } 674 if (count == 4) { 675 Object [] encodingDesc = getEncodingName(b4, count); 676 encoding = (String)(encodingDesc[0]); 677 isBigEndian = (Boolean)(encodingDesc[1]); 678 679 stream.reset(); 680 // Special case UTF-8 files with BOM created by Microsoft 681 // tools. It's more efficient to consume the BOM than make 682 // the reader perform extra checks. -Ac 683 if (count > 2 && encoding.equals("UTF-8")) { 684 int b0 = b4[0] & 0xFF; 685 int b1 = b4[1] & 0xFF; 686 int b2 = b4[2] & 0xFF; 687 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 688 // ignore first three bytes... 689 stream.skip(3); 690 } 691 } 692 reader = createReader(stream, encoding, isBigEndian); 693 } else { 694 reader = createReader(stream, encoding, isBigEndian); 695 } 696 } 697 698 // use specified encoding 699 else { 700 encoding = encoding.toUpperCase(Locale.ENGLISH); 701 702 // If encoding is UTF-8, consume BOM if one is present. 703 if (encoding.equals("UTF-8")) { 704 final int[] b3 = new int[3]; 705 int count = 0; 706 for (; count < 3; ++count) { 707 b3[count] = stream.read(); 708 if (b3[count] == -1) 709 break; 710 } 711 if (count == 3) { 712 if (b3[0] != 0xEF || b3[1] != 0xBB || b3[2] != 0xBF) { 713 // First three bytes are not BOM, so reset. 714 stream.reset(); 715 } 716 } else { 717 stream.reset(); 718 } 719 } 720 // If encoding is UTF-16, we still need to read the first four bytes 721 // in order to discover the byte order. 722 else if (encoding.equals("UTF-16")) { 723 final int[] b4 = new int[4]; 724 int count = 0; 725 for (; count < 4; ++count) { 726 b4[count] = stream.read(); 727 if (b4[count] == -1) 728 break; 729 } 730 stream.reset(); 731 732 String utf16Encoding = "UTF-16"; 733 if (count >= 2) { 734 final int b0 = b4[0]; 735 final int b1 = b4[1]; 736 if (b0 == 0xFE && b1 == 0xFF) { 737 // UTF-16, big-endian 738 utf16Encoding = "UTF-16BE"; 739 isBigEndian = Boolean.TRUE; 740 } 741 else if (b0 == 0xFF && b1 == 0xFE) { 742 // UTF-16, little-endian 743 utf16Encoding = "UTF-16LE"; 744 isBigEndian = Boolean.FALSE; 745 } 746 else if (count == 4) { 747 final int b2 = b4[2]; 748 final int b3 = b4[3]; 749 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 750 // UTF-16, big-endian, no BOM 751 utf16Encoding = "UTF-16BE"; 752 isBigEndian = Boolean.TRUE; 753 } 754 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 755 // UTF-16, little-endian, no BOM 756 utf16Encoding = "UTF-16LE"; 757 isBigEndian = Boolean.FALSE; 758 } 759 } 760 } 761 reader = createReader(stream, utf16Encoding, isBigEndian); 762 } 763 // If encoding is UCS-4, we still need to read the first four bytes 764 // in order to discover the byte order. 765 else if (encoding.equals("ISO-10646-UCS-4")) { 766 final int[] b4 = new int[4]; 767 int count = 0; 768 for (; count < 4; ++count) { 769 b4[count] = stream.read(); 770 if (b4[count] == -1) 771 break; 772 } 773 stream.reset(); 774 775 // Ignore unusual octet order for now. 776 if (count == 4) { 777 // UCS-4, big endian (1234) 778 if (b4[0] == 0x00 && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x3C) { 779 isBigEndian = Boolean.TRUE; 780 } 781 // UCS-4, little endian (1234) 782 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x00 && b4[3] == 0x00) { 783 isBigEndian = Boolean.FALSE; 784 } 785 } 786 } 787 // If encoding is UCS-2, we still need to read the first four bytes 788 // in order to discover the byte order. 789 else if (encoding.equals("ISO-10646-UCS-2")) { 790 final int[] b4 = new int[4]; 791 int count = 0; 792 for (; count < 4; ++count) { 793 b4[count] = stream.read(); 794 if (b4[count] == -1) 795 break; 796 } 797 stream.reset(); 798 799 if (count == 4) { 800 // UCS-2, big endian 801 if (b4[0] == 0x00 && b4[1] == 0x3C && b4[2] == 0x00 && b4[3] == 0x3F) { 802 isBigEndian = Boolean.TRUE; 803 } 804 // UCS-2, little endian 805 else if (b4[0] == 0x3C && b4[1] == 0x00 && b4[2] == 0x3F && b4[3] == 0x00) { 806 isBigEndian = Boolean.FALSE; 807 } 808 } 809 } 810 811 reader = createReader(stream, encoding, isBigEndian); 812 } 813 814 // read one character at a time so we don't jump too far 815 // ahead, converting characters from the byte stream in 816 // the wrong encoding 817 if (DEBUG_ENCODINGS) { 818 System.out.println("$$$ no longer wrapping reader in OneCharReader"); 819 } 820 //reader = new OneCharReader(reader); 821 } 822 823 // We've seen a new Reader. 824 // Push it on the stack so we can close it later. 825 //fOwnReaders.add(reader); 826 827 // push entity on stack 828 if (fCurrentEntity != null) { 829 fEntityStack.push(fCurrentEntity); 830 } 831 832 // create entity 833 /* if encoding is specified externally, 'encoding' information present 834 * in the prolog of the XML document is not considered. Hence, prolog can 835 * be read in Chunks of data instead of byte by byte. 836 */ 837 fCurrentEntity = new com.sun.xml.internal.stream.Entity.ScannedEntity(name,new XMLResourceIdentifierImpl(publicId, literalSystemId, baseSystemId, expandedSystemId),stream, reader, encoding, literal, encodingExternallySpecified, isExternal); 838 fCurrentEntity.setEncodingExternallySpecified(encodingExternallySpecified); 839 fEntityScanner.setCurrentEntity(fCurrentEntity); 840 fResourceIdentifier.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 841 if (fLimitAnalyzer != null) { 842 fLimitAnalyzer.startEntity(name); 843 } 844 return encoding; 845 } //setupCurrentEntity(String, XMLInputSource, boolean, boolean): String 846 847 848 /** 849 * Checks whether an entity given by name is external. 850 * 851 * @param entityName The name of the entity to check. 852 * @return True if the entity is external, false otherwise 853 * (including when the entity is not declared). 854 */ 855 public boolean isExternalEntity(String entityName) { 856 857 Entity entity = (Entity)fEntities.get(entityName); 858 if (entity == null) { 859 return false; 860 } 861 return entity.isExternal(); 862 } 863 864 /** 865 * Checks whether the declaration of an entity given by name is 866 * // in the external subset. 867 * 868 * @param entityName The name of the entity to check. 869 * @return True if the entity was declared in the external subset, false otherwise 870 * (including when the entity is not declared). 871 */ 872 public boolean isEntityDeclInExternalSubset(String entityName) { 873 874 Entity entity = (Entity)fEntities.get(entityName); 875 if (entity == null) { 876 return false; 877 } 878 return entity.isEntityDeclInExternalSubset(); 879 } 880 881 882 883 // 884 // Public methods 885 // 886 887 /** 888 * Sets whether the document entity is standalone. 889 * 890 * @param standalone True if document entity is standalone. 891 */ 892 public void setStandalone(boolean standalone) { 893 fStandalone = standalone; 894 } 895 // setStandalone(boolean) 896 897 /** Returns true if the document entity is standalone. */ 898 public boolean isStandalone() { 899 return fStandalone; 900 } //isStandalone():boolean 901 902 public boolean isDeclaredEntity(String entityName) { 903 904 Entity entity = (Entity)fEntities.get(entityName); 905 return entity != null; 906 } 907 908 public boolean isUnparsedEntity(String entityName) { 909 910 Entity entity = (Entity)fEntities.get(entityName); 911 if (entity == null) { 912 return false; 913 } 914 return entity.isUnparsed(); 915 } 916 917 918 919 // this simply returns the fResourceIdentifier object; 920 // this should only be used with caution by callers that 921 // carefully manage the entity manager's behaviour, so that 922 // this doesn't returning meaningless or misleading data. 923 // @return a reference to the current fResourceIdentifier object 924 public XMLResourceIdentifier getCurrentResourceIdentifier() { 925 return fResourceIdentifier; 926 } 927 928 /** 929 * Sets the entity handler. When an entity starts and ends, the 930 * entity handler is notified of the change. 931 * 932 * @param entityHandler The new entity handler. 933 */ 934 935 public void setEntityHandler(com.sun.org.apache.xerces.internal.impl.XMLEntityHandler entityHandler) { 936 fEntityHandler = (XMLEntityHandler) entityHandler; 937 } // setEntityHandler(XMLEntityHandler) 938 939 //this function returns StaxXMLInputSource 940 public StaxXMLInputSource resolveEntityAsPerStax(XMLResourceIdentifier resourceIdentifier) throws java.io.IOException{ 941 942 if(resourceIdentifier == null ) return null; 943 944 String publicId = resourceIdentifier.getPublicId(); 945 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 946 String baseSystemId = resourceIdentifier.getBaseSystemId(); 947 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 948 // if no base systemId given, assume that it's relative 949 // to the systemId of the current scanned entity 950 // Sometimes the system id is not (properly) expanded. 951 // We need to expand the system id if: 952 // a. the expanded one was null; or 953 // b. the base system id was null, but becomes non-null from the current entity. 954 boolean needExpand = (expandedSystemId == null); 955 // REVISIT: why would the baseSystemId ever be null? if we 956 // didn't have to make this check we wouldn't have to reuse the 957 // fXMLResourceIdentifier object... 958 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 959 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 960 if (baseSystemId != null) 961 needExpand = true; 962 } 963 if (needExpand) 964 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 965 966 // give the entity resolver a chance 967 StaxXMLInputSource staxInputSource = null; 968 XMLInputSource xmlInputSource = null; 969 970 XMLResourceIdentifierImpl ri = null; 971 972 if (resourceIdentifier instanceof XMLResourceIdentifierImpl) { 973 ri = (XMLResourceIdentifierImpl)resourceIdentifier; 974 } else { 975 fResourceIdentifier.clear(); 976 ri = fResourceIdentifier; 977 } 978 ri.setValues(publicId, literalSystemId, baseSystemId, expandedSystemId); 979 if(DEBUG_RESOLVER){ 980 System.out.println("BEFORE Calling resolveEntity") ; 981 } 982 983 fISCreatedByResolver = false; 984 //either of Stax or Xerces would be null 985 if(fStaxEntityResolver != null){ 986 staxInputSource = fStaxEntityResolver.resolveEntity(ri); 987 if(staxInputSource != null) { 988 fISCreatedByResolver = true; 989 } 990 } 991 992 if(fEntityResolver != null){ 993 xmlInputSource = fEntityResolver.resolveEntity(ri); 994 if(xmlInputSource != null) { 995 fISCreatedByResolver = true; 996 } 997 } 998 999 if(xmlInputSource != null){ 1000 //wrap this XMLInputSource to StaxInputSource 1001 staxInputSource = new StaxXMLInputSource(xmlInputSource, fISCreatedByResolver); 1002 } 1003 1004 // do default resolution 1005 //this works for both stax & Xerces, if staxInputSource is null, it means parser need to revert to default resolution 1006 if (staxInputSource == null) { 1007 // REVISIT: when systemId is null, I think we should return null. 1008 // is this the right solution? -SG 1009 //if (systemId != null) 1010 staxInputSource = new StaxXMLInputSource(new XMLInputSource(publicId, literalSystemId, baseSystemId)); 1011 }else if(staxInputSource.hasXMLStreamOrXMLEventReader()){ 1012 //Waiting for the clarification from EG. - nb 1013 } 1014 1015 if (DEBUG_RESOLVER) { 1016 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1017 System.err.println(" = " + xmlInputSource); 1018 } 1019 1020 return staxInputSource; 1021 1022 } 1023 1024 /** 1025 * Resolves the specified public and system identifiers. This 1026 * method first attempts to resolve the entity based on the 1027 * EntityResolver registered by the application. If no entity 1028 * resolver is registered or if the registered entity handler 1029 * is unable to resolve the entity, then default entity 1030 * resolution will occur. 1031 * 1032 * @param publicId The public identifier of the entity. 1033 * @param systemId The system identifier of the entity. 1034 * @param baseSystemId The base system identifier of the entity. 1035 * This is the system identifier of the current 1036 * entity and is used to expand the system 1037 * identifier when the system identifier is a 1038 * relative URI. 1039 * 1040 * @return Returns an input source that wraps the resolved entity. 1041 * This method will never return null. 1042 * 1043 * @throws IOException Thrown on i/o error. 1044 * @throws XNIException Thrown by entity resolver to signal an error. 1045 */ 1046 public XMLInputSource resolveEntity(XMLResourceIdentifier resourceIdentifier) throws IOException, XNIException { 1047 if(resourceIdentifier == null ) return null; 1048 String publicId = resourceIdentifier.getPublicId(); 1049 String literalSystemId = resourceIdentifier.getLiteralSystemId(); 1050 String baseSystemId = resourceIdentifier.getBaseSystemId(); 1051 String expandedSystemId = resourceIdentifier.getExpandedSystemId(); 1052 String namespace = resourceIdentifier.getNamespace(); 1053 1054 // if no base systemId given, assume that it's relative 1055 // to the systemId of the current scanned entity 1056 // Sometimes the system id is not (properly) expanded. 1057 // We need to expand the system id if: 1058 // a. the expanded one was null; or 1059 // b. the base system id was null, but becomes non-null from the current entity. 1060 boolean needExpand = (expandedSystemId == null); 1061 // REVISIT: why would the baseSystemId ever be null? if we 1062 // didn't have to make this check we wouldn't have to reuse the 1063 // fXMLResourceIdentifier object... 1064 if (baseSystemId == null && fCurrentEntity != null && fCurrentEntity.entityLocation != null) { 1065 baseSystemId = fCurrentEntity.entityLocation.getExpandedSystemId(); 1066 if (baseSystemId != null) 1067 needExpand = true; 1068 } 1069 if (needExpand) 1070 expandedSystemId = expandSystemId(literalSystemId, baseSystemId,false); 1071 1072 // give the entity resolver a chance 1073 XMLInputSource xmlInputSource = null; 1074 1075 if (fEntityResolver != null) { 1076 resourceIdentifier.setBaseSystemId(baseSystemId); 1077 resourceIdentifier.setExpandedSystemId(expandedSystemId); 1078 xmlInputSource = fEntityResolver.resolveEntity(resourceIdentifier); 1079 } 1080 1081 // do default resolution 1082 // REVISIT: what's the correct behavior if the user provided an entity 1083 // resolver (fEntityResolver != null), but resolveEntity doesn't return 1084 // an input source (xmlInputSource == null)? 1085 // do we do default resolution, or do we just return null? -SG 1086 if (xmlInputSource == null) { 1087 // REVISIT: when systemId is null, I think we should return null. 1088 // is this the right solution? -SG 1089 //if (systemId != null) 1090 xmlInputSource = new XMLInputSource(publicId, literalSystemId, baseSystemId); 1091 } 1092 1093 if (DEBUG_RESOLVER) { 1094 System.err.println("XMLEntityManager.resolveEntity(" + publicId + ")"); 1095 System.err.println(" = " + xmlInputSource); 1096 } 1097 1098 return xmlInputSource; 1099 1100 } // resolveEntity(XMLResourceIdentifier):XMLInputSource 1101 1102 /** 1103 * Starts a named entity. 1104 * 1105 * @param entityName The name of the entity to start. 1106 * @param literal True if this entity is started within a literal 1107 * value. 1108 * 1109 * @throws IOException Thrown on i/o error. 1110 * @throws XNIException Thrown by entity handler to signal an error. 1111 */ 1112 public void startEntity(String entityName, boolean literal) 1113 throws IOException, XNIException { 1114 1115 // was entity declared? 1116 Entity entity = (Entity)fEntityStorage.getEntity(entityName); 1117 if (entity == null) { 1118 if (fEntityHandler != null) { 1119 String encoding = null; 1120 fResourceIdentifier.clear(); 1121 fEntityAugs.removeAllItems(); 1122 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1123 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1124 fEntityAugs.removeAllItems(); 1125 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1126 fEntityHandler.endEntity(entityName, fEntityAugs); 1127 } 1128 return; 1129 } 1130 1131 // should we skip external entities? 1132 boolean external = entity.isExternal(); 1133 Entity.ExternalEntity externalEntity = null; 1134 String extLitSysId = null, extBaseSysId = null, expandedSystemId = null; 1135 if (external) { 1136 externalEntity = (Entity.ExternalEntity)entity; 1137 extLitSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getLiteralSystemId() : null); 1138 extBaseSysId = (externalEntity.entityLocation != null ? externalEntity.entityLocation.getBaseSystemId() : null); 1139 expandedSystemId = expandSystemId(extLitSysId, extBaseSysId); 1140 boolean unparsed = entity.isUnparsed(); 1141 boolean parameter = entityName.startsWith("%"); 1142 boolean general = !parameter; 1143 if (unparsed || (general && !fExternalGeneralEntities) || 1144 (parameter && !fExternalParameterEntities) || 1145 !fSupportDTD || !fSupportExternalEntities) { 1146 1147 if (fEntityHandler != null) { 1148 fResourceIdentifier.clear(); 1149 final String encoding = null; 1150 fResourceIdentifier.setValues( 1151 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1152 extLitSysId, extBaseSysId, expandedSystemId); 1153 fEntityAugs.removeAllItems(); 1154 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1155 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1156 fEntityAugs.removeAllItems(); 1157 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1158 fEntityHandler.endEntity(entityName, fEntityAugs); 1159 } 1160 return; 1161 } 1162 } 1163 1164 // is entity recursive? 1165 int size = fEntityStack.size(); 1166 for (int i = size; i >= 0; i--) { 1167 Entity activeEntity = i == size 1168 ? fCurrentEntity 1169 : (Entity)fEntityStack.elementAt(i); 1170 if (activeEntity.name == entityName) { 1171 String path = entityName; 1172 for (int j = i + 1; j < size; j++) { 1173 activeEntity = (Entity)fEntityStack.elementAt(j); 1174 path = path + " -> " + activeEntity.name; 1175 } 1176 path = path + " -> " + fCurrentEntity.name; 1177 path = path + " -> " + entityName; 1178 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1179 "RecursiveReference", 1180 new Object[] { entityName, path }, 1181 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1182 1183 if (fEntityHandler != null) { 1184 fResourceIdentifier.clear(); 1185 final String encoding = null; 1186 if (external) { 1187 fResourceIdentifier.setValues( 1188 (externalEntity.entityLocation != null ? externalEntity.entityLocation.getPublicId() : null), 1189 extLitSysId, extBaseSysId, expandedSystemId); 1190 } 1191 fEntityAugs.removeAllItems(); 1192 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1193 fEntityHandler.startEntity(entityName, fResourceIdentifier, encoding, fEntityAugs); 1194 fEntityAugs.removeAllItems(); 1195 fEntityAugs.putItem(Constants.ENTITY_SKIPPED, Boolean.TRUE); 1196 fEntityHandler.endEntity(entityName, fEntityAugs); 1197 } 1198 1199 return; 1200 } 1201 } 1202 1203 // resolve external entity 1204 StaxXMLInputSource staxInputSource = null; 1205 XMLInputSource xmlInputSource = null ; 1206 1207 if (external) { 1208 staxInputSource = resolveEntityAsPerStax(externalEntity.entityLocation); 1209 /** xxx: Waiting from the EG 1210 * //simply return if there was entity resolver registered and application 1211 * //returns either XMLStreamReader or XMLEventReader. 1212 * if(staxInputSource.hasXMLStreamOrXMLEventReader()) return ; 1213 */ 1214 xmlInputSource = staxInputSource.getXMLInputSource() ; 1215 if (!fISCreatedByResolver) { 1216 //let the not-LoadExternalDTD or not-SupportDTD process to handle the situation 1217 if (fLoadExternalDTD) { 1218 String accessError = SecuritySupport.checkAccess(expandedSystemId, fAccessExternalDTD, Constants.ACCESS_EXTERNAL_ALL); 1219 if (accessError != null) { 1220 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 1221 "AccessExternalEntity", 1222 new Object[] { SecuritySupport.sanitizePath(expandedSystemId), accessError }, 1223 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1224 } 1225 } 1226 } 1227 } 1228 // wrap internal entity 1229 else { 1230 Entity.InternalEntity internalEntity = (Entity.InternalEntity)entity; 1231 Reader reader = new StringReader(internalEntity.text); 1232 xmlInputSource = new XMLInputSource(null, null, null, reader, null); 1233 } 1234 1235 // start the entity 1236 startEntity(entityName, xmlInputSource, literal, external); 1237 1238 } // startEntity(String,boolean) 1239 1240 /** 1241 * Starts the document entity. The document entity has the "[xml]" 1242 * pseudo-name. 1243 * 1244 * @param xmlInputSource The input source of the document entity. 1245 * 1246 * @throws IOException Thrown on i/o error. 1247 * @throws XNIException Thrown by entity handler to signal an error. 1248 */ 1249 public void startDocumentEntity(XMLInputSource xmlInputSource) 1250 throws IOException, XNIException { 1251 startEntity(XMLEntity, xmlInputSource, false, true); 1252 } // startDocumentEntity(XMLInputSource) 1253 1254 //xxx these methods are not required. 1255 /** 1256 * Starts the DTD entity. The DTD entity has the "[dtd]" 1257 * pseudo-name. 1258 * 1259 * @param xmlInputSource The input source of the DTD entity. 1260 * 1261 * @throws IOException Thrown on i/o error. 1262 * @throws XNIException Thrown by entity handler to signal an error. 1263 */ 1264 public void startDTDEntity(XMLInputSource xmlInputSource) 1265 throws IOException, XNIException { 1266 startEntity(DTDEntity, xmlInputSource, false, true); 1267 } // startDTDEntity(XMLInputSource) 1268 1269 // indicate start of external subset so that 1270 // location of entity decls can be tracked 1271 public void startExternalSubset() { 1272 fInExternalSubset = true; 1273 } 1274 1275 public void endExternalSubset() { 1276 fInExternalSubset = false; 1277 } 1278 1279 /** 1280 * Starts an entity. 1281 * <p> 1282 * This method can be used to insert an application defined XML 1283 * entity stream into the parsing stream. 1284 * 1285 * @param name The name of the entity. 1286 * @param xmlInputSource The input source of the entity. 1287 * @param literal True if this entity is started within a 1288 * literal value. 1289 * @param isExternal whether this entity should be treated as an internal or external entity. 1290 * 1291 * @throws IOException Thrown on i/o error. 1292 * @throws XNIException Thrown by entity handler to signal an error. 1293 */ 1294 public void startEntity(String name, 1295 XMLInputSource xmlInputSource, 1296 boolean literal, boolean isExternal) 1297 throws IOException, XNIException { 1298 1299 String encoding = setupCurrentEntity(name, xmlInputSource, literal, isExternal); 1300 1301 //when entity expansion limit is set by the Application, we need to 1302 //check for the entity expansion limit set by the parser, if number of entity 1303 //expansions exceeds the entity expansion limit, parser will throw fatal error. 1304 // Note that this represents the nesting level of open entities. 1305 fEntityExpansionCount++; 1306 if(fLimitAnalyzer != null) { 1307 fLimitAnalyzer.addValue(entityExpansionIndex, name, 1); 1308 } 1309 if( fSecurityManager != null && fSecurityManager.isOverLimit(entityExpansionIndex, fLimitAnalyzer)){ 1310 fSecurityManager.debugPrint(fLimitAnalyzer); 1311 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,"EntityExpansionLimitExceeded", 1312 new Object[]{fSecurityManager.getLimitValueByIndex(entityExpansionIndex)}, 1313 XMLErrorReporter.SEVERITY_FATAL_ERROR ); 1314 // is there anything better to do than reset the counter? 1315 // at least one can envision debugging applications where this might 1316 // be useful... 1317 fEntityExpansionCount = 0; 1318 } 1319 1320 // call handler 1321 if (fEntityHandler != null) { 1322 fEntityHandler.startEntity(name, fResourceIdentifier, encoding, null); 1323 } 1324 1325 } // startEntity(String,XMLInputSource) 1326 1327 /** 1328 * Return the current entity being scanned. Current entity is SET using startEntity function. 1329 * @return Entity.ScannedEntity 1330 */ 1331 1332 public Entity.ScannedEntity getCurrentEntity(){ 1333 return fCurrentEntity ; 1334 } 1335 1336 /** 1337 * Return the top level entity handled by this manager, or null 1338 * if no entity was added. 1339 */ 1340 public Entity.ScannedEntity getTopLevelEntity() { 1341 return (Entity.ScannedEntity) 1342 (fEntityStack.empty() ? null : fEntityStack.elementAt(0)); 1343 } 1344 1345 1346 /** 1347 * Close all opened InputStreams and Readers opened by this parser. 1348 */ 1349 public void closeReaders() { 1350 /** this call actually does nothing, readers are closed in the endEntity method 1351 * through the current entity. 1352 * The change seems to have happened during the jdk6 development with the 1353 * addition of StAX 1354 **/ 1355 } 1356 1357 public void endEntity() throws IOException, XNIException { 1358 1359 // call handler 1360 if (DEBUG_BUFFER) { 1361 System.out.print("(endEntity: "); 1362 print(); 1363 System.out.println(); 1364 } 1365 //pop the entity from the stack 1366 Entity.ScannedEntity entity = fEntityStack.size() > 0 ? (Entity.ScannedEntity)fEntityStack.pop() : null ; 1367 1368 /** need to close the reader first since the program can end 1369 * prematurely (e.g. fEntityHandler.endEntity may throw exception) 1370 * leaving the reader open 1371 */ 1372 //close the reader 1373 if(fCurrentEntity != null){ 1374 //close the reader 1375 try{ 1376 if (fLimitAnalyzer != null) { 1377 fLimitAnalyzer.endEntity(XMLSecurityManager.Limit.GENERAL_ENTITY_SIZE_LIMIT, fCurrentEntity.name); 1378 if (fCurrentEntity.name.equals("[xml]")) { 1379 fSecurityManager.debugPrint(fLimitAnalyzer); 1380 } 1381 } 1382 fCurrentEntity.close(); 1383 }catch(IOException ex){ 1384 throw new XNIException(ex); 1385 } 1386 } 1387 1388 if (fEntityHandler != null) { 1389 //so this is the last opened entity, signal it to current fEntityHandler using Augmentation 1390 if(entity == null){ 1391 fEntityAugs.removeAllItems(); 1392 fEntityAugs.putItem(Constants.LAST_ENTITY, Boolean.TRUE); 1393 fEntityHandler.endEntity(fCurrentEntity.name, fEntityAugs); 1394 fEntityAugs.removeAllItems(); 1395 }else{ 1396 fEntityHandler.endEntity(fCurrentEntity.name, null); 1397 } 1398 } 1399 //check if it is a document entity 1400 boolean documentEntity = fCurrentEntity.name == XMLEntity; 1401 1402 //set popped entity as current entity 1403 fCurrentEntity = entity; 1404 fEntityScanner.setCurrentEntity(fCurrentEntity); 1405 1406 //check if there are any entity left in the stack -- if there are 1407 //no entries EOF has been reached. 1408 // throw exception when it is the last entity but it is not a document entity 1409 1410 if(fCurrentEntity == null & !documentEntity){ 1411 throw new EOFException() ; 1412 } 1413 1414 if (DEBUG_BUFFER) { 1415 System.out.print(")endEntity: "); 1416 print(); 1417 System.out.println(); 1418 } 1419 1420 } // endEntity() 1421 1422 1423 // 1424 // XMLComponent methods 1425 // 1426 public void reset(PropertyManager propertyManager){ 1427 //reset fEntityStorage 1428 fEntityStorage.reset(propertyManager); 1429 //reset XMLEntityReaderImpl 1430 fEntityScanner.reset(propertyManager); 1431 // xerces properties 1432 fSymbolTable = (SymbolTable)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY); 1433 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY); 1434 try { 1435 fStaxEntityResolver = (StaxEntityResolverWrapper)propertyManager.getProperty(STAX_ENTITY_RESOLVER); 1436 } catch (XMLConfigurationException e) { 1437 fStaxEntityResolver = null; 1438 } 1439 1440 fSupportDTD = ((Boolean)propertyManager.getProperty(XMLInputFactory.SUPPORT_DTD)).booleanValue(); 1441 fReplaceEntityReferences = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES)).booleanValue(); 1442 fSupportExternalEntities = ((Boolean)propertyManager.getProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES)).booleanValue(); 1443 1444 // Zephyr feature ignore-external-dtd is the opposite of Xerces' load-external-dtd 1445 fLoadExternalDTD = !((Boolean)propertyManager.getProperty(Constants.ZEPHYR_PROPERTY_PREFIX + Constants.IGNORE_EXTERNAL_DTD)).booleanValue(); 1446 1447 // JAXP 1.5 feature 1448 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) propertyManager.getProperty(XML_SECURITY_PROPERTY_MANAGER); 1449 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1450 1451 fSecurityManager = (XMLSecurityManager)propertyManager.getProperty(SECURITY_MANAGER); 1452 1453 // initialize state 1454 //fStandalone = false; 1455 fEntities.clear(); 1456 fEntityStack.removeAllElements(); 1457 fCurrentEntity = null; 1458 fValidation = false; 1459 fExternalGeneralEntities = true; 1460 fExternalParameterEntities = true; 1461 fAllowJavaEncodings = true ; 1462 } 1463 1464 /** 1465 * Resets the component. The component can query the component manager 1466 * about any features and properties that affect the operation of the 1467 * component. 1468 * 1469 * @param componentManager The component manager. 1470 * 1471 * @throws SAXException Thrown by component on initialization error. 1472 * For example, if a feature or property is 1473 * required for the operation of the component, the 1474 * component manager may throw a 1475 * SAXNotRecognizedException or a 1476 * SAXNotSupportedException. 1477 */ 1478 public void reset(XMLComponentManager componentManager) 1479 throws XMLConfigurationException { 1480 1481 boolean parser_settings = componentManager.getFeature(PARSER_SETTINGS, true); 1482 1483 if (!parser_settings) { 1484 // parser settings have not been changed 1485 reset(); 1486 if(fEntityScanner != null){ 1487 fEntityScanner.reset(componentManager); 1488 } 1489 if(fEntityStorage != null){ 1490 fEntityStorage.reset(componentManager); 1491 } 1492 return; 1493 } 1494 1495 // sax features 1496 fValidation = componentManager.getFeature(VALIDATION, false); 1497 fExternalGeneralEntities = componentManager.getFeature(EXTERNAL_GENERAL_ENTITIES, true); 1498 fExternalParameterEntities = componentManager.getFeature(EXTERNAL_PARAMETER_ENTITIES, true); 1499 1500 // xerces features 1501 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 1502 fWarnDuplicateEntityDef = componentManager.getFeature(WARN_ON_DUPLICATE_ENTITYDEF, false); 1503 fStrictURI = componentManager.getFeature(STANDARD_URI_CONFORMANT, false); 1504 fLoadExternalDTD = componentManager.getFeature(LOAD_EXTERNAL_DTD, true); 1505 1506 // xerces properties 1507 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 1508 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 1509 fEntityResolver = (XMLEntityResolver)componentManager.getProperty(ENTITY_RESOLVER, null); 1510 fStaxEntityResolver = (StaxEntityResolverWrapper)componentManager.getProperty(STAX_ENTITY_RESOLVER, null); 1511 fValidationManager = (ValidationManager)componentManager.getProperty(VALIDATION_MANAGER, null); 1512 fSecurityManager = (XMLSecurityManager)componentManager.getProperty(SECURITY_MANAGER, null); 1513 entityExpansionIndex = fSecurityManager.getIndex(Constants.JDK_ENTITY_EXPANSION_LIMIT); 1514 //StAX Property 1515 fSupportDTD = true; 1516 fReplaceEntityReferences = true; 1517 fSupportExternalEntities = true; 1518 // JAXP 1.5 feature 1519 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager) componentManager.getProperty(XML_SECURITY_PROPERTY_MANAGER, null); 1520 if (spm == null) { 1521 spm = new XMLSecurityPropertyManager(); 1522 } 1523 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1524 1525 //reset general state 1526 reset(); 1527 1528 fEntityScanner.reset(componentManager); 1529 fEntityStorage.reset(componentManager); 1530 1531 } // reset(XMLComponentManager) 1532 1533 // reset general state. Should not be called other than by 1534 // a class acting as a component manager but not 1535 // implementing that interface for whatever reason. 1536 public void reset() { 1537 1538 // initialize state 1539 fStandalone = false; 1540 fEntities.clear(); 1541 fEntityStack.removeAllElements(); 1542 fEntityExpansionCount = 0; 1543 1544 fCurrentEntity = null; 1545 // reset scanner 1546 if(fXML10EntityScanner != null){ 1547 fXML10EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1548 } 1549 if(fXML11EntityScanner != null) { 1550 fXML11EntityScanner.reset(fSymbolTable, this, fErrorReporter); 1551 } 1552 1553 // DEBUG 1554 if (DEBUG_ENTITIES) { 1555 addInternalEntity("text", "Hello, World."); 1556 addInternalEntity("empty-element", "<foo/>"); 1557 addInternalEntity("balanced-element", "<foo></foo>"); 1558 addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 1559 addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 1560 addInternalEntity("unbalanced-entity", "<foo>"); 1561 addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 1562 addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 1563 addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 1564 try { 1565 addExternalEntity("external-text", null, "external-text.ent", "test/external-text.xml"); 1566 addExternalEntity("external-balanced-element", null, "external-balanced-element.ent", "test/external-balanced-element.xml"); 1567 addExternalEntity("one", null, "ent/one.ent", "test/external-entity.xml"); 1568 addExternalEntity("two", null, "ent/two.ent", "test/ent/one.xml"); 1569 } 1570 catch (IOException ex) { 1571 // should never happen 1572 } 1573 } 1574 1575 fEntityHandler = null; 1576 1577 // reset scanner 1578 //if(fEntityScanner!=null) 1579 // fEntityScanner.reset(fSymbolTable, this,fErrorReporter); 1580 1581 } 1582 /** 1583 * Returns a list of feature identifiers that are recognized by 1584 * this component. This method may return null if no features 1585 * are recognized by this component. 1586 */ 1587 public String[] getRecognizedFeatures() { 1588 return (String[])(RECOGNIZED_FEATURES.clone()); 1589 } // getRecognizedFeatures():String[] 1590 1591 /** 1592 * Sets the state of a feature. This method is called by the component 1593 * manager any time after reset when a feature changes state. 1594 * <p> 1595 * <strong>Note:</strong> Components should silently ignore features 1596 * that do not affect the operation of the component. 1597 * 1598 * @param featureId The feature identifier. 1599 * @param state The state of the feature. 1600 * 1601 * @throws SAXNotRecognizedException The component should not throw 1602 * this exception. 1603 * @throws SAXNotSupportedException The component should not throw 1604 * this exception. 1605 */ 1606 public void setFeature(String featureId, boolean state) 1607 throws XMLConfigurationException { 1608 1609 // xerces features 1610 if (featureId.startsWith(Constants.XERCES_FEATURE_PREFIX)) { 1611 final int suffixLength = featureId.length() - Constants.XERCES_FEATURE_PREFIX.length(); 1612 if (suffixLength == Constants.ALLOW_JAVA_ENCODINGS_FEATURE.length() && 1613 featureId.endsWith(Constants.ALLOW_JAVA_ENCODINGS_FEATURE)) { 1614 fAllowJavaEncodings = state; 1615 } 1616 if (suffixLength == Constants.LOAD_EXTERNAL_DTD_FEATURE.length() && 1617 featureId.endsWith(Constants.LOAD_EXTERNAL_DTD_FEATURE)) { 1618 fLoadExternalDTD = state; 1619 return; 1620 } 1621 } 1622 1623 } // setFeature(String,boolean) 1624 1625 /** 1626 * Sets the value of a property. This method is called by the component 1627 * manager any time after reset when a property changes value. 1628 * <p> 1629 * <strong>Note:</strong> Components should silently ignore properties 1630 * that do not affect the operation of the component. 1631 * 1632 * @param propertyId The property identifier. 1633 * @param value The value of the property. 1634 * 1635 * @throws SAXNotRecognizedException The component should not throw 1636 * this exception. 1637 * @throws SAXNotSupportedException The component should not throw 1638 * this exception. 1639 */ 1640 public void setProperty(String propertyId, Object value){ 1641 // Xerces properties 1642 if (propertyId.startsWith(Constants.XERCES_PROPERTY_PREFIX)) { 1643 final int suffixLength = propertyId.length() - Constants.XERCES_PROPERTY_PREFIX.length(); 1644 1645 if (suffixLength == Constants.SYMBOL_TABLE_PROPERTY.length() && 1646 propertyId.endsWith(Constants.SYMBOL_TABLE_PROPERTY)) { 1647 fSymbolTable = (SymbolTable)value; 1648 return; 1649 } 1650 if (suffixLength == Constants.ERROR_REPORTER_PROPERTY.length() && 1651 propertyId.endsWith(Constants.ERROR_REPORTER_PROPERTY)) { 1652 fErrorReporter = (XMLErrorReporter)value; 1653 return; 1654 } 1655 if (suffixLength == Constants.ENTITY_RESOLVER_PROPERTY.length() && 1656 propertyId.endsWith(Constants.ENTITY_RESOLVER_PROPERTY)) { 1657 fEntityResolver = (XMLEntityResolver)value; 1658 return; 1659 } 1660 if (suffixLength == Constants.BUFFER_SIZE_PROPERTY.length() && 1661 propertyId.endsWith(Constants.BUFFER_SIZE_PROPERTY)) { 1662 Integer bufferSize = (Integer)value; 1663 if (bufferSize != null && 1664 bufferSize.intValue() > DEFAULT_XMLDECL_BUFFER_SIZE) { 1665 fBufferSize = bufferSize.intValue(); 1666 fEntityScanner.setBufferSize(fBufferSize); 1667 fBufferPool.setExternalBufferSize(fBufferSize); 1668 } 1669 } 1670 if (suffixLength == Constants.SECURITY_MANAGER_PROPERTY.length() && 1671 propertyId.endsWith(Constants.SECURITY_MANAGER_PROPERTY)) { 1672 fSecurityManager = (XMLSecurityManager)value; 1673 } 1674 } 1675 1676 //JAXP 1.5 properties 1677 if (propertyId.equals(XML_SECURITY_PROPERTY_MANAGER)) 1678 { 1679 XMLSecurityPropertyManager spm = (XMLSecurityPropertyManager)value; 1680 fAccessExternalDTD = spm.getValue(XMLSecurityPropertyManager.Property.ACCESS_EXTERNAL_DTD); 1681 } 1682 } 1683 1684 public void setLimitAnalyzer(XMLLimitAnalyzer fLimitAnalyzer) { 1685 this.fLimitAnalyzer = fLimitAnalyzer; 1686 } 1687 1688 /** 1689 * Returns a list of property identifiers that are recognized by 1690 * this component. This method may return null if no properties 1691 * are recognized by this component. 1692 */ 1693 public String[] getRecognizedProperties() { 1694 return (String[])(RECOGNIZED_PROPERTIES.clone()); 1695 } // getRecognizedProperties():String[] 1696 /** 1697 * Returns the default state for a feature, or null if this 1698 * component does not want to report a default value for this 1699 * feature. 1700 * 1701 * @param featureId The feature identifier. 1702 * 1703 * @since Xerces 2.2.0 1704 */ 1705 public Boolean getFeatureDefault(String featureId) { 1706 for (int i = 0; i < RECOGNIZED_FEATURES.length; i++) { 1707 if (RECOGNIZED_FEATURES[i].equals(featureId)) { 1708 return FEATURE_DEFAULTS[i]; 1709 } 1710 } 1711 return null; 1712 } // getFeatureDefault(String):Boolean 1713 1714 /** 1715 * Returns the default state for a property, or null if this 1716 * component does not want to report a default value for this 1717 * property. 1718 * 1719 * @param propertyId The property identifier. 1720 * 1721 * @since Xerces 2.2.0 1722 */ 1723 public Object getPropertyDefault(String propertyId) { 1724 for (int i = 0; i < RECOGNIZED_PROPERTIES.length; i++) { 1725 if (RECOGNIZED_PROPERTIES[i].equals(propertyId)) { 1726 return PROPERTY_DEFAULTS[i]; 1727 } 1728 } 1729 return null; 1730 } // getPropertyDefault(String):Object 1731 1732 // 1733 // Public static methods 1734 // 1735 1736 /** 1737 * Expands a system id and returns the system id as a URI, if 1738 * it can be expanded. A return value of null means that the 1739 * identifier is already expanded. An exception thrown 1740 * indicates a failure to expand the id. 1741 * 1742 * @param systemId The systemId to be expanded. 1743 * 1744 * @return Returns the URI string representing the expanded system 1745 * identifier. A null value indicates that the given 1746 * system identifier is already expanded. 1747 * 1748 */ 1749 public static String expandSystemId(String systemId) { 1750 return expandSystemId(systemId, null); 1751 } // expandSystemId(String):String 1752 1753 // 1754 // Public static methods 1755 // 1756 1757 // current value of the "user.dir" property 1758 private static String gUserDir; 1759 // cached URI object for the current value of the escaped "user.dir" property stored as a URI 1760 private static URI gUserDirURI; 1761 // which ASCII characters need to be escaped 1762 private static boolean gNeedEscaping[] = new boolean[128]; 1763 // the first hex character if a character needs to be escaped 1764 private static char gAfterEscaping1[] = new char[128]; 1765 // the second hex character if a character needs to be escaped 1766 private static char gAfterEscaping2[] = new char[128]; 1767 private static char[] gHexChs = {'0', '1', '2', '3', '4', '5', '6', '7', 1768 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; 1769 // initialize the above 3 arrays 1770 static { 1771 for (int i = 0; i <= 0x1f; i++) { 1772 gNeedEscaping[i] = true; 1773 gAfterEscaping1[i] = gHexChs[i >> 4]; 1774 gAfterEscaping2[i] = gHexChs[i & 0xf]; 1775 } 1776 gNeedEscaping[0x7f] = true; 1777 gAfterEscaping1[0x7f] = '7'; 1778 gAfterEscaping2[0x7f] = 'F'; 1779 char[] escChs = {' ', '<', '>', '#', '%', '"', '{', '}', 1780 '|', '\\', '^', '~', '[', ']', '`'}; 1781 int len = escChs.length; 1782 char ch; 1783 for (int i = 0; i < len; i++) { 1784 ch = escChs[i]; 1785 gNeedEscaping[ch] = true; 1786 gAfterEscaping1[ch] = gHexChs[ch >> 4]; 1787 gAfterEscaping2[ch] = gHexChs[ch & 0xf]; 1788 } 1789 } 1790 1791 // To escape the "user.dir" system property, by using %HH to represent 1792 // special ASCII characters: 0x00~0x1F, 0x7F, ' ', '<', '>', '#', '%' 1793 // and '"'. It's a static method, so needs to be synchronized. 1794 // this method looks heavy, but since the system property isn't expected 1795 // to change often, so in most cases, we only need to return the URI 1796 // that was escaped before. 1797 // According to the URI spec, non-ASCII characters (whose value >= 128) 1798 // need to be escaped too. 1799 // REVISIT: don't know how to escape non-ASCII characters, especially 1800 // which encoding to use. Leave them for now. 1801 private static synchronized URI getUserDir() throws URI.MalformedURIException { 1802 // get the user.dir property 1803 String userDir = ""; 1804 try { 1805 userDir = SecuritySupport.getSystemProperty("user.dir"); 1806 } 1807 catch (SecurityException se) { 1808 } 1809 1810 // return empty string if property value is empty string. 1811 if (userDir.length() == 0) 1812 return new URI("file", "", "", null, null); 1813 // compute the new escaped value if the new property value doesn't 1814 // match the previous one 1815 if (gUserDirURI != null && userDir.equals(gUserDir)) { 1816 return gUserDirURI; 1817 } 1818 1819 // record the new value as the global property value 1820 gUserDir = userDir; 1821 1822 char separator = java.io.File.separatorChar; 1823 userDir = userDir.replace(separator, '/'); 1824 1825 int len = userDir.length(), ch; 1826 StringBuilder buffer = new StringBuilder(len*3); 1827 // change C:/blah to /C:/blah 1828 if (len >= 2 && userDir.charAt(1) == ':') { 1829 ch = Character.toUpperCase(userDir.charAt(0)); 1830 if (ch >= 'A' && ch <= 'Z') { 1831 buffer.append('/'); 1832 } 1833 } 1834 1835 // for each character in the path 1836 int i = 0; 1837 for (; i < len; i++) { 1838 ch = userDir.charAt(i); 1839 // if it's not an ASCII character, break here, and use UTF-8 encoding 1840 if (ch >= 128) 1841 break; 1842 if (gNeedEscaping[ch]) { 1843 buffer.append('%'); 1844 buffer.append(gAfterEscaping1[ch]); 1845 buffer.append(gAfterEscaping2[ch]); 1846 // record the fact that it's escaped 1847 } 1848 else { 1849 buffer.append((char)ch); 1850 } 1851 } 1852 1853 // we saw some non-ascii character 1854 if (i < len) { 1855 // get UTF-8 bytes for the remaining sub-string 1856 byte[] bytes = null; 1857 byte b; 1858 try { 1859 bytes = userDir.substring(i).getBytes("UTF-8"); 1860 } catch (java.io.UnsupportedEncodingException e) { 1861 // should never happen 1862 return new URI("file", "", userDir, null, null); 1863 } 1864 len = bytes.length; 1865 1866 // for each byte 1867 for (i = 0; i < len; i++) { 1868 b = bytes[i]; 1869 // for non-ascii character: make it positive, then escape 1870 if (b < 0) { 1871 ch = b + 256; 1872 buffer.append('%'); 1873 buffer.append(gHexChs[ch >> 4]); 1874 buffer.append(gHexChs[ch & 0xf]); 1875 } 1876 else if (gNeedEscaping[b]) { 1877 buffer.append('%'); 1878 buffer.append(gAfterEscaping1[b]); 1879 buffer.append(gAfterEscaping2[b]); 1880 } 1881 else { 1882 buffer.append((char)b); 1883 } 1884 } 1885 } 1886 1887 // change blah/blah to blah/blah/ 1888 if (!userDir.endsWith("/")) 1889 buffer.append('/'); 1890 1891 gUserDirURI = new URI("file", "", buffer.toString(), null, null); 1892 1893 return gUserDirURI; 1894 } 1895 1896 public static OutputStream createOutputStream(String uri) throws IOException { 1897 // URI was specified. Handle relative URIs. 1898 final String expanded = XMLEntityManager.expandSystemId(uri, null, true); 1899 final URL url = new URL(expanded != null ? expanded : uri); 1900 OutputStream out = null; 1901 String protocol = url.getProtocol(); 1902 String host = url.getHost(); 1903 // Use FileOutputStream if this URI is for a local file. 1904 if (protocol.equals("file") 1905 && (host == null || host.length() == 0 || host.equals("localhost"))) { 1906 File file = new File(getPathWithoutEscapes(url.getPath())); 1907 if (!file.exists()) { 1908 File parent = file.getParentFile(); 1909 if (parent != null && !parent.exists()) { 1910 parent.mkdirs(); 1911 } 1912 } 1913 out = new FileOutputStream(file); 1914 } 1915 // Try to write to some other kind of URI. Some protocols 1916 // won't support this, though HTTP should work. 1917 else { 1918 URLConnection urlCon = url.openConnection(); 1919 urlCon.setDoInput(false); 1920 urlCon.setDoOutput(true); 1921 urlCon.setUseCaches(false); // Enable tunneling. 1922 if (urlCon instanceof HttpURLConnection) { 1923 // The DOM L3 REC says if we are writing to an HTTP URI 1924 // it is to be done with an HTTP PUT. 1925 HttpURLConnection httpCon = (HttpURLConnection) urlCon; 1926 httpCon.setRequestMethod("PUT"); 1927 } 1928 out = urlCon.getOutputStream(); 1929 } 1930 return out; 1931 } 1932 1933 private static String getPathWithoutEscapes(String origPath) { 1934 if (origPath != null && origPath.length() != 0 && origPath.indexOf('%') != -1) { 1935 // Locate the escape characters 1936 StringTokenizer tokenizer = new StringTokenizer(origPath, "%"); 1937 StringBuilder result = new StringBuilder(origPath.length()); 1938 int size = tokenizer.countTokens(); 1939 result.append(tokenizer.nextToken()); 1940 for(int i = 1; i < size; ++i) { 1941 String token = tokenizer.nextToken(); 1942 // Decode the 2 digit hexadecimal number following % in '%nn' 1943 result.append((char)Integer.valueOf(token.substring(0, 2), 16).intValue()); 1944 result.append(token.substring(2)); 1945 } 1946 return result.toString(); 1947 } 1948 return origPath; 1949 } 1950 1951 /** 1952 * Absolutizes a URI using the current value 1953 * of the "user.dir" property as the base URI. If 1954 * the URI is already absolute, this is a no-op. 1955 * 1956 * @param uri the URI to absolutize 1957 */ 1958 public static void absolutizeAgainstUserDir(URI uri) 1959 throws URI.MalformedURIException { 1960 uri.absolutize(getUserDir()); 1961 } 1962 1963 /** 1964 * Expands a system id and returns the system id as a URI, if 1965 * it can be expanded. A return value of null means that the 1966 * identifier is already expanded. An exception thrown 1967 * indicates a failure to expand the id. 1968 * 1969 * @param systemId The systemId to be expanded. 1970 * 1971 * @return Returns the URI string representing the expanded system 1972 * identifier. A null value indicates that the given 1973 * system identifier is already expanded. 1974 * 1975 */ 1976 public static String expandSystemId(String systemId, String baseSystemId) { 1977 1978 // check for bad parameters id 1979 if (systemId == null || systemId.length() == 0) { 1980 return systemId; 1981 } 1982 // if id already expanded, return 1983 try { 1984 URI uri = new URI(systemId); 1985 if (uri != null) { 1986 return systemId; 1987 } 1988 } catch (URI.MalformedURIException e) { 1989 // continue on... 1990 } 1991 // normalize id 1992 String id = fixURI(systemId); 1993 1994 // normalize base 1995 URI base = null; 1996 URI uri = null; 1997 try { 1998 if (baseSystemId == null || baseSystemId.length() == 0 || 1999 baseSystemId.equals(systemId)) { 2000 String dir = getUserDir().toString(); 2001 base = new URI("file", "", dir, null, null); 2002 } else { 2003 try { 2004 base = new URI(fixURI(baseSystemId)); 2005 } catch (URI.MalformedURIException e) { 2006 if (baseSystemId.indexOf(':') != -1) { 2007 // for xml schemas we might have baseURI with 2008 // a specified drive 2009 base = new URI("file", "", fixURI(baseSystemId), null, null); 2010 } else { 2011 String dir = getUserDir().toString(); 2012 dir = dir + fixURI(baseSystemId); 2013 base = new URI("file", "", dir, null, null); 2014 } 2015 } 2016 } 2017 // expand id 2018 uri = new URI(base, id); 2019 } catch (Exception e) { 2020 // let it go through 2021 2022 } 2023 2024 if (uri == null) { 2025 return systemId; 2026 } 2027 return uri.toString(); 2028 2029 } // expandSystemId(String,String):String 2030 2031 /** 2032 * Expands a system id and returns the system id as a URI, if 2033 * it can be expanded. A return value of null means that the 2034 * identifier is already expanded. An exception thrown 2035 * indicates a failure to expand the id. 2036 * 2037 * @param systemId The systemId to be expanded. 2038 * 2039 * @return Returns the URI string representing the expanded system 2040 * identifier. A null value indicates that the given 2041 * system identifier is already expanded. 2042 * 2043 */ 2044 public static String expandSystemId(String systemId, String baseSystemId, 2045 boolean strict) 2046 throws URI.MalformedURIException { 2047 2048 // check if there is a system id before 2049 // trying to expand it. 2050 if (systemId == null) { 2051 return null; 2052 } 2053 2054 // system id has to be a valid URI 2055 if (strict) { 2056 2057 2058 // check if there is a system id before 2059 // trying to expand it. 2060 if (systemId == null) { 2061 return null; 2062 } 2063 2064 try { 2065 // if it's already an absolute one, return it 2066 new URI(systemId); 2067 return systemId; 2068 } 2069 catch (URI.MalformedURIException ex) { 2070 } 2071 URI base = null; 2072 // if there isn't a base uri, use the working directory 2073 if (baseSystemId == null || baseSystemId.length() == 0) { 2074 base = new URI("file", "", getUserDir().toString(), null, null); 2075 } 2076 // otherwise, use the base uri 2077 else { 2078 try { 2079 base = new URI(baseSystemId); 2080 } 2081 catch (URI.MalformedURIException e) { 2082 // assume "base" is also a relative uri 2083 String dir = getUserDir().toString(); 2084 dir = dir + baseSystemId; 2085 base = new URI("file", "", dir, null, null); 2086 } 2087 } 2088 // absolutize the system id using the base 2089 URI uri = new URI(base, systemId); 2090 // return the string rep of the new uri (an absolute one) 2091 return uri.toString(); 2092 2093 // if any exception is thrown, it'll get thrown to the caller. 2094 } 2095 2096 // Assume the URIs are well-formed. If it turns out they're not, try fixing them up. 2097 try { 2098 return expandSystemIdStrictOff(systemId, baseSystemId); 2099 } 2100 catch (URI.MalformedURIException e) { 2101 /** Xerces URI rejects unicode, try java.net.URI 2102 * this is not ideal solution, but it covers known cases which either 2103 * Xerces URI or java.net.URI can handle alone 2104 * will file bug against java.net.URI 2105 */ 2106 try { 2107 return expandSystemIdStrictOff1(systemId, baseSystemId); 2108 } catch (URISyntaxException ex) { 2109 // continue on... 2110 } 2111 } 2112 // check for bad parameters id 2113 if (systemId.length() == 0) { 2114 return systemId; 2115 } 2116 2117 // normalize id 2118 String id = fixURI(systemId); 2119 2120 // normalize base 2121 URI base = null; 2122 URI uri = null; 2123 try { 2124 if (baseSystemId == null || baseSystemId.length() == 0 || 2125 baseSystemId.equals(systemId)) { 2126 base = getUserDir(); 2127 } 2128 else { 2129 try { 2130 base = new URI(fixURI(baseSystemId).trim()); 2131 } 2132 catch (URI.MalformedURIException e) { 2133 if (baseSystemId.indexOf(':') != -1) { 2134 // for xml schemas we might have baseURI with 2135 // a specified drive 2136 base = new URI("file", "", fixURI(baseSystemId).trim(), null, null); 2137 } 2138 else { 2139 base = new URI(getUserDir(), fixURI(baseSystemId)); 2140 } 2141 } 2142 } 2143 // expand id 2144 uri = new URI(base, id.trim()); 2145 } 2146 catch (Exception e) { 2147 // let it go through 2148 2149 } 2150 2151 if (uri == null) { 2152 return systemId; 2153 } 2154 return uri.toString(); 2155 2156 } // expandSystemId(String,String,boolean):String 2157 2158 /** 2159 * Helper method for expandSystemId(String,String,boolean):String 2160 */ 2161 private static String expandSystemIdStrictOn(String systemId, String baseSystemId) 2162 throws URI.MalformedURIException { 2163 2164 URI systemURI = new URI(systemId, true); 2165 // If it's already an absolute one, return it 2166 if (systemURI.isAbsoluteURI()) { 2167 return systemId; 2168 } 2169 2170 // If there isn't a base URI, use the working directory 2171 URI baseURI = null; 2172 if (baseSystemId == null || baseSystemId.length() == 0) { 2173 baseURI = getUserDir(); 2174 } 2175 else { 2176 baseURI = new URI(baseSystemId, true); 2177 if (!baseURI.isAbsoluteURI()) { 2178 // assume "base" is also a relative uri 2179 baseURI.absolutize(getUserDir()); 2180 } 2181 } 2182 2183 // absolutize the system identifier using the base URI 2184 systemURI.absolutize(baseURI); 2185 2186 // return the string rep of the new uri (an absolute one) 2187 return systemURI.toString(); 2188 2189 // if any exception is thrown, it'll get thrown to the caller. 2190 2191 } // expandSystemIdStrictOn(String,String):String 2192 2193 /** 2194 * Attempt to set whether redirects will be followed for an <code>HttpURLConnection</code>. 2195 * This may fail on earlier JDKs which do not support setting this preference. 2196 */ 2197 public static void setInstanceFollowRedirects(HttpURLConnection urlCon, boolean followRedirects) { 2198 try { 2199 Method method = HttpURLConnection.class.getMethod("setInstanceFollowRedirects", new Class[] {Boolean.TYPE}); 2200 method.invoke(urlCon, new Object[] {followRedirects ? Boolean.TRUE : Boolean.FALSE}); 2201 } 2202 // setInstanceFollowRedirects doesn't exist. 2203 catch (Exception exc) {} 2204 } 2205 2206 2207 /** 2208 * Helper method for expandSystemId(String,String,boolean):String 2209 */ 2210 private static String expandSystemIdStrictOff(String systemId, String baseSystemId) 2211 throws URI.MalformedURIException { 2212 2213 URI systemURI = new URI(systemId, true); 2214 // If it's already an absolute one, return it 2215 if (systemURI.isAbsoluteURI()) { 2216 if (systemURI.getScheme().length() > 1) { 2217 return systemId; 2218 } 2219 /** 2220 * If the scheme's length is only one character, 2221 * it's likely that this was intended as a file 2222 * path. Fixing this up in expandSystemId to 2223 * maintain backwards compatibility. 2224 */ 2225 throw new URI.MalformedURIException(); 2226 } 2227 2228 // If there isn't a base URI, use the working directory 2229 URI baseURI = null; 2230 if (baseSystemId == null || baseSystemId.length() == 0) { 2231 baseURI = getUserDir(); 2232 } 2233 else { 2234 baseURI = new URI(baseSystemId, true); 2235 if (!baseURI.isAbsoluteURI()) { 2236 // assume "base" is also a relative uri 2237 baseURI.absolutize(getUserDir()); 2238 } 2239 } 2240 2241 // absolutize the system identifier using the base URI 2242 systemURI.absolutize(baseURI); 2243 2244 // return the string rep of the new uri (an absolute one) 2245 return systemURI.toString(); 2246 2247 // if any exception is thrown, it'll get thrown to the caller. 2248 2249 } // expandSystemIdStrictOff(String,String):String 2250 2251 private static String expandSystemIdStrictOff1(String systemId, String baseSystemId) 2252 throws URISyntaxException, URI.MalformedURIException { 2253 2254 java.net.URI systemURI = new java.net.URI(systemId); 2255 // If it's already an absolute one, return it 2256 if (systemURI.isAbsolute()) { 2257 if (systemURI.getScheme().length() > 1) { 2258 return systemId; 2259 } 2260 /** 2261 * If the scheme's length is only one character, 2262 * it's likely that this was intended as a file 2263 * path. Fixing this up in expandSystemId to 2264 * maintain backwards compatibility. 2265 */ 2266 throw new URISyntaxException(systemId, "the scheme's length is only one character"); 2267 } 2268 2269 // If there isn't a base URI, use the working directory 2270 URI baseURI = null; 2271 if (baseSystemId == null || baseSystemId.length() == 0) { 2272 baseURI = getUserDir(); 2273 } 2274 else { 2275 baseURI = new URI(baseSystemId, true); 2276 if (!baseURI.isAbsoluteURI()) { 2277 // assume "base" is also a relative uri 2278 baseURI.absolutize(getUserDir()); 2279 } 2280 } 2281 2282 // absolutize the system identifier using the base URI 2283// systemURI.absolutize(baseURI); 2284 systemURI = (new java.net.URI(baseURI.toString())).resolve(systemURI); 2285 2286 // return the string rep of the new uri (an absolute one) 2287 return systemURI.toString(); 2288 2289 // if any exception is thrown, it'll get thrown to the caller. 2290 2291 } // expandSystemIdStrictOff(String,String):String 2292 2293 // 2294 // Protected methods 2295 // 2296 2297 2298 /** 2299 * Returns the IANA encoding name that is auto-detected from 2300 * the bytes specified, with the endian-ness of that encoding where appropriate. 2301 * 2302 * @param b4 The first four bytes of the input. 2303 * @param count The number of bytes actually read. 2304 * @return a 2-element array: the first element, an IANA-encoding string, 2305 * the second element a Boolean which is true iff the document is big endian, false 2306 * if it's little-endian, and null if the distinction isn't relevant. 2307 */ 2308 protected Object[] getEncodingName(byte[] b4, int count) { 2309 2310 if (count < 2) { 2311 return defaultEncoding; 2312 } 2313 2314 // UTF-16, with BOM 2315 int b0 = b4[0] & 0xFF; 2316 int b1 = b4[1] & 0xFF; 2317 if (b0 == 0xFE && b1 == 0xFF) { 2318 // UTF-16, big-endian 2319 return new Object [] {"UTF-16BE", new Boolean(true)}; 2320 } 2321 if (b0 == 0xFF && b1 == 0xFE) { 2322 // UTF-16, little-endian 2323 return new Object [] {"UTF-16LE", new Boolean(false)}; 2324 } 2325 2326 // default to UTF-8 if we don't have enough bytes to make a 2327 // good determination of the encoding 2328 if (count < 3) { 2329 return defaultEncoding; 2330 } 2331 2332 // UTF-8 with a BOM 2333 int b2 = b4[2] & 0xFF; 2334 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2335 return defaultEncoding; 2336 } 2337 2338 // default to UTF-8 if we don't have enough bytes to make a 2339 // good determination of the encoding 2340 if (count < 4) { 2341 return defaultEncoding; 2342 } 2343 2344 // other encodings 2345 int b3 = b4[3] & 0xFF; 2346 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2347 // UCS-4, big endian (1234) 2348 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2349 } 2350 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2351 // UCS-4, little endian (4321) 2352 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2353 } 2354 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2355 // UCS-4, unusual octet order (2143) 2356 // REVISIT: What should this be? 2357 return new Object [] {"ISO-10646-UCS-4", null}; 2358 } 2359 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2360 // UCS-4, unusual octect order (3412) 2361 // REVISIT: What should this be? 2362 return new Object [] {"ISO-10646-UCS-4", null}; 2363 } 2364 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2365 // UTF-16, big-endian, no BOM 2366 // (or could turn out to be UCS-2... 2367 // REVISIT: What should this be? 2368 return new Object [] {"UTF-16BE", new Boolean(true)}; 2369 } 2370 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2371 // UTF-16, little-endian, no BOM 2372 // (or could turn out to be UCS-2... 2373 return new Object [] {"UTF-16LE", new Boolean(false)}; 2374 } 2375 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2376 // EBCDIC 2377 // a la xerces1, return CP037 instead of EBCDIC here 2378 return new Object [] {"CP037", null}; 2379 } 2380 2381 return defaultEncoding; 2382 2383 } // getEncodingName(byte[],int):Object[] 2384 2385 /** 2386 * Creates a reader capable of reading the given input stream in 2387 * the specified encoding. 2388 * 2389 * @param inputStream The input stream. 2390 * @param encoding The encoding name that the input stream is 2391 * encoded using. If the user has specified that 2392 * Java encoding names are allowed, then the 2393 * encoding name may be a Java encoding name; 2394 * otherwise, it is an ianaEncoding name. 2395 * @param isBigEndian For encodings (like uCS-4), whose names cannot 2396 * specify a byte order, this tells whether the order is bigEndian. null menas 2397 * unknown or not relevant. 2398 * 2399 * @return Returns a reader. 2400 */ 2401 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 2402 throws IOException { 2403 2404 // normalize encoding name 2405 if (encoding == null) { 2406 encoding = "UTF-8"; 2407 } 2408 2409 // try to use an optimized reader 2410 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 2411 if (ENCODING.equals("UTF-8")) { 2412 if (DEBUG_ENCODINGS) { 2413 System.out.println("$$$ creating UTF8Reader"); 2414 } 2415 return new UTF8Reader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 2416 } 2417 if (ENCODING.equals("US-ASCII")) { 2418 if (DEBUG_ENCODINGS) { 2419 System.out.println("$$$ creating ASCIIReader"); 2420 } 2421 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2422 } 2423 if(ENCODING.equals("ISO-10646-UCS-4")) { 2424 if(isBigEndian != null) { 2425 boolean isBE = isBigEndian.booleanValue(); 2426 if(isBE) { 2427 return new UCSReader(inputStream, UCSReader.UCS4BE); 2428 } else { 2429 return new UCSReader(inputStream, UCSReader.UCS4LE); 2430 } 2431 } else { 2432 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2433 "EncodingByteOrderUnsupported", 2434 new Object[] { encoding }, 2435 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2436 } 2437 } 2438 if(ENCODING.equals("ISO-10646-UCS-2")) { 2439 if(isBigEndian != null) { // sould never happen with this encoding... 2440 boolean isBE = isBigEndian.booleanValue(); 2441 if(isBE) { 2442 return new UCSReader(inputStream, UCSReader.UCS2BE); 2443 } else { 2444 return new UCSReader(inputStream, UCSReader.UCS2LE); 2445 } 2446 } else { 2447 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2448 "EncodingByteOrderUnsupported", 2449 new Object[] { encoding }, 2450 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2451 } 2452 } 2453 2454 // check for valid name 2455 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2456 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2457 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2458 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2459 "EncodingDeclInvalid", 2460 new Object[] { encoding }, 2461 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2462 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2463 // because every byte is a valid ISO Latin 1 character. 2464 // It may not translate correctly but if we failed on 2465 // the encoding anyway, then we're expecting the content 2466 // of the document to be bad. This will just prevent an 2467 // invalid UTF-8 sequence to be detected. This is only 2468 // important when continue-after-fatal-error is turned 2469 // on. -Ac 2470 encoding = "ISO-8859-1"; 2471 } 2472 2473 // try to use a Java reader 2474 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2475 if (javaEncoding == null) { 2476 if(fAllowJavaEncodings) { 2477 javaEncoding = encoding; 2478 } else { 2479 fErrorReporter.reportError(this.getEntityScanner(),XMLMessageFormatter.XML_DOMAIN, 2480 "EncodingDeclInvalid", 2481 new Object[] { encoding }, 2482 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2483 // see comment above. 2484 javaEncoding = "ISO8859_1"; 2485 } 2486 } 2487 if (DEBUG_ENCODINGS) { 2488 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2489 if (javaEncoding == encoding) { 2490 System.out.print(" (IANA encoding)"); 2491 } 2492 System.out.println(); 2493 } 2494 return new BufferedReader( new InputStreamReader(inputStream, javaEncoding)); 2495 2496 } // createReader(InputStream,String, Boolean): Reader 2497 2498 2499 /** 2500 * Return the public identifier for the current document event. 2501 * <p> 2502 * The return value is the public identifier of the document 2503 * entity or of the external parsed entity in which the markup 2504 * triggering the event appears. 2505 * 2506 * @return A string containing the public identifier, or 2507 * null if none is available. 2508 */ 2509 public String getPublicId() { 2510 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 2511 } // getPublicId():String 2512 2513 /** 2514 * Return the expanded system identifier for the current document event. 2515 * <p> 2516 * The return value is the expanded system identifier of the document 2517 * entity or of the external parsed entity in which the markup 2518 * triggering the event appears. 2519 * <p> 2520 * If the system identifier is a URL, the parser must resolve it 2521 * fully before passing it to the application. 2522 * 2523 * @return A string containing the expanded system identifier, or null 2524 * if none is available. 2525 */ 2526 public String getExpandedSystemId() { 2527 if (fCurrentEntity != null) { 2528 if (fCurrentEntity.entityLocation != null && 2529 fCurrentEntity.entityLocation.getExpandedSystemId() != null ) { 2530 return fCurrentEntity.entityLocation.getExpandedSystemId(); 2531 } else { 2532 // search for the first external entity on the stack 2533 int size = fEntityStack.size(); 2534 for (int i = size - 1; i >= 0 ; i--) { 2535 Entity.ScannedEntity externalEntity = 2536 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2537 2538 if (externalEntity.entityLocation != null && 2539 externalEntity.entityLocation.getExpandedSystemId() != null) { 2540 return externalEntity.entityLocation.getExpandedSystemId(); 2541 } 2542 } 2543 } 2544 } 2545 return null; 2546 } // getExpandedSystemId():String 2547 2548 /** 2549 * Return the literal system identifier for the current document event. 2550 * <p> 2551 * The return value is the literal system identifier of the document 2552 * entity or of the external parsed entity in which the markup 2553 * triggering the event appears. 2554 * <p> 2555 * @return A string containing the literal system identifier, or null 2556 * if none is available. 2557 */ 2558 public String getLiteralSystemId() { 2559 if (fCurrentEntity != null) { 2560 if (fCurrentEntity.entityLocation != null && 2561 fCurrentEntity.entityLocation.getLiteralSystemId() != null ) { 2562 return fCurrentEntity.entityLocation.getLiteralSystemId(); 2563 } else { 2564 // search for the first external entity on the stack 2565 int size = fEntityStack.size(); 2566 for (int i = size - 1; i >= 0 ; i--) { 2567 Entity.ScannedEntity externalEntity = 2568 (Entity.ScannedEntity)fEntityStack.elementAt(i); 2569 2570 if (externalEntity.entityLocation != null && 2571 externalEntity.entityLocation.getLiteralSystemId() != null) { 2572 return externalEntity.entityLocation.getLiteralSystemId(); 2573 } 2574 } 2575 } 2576 } 2577 return null; 2578 } // getLiteralSystemId():String 2579 2580 /** 2581 * Return the line number where the current document event ends. 2582 * <p> 2583 * <strong>Warning:</strong> The return value from the method 2584 * is intended only as an approximation for the sake of error 2585 * reporting; it is not intended to provide sufficient information 2586 * to edit the character content of the original XML document. 2587 * <p> 2588 * The return value is an approximation of the line number 2589 * in the document entity or external parsed entity where the 2590 * markup triggering the event appears. 2591 * <p> 2592 * If possible, the SAX driver should provide the line position 2593 * of the first character after the text associated with the document 2594 * event. The first line in the document is line 1. 2595 * 2596 * @return The line number, or -1 if none is available. 2597 */ 2598 public int getLineNumber() { 2599 if (fCurrentEntity != null) { 2600 if (fCurrentEntity.isExternal()) { 2601 return fCurrentEntity.lineNumber; 2602 } else { 2603 // search for the first external entity on the stack 2604 int size = fEntityStack.size(); 2605 for (int i=size-1; i>0 ; i--) { 2606 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2607 if (firstExternalEntity.isExternal()) { 2608 return firstExternalEntity.lineNumber; 2609 } 2610 } 2611 } 2612 } 2613 2614 return -1; 2615 2616 } // getLineNumber():int 2617 2618 /** 2619 * Return the column number where the current document event ends. 2620 * <p> 2621 * <strong>Warning:</strong> The return value from the method 2622 * is intended only as an approximation for the sake of error 2623 * reporting; it is not intended to provide sufficient information 2624 * to edit the character content of the original XML document. 2625 * <p> 2626 * The return value is an approximation of the column number 2627 * in the document entity or external parsed entity where the 2628 * markup triggering the event appears. 2629 * <p> 2630 * If possible, the SAX driver should provide the line position 2631 * of the first character after the text associated with the document 2632 * event. 2633 * <p> 2634 * If possible, the SAX driver should provide the line position 2635 * of the first character after the text associated with the document 2636 * event. The first column in each line is column 1. 2637 * 2638 * @return The column number, or -1 if none is available. 2639 */ 2640 public int getColumnNumber() { 2641 if (fCurrentEntity != null) { 2642 if (fCurrentEntity.isExternal()) { 2643 return fCurrentEntity.columnNumber; 2644 } else { 2645 // search for the first external entity on the stack 2646 int size = fEntityStack.size(); 2647 for (int i=size-1; i>0 ; i--) { 2648 Entity.ScannedEntity firstExternalEntity = (Entity.ScannedEntity)fEntityStack.elementAt(i); 2649 if (firstExternalEntity.isExternal()) { 2650 return firstExternalEntity.columnNumber; 2651 } 2652 } 2653 } 2654 } 2655 2656 return -1; 2657 } // getColumnNumber():int 2658 2659 2660 // 2661 // Protected static methods 2662 // 2663 2664 /** 2665 * Fixes a platform dependent filename to standard URI form. 2666 * 2667 * @param str The string to fix. 2668 * 2669 * @return Returns the fixed URI string. 2670 */ 2671 protected static String fixURI(String str) { 2672 2673 // handle platform dependent strings 2674 str = str.replace(java.io.File.separatorChar, '/'); 2675 2676 // Windows fix 2677 if (str.length() >= 2) { 2678 char ch1 = str.charAt(1); 2679 // change "C:blah" to "/C:blah" 2680 if (ch1 == ':') { 2681 char ch0 = Character.toUpperCase(str.charAt(0)); 2682 if (ch0 >= 'A' && ch0 <= 'Z') { 2683 str = "/" + str; 2684 } 2685 } 2686 // change "//blah" to "file://blah" 2687 else if (ch1 == '/' && str.charAt(0) == '/') { 2688 str = "file:" + str; 2689 } 2690 } 2691 2692 // replace spaces in file names with %20. 2693 // Original comment from JDK5: the following algorithm might not be 2694 // very performant, but people who want to use invalid URI's have to 2695 // pay the price. 2696 int pos = str.indexOf(' '); 2697 if (pos >= 0) { 2698 StringBuilder sb = new StringBuilder(str.length()); 2699 // put characters before ' ' into the string builder 2700 for (int i = 0; i < pos; i++) 2701 sb.append(str.charAt(i)); 2702 // and %20 for the space 2703 sb.append("%20"); 2704 // for the remamining part, also convert ' ' to "%20". 2705 for (int i = pos+1; i < str.length(); i++) { 2706 if (str.charAt(i) == ' ') 2707 sb.append("%20"); 2708 else 2709 sb.append(str.charAt(i)); 2710 } 2711 str = sb.toString(); 2712 } 2713 2714 // done 2715 return str; 2716 2717 } // fixURI(String):String 2718 2719 2720 // 2721 // Package visible methods 2722 // 2723 /** Prints the contents of the buffer. */ 2724 final void print() { 2725 if (DEBUG_BUFFER) { 2726 if (fCurrentEntity != null) { 2727 System.out.print('['); 2728 System.out.print(fCurrentEntity.count); 2729 System.out.print(' '); 2730 System.out.print(fCurrentEntity.position); 2731 if (fCurrentEntity.count > 0) { 2732 System.out.print(" \""); 2733 for (int i = 0; i < fCurrentEntity.count; i++) { 2734 if (i == fCurrentEntity.position) { 2735 System.out.print('^'); 2736 } 2737 char c = fCurrentEntity.ch[i]; 2738 switch (c) { 2739 case '\n': { 2740 System.out.print("\\n"); 2741 break; 2742 } 2743 case '\r': { 2744 System.out.print("\\r"); 2745 break; 2746 } 2747 case '\t': { 2748 System.out.print("\\t"); 2749 break; 2750 } 2751 case '\\': { 2752 System.out.print("\\\\"); 2753 break; 2754 } 2755 default: { 2756 System.out.print(c); 2757 } 2758 } 2759 } 2760 if (fCurrentEntity.position == fCurrentEntity.count) { 2761 System.out.print('^'); 2762 } 2763 System.out.print('"'); 2764 } 2765 System.out.print(']'); 2766 System.out.print(" @ "); 2767 System.out.print(fCurrentEntity.lineNumber); 2768 System.out.print(','); 2769 System.out.print(fCurrentEntity.columnNumber); 2770 } else { 2771 System.out.print("*NO CURRENT ENTITY*"); 2772 } 2773 } 2774 } // print() 2775 2776 /** 2777 * Buffer used in entity manager to reuse character arrays instead 2778 * of creating new ones every time. 2779 * 2780 * @xerces.internal 2781 * 2782 * @author Ankit Pasricha, IBM 2783 */ 2784 private static class CharacterBuffer { 2785 2786 /** character buffer */ 2787 private char[] ch; 2788 2789 /** whether the buffer is for an external or internal scanned entity */ 2790 private boolean isExternal; 2791 2792 public CharacterBuffer(boolean isExternal, int size) { 2793 this.isExternal = isExternal; 2794 ch = new char[size]; 2795 } 2796 } 2797 2798 2799 /** 2800 * Stores a number of character buffers and provides it to the entity 2801 * manager to use when an entity is seen. 2802 * 2803 * @xerces.internal 2804 * 2805 * @author Ankit Pasricha, IBM 2806 */ 2807 private static class CharacterBufferPool { 2808 2809 private static final int DEFAULT_POOL_SIZE = 3; 2810 2811 private CharacterBuffer[] fInternalBufferPool; 2812 private CharacterBuffer[] fExternalBufferPool; 2813 2814 private int fExternalBufferSize; 2815 private int fInternalBufferSize; 2816 private int poolSize; 2817 2818 private int fInternalTop; 2819 private int fExternalTop; 2820 2821 public CharacterBufferPool(int externalBufferSize, int internalBufferSize) { 2822 this(DEFAULT_POOL_SIZE, externalBufferSize, internalBufferSize); 2823 } 2824 2825 public CharacterBufferPool(int poolSize, int externalBufferSize, int internalBufferSize) { 2826 fExternalBufferSize = externalBufferSize; 2827 fInternalBufferSize = internalBufferSize; 2828 this.poolSize = poolSize; 2829 init(); 2830 } 2831 2832 /** Initializes buffer pool. **/ 2833 private void init() { 2834 fInternalBufferPool = new CharacterBuffer[poolSize]; 2835 fExternalBufferPool = new CharacterBuffer[poolSize]; 2836 fInternalTop = -1; 2837 fExternalTop = -1; 2838 } 2839 2840 /** Retrieves buffer from pool. **/ 2841 public CharacterBuffer getBuffer(boolean external) { 2842 if (external) { 2843 if (fExternalTop > -1) { 2844 return (CharacterBuffer)fExternalBufferPool[fExternalTop--]; 2845 } 2846 else { 2847 return new CharacterBuffer(true, fExternalBufferSize); 2848 } 2849 } 2850 else { 2851 if (fInternalTop > -1) { 2852 return (CharacterBuffer)fInternalBufferPool[fInternalTop--]; 2853 } 2854 else { 2855 return new CharacterBuffer(false, fInternalBufferSize); 2856 } 2857 } 2858 } 2859 2860 /** Returns buffer to pool. **/ 2861 public void returnToPool(CharacterBuffer buffer) { 2862 if (buffer.isExternal) { 2863 if (fExternalTop < fExternalBufferPool.length - 1) { 2864 fExternalBufferPool[++fExternalTop] = buffer; 2865 } 2866 } 2867 else if (fInternalTop < fInternalBufferPool.length - 1) { 2868 fInternalBufferPool[++fInternalTop] = buffer; 2869 } 2870 } 2871 2872 /** Sets the size of external buffers and dumps the old pool. **/ 2873 public void setExternalBufferSize(int bufferSize) { 2874 fExternalBufferSize = bufferSize; 2875 fExternalBufferPool = new CharacterBuffer[poolSize]; 2876 fExternalTop = -1; 2877 } 2878 } 2879 2880 /** 2881 * This class wraps the byte inputstreams we're presented with. 2882 * We need it because java.io.InputStreams don't provide 2883 * functionality to reread processed bytes, and they have a habit 2884 * of reading more than one character when you call their read() 2885 * methods. This means that, once we discover the true (declared) 2886 * encoding of a document, we can neither backtrack to read the 2887 * whole doc again nor start reading where we are with a new 2888 * reader. 2889 * 2890 * This class allows rewinding an inputStream by allowing a mark 2891 * to be set, and the stream reset to that position. <strong>The 2892 * class assumes that it needs to read one character per 2893 * invocation when it's read() method is inovked, but uses the 2894 * underlying InputStream's read(char[], offset length) method--it 2895 * won't buffer data read this way!</strong> 2896 * 2897 * @xerces.internal 2898 * 2899 * @author Neil Graham, IBM 2900 * @author Glenn Marcy, IBM 2901 */ 2902 2903 protected final class RewindableInputStream extends InputStream { 2904 2905 private InputStream fInputStream; 2906 private byte[] fData; 2907 private int fStartOffset; 2908 private int fEndOffset; 2909 private int fOffset; 2910 private int fLength; 2911 private int fMark; 2912 2913 public RewindableInputStream(InputStream is) { 2914 fData = new byte[DEFAULT_XMLDECL_BUFFER_SIZE]; 2915 fInputStream = is; 2916 fStartOffset = 0; 2917 fEndOffset = -1; 2918 fOffset = 0; 2919 fLength = 0; 2920 fMark = 0; 2921 } 2922 2923 public void setStartOffset(int offset) { 2924 fStartOffset = offset; 2925 } 2926 2927 public void rewind() { 2928 fOffset = fStartOffset; 2929 } 2930 2931 public int read() throws IOException { 2932 int b = 0; 2933 if (fOffset < fLength) { 2934 return fData[fOffset++] & 0xff; 2935 } 2936 if (fOffset == fEndOffset) { 2937 return -1; 2938 } 2939 if (fOffset == fData.length) { 2940 byte[] newData = new byte[fOffset << 1]; 2941 System.arraycopy(fData, 0, newData, 0, fOffset); 2942 fData = newData; 2943 } 2944 b = fInputStream.read(); 2945 if (b == -1) { 2946 fEndOffset = fOffset; 2947 return -1; 2948 } 2949 fData[fLength++] = (byte)b; 2950 fOffset++; 2951 return b & 0xff; 2952 } 2953 2954 public int read(byte[] b, int off, int len) throws IOException { 2955 int bytesLeft = fLength - fOffset; 2956 if (bytesLeft == 0) { 2957 if (fOffset == fEndOffset) { 2958 return -1; 2959 } 2960 2961 /** 2962 * //System.out.println("fCurrentEntitty = " + fCurrentEntity ); 2963 * //System.out.println("fInputStream = " + fInputStream ); 2964 * // better get some more for the voracious reader... */ 2965 2966 if(fCurrentEntity.mayReadChunks || !fCurrentEntity.xmlDeclChunkRead) { 2967 2968 if (!fCurrentEntity.xmlDeclChunkRead) 2969 { 2970 fCurrentEntity.xmlDeclChunkRead = true; 2971 len = fCurrentEntity.DEFAULT_XMLDECL_BUFFER_SIZE; 2972 } 2973 return fInputStream.read(b, off, len); 2974 } 2975 2976 int returnedVal = read(); 2977 if(returnedVal == -1) { 2978 fEndOffset = fOffset; 2979 return -1; 2980 } 2981 b[off] = (byte)returnedVal; 2982 return 1; 2983 2984 } 2985 if (len < bytesLeft) { 2986 if (len <= 0) { 2987 return 0; 2988 } 2989 } else { 2990 len = bytesLeft; 2991 } 2992 if (b != null) { 2993 System.arraycopy(fData, fOffset, b, off, len); 2994 } 2995 fOffset += len; 2996 return len; 2997 } 2998 2999 public long skip(long n) 3000 throws IOException { 3001 int bytesLeft; 3002 if (n <= 0) { 3003 return 0; 3004 } 3005 bytesLeft = fLength - fOffset; 3006 if (bytesLeft == 0) { 3007 if (fOffset == fEndOffset) { 3008 return 0; 3009 } 3010 return fInputStream.skip(n); 3011 } 3012 if (n <= bytesLeft) { 3013 fOffset += n; 3014 return n; 3015 } 3016 fOffset += bytesLeft; 3017 if (fOffset == fEndOffset) { 3018 return bytesLeft; 3019 } 3020 n -= bytesLeft; 3021 /* 3022 * In a manner of speaking, when this class isn't permitting more 3023 * than one byte at a time to be read, it is "blocking". The 3024 * available() method should indicate how much can be read without 3025 * blocking, so while we're in this mode, it should only indicate 3026 * that bytes in its buffer are available; otherwise, the result of 3027 * available() on the underlying InputStream is appropriate. 3028 */ 3029 return fInputStream.skip(n) + bytesLeft; 3030 } 3031 3032 public int available() throws IOException { 3033 int bytesLeft = fLength - fOffset; 3034 if (bytesLeft == 0) { 3035 if (fOffset == fEndOffset) { 3036 return -1; 3037 } 3038 return fCurrentEntity.mayReadChunks ? fInputStream.available() 3039 : 0; 3040 } 3041 return bytesLeft; 3042 } 3043 3044 public void mark(int howMuch) { 3045 fMark = fOffset; 3046 } 3047 3048 public void reset() { 3049 fOffset = fMark; 3050 //test(); 3051 } 3052 3053 public boolean markSupported() { 3054 return true; 3055 } 3056 3057 public void close() throws IOException { 3058 if (fInputStream != null) { 3059 fInputStream.close(); 3060 fInputStream = null; 3061 } 3062 } 3063 } // end of RewindableInputStream class 3064 3065 public void test(){ 3066 //System.out.println("TESTING: Added familytree to entityManager"); 3067 //Usecase1 3068 fEntityStorage.addExternalEntity("entityUsecase1",null, 3069 "/space/home/stax/sun/6thJan2004/zephyr/data/test.txt", 3070 "/space/home/stax/sun/6thJan2004/zephyr/data/entity.xml"); 3071 3072 //Usecase2 3073 fEntityStorage.addInternalEntity("entityUsecase2","<Test>value</Test>"); 3074 fEntityStorage.addInternalEntity("entityUsecase3","value3"); 3075 fEntityStorage.addInternalEntity("text", "Hello World."); 3076 fEntityStorage.addInternalEntity("empty-element", "<foo/>"); 3077 fEntityStorage.addInternalEntity("balanced-element", "<foo></foo>"); 3078 fEntityStorage.addInternalEntity("balanced-element-with-text", "<foo>Hello, World</foo>"); 3079 fEntityStorage.addInternalEntity("balanced-element-with-entity", "<foo>&text;</foo>"); 3080 fEntityStorage.addInternalEntity("unbalanced-entity", "<foo>"); 3081 fEntityStorage.addInternalEntity("recursive-entity", "<foo>&recursive-entity2;</foo>"); 3082 fEntityStorage.addInternalEntity("recursive-entity2", "<bar>&recursive-entity3;</bar>"); 3083 fEntityStorage.addInternalEntity("recursive-entity3", "<baz>&recursive-entity;</baz>"); 3084 fEntityStorage.addInternalEntity("ch","©"); 3085 fEntityStorage.addInternalEntity("ch1","T"); 3086 fEntityStorage.addInternalEntity("% ch2","param"); 3087 } 3088 3089} // class XMLEntityManager 3090