1/* 2 * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved. 3 */ 4 5/* 6 * Licensed to the Apache Software Foundation (ASF) under one or more 7 * contributor license agreements. See the NOTICE file distributed with 8 * this work for additional information regarding copyright ownership. 9 * The ASF licenses this file to You under the Apache License, Version 2.0 10 * (the "License"); you may not use this file except in compliance with 11 * the License. You may obtain a copy of the License at 12 * 13 * http://www.apache.org/licenses/LICENSE-2.0 14 * 15 * Unless required by applicable law or agreed to in writing, software 16 * distributed under the License is distributed on an "AS IS" BASIS, 17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 * See the License for the specific language governing permissions and 19 * limitations under the License. 20 */ 21 22package com.sun.org.apache.xerces.internal.impl; 23 24import com.sun.org.apache.xerces.internal.impl.XMLScanner.NameType; 25import com.sun.org.apache.xerces.internal.impl.io.ASCIIReader; 26import com.sun.org.apache.xerces.internal.impl.io.UCSReader; 27import com.sun.org.apache.xerces.internal.impl.io.UTF8Reader; 28import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter; 29import com.sun.org.apache.xerces.internal.util.EncodingMap; 30import com.sun.org.apache.xerces.internal.util.SymbolTable; 31import com.sun.org.apache.xerces.internal.util.XMLChar; 32import com.sun.org.apache.xerces.internal.util.XMLStringBuffer; 33import com.sun.org.apache.xerces.internal.utils.XMLLimitAnalyzer; 34import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager; 35import com.sun.org.apache.xerces.internal.utils.XMLSecurityManager.Limit; 36import com.sun.org.apache.xerces.internal.xni.*; 37import com.sun.org.apache.xerces.internal.xni.parser.XMLComponentManager; 38import com.sun.org.apache.xerces.internal.xni.parser.XMLConfigurationException; 39import com.sun.xml.internal.stream.Entity; 40import com.sun.xml.internal.stream.Entity.ScannedEntity; 41import com.sun.xml.internal.stream.XMLBufferListener; 42import java.io.EOFException; 43import java.io.IOException; 44import java.io.InputStream; 45import java.io.InputStreamReader; 46import java.io.Reader; 47import java.util.ArrayList; 48import java.util.Locale; 49 50/** 51 * Implements the entity scanner methods. 52 * 53 * @author Neeraj Bajaj, Sun Microsystems 54 * @author Andy Clark, IBM 55 * @author Arnaud Le Hors, IBM 56 * @author K.Venugopal Sun Microsystems 57 * 58 */ 59public class XMLEntityScanner implements XMLLocator { 60 61 protected Entity.ScannedEntity fCurrentEntity = null; 62 protected int fBufferSize = XMLEntityManager.DEFAULT_BUFFER_SIZE; 63 64 protected XMLEntityManager fEntityManager; 65 66 /** Security manager. */ 67 protected XMLSecurityManager fSecurityManager = null; 68 69 /** Limit analyzer. */ 70 protected XMLLimitAnalyzer fLimitAnalyzer = null; 71 72 /** Debug switching readers for encodings. */ 73 private static final boolean DEBUG_ENCODINGS = false; 74 75 /** Listeners which should know when load is being called */ 76 private ArrayList<XMLBufferListener> listeners = new ArrayList<>(); 77 78 private static final boolean [] VALID_NAMES = new boolean[127]; 79 80 /** 81 * Debug printing of buffer. This debugging flag works best when you 82 * resize the DEFAULT_BUFFER_SIZE down to something reasonable like 83 * 64 characters. 84 */ 85 private static final boolean DEBUG_BUFFER = false; 86 private static final boolean DEBUG_SKIP_STRING = false; 87 /** 88 * To signal the end of the document entity, this exception will be thrown. 89 */ 90 private static final EOFException END_OF_DOCUMENT_ENTITY = new EOFException() { 91 private static final long serialVersionUID = 980337771224675268L; 92 public Throwable fillInStackTrace() { 93 return this; 94 } 95 }; 96 97 protected SymbolTable fSymbolTable = null; 98 protected XMLErrorReporter fErrorReporter = null; 99 int [] whiteSpaceLookup = new int[100]; 100 int whiteSpaceLen = 0; 101 boolean whiteSpaceInfoNeeded = true; 102 103 /** 104 * Allow Java encoding names. This feature identifier is: 105 * http://apache.org/xml/features/allow-java-encodings 106 */ 107 protected boolean fAllowJavaEncodings; 108 109 //Will be used only during internal subsets. 110 //for appending data. 111 112 /** Property identifier: symbol table. */ 113 protected static final String SYMBOL_TABLE = 114 Constants.XERCES_PROPERTY_PREFIX + Constants.SYMBOL_TABLE_PROPERTY; 115 116 /** Property identifier: error reporter. */ 117 protected static final String ERROR_REPORTER = 118 Constants.XERCES_PROPERTY_PREFIX + Constants.ERROR_REPORTER_PROPERTY; 119 120 /** Feature identifier: allow Java encodings. */ 121 protected static final String ALLOW_JAVA_ENCODINGS = 122 Constants.XERCES_FEATURE_PREFIX + Constants.ALLOW_JAVA_ENCODINGS_FEATURE; 123 124 protected PropertyManager fPropertyManager = null ; 125 126 boolean isExternal = false; 127 static { 128 129 for(int i=0x0041;i<=0x005A ; i++){ 130 VALID_NAMES[i]=true; 131 } 132 for(int i=0x0061;i<=0x007A; i++){ 133 VALID_NAMES[i]=true; 134 } 135 for(int i=0x0030;i<=0x0039; i++){ 136 VALID_NAMES[i]=true; 137 } 138 VALID_NAMES[45]=true; 139 VALID_NAMES[46]=true; 140 VALID_NAMES[58]=true; 141 VALID_NAMES[95]=true; 142 } 143 144 // Remember, that the XML version has explicitly been set, 145 // so that XMLStreamReader.getVersion() can find that out. 146 protected boolean xmlVersionSetExplicitly = false; 147 148 // indicates that the operation is for detecting XML version 149 boolean detectingVersion = false; 150 151 // 152 // Constructors 153 // 154 155 /** Default constructor. */ 156 public XMLEntityScanner() { 157 } // <init>() 158 159 160 /** private constructor, this class can only be instantiated within this class. Instance of this class should 161 * be obtained using getEntityScanner() or getEntityScanner(ScannedEntity scannedEntity) 162 * @see getEntityScanner() 163 * @see getEntityScanner(ScannedEntity) 164 */ 165 public XMLEntityScanner(PropertyManager propertyManager, XMLEntityManager entityManager) { 166 fEntityManager = entityManager ; 167 reset(propertyManager); 168 } // <init>() 169 170 171 // set buffer size: 172 public final void setBufferSize(int size) { 173 // REVISIT: Buffer size passed to entity scanner 174 // was not being kept in synch with the actual size 175 // of the buffers in each scanned entity. If any 176 // of the buffers were actually resized, it was possible 177 // that the parser would throw an ArrayIndexOutOfBoundsException 178 // for documents which contained names which are longer than 179 // the current buffer size. Conceivably the buffer size passed 180 // to entity scanner could be used to determine a minimum size 181 // for resizing, if doubling its size is smaller than this 182 // minimum. -- mrglavas 183 fBufferSize = size; 184 } 185 186 /** 187 * Resets the components. 188 */ 189 public void reset(PropertyManager propertyManager){ 190 fSymbolTable = (SymbolTable)propertyManager.getProperty(SYMBOL_TABLE) ; 191 fErrorReporter = (XMLErrorReporter)propertyManager.getProperty(ERROR_REPORTER) ; 192 resetCommon(); 193 } 194 195 /** 196 * Resets the component. The component can query the component manager 197 * about any features and properties that affect the operation of the 198 * component. 199 * 200 * @param componentManager The component manager. 201 * 202 * @throws SAXException Thrown by component on initialization error. 203 * For example, if a feature or property is 204 * required for the operation of the component, the 205 * component manager may throw a 206 * SAXNotRecognizedException or a 207 * SAXNotSupportedException. 208 */ 209 public void reset(XMLComponentManager componentManager) 210 throws XMLConfigurationException { 211 // xerces features 212 fAllowJavaEncodings = componentManager.getFeature(ALLOW_JAVA_ENCODINGS, false); 213 214 //xerces properties 215 fSymbolTable = (SymbolTable)componentManager.getProperty(SYMBOL_TABLE); 216 fErrorReporter = (XMLErrorReporter)componentManager.getProperty(ERROR_REPORTER); 217 resetCommon(); 218 } // reset(XMLComponentManager) 219 220 221 public final void reset(SymbolTable symbolTable, XMLEntityManager entityManager, 222 XMLErrorReporter reporter) { 223 fCurrentEntity = null; 224 fSymbolTable = symbolTable; 225 fEntityManager = entityManager; 226 fErrorReporter = reporter; 227 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 228 fSecurityManager = fEntityManager.fSecurityManager; 229 } 230 231 private void resetCommon() { 232 fCurrentEntity = null; 233 whiteSpaceLen = 0; 234 whiteSpaceInfoNeeded = true; 235 listeners.clear(); 236 fLimitAnalyzer = fEntityManager.fLimitAnalyzer; 237 fSecurityManager = fEntityManager.fSecurityManager; 238 } 239 240 /** 241 * Returns the XML version of the current entity. This will normally be the 242 * value from the XML or text declaration or defaulted by the parser. Note that 243 * that this value may be different than the version of the processing rules 244 * applied to the current entity. For instance, an XML 1.1 document may refer to 245 * XML 1.0 entities. In such a case the rules of XML 1.1 are applied to the entire 246 * document. Also note that, for a given entity, this value can only be considered 247 * final once the XML or text declaration has been read or once it has been 248 * determined that there is no such declaration. 249 */ 250 public final String getXMLVersion() { 251 if (fCurrentEntity != null) { 252 return fCurrentEntity.xmlVersion; 253 } 254 return null; 255 } // getXMLVersion():String 256 257 /** 258 * Sets the XML version. This method is used by the 259 * scanners to report the value of the version pseudo-attribute 260 * in an XML or text declaration. 261 * 262 * @param xmlVersion the XML version of the current entity 263 */ 264 public final void setXMLVersion(String xmlVersion) { 265 xmlVersionSetExplicitly = true; 266 fCurrentEntity.xmlVersion = xmlVersion; 267 } // setXMLVersion(String) 268 269 270 /** set the instance of current scanned entity. 271 * @param ScannedEntity 272 */ 273 274 public final void setCurrentEntity(Entity.ScannedEntity scannedEntity){ 275 fCurrentEntity = scannedEntity ; 276 if(fCurrentEntity != null){ 277 isExternal = fCurrentEntity.isExternal(); 278 if(DEBUG_BUFFER) 279 System.out.println("Current Entity is "+scannedEntity.name); 280 } 281 } 282 283 public Entity.ScannedEntity getCurrentEntity(){ 284 return fCurrentEntity ; 285 } 286 // 287 // XMLEntityReader methods 288 // 289 290 /** 291 * Returns the base system identifier of the currently scanned 292 * entity, or null if none is available. 293 */ 294 public final String getBaseSystemId() { 295 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 296 } // getBaseSystemId():String 297 298 /** 299 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setBaseSystemId(String) 300 */ 301 public void setBaseSystemId(String systemId) { 302 //no-op 303 } 304 305 ///////////// Locator methods start. 306 public final int getLineNumber(){ 307 //if the entity is closed, we should return -1 308 //xxx at first place why such call should be there... 309 return fCurrentEntity != null ? fCurrentEntity.lineNumber : -1 ; 310 } 311 312 /** 313 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setLineNumber(int) 314 */ 315 public void setLineNumber(int line) { 316 //no-op 317 } 318 319 320 public final int getColumnNumber(){ 321 //if the entity is closed, we should return -1 322 //xxx at first place why such call should be there... 323 return fCurrentEntity != null ? fCurrentEntity.columnNumber : -1 ; 324 } 325 326 /** 327 * @see com.sun.org.apache.xerces.internal.xni.XMLLocator#setColumnNumber(int) 328 */ 329 public void setColumnNumber(int col) { 330 // no-op 331 } 332 333 334 public final int getCharacterOffset(){ 335 return fCurrentEntity != null ? fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.position : -1 ; 336 } 337 338 /** Returns the expanded system identifier. */ 339 public final String getExpandedSystemId() { 340 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getExpandedSystemId() : null; 341 } 342 343 /** 344 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setExpandedSystemId(String) 345 */ 346 public void setExpandedSystemId(String systemId) { 347 //no-op 348 } 349 350 /** Returns the literal system identifier. */ 351 public final String getLiteralSystemId() { 352 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getLiteralSystemId() : null; 353 } 354 355 /** 356 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setLiteralSystemId(String) 357 */ 358 public void setLiteralSystemId(String systemId) { 359 //no-op 360 } 361 362 /** Returns the public identifier. */ 363 public final String getPublicId() { 364 return (fCurrentEntity != null && fCurrentEntity.entityLocation != null) ? fCurrentEntity.entityLocation.getPublicId() : null; 365 } 366 367 /** 368 * @see com.sun.org.apache.xerces.internal.xni.XMLResourceIdentifier#setPublicId(String) 369 */ 370 public void setPublicId(String publicId) { 371 //no-op 372 } 373 374 ///////////////// Locator methods finished. 375 376 /** the version of the current entity being scanned */ 377 public void setVersion(String version){ 378 fCurrentEntity.version = version; 379 } 380 381 public String getVersion(){ 382 if (fCurrentEntity != null) 383 return fCurrentEntity.version ; 384 return null; 385 } 386 387 /** 388 * Returns the encoding of the current entity. 389 * Note that, for a given entity, this value can only be 390 * considered final once the encoding declaration has been read (or once it 391 * has been determined that there is no such declaration) since, no encoding 392 * having been specified on the XMLInputSource, the parser 393 * will make an initial "guess" which could be in error. 394 */ 395 public final String getEncoding() { 396 if (fCurrentEntity != null) { 397 return fCurrentEntity.encoding; 398 } 399 return null; 400 } // getEncoding():String 401 402 /** 403 * Sets the encoding of the scanner. This method is used by the 404 * scanners if the XMLDecl or TextDecl line contains an encoding 405 * pseudo-attribute. 406 * <p> 407 * <strong>Note:</strong> The underlying character reader on the 408 * current entity will be changed to accomodate the new encoding. 409 * However, the new encoding is ignored if the current reader was 410 * not constructed from an input stream (e.g. an external entity 411 * that is resolved directly to the appropriate java.io.Reader 412 * object). 413 * 414 * @param encoding The IANA encoding name of the new encoding. 415 * 416 * @throws IOException Thrown if the new encoding is not supported. 417 * 418 * @see com.sun.org.apache.xerces.internal.util.EncodingMap 419 */ 420 public final void setEncoding(String encoding) throws IOException { 421 422 if (DEBUG_ENCODINGS) { 423 System.out.println("$$$ setEncoding: "+encoding); 424 } 425 426 if (fCurrentEntity.stream != null) { 427 // if the encoding is the same, don't change the reader and 428 // re-use the original reader used by the OneCharReader 429 // NOTE: Besides saving an object, this overcomes deficiencies 430 // in the UTF-16 reader supplied with the standard Java 431 // distribution (up to and including 1.3). The UTF-16 432 // decoder buffers 8K blocks even when only asked to read 433 // a single char! -Ac 434 if (fCurrentEntity.encoding == null || 435 !fCurrentEntity.encoding.equals(encoding)) { 436 // UTF-16 is a bit of a special case. If the encoding is UTF-16, 437 // and we know the endian-ness, we shouldn't change readers. 438 // If it's ISO-10646-UCS-(2|4), then we'll have to deduce 439 // the endian-ness from the encoding we presently have. 440 if(fCurrentEntity.encoding != null && fCurrentEntity.encoding.startsWith("UTF-16")) { 441 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 442 if(ENCODING.equals("UTF-16")) return; 443 if(ENCODING.equals("ISO-10646-UCS-4")) { 444 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 445 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4BE); 446 } else { 447 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS4LE); 448 } 449 return; 450 } 451 if(ENCODING.equals("ISO-10646-UCS-2")) { 452 if(fCurrentEntity.encoding.equals("UTF-16BE")) { 453 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2BE); 454 } else { 455 fCurrentEntity.reader = new UCSReader(fCurrentEntity.stream, UCSReader.UCS2LE); 456 } 457 return; 458 } 459 } 460 // wrap a new reader around the input stream, changing 461 // the encoding 462 if (DEBUG_ENCODINGS) { 463 System.out.println("$$$ creating new reader from stream: "+ 464 fCurrentEntity.stream); 465 } 466 //fCurrentEntity.stream.reset(); 467 fCurrentEntity.reader = createReader(fCurrentEntity.stream, encoding, null); 468 fCurrentEntity.encoding = encoding; 469 470 } else { 471 if (DEBUG_ENCODINGS) 472 System.out.println("$$$ reusing old reader on stream"); 473 } 474 } 475 476 } // setEncoding(String) 477 478 /** Returns true if the current entity being scanned is external. */ 479 public final boolean isExternal() { 480 return fCurrentEntity.isExternal(); 481 } // isExternal():boolean 482 483 public int getChar(int relative) throws IOException{ 484 if(arrangeCapacity(relative + 1, false)){ 485 return fCurrentEntity.ch[fCurrentEntity.position + relative]; 486 }else{ 487 return -1; 488 } 489 }//getChar() 490 491 /** 492 * Returns the next character on the input. 493 * <p> 494 * <strong>Note:</strong> The character is <em>not</em> consumed. 495 * 496 * @throws IOException Thrown if i/o error occurs. 497 * @throws EOFException Thrown on end of file. 498 */ 499 public int peekChar() throws IOException { 500 if (DEBUG_BUFFER) { 501 System.out.print("(peekChar: "); 502 print(); 503 System.out.println(); 504 } 505 506 // load more characters, if needed 507 if (fCurrentEntity.position == fCurrentEntity.count) { 508 load(0, true, true); 509 } 510 511 // peek at character 512 int c = fCurrentEntity.ch[fCurrentEntity.position]; 513 514 // return peeked character 515 if (DEBUG_BUFFER) { 516 System.out.print(")peekChar: "); 517 print(); 518 if (isExternal) { 519 System.out.println(" -> '"+(c!='\r'?(char)c:'\n')+"'"); 520 } else { 521 System.out.println(" -> '"+(char)c+"'"); 522 } 523 } 524 if (isExternal) { 525 return c != '\r' ? c : '\n'; 526 } else { 527 return c; 528 } 529 530 } // peekChar():int 531 532 /** 533 * Returns the next character on the input. 534 * <p> 535 * <strong>Note:</strong> The character is consumed. 536 * 537 * @param nt The type of the name (element or attribute) 538 * 539 * @throws IOException Thrown if i/o error occurs. 540 * @throws EOFException Thrown on end of file. 541 */ 542 protected int scanChar(NameType nt) throws IOException { 543 if (DEBUG_BUFFER) { 544 System.out.print("(scanChar: "); 545 print(); 546 System.out.println(); 547 } 548 549 // load more characters, if needed 550 if (fCurrentEntity.position == fCurrentEntity.count) { 551 load(0, true, true); 552 } 553 554 // scan character 555 int offset = fCurrentEntity.position; 556 int c = fCurrentEntity.ch[fCurrentEntity.position++]; 557 if (c == '\n' || (c == '\r' && isExternal)) { 558 fCurrentEntity.lineNumber++; 559 fCurrentEntity.columnNumber = 1; 560 if (fCurrentEntity.position == fCurrentEntity.count) { 561 invokeListeners(1); 562 fCurrentEntity.ch[0] = (char)c; 563 load(1, false, false); 564 offset = 0; 565 } 566 if (c == '\r' && isExternal) { 567 if (fCurrentEntity.ch[fCurrentEntity.position++] != '\n') { 568 fCurrentEntity.position--; 569 } 570 c = '\n'; 571 } 572 } 573 574 // return character that was scanned 575 if (DEBUG_BUFFER) { 576 System.out.print(")scanChar: "); 577 print(); 578 System.out.println(" -> '"+(char)c+"'"); 579 } 580 fCurrentEntity.columnNumber++; 581 if (!detectingVersion) { 582 checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); 583 } 584 return c; 585 586 } // scanChar():int 587 588 /** 589 * Returns a string matching the NMTOKEN production appearing immediately 590 * on the input as a symbol, or null if NMTOKEN Name string is present. 591 * <p> 592 * <strong>Note:</strong> The NMTOKEN characters are consumed. 593 * <p> 594 * <strong>Note:</strong> The string returned must be a symbol. The 595 * SymbolTable can be used for this purpose. 596 * 597 * @throws IOException Thrown if i/o error occurs. 598 * @throws EOFException Thrown on end of file. 599 * 600 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 601 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 602 */ 603 protected String scanNmtoken() throws IOException { 604 if (DEBUG_BUFFER) { 605 System.out.print("(scanNmtoken: "); 606 print(); 607 System.out.println(); 608 } 609 610 // load more characters, if needed 611 if (fCurrentEntity.position == fCurrentEntity.count) { 612 load(0, true, true); 613 } 614 615 // scan nmtoken 616 int offset = fCurrentEntity.position; 617 boolean vc = false; 618 char c; 619 while (true){ 620 //while (XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) { 621 c = fCurrentEntity.ch[fCurrentEntity.position]; 622 if(c < 127){ 623 vc = VALID_NAMES[c]; 624 }else{ 625 vc = XMLChar.isName(c); 626 } 627 if(!vc)break; 628 629 if (++fCurrentEntity.position == fCurrentEntity.count) { 630 int length = fCurrentEntity.position - offset; 631 invokeListeners(length); 632 if (length == fCurrentEntity.fBufferSize) { 633 // bad luck we have to resize our buffer 634 char[] tmp = new char[fCurrentEntity.fBufferSize * 2]; 635 System.arraycopy(fCurrentEntity.ch, offset, 636 tmp, 0, length); 637 fCurrentEntity.ch = tmp; 638 fCurrentEntity.fBufferSize *= 2; 639 } else { 640 System.arraycopy(fCurrentEntity.ch, offset, 641 fCurrentEntity.ch, 0, length); 642 } 643 offset = 0; 644 if (load(length, false, false)) { 645 break; 646 } 647 } 648 } 649 int length = fCurrentEntity.position - offset; 650 fCurrentEntity.columnNumber += length; 651 652 // return nmtoken 653 String symbol = null; 654 if (length > 0) { 655 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 656 } 657 if (DEBUG_BUFFER) { 658 System.out.print(")scanNmtoken: "); 659 print(); 660 System.out.println(" -> "+String.valueOf(symbol)); 661 } 662 return symbol; 663 664 } // scanNmtoken():String 665 666 /** 667 * Returns a string matching the Name production appearing immediately 668 * on the input as a symbol, or null if no Name string is present. 669 * <p> 670 * <strong>Note:</strong> The Name characters are consumed. 671 * <p> 672 * <strong>Note:</strong> The string returned must be a symbol. The 673 * SymbolTable can be used for this purpose. 674 * 675 * @param nt The type of the name (element or attribute) 676 * 677 * @throws IOException Thrown if i/o error occurs. 678 * @throws EOFException Thrown on end of file. 679 * 680 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 681 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 682 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 683 */ 684 protected String scanName(NameType nt) throws IOException { 685 if (DEBUG_BUFFER) { 686 System.out.print("(scanName: "); 687 print(); 688 System.out.println(); 689 } 690 691 // load more characters, if needed 692 if (fCurrentEntity.position == fCurrentEntity.count) { 693 load(0, true, true); 694 } 695 696 // scan name 697 int offset = fCurrentEntity.position; 698 int length; 699 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 700 if (++fCurrentEntity.position == fCurrentEntity.count) { 701 invokeListeners(1); 702 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 703 offset = 0; 704 if (load(1, false, false)) { 705 fCurrentEntity.columnNumber++; 706 String symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 707 708 if (DEBUG_BUFFER) { 709 System.out.print(")scanName: "); 710 print(); 711 System.out.println(" -> "+String.valueOf(symbol)); 712 } 713 return symbol; 714 } 715 } 716 boolean vc =false; 717 while (true ){ 718 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 719 char c = fCurrentEntity.ch[fCurrentEntity.position]; 720 if(c < 127){ 721 vc = VALID_NAMES[c]; 722 }else{ 723 vc = XMLChar.isName(c); 724 } 725 if(!vc)break; 726 if ((length = checkBeforeLoad(fCurrentEntity, offset, offset)) > 0) { 727 offset = 0; 728 if (load(length, false, false)) { 729 break; 730 } 731 } 732 } 733 } 734 length = fCurrentEntity.position - offset; 735 fCurrentEntity.columnNumber += length; 736 737 // return name 738 String symbol; 739 if (length > 0) { 740 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); 741 checkEntityLimit(nt, fCurrentEntity, offset, length); 742 symbol = fSymbolTable.addSymbol(fCurrentEntity.ch, offset, length); 743 } else 744 symbol = null; 745 if (DEBUG_BUFFER) { 746 System.out.print(")scanName: "); 747 print(); 748 System.out.println(" -> "+String.valueOf(symbol)); 749 } 750 return symbol; 751 752 } // scanName():String 753 754 /** 755 * Scans a qualified name from the input, setting the fields of the 756 * QName structure appropriately. 757 * <p> 758 * <strong>Note:</strong> The qualified name characters are consumed. 759 * <p> 760 * <strong>Note:</strong> The strings used to set the values of the 761 * QName structure must be symbols. The SymbolTable can be used for 762 * this purpose. 763 * 764 * @param qname The qualified name structure to fill. 765 * @param nt The type of the name (element or attribute) 766 * 767 * @return Returns true if a qualified name appeared immediately on 768 * the input and was scanned, false otherwise. 769 * 770 * @throws IOException Thrown if i/o error occurs. 771 * @throws EOFException Thrown on end of file. 772 * 773 * @see com.sun.org.apache.xerces.internal.util.SymbolTable 774 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isName 775 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isNameStart 776 */ 777 protected boolean scanQName(QName qname, NameType nt) throws IOException { 778 if (DEBUG_BUFFER) { 779 System.out.print("(scanQName, "+qname+": "); 780 print(); 781 System.out.println(); 782 } 783 784 // load more characters, if needed 785 if (fCurrentEntity.position == fCurrentEntity.count) { 786 load(0, true, true); 787 } 788 789 // scan qualified name 790 int offset = fCurrentEntity.position; 791 792 //making a check if if the specified character is a valid name start character 793 //as defined by production [5] in the XML 1.0 specification. 794 // Name ::= (Letter | '_' | ':') (NameChar)* 795 796 if (XMLChar.isNameStart(fCurrentEntity.ch[offset])) { 797 if (++fCurrentEntity.position == fCurrentEntity.count) { 798 invokeListeners(1); 799 fCurrentEntity.ch[0] = fCurrentEntity.ch[offset]; 800 offset = 0; 801 802 if (load(1, false, false)) { 803 fCurrentEntity.columnNumber++; 804 //adding into symbol table. 805 //XXX We are trying to add single character in SymbolTable?????? 806 String name = fSymbolTable.addSymbol(fCurrentEntity.ch, 0, 1); 807 qname.setValues(null, name, name, null); 808 if (DEBUG_BUFFER) { 809 System.out.print(")scanQName, "+qname+": "); 810 print(); 811 System.out.println(" -> true"); 812 } 813 checkEntityLimit(nt, fCurrentEntity, 0, 1); 814 return true; 815 } 816 } 817 int index = -1; 818 boolean vc = false; 819 int length; 820 while ( true){ 821 822 //XMLChar.isName(fCurrentEntity.ch[fCurrentEntity.position])) ; 823 char c = fCurrentEntity.ch[fCurrentEntity.position]; 824 if(c < 127){ 825 vc = VALID_NAMES[c]; 826 }else{ 827 vc = XMLChar.isName(c); 828 } 829 if(!vc)break; 830 if (c == ':') { 831 if (index != -1) { 832 break; 833 } 834 index = fCurrentEntity.position; 835 //check prefix before further read 836 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, index - offset); 837 } 838 if ((length = checkBeforeLoad(fCurrentEntity, offset, index)) > 0) { 839 if (index != -1) { 840 index = index - offset; 841 } 842 offset = 0; 843 if (load(length, false, false)) { 844 break; 845 } 846 } 847 } 848 length = fCurrentEntity.position - offset; 849 fCurrentEntity.columnNumber += length; 850 if (length > 0) { 851 String prefix = null; 852 String localpart = null; 853 String rawname = fSymbolTable.addSymbol(fCurrentEntity.ch, 854 offset, length); 855 856 if (index != -1) { 857 int prefixLength = index - offset; 858 //check the result: prefix 859 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, prefixLength); 860 prefix = fSymbolTable.addSymbol(fCurrentEntity.ch, 861 offset, prefixLength); 862 int len = length - prefixLength - 1; 863 //check the result: localpart 864 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, index + 1, len); 865 localpart = fSymbolTable.addSymbol(fCurrentEntity.ch, 866 index + 1, len); 867 868 } else { 869 localpart = rawname; 870 //check the result: localpart 871 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); 872 } 873 qname.setValues(prefix, localpart, rawname, null); 874 if (DEBUG_BUFFER) { 875 System.out.print(")scanQName, "+qname+": "); 876 print(); 877 System.out.println(" -> true"); 878 } 879 checkEntityLimit(nt, fCurrentEntity, offset, length); 880 return true; 881 } 882 } 883 884 // no qualified name found 885 if (DEBUG_BUFFER) { 886 System.out.print(")scanQName, "+qname+": "); 887 print(); 888 System.out.println(" -> false"); 889 } 890 return false; 891 892 } // scanQName(QName):boolean 893 894 /** 895 * Checks whether the end of the entity buffer has been reached. If yes, 896 * checks against the limit and buffer size before loading more characters. 897 * 898 * @param entity the current entity 899 * @param offset the offset from which the current read was started 900 * @param nameOffset the offset from which the current name starts 901 * @return the length of characters scanned before the end of the buffer, 902 * zero if there is more to be read in the buffer 903 */ 904 protected int checkBeforeLoad(Entity.ScannedEntity entity, int offset, 905 int nameOffset) throws IOException { 906 int length = 0; 907 if (++entity.position == entity.count) { 908 length = entity.position - offset; 909 int nameLength = length; 910 if (nameOffset != -1) { 911 nameOffset = nameOffset - offset; 912 nameLength = length - nameOffset; 913 } else { 914 nameOffset = offset; 915 } 916 //check limit before loading more data 917 checkLimit(Limit.MAX_NAME_LIMIT, entity, nameOffset, nameLength); 918 invokeListeners(length); 919 if (length == entity.ch.length) { 920 // bad luck we have to resize our buffer 921 char[] tmp = new char[entity.fBufferSize * 2]; 922 System.arraycopy(entity.ch, offset, tmp, 0, length); 923 entity.ch = tmp; 924 entity.fBufferSize *= 2; 925 } 926 else { 927 System.arraycopy(entity.ch, offset, entity.ch, 0, length); 928 } 929 } 930 return length; 931 } 932 933 /** 934 * If the current entity is an Entity reference, check the accumulated size 935 * against the limit. 936 * 937 * @param nt type of name (element, attribute or entity) 938 * @param entity The current entity 939 * @param offset The index of the first byte 940 * @param length The length of the entity scanned 941 */ 942 protected void checkEntityLimit(NameType nt, ScannedEntity entity, int offset, int length) { 943 if (entity == null || !entity.isGE) { 944 return; 945 } 946 947 if (nt != NameType.REFERENCE) { 948 checkLimit(Limit.GENERAL_ENTITY_SIZE_LIMIT, entity, offset, length); 949 } 950 if (nt == NameType.ELEMENTSTART || nt == NameType.ATTRIBUTENAME) { 951 checkNodeCount(entity); 952 } 953 } 954 955 /** 956 * If the current entity is an Entity reference, counts the total nodes in 957 * the entity and checks the accumulated value against the limit. 958 * 959 * @param entity The current entity 960 */ 961 protected void checkNodeCount(ScannedEntity entity) { 962 if (entity != null && entity.isGE) { 963 checkLimit(Limit.ENTITY_REPLACEMENT_LIMIT, entity, 0, 1); 964 } 965 } 966 967 /** 968 * Checks whether the value of the specified Limit exceeds its limit 969 * 970 * @param limit The Limit to be checked 971 * @param entity The current entity 972 * @param offset The index of the first byte 973 * @param length The length of the entity scanned 974 */ 975 protected void checkLimit(Limit limit, ScannedEntity entity, int offset, int length) { 976 fLimitAnalyzer.addValue(limit, entity.name, length); 977 if (fSecurityManager.isOverLimit(limit, fLimitAnalyzer)) { 978 fSecurityManager.debugPrint(fLimitAnalyzer); 979 Object[] e = (limit == Limit.ENTITY_REPLACEMENT_LIMIT) ? 980 new Object[]{fLimitAnalyzer.getValue(limit), 981 fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)} : 982 new Object[]{entity.name, fLimitAnalyzer.getValue(limit), 983 fSecurityManager.getLimit(limit), fSecurityManager.getStateLiteral(limit)}; 984 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, limit.key(), 985 e, XMLErrorReporter.SEVERITY_FATAL_ERROR); 986 } 987 if (fSecurityManager.isOverLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT, fLimitAnalyzer)) { 988 fSecurityManager.debugPrint(fLimitAnalyzer); 989 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "TotalEntitySizeLimit", 990 new Object[]{fLimitAnalyzer.getTotalValue(Limit.TOTAL_ENTITY_SIZE_LIMIT), 991 fSecurityManager.getLimit(Limit.TOTAL_ENTITY_SIZE_LIMIT), 992 fSecurityManager.getStateLiteral(Limit.TOTAL_ENTITY_SIZE_LIMIT)}, 993 XMLErrorReporter.SEVERITY_FATAL_ERROR); 994 } 995 } 996 997 /** 998 * CHANGED: 999 * Scans a range of parsed character data, This function appends the character data to 1000 * the supplied buffer. 1001 * <p> 1002 * <strong>Note:</strong> The characters are consumed. 1003 * <p> 1004 * <strong>Note:</strong> This method does not guarantee to return 1005 * the longest run of parsed character data. This method may return 1006 * before markup due to reaching the end of the input buffer or any 1007 * other reason. 1008 * <p> 1009 * 1010 * @param content The content structure to fill. 1011 * 1012 * @return Returns the next character on the input, if known. This 1013 * value may be -1 but this does <em>note</em> designate 1014 * end of file. 1015 * 1016 * @throws IOException Thrown if i/o error occurs. 1017 * @throws EOFException Thrown on end of file. 1018 */ 1019 protected int scanContent(XMLString content) throws IOException { 1020 if (DEBUG_BUFFER) { 1021 System.out.print("(scanContent: "); 1022 print(); 1023 System.out.println(); 1024 } 1025 1026 // load more characters, if needed 1027 if (fCurrentEntity.position == fCurrentEntity.count) { 1028 load(0, true, true); 1029 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1030 invokeListeners(1); 1031 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1032 load(1, false, false); 1033 fCurrentEntity.position = 0; 1034 } 1035 1036 // normalize newlines 1037 int offset = fCurrentEntity.position; 1038 int c = fCurrentEntity.ch[offset]; 1039 int newlines = 0; 1040 boolean counted = false; 1041 if (c == '\n' || (c == '\r' && isExternal)) { 1042 if (DEBUG_BUFFER) { 1043 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1044 print(); 1045 System.out.println(); 1046 } 1047 do { 1048 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1049 if (c == '\r' && isExternal) { 1050 newlines++; 1051 fCurrentEntity.lineNumber++; 1052 fCurrentEntity.columnNumber = 1; 1053 if (fCurrentEntity.position == fCurrentEntity.count) { 1054 checkEntityLimit(null, fCurrentEntity, offset, newlines); 1055 offset = 0; 1056 fCurrentEntity.position = newlines; 1057 if (load(newlines, false, true)) { 1058 counted = true; 1059 break; 1060 } 1061 } 1062 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1063 fCurrentEntity.position++; 1064 offset++; 1065 } 1066 /*** NEWLINE NORMALIZATION ***/ 1067 else { 1068 newlines++; 1069 } 1070 } else if (c == '\n') { 1071 newlines++; 1072 fCurrentEntity.lineNumber++; 1073 fCurrentEntity.columnNumber = 1; 1074 if (fCurrentEntity.position == fCurrentEntity.count) { 1075 checkEntityLimit(null, fCurrentEntity, offset, newlines); 1076 offset = 0; 1077 fCurrentEntity.position = newlines; 1078 if (load(newlines, false, true)) { 1079 counted = true; 1080 break; 1081 } 1082 } 1083 } else { 1084 fCurrentEntity.position--; 1085 break; 1086 } 1087 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1088 for (int i = offset; i < fCurrentEntity.position; i++) { 1089 fCurrentEntity.ch[i] = '\n'; 1090 } 1091 int length = fCurrentEntity.position - offset; 1092 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1093 checkEntityLimit(null, fCurrentEntity, offset, length); 1094 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1095 //on buffering the data.. 1096 content.setValues(fCurrentEntity.ch, offset, length); 1097 //content.append(fCurrentEntity.ch, offset, length); 1098 if (DEBUG_BUFFER) { 1099 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1100 print(); 1101 System.out.println(); 1102 } 1103 return -1; 1104 } 1105 if (DEBUG_BUFFER) { 1106 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1107 print(); 1108 System.out.println(); 1109 } 1110 } 1111 1112 while (fCurrentEntity.position < fCurrentEntity.count) { 1113 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1114 if (!XMLChar.isContent(c)) { 1115 fCurrentEntity.position--; 1116 break; 1117 } 1118 } 1119 int length = fCurrentEntity.position - offset; 1120 fCurrentEntity.columnNumber += length - newlines; 1121 if (!counted) { 1122 checkEntityLimit(null, fCurrentEntity, offset, length); 1123 } 1124 1125 //CHANGED: dont replace the value.. append to the buffer. This gives control to the callee 1126 //on buffering the data.. 1127 content.setValues(fCurrentEntity.ch, offset, length); 1128 //content.append(fCurrentEntity.ch, offset, length); 1129 // return next character 1130 if (fCurrentEntity.position != fCurrentEntity.count) { 1131 c = fCurrentEntity.ch[fCurrentEntity.position]; 1132 // REVISIT: Does this need to be updated to fix the 1133 // #x0D ^#x0A newline normalization problem? -Ac 1134 if (c == '\r' && isExternal) { 1135 c = '\n'; 1136 } 1137 } else { 1138 c = -1; 1139 } 1140 if (DEBUG_BUFFER) { 1141 System.out.print(")scanContent: "); 1142 print(); 1143 System.out.println(" -> '"+(char)c+"'"); 1144 } 1145 return c; 1146 1147 } // scanContent(XMLString):int 1148 1149 /** 1150 * Scans a range of attribute value data, setting the fields of the 1151 * XMLString structure, appropriately. 1152 * <p> 1153 * <strong>Note:</strong> The characters are consumed. 1154 * <p> 1155 * <strong>Note:</strong> This method does not guarantee to return 1156 * the longest run of attribute value data. This method may return 1157 * before the quote character due to reaching the end of the input 1158 * buffer or any other reason. 1159 * <p> 1160 * <strong>Note:</strong> The fields contained in the XMLString 1161 * structure are not guaranteed to remain valid upon subsequent calls 1162 * to the entity scanner. Therefore, the caller is responsible for 1163 * immediately using the returned character data or making a copy of 1164 * the character data. 1165 * 1166 * @param quote The quote character that signifies the end of the 1167 * attribute value data. 1168 * @param content The content structure to fill. 1169 * @param isNSURI a flag indicating whether the content is a Namespace URI 1170 * 1171 * @return Returns the next character on the input, if known. This 1172 * value may be -1 but this does <em>note</em> designate 1173 * end of file. 1174 * 1175 * @throws IOException Thrown if i/o error occurs. 1176 * @throws EOFException Thrown on end of file. 1177 */ 1178 protected int scanLiteral(int quote, XMLString content, boolean isNSURI) 1179 throws IOException { 1180 if (DEBUG_BUFFER) { 1181 System.out.print("(scanLiteral, '"+(char)quote+"': "); 1182 print(); 1183 System.out.println(); 1184 } 1185 // load more characters, if needed 1186 if (fCurrentEntity.position == fCurrentEntity.count) { 1187 load(0, true, true); 1188 } else if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1189 invokeListeners(1); 1190 fCurrentEntity.ch[0] = fCurrentEntity.ch[fCurrentEntity.count - 1]; 1191 load(1, false, false); 1192 fCurrentEntity.position = 0; 1193 } 1194 1195 // normalize newlines 1196 int offset = fCurrentEntity.position; 1197 int c = fCurrentEntity.ch[offset]; 1198 int newlines = 0; 1199 if(whiteSpaceInfoNeeded) 1200 whiteSpaceLen=0; 1201 if (c == '\n' || (c == '\r' && isExternal)) { 1202 if (DEBUG_BUFFER) { 1203 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1204 print(); 1205 System.out.println(); 1206 } 1207 do { 1208 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1209 if (c == '\r' && isExternal) { 1210 newlines++; 1211 fCurrentEntity.lineNumber++; 1212 fCurrentEntity.columnNumber = 1; 1213 if (fCurrentEntity.position == fCurrentEntity.count) { 1214 offset = 0; 1215 fCurrentEntity.position = newlines; 1216 if (load(newlines, false, true)) { 1217 break; 1218 } 1219 } 1220 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1221 fCurrentEntity.position++; 1222 offset++; 1223 } 1224 /*** NEWLINE NORMALIZATION ***/ 1225 else { 1226 newlines++; 1227 } 1228 /***/ 1229 } else if (c == '\n') { 1230 newlines++; 1231 fCurrentEntity.lineNumber++; 1232 fCurrentEntity.columnNumber = 1; 1233 if (fCurrentEntity.position == fCurrentEntity.count) { 1234 offset = 0; 1235 fCurrentEntity.position = newlines; 1236 if (load(newlines, false, true)) { 1237 break; 1238 } 1239 } 1240 /*** NEWLINE NORMALIZATION *** 1241 * if (fCurrentEntity.ch[fCurrentEntity.position] == '\r' 1242 * && external) { 1243 * fCurrentEntity.position++; 1244 * offset++; 1245 * } 1246 * /***/ 1247 } else { 1248 fCurrentEntity.position--; 1249 break; 1250 } 1251 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1252 int i=0; 1253 for ( i = offset; i < fCurrentEntity.position; i++) { 1254 fCurrentEntity.ch[i] = '\n'; 1255 storeWhiteSpace(i); 1256 } 1257 1258 int length = fCurrentEntity.position - offset; 1259 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1260 content.setValues(fCurrentEntity.ch, offset, length); 1261 if (DEBUG_BUFFER) { 1262 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1263 print(); 1264 System.out.println(); 1265 } 1266 return -1; 1267 } 1268 if (DEBUG_BUFFER) { 1269 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1270 print(); 1271 System.out.println(); 1272 } 1273 } 1274 1275 // scan literal value 1276 for (; fCurrentEntity.position<fCurrentEntity.count; fCurrentEntity.position++) { 1277 c = fCurrentEntity.ch[fCurrentEntity.position]; 1278 if ((c == quote && 1279 (!fCurrentEntity.literal || isExternal)) || 1280 c == '%' || !XMLChar.isContent(c)) { 1281 break; 1282 } 1283 if (whiteSpaceInfoNeeded && c == '\t') { 1284 storeWhiteSpace(fCurrentEntity.position); 1285 } 1286 } 1287 int length = fCurrentEntity.position - offset; 1288 fCurrentEntity.columnNumber += length - newlines; 1289 1290 checkEntityLimit(null, fCurrentEntity, offset, length); 1291 if (isNSURI) { 1292 checkLimit(Limit.MAX_NAME_LIMIT, fCurrentEntity, offset, length); 1293 } 1294 content.setValues(fCurrentEntity.ch, offset, length); 1295 1296 // return next character 1297 if (fCurrentEntity.position != fCurrentEntity.count) { 1298 c = fCurrentEntity.ch[fCurrentEntity.position]; 1299 // NOTE: We don't want to accidentally signal the 1300 // end of the literal if we're expanding an 1301 // entity appearing in the literal. -Ac 1302 if (c == quote && fCurrentEntity.literal) { 1303 c = -1; 1304 } 1305 } else { 1306 c = -1; 1307 } 1308 if (DEBUG_BUFFER) { 1309 System.out.print(")scanLiteral, '"+(char)quote+"': "); 1310 print(); 1311 System.out.println(" -> '"+(char)c+"'"); 1312 } 1313 return c; 1314 1315 } // scanLiteral(int,XMLString):int 1316 1317 /** 1318 * Save whitespace information. Increase the whitespace buffer by 100 1319 * when needed. 1320 * 1321 * For XML 1.0, legal characters below 0x20 are 0x09 (TAB), 0x0A (LF) and 0x0D (CR). 1322 * 1323 * @param whiteSpacePos position of a whitespace in the scanner entity buffer 1324 */ 1325 private void storeWhiteSpace(int whiteSpacePos) { 1326 if (whiteSpaceLen >= whiteSpaceLookup.length) { 1327 int [] tmp = new int[whiteSpaceLookup.length + 100]; 1328 System.arraycopy(whiteSpaceLookup, 0, tmp, 0, whiteSpaceLookup.length); 1329 whiteSpaceLookup = tmp; 1330 } 1331 1332 whiteSpaceLookup[whiteSpaceLen++] = whiteSpacePos; 1333 } 1334 1335 //CHANGED: 1336 /** 1337 * Scans a range of character data up to the specified delimiter, 1338 * setting the fields of the XMLString structure, appropriately. 1339 * <p> 1340 * <strong>Note:</strong> The characters are consumed. 1341 * <p> 1342 * <strong>Note:</strong> This assumes that the delimiter contains at 1343 * least one character. 1344 * <p> 1345 * <strong>Note:</strong> This method does not guarantee to return 1346 * the longest run of character data. This method may return before 1347 * the delimiter due to reaching the end of the input buffer or any 1348 * other reason. 1349 * <p> 1350 * @param delimiter The string that signifies the end of the character 1351 * data to be scanned. 1352 * @param buffer The XMLStringBuffer to fill. 1353 * @param chunkLimit the size limit of the data to be scanned. Zero by default 1354 * indicating no limit. 1355 * 1356 * @return Returns true if there is more data to scan, false otherwise. 1357 * 1358 * @throws IOException Thrown if i/o error occurs. 1359 * @throws EOFException Thrown on end of file. 1360 */ 1361 protected boolean scanData(String delimiter, XMLStringBuffer buffer, int chunkLimit) 1362 throws IOException { 1363 1364 boolean done = false; 1365 int delimLen = delimiter.length(); 1366 char charAt0 = delimiter.charAt(0); 1367 do { 1368 if (DEBUG_BUFFER) { 1369 System.out.print("(scanData: "); 1370 print(); 1371 System.out.println(); 1372 } 1373 1374 // load more characters, if needed 1375 1376 if (fCurrentEntity.position == fCurrentEntity.count) { 1377 load(0, true, false); 1378 } 1379 1380 boolean bNextEntity = false; 1381 1382 while ((fCurrentEntity.position > fCurrentEntity.count - delimLen) 1383 && (!bNextEntity)) 1384 { 1385 System.arraycopy(fCurrentEntity.ch, 1386 fCurrentEntity.position, 1387 fCurrentEntity.ch, 1388 0, 1389 fCurrentEntity.count - fCurrentEntity.position); 1390 1391 bNextEntity = load(fCurrentEntity.count - fCurrentEntity.position, false, false); 1392 fCurrentEntity.position = 0; 1393 fCurrentEntity.startPosition = 0; 1394 } 1395 1396 if (fCurrentEntity.position > fCurrentEntity.count - delimLen) { 1397 // something must be wrong with the input: e.g., file ends in an unterminated comment 1398 int length = fCurrentEntity.count - fCurrentEntity.position; 1399 checkEntityLimit(NameType.COMMENT, fCurrentEntity, fCurrentEntity.position, length); 1400 buffer.append (fCurrentEntity.ch, fCurrentEntity.position, length); 1401 fCurrentEntity.columnNumber += fCurrentEntity.count; 1402 fCurrentEntity.baseCharOffset += (fCurrentEntity.position - fCurrentEntity.startPosition); 1403 fCurrentEntity.position = fCurrentEntity.count; 1404 fCurrentEntity.startPosition = fCurrentEntity.count; 1405 load(0, true, false); 1406 return false; 1407 } 1408 1409 // normalize newlines 1410 int offset = fCurrentEntity.position; 1411 int c = fCurrentEntity.ch[offset]; 1412 int newlines = 0; 1413 if (c == '\n' || (c == '\r' && isExternal)) { 1414 if (DEBUG_BUFFER) { 1415 System.out.print("[newline, "+offset+", "+fCurrentEntity.position+": "); 1416 print(); 1417 System.out.println(); 1418 } 1419 do { 1420 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1421 if (c == '\r' && isExternal) { 1422 newlines++; 1423 fCurrentEntity.lineNumber++; 1424 fCurrentEntity.columnNumber = 1; 1425 if (fCurrentEntity.position == fCurrentEntity.count) { 1426 offset = 0; 1427 fCurrentEntity.position = newlines; 1428 if (load(newlines, false, true)) { 1429 break; 1430 } 1431 } 1432 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1433 fCurrentEntity.position++; 1434 offset++; 1435 } 1436 /*** NEWLINE NORMALIZATION ***/ 1437 else { 1438 newlines++; 1439 } 1440 } else if (c == '\n') { 1441 newlines++; 1442 fCurrentEntity.lineNumber++; 1443 fCurrentEntity.columnNumber = 1; 1444 if (fCurrentEntity.position == fCurrentEntity.count) { 1445 offset = 0; 1446 fCurrentEntity.position = newlines; 1447 fCurrentEntity.count = newlines; 1448 if (load(newlines, false, true)) { 1449 break; 1450 } 1451 } 1452 } else { 1453 fCurrentEntity.position--; 1454 break; 1455 } 1456 } while (fCurrentEntity.position < fCurrentEntity.count - 1); 1457 for (int i = offset; i < fCurrentEntity.position; i++) { 1458 fCurrentEntity.ch[i] = '\n'; 1459 } 1460 int length = fCurrentEntity.position - offset; 1461 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1462 checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); 1463 buffer.append(fCurrentEntity.ch, offset, length); 1464 if (DEBUG_BUFFER) { 1465 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1466 print(); 1467 System.out.println(); 1468 } 1469 return true; 1470 } 1471 if (DEBUG_BUFFER) { 1472 System.out.print("]newline, "+offset+", "+fCurrentEntity.position+": "); 1473 print(); 1474 System.out.println(); 1475 } 1476 } 1477 1478 // iterate over buffer looking for delimiter 1479 OUTER: while (fCurrentEntity.position < fCurrentEntity.count) { 1480 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1481 if (c == charAt0) { 1482 // looks like we just hit the delimiter 1483 int delimOffset = fCurrentEntity.position - 1; 1484 for (int i = 1; i < delimLen; i++) { 1485 if (fCurrentEntity.position == fCurrentEntity.count) { 1486 fCurrentEntity.position -= i; 1487 break OUTER; 1488 } 1489 c = fCurrentEntity.ch[fCurrentEntity.position++]; 1490 if (delimiter.charAt(i) != c) { 1491 fCurrentEntity.position -= i; 1492 break; 1493 } 1494 } 1495 if (fCurrentEntity.position == delimOffset + delimLen) { 1496 done = true; 1497 break; 1498 } 1499 } else if (c == '\n' || (isExternal && c == '\r')) { 1500 fCurrentEntity.position--; 1501 break; 1502 } else if (XMLChar.isInvalid(c)) { 1503 fCurrentEntity.position--; 1504 int length = fCurrentEntity.position - offset; 1505 fCurrentEntity.columnNumber += length - newlines; 1506 checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); 1507 buffer.append(fCurrentEntity.ch, offset, length); 1508 return true; 1509 } 1510 if (chunkLimit > 0 && 1511 (buffer.length + fCurrentEntity.position - offset) >= chunkLimit) { 1512 break; 1513 } 1514 } 1515 int length = fCurrentEntity.position - offset; 1516 fCurrentEntity.columnNumber += length - newlines; 1517 checkEntityLimit(NameType.COMMENT, fCurrentEntity, offset, length); 1518 if (done) { 1519 length -= delimLen; 1520 } 1521 buffer.append(fCurrentEntity.ch, offset, length); 1522 1523 // return true if string was skipped 1524 if (DEBUG_BUFFER) { 1525 System.out.print(")scanData: "); 1526 print(); 1527 System.out.println(" -> " + done); 1528 } 1529 if (chunkLimit > 0 && buffer.length >= chunkLimit) { 1530 break; 1531 } 1532 } while (!done && chunkLimit == 0); 1533 return !done; 1534 1535 } // scanData(String, XMLStringBuffer) 1536 1537 /** 1538 * Skips a character appearing immediately on the input. 1539 * <p> 1540 * <strong>Note:</strong> The character is consumed only if it matches 1541 * the specified character. 1542 * 1543 * @param c The character to skip. 1544 * @param nt The type of the name (element or attribute) 1545 * 1546 * @return Returns true if the character was skipped. 1547 * 1548 * @throws IOException Thrown if i/o error occurs. 1549 * @throws EOFException Thrown on end of file. 1550 */ 1551 protected boolean skipChar(int c, NameType nt) throws IOException { 1552 if (DEBUG_BUFFER) { 1553 System.out.print("(skipChar, '"+(char)c+"': "); 1554 print(); 1555 System.out.println(); 1556 } 1557 1558 // load more characters, if needed 1559 if (fCurrentEntity.position == fCurrentEntity.count) { 1560 load(0, true, true); 1561 } 1562 1563 // skip character 1564 int offset = fCurrentEntity.position; 1565 int cc = fCurrentEntity.ch[fCurrentEntity.position]; 1566 if (cc == c) { 1567 fCurrentEntity.position++; 1568 if (c == '\n') { 1569 fCurrentEntity.lineNumber++; 1570 fCurrentEntity.columnNumber = 1; 1571 } else { 1572 fCurrentEntity.columnNumber++; 1573 } 1574 if (DEBUG_BUFFER) { 1575 System.out.print(")skipChar, '"+(char)c+"': "); 1576 print(); 1577 System.out.println(" -> true"); 1578 } 1579 checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); 1580 return true; 1581 } else if (c == '\n' && cc == '\r' && isExternal) { 1582 // handle newlines 1583 if (fCurrentEntity.position == fCurrentEntity.count) { 1584 invokeListeners(1); 1585 fCurrentEntity.ch[0] = (char)cc; 1586 load(1, false, false); 1587 } 1588 fCurrentEntity.position++; 1589 if (fCurrentEntity.ch[fCurrentEntity.position] == '\n') { 1590 fCurrentEntity.position++; 1591 } 1592 fCurrentEntity.lineNumber++; 1593 fCurrentEntity.columnNumber = 1; 1594 if (DEBUG_BUFFER) { 1595 System.out.print(")skipChar, '"+(char)c+"': "); 1596 print(); 1597 System.out.println(" -> true"); 1598 } 1599 checkEntityLimit(nt, fCurrentEntity, offset, fCurrentEntity.position - offset); 1600 return true; 1601 } 1602 1603 // character was not skipped 1604 if (DEBUG_BUFFER) { 1605 System.out.print(")skipChar, '"+(char)c+"': "); 1606 print(); 1607 System.out.println(" -> false"); 1608 } 1609 return false; 1610 1611 } // skipChar(int):boolean 1612 1613 public boolean isSpace(char ch){ 1614 return (ch == ' ') || (ch == '\n') || (ch == '\t') || (ch == '\r'); 1615 } 1616 /** 1617 * Skips space characters appearing immediately on the input. 1618 * <p> 1619 * <strong>Note:</strong> The characters are consumed only if they are 1620 * space characters. 1621 * 1622 * @return Returns true if at least one space character was skipped. 1623 * 1624 * @throws IOException Thrown if i/o error occurs. 1625 * @throws EOFException Thrown on end of file. 1626 * 1627 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 1628 */ 1629 protected boolean skipSpaces() throws IOException { 1630 if (DEBUG_BUFFER) { 1631 System.out.print("(skipSpaces: "); 1632 print(); 1633 System.out.println(); 1634 } 1635 //boolean entityChanged = false; 1636 // load more characters, if needed 1637 if (fCurrentEntity.position == fCurrentEntity.count) { 1638 load(0, true, true); 1639 } 1640 1641 //we are doing this check only in skipSpace() because it is called by 1642 //fMiscDispatcher and we want the parser to exit gracefully when document 1643 //is well-formed. 1644 //it is possible that end of document is reached and 1645 //fCurrentEntity becomes null 1646 //nothing was read so entity changed 'false' should be returned. 1647 if(fCurrentEntity == null){ 1648 return false ; 1649 } 1650 1651 // skip spaces 1652 int c = fCurrentEntity.ch[fCurrentEntity.position]; 1653 int offset = fCurrentEntity.position - 1; 1654 if (XMLChar.isSpace(c)) { 1655 do { 1656 boolean entityChanged = false; 1657 // handle newlines 1658 if (c == '\n' || (isExternal && c == '\r')) { 1659 fCurrentEntity.lineNumber++; 1660 fCurrentEntity.columnNumber = 1; 1661 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 1662 invokeListeners(1); 1663 fCurrentEntity.ch[0] = (char)c; 1664 entityChanged = load(1, true, false); 1665 if (!entityChanged){ 1666 // the load change the position to be 1, 1667 // need to restore it when entity not changed 1668 fCurrentEntity.position = 0; 1669 }else if(fCurrentEntity == null){ 1670 return true ; 1671 } 1672 } 1673 if (c == '\r' && isExternal) { 1674 // REVISIT: Does this need to be updated to fix the 1675 // #x0D ^#x0A newline normalization problem? -Ac 1676 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 1677 fCurrentEntity.position--; 1678 } 1679 } 1680 } else { 1681 fCurrentEntity.columnNumber++; 1682 } 1683 1684 //If this is a general entity, spaces within a start element should be counted 1685 checkEntityLimit(null, fCurrentEntity, offset, fCurrentEntity.position - offset); 1686 offset = fCurrentEntity.position; 1687 1688 // load more characters, if needed 1689 if (!entityChanged){ 1690 fCurrentEntity.position++; 1691 } 1692 1693 if (fCurrentEntity.position == fCurrentEntity.count) { 1694 load(0, true, true); 1695 1696 //we are doing this check only in skipSpace() because it is called by 1697 //fMiscDispatcher and we want the parser to exit gracefully when document 1698 //is well-formed. 1699 1700 //it is possible that end of document is reached and 1701 //fCurrentEntity becomes null 1702 //nothing was read so entity changed 'false' should be returned. 1703 if(fCurrentEntity == null){ 1704 return true ; 1705 } 1706 1707 } 1708 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 1709 if (DEBUG_BUFFER) { 1710 System.out.print(")skipSpaces: "); 1711 print(); 1712 System.out.println(" -> true"); 1713 } 1714 return true; 1715 } 1716 1717 // no spaces were found 1718 if (DEBUG_BUFFER) { 1719 System.out.print(")skipSpaces: "); 1720 print(); 1721 System.out.println(" -> false"); 1722 } 1723 return false; 1724 1725 } // skipSpaces():boolean 1726 1727 1728 /** 1729 * @param length This function checks that following number of characters are available. 1730 * to the underlying buffer. 1731 * @return This function returns true if capacity asked is available. 1732 */ 1733 public boolean arrangeCapacity(int length) throws IOException{ 1734 return arrangeCapacity(length, false); 1735 } 1736 1737 /** 1738 * @param length This function checks that following number of characters are available. 1739 * to the underlying buffer. 1740 * @param changeEntity a flag to indicate that the underlying function should change the entity 1741 * @return This function returns true if capacity asked is available. 1742 * 1743 */ 1744 public boolean arrangeCapacity(int length, boolean changeEntity) throws IOException{ 1745 //check if the capacity is availble in the current buffer 1746 //count is no. of characters in the buffer [x][m][l] 1747 //position is '0' based 1748 //System.out.println("fCurrent Entity " + fCurrentEntity); 1749 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1750 return true; 1751 } 1752 if(DEBUG_SKIP_STRING){ 1753 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1754 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1755 System.out.println("length = " + length); 1756 } 1757 boolean entityChanged = false; 1758 //load more characters -- this function shouldn't change the entity 1759 while((fCurrentEntity.count - fCurrentEntity.position) < length){ 1760 if( (fCurrentEntity.ch.length - fCurrentEntity.position) < length){ 1761 invokeListeners(0); 1762 System.arraycopy(fCurrentEntity.ch, fCurrentEntity.position, fCurrentEntity.ch,0,fCurrentEntity.count - fCurrentEntity.position); 1763 fCurrentEntity.count = fCurrentEntity.count - fCurrentEntity.position; 1764 fCurrentEntity.position = 0; 1765 } 1766 1767 if((fCurrentEntity.count - fCurrentEntity.position) < length){ 1768 int pos = fCurrentEntity.position; 1769 invokeListeners(pos); 1770 entityChanged = load(fCurrentEntity.count, changeEntity, false); 1771 fCurrentEntity.position = pos; 1772 if(entityChanged)break; 1773 } 1774 if(DEBUG_SKIP_STRING){ 1775 System.out.println("fCurrentEntity.count = " + fCurrentEntity.count); 1776 System.out.println("fCurrentEntity.position = " + fCurrentEntity.position); 1777 System.out.println("length = " + length); 1778 } 1779 } 1780 //load changes the position.. set it back to the point where we started. 1781 1782 //after loading check again. 1783 if((fCurrentEntity.count - fCurrentEntity.position) >= length) { 1784 return true; 1785 } else { 1786 return false; 1787 } 1788 } 1789 1790 /** 1791 * Skips the specified string appearing immediately on the input. 1792 * <p> 1793 * <strong>Note:</strong> The characters are consumed only if all 1794 * the characters are skipped. 1795 * 1796 * @param s The string to skip. 1797 * 1798 * @return Returns true if the string was skipped. 1799 * 1800 * @throws IOException Thrown if i/o error occurs. 1801 * @throws EOFException Thrown on end of file. 1802 */ 1803 protected boolean skipString(String s) throws IOException { 1804 1805 final int length = s.length(); 1806 1807 //first make sure that required capacity is avaible 1808 if(arrangeCapacity(length, false)){ 1809 final int beforeSkip = fCurrentEntity.position ; 1810 int afterSkip = fCurrentEntity.position + length - 1 ; 1811 if(DEBUG_SKIP_STRING){ 1812 System.out.println("skipString,length = " + s + "," + length); 1813 System.out.println("Buffer string to be skipped = " + new String(fCurrentEntity.ch, beforeSkip, length)); 1814 } 1815 1816 //s.charAt() indexes are 0 to 'Length -1' based. 1817 int i = length - 1 ; 1818 //check from reverse 1819 while(s.charAt(i--) == fCurrentEntity.ch[afterSkip]){ 1820 if(afterSkip-- == beforeSkip){ 1821 fCurrentEntity.position = fCurrentEntity.position + length ; 1822 fCurrentEntity.columnNumber += length; 1823 if (!detectingVersion) { 1824 checkEntityLimit(null, fCurrentEntity, beforeSkip, length); 1825 } 1826 return true; 1827 } 1828 } 1829 } 1830 1831 return false; 1832 } // skipString(String):boolean 1833 1834 protected boolean skipString(char [] s) throws IOException { 1835 1836 final int length = s.length; 1837 //first make sure that required capacity is avaible 1838 if(arrangeCapacity(length, false)){ 1839 int beforeSkip = fCurrentEntity.position; 1840 1841 if(DEBUG_SKIP_STRING){ 1842 System.out.println("skipString,length = " + new String(s) + "," + length); 1843 System.out.println("skipString,length = " + new String(s) + "," + length); 1844 } 1845 1846 for(int i=0;i<length;i++){ 1847 if(!(fCurrentEntity.ch[beforeSkip++]==s[i])){ 1848 return false; 1849 } 1850 } 1851 fCurrentEntity.position = fCurrentEntity.position + length ; 1852 fCurrentEntity.columnNumber += length; 1853 if (!detectingVersion) { 1854 checkEntityLimit(null, fCurrentEntity, beforeSkip, length); 1855 } 1856 return true; 1857 1858 } 1859 1860 return false; 1861 } 1862 1863 // 1864 // Locator methods 1865 // 1866 // 1867 // Private methods 1868 // 1869 1870 /** 1871 * Loads a chunk of text. 1872 * 1873 * @param offset The offset into the character buffer to 1874 * read the next batch of characters. 1875 * @param changeEntity True if the load should change entities 1876 * at the end of the entity, otherwise leave 1877 * the current entity in place and the entity 1878 * boundary will be signaled by the return 1879 * value. 1880 * @param notify Determine whether to notify listeners of 1881 * the event 1882 * 1883 * @returns Returns true if the entity changed as a result of this 1884 * load operation. 1885 */ 1886 final boolean load(int offset, boolean changeEntity, boolean notify) 1887 throws IOException { 1888 if (DEBUG_BUFFER) { 1889 System.out.print("(load, "+offset+": "); 1890 print(); 1891 System.out.println(); 1892 } 1893 if (notify) { 1894 invokeListeners(offset); 1895 } 1896 //maintaing the count till last load 1897 fCurrentEntity.fTotalCountTillLastLoad = fCurrentEntity.fTotalCountTillLastLoad + fCurrentEntity.fLastCount ; 1898 // read characters 1899 int length = fCurrentEntity.ch.length - offset; 1900 if (!fCurrentEntity.mayReadChunks && length > XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE) { 1901 length = XMLEntityManager.DEFAULT_XMLDECL_BUFFER_SIZE; 1902 } 1903 if (DEBUG_BUFFER) System.out.println(" length to try to read: "+length); 1904 int count = fCurrentEntity.reader.read(fCurrentEntity.ch, offset, length); 1905 if (DEBUG_BUFFER) System.out.println(" length actually read: "+count); 1906 1907 // reset count and position 1908 boolean entityChanged = false; 1909 if (count != -1) { 1910 if (count != 0) { 1911 // record the last count 1912 fCurrentEntity.fLastCount = count; 1913 fCurrentEntity.count = count + offset; 1914 fCurrentEntity.position = offset; 1915 } 1916 } 1917 // end of this entity 1918 else { 1919 fCurrentEntity.count = offset; 1920 fCurrentEntity.position = offset; 1921 entityChanged = true; 1922 1923 if (changeEntity) { 1924 //notify the entity manager about the end of entity 1925 fEntityManager.endEntity(); 1926 //return if the current entity becomes null 1927 if(fCurrentEntity == null){ 1928 throw END_OF_DOCUMENT_ENTITY; 1929 } 1930 // handle the trailing edges 1931 if (fCurrentEntity.position == fCurrentEntity.count) { 1932 load(0, true, false); 1933 } 1934 } 1935 1936 } 1937 if (DEBUG_BUFFER) { 1938 System.out.print(")load, "+offset+": "); 1939 print(); 1940 System.out.println(); 1941 } 1942 1943 return entityChanged; 1944 1945 } // load(int, boolean):boolean 1946 1947 /** 1948 * Creates a reader capable of reading the given input stream in 1949 * the specified encoding. 1950 * 1951 * @param inputStream The input stream. 1952 * @param encoding The encoding name that the input stream is 1953 * encoded using. If the user has specified that 1954 * Java encoding names are allowed, then the 1955 * encoding name may be a Java encoding name; 1956 * otherwise, it is an ianaEncoding name. 1957 * @param isBigEndian For encodings (like uCS-4), whose names cannot 1958 * specify a byte order, this tells whether the order is bigEndian. null menas 1959 * unknown or not relevant. 1960 * 1961 * @return Returns a reader. 1962 */ 1963 protected Reader createReader(InputStream inputStream, String encoding, Boolean isBigEndian) 1964 throws IOException { 1965 1966 // normalize encoding name 1967 if (encoding == null) { 1968 encoding = "UTF-8"; 1969 } 1970 1971 // try to use an optimized reader 1972 String ENCODING = encoding.toUpperCase(Locale.ENGLISH); 1973 if (ENCODING.equals("UTF-8")) { 1974 if (DEBUG_ENCODINGS) { 1975 System.out.println("$$$ creating UTF8Reader"); 1976 } 1977 return new UTF8Reader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale() ); 1978 } 1979 if (ENCODING.equals("US-ASCII")) { 1980 if (DEBUG_ENCODINGS) { 1981 System.out.println("$$$ creating ASCIIReader"); 1982 } 1983 return new ASCIIReader(inputStream, fCurrentEntity.fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 1984 } 1985 if(ENCODING.equals("ISO-10646-UCS-4")) { 1986 if(isBigEndian != null) { 1987 boolean isBE = isBigEndian.booleanValue(); 1988 if(isBE) { 1989 return new UCSReader(inputStream, UCSReader.UCS4BE); 1990 } else { 1991 return new UCSReader(inputStream, UCSReader.UCS4LE); 1992 } 1993 } else { 1994 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 1995 "EncodingByteOrderUnsupported", 1996 new Object[] { encoding }, 1997 XMLErrorReporter.SEVERITY_FATAL_ERROR); 1998 } 1999 } 2000 if(ENCODING.equals("ISO-10646-UCS-2")) { 2001 if(isBigEndian != null) { // sould never happen with this encoding... 2002 boolean isBE = isBigEndian.booleanValue(); 2003 if(isBE) { 2004 return new UCSReader(inputStream, UCSReader.UCS2BE); 2005 } else { 2006 return new UCSReader(inputStream, UCSReader.UCS2LE); 2007 } 2008 } else { 2009 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 2010 "EncodingByteOrderUnsupported", 2011 new Object[] { encoding }, 2012 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2013 } 2014 } 2015 2016 // check for valid name 2017 boolean validIANA = XMLChar.isValidIANAEncoding(encoding); 2018 boolean validJava = XMLChar.isValidJavaEncoding(encoding); 2019 if (!validIANA || (fAllowJavaEncodings && !validJava)) { 2020 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 2021 "EncodingDeclInvalid", 2022 new Object[] { encoding }, 2023 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2024 // NOTE: AndyH suggested that, on failure, we use ISO Latin 1 2025 // because every byte is a valid ISO Latin 1 character. 2026 // It may not translate correctly but if we failed on 2027 // the encoding anyway, then we're expecting the content 2028 // of the document to be bad. This will just prevent an 2029 // invalid UTF-8 sequence to be detected. This is only 2030 // important when continue-after-fatal-error is turned 2031 // on. -Ac 2032 encoding = "ISO-8859-1"; 2033 } 2034 2035 // try to use a Java reader 2036 String javaEncoding = EncodingMap.getIANA2JavaMapping(ENCODING); 2037 if (javaEncoding == null) { 2038 if(fAllowJavaEncodings) { 2039 javaEncoding = encoding; 2040 } else { 2041 fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, 2042 "EncodingDeclInvalid", 2043 new Object[] { encoding }, 2044 XMLErrorReporter.SEVERITY_FATAL_ERROR); 2045 // see comment above. 2046 javaEncoding = "ISO8859_1"; 2047 } 2048 } 2049 else if (javaEncoding.equals("ASCII")) { 2050 if (DEBUG_ENCODINGS) { 2051 System.out.println("$$$ creating ASCIIReader"); 2052 } 2053 return new ASCIIReader(inputStream, fBufferSize, fErrorReporter.getMessageFormatter(XMLMessageFormatter.XML_DOMAIN), fErrorReporter.getLocale()); 2054 } 2055 2056 if (DEBUG_ENCODINGS) { 2057 System.out.print("$$$ creating Java InputStreamReader: encoding="+javaEncoding); 2058 if (javaEncoding == encoding) { 2059 System.out.print(" (IANA encoding)"); 2060 } 2061 System.out.println(); 2062 } 2063 return new InputStreamReader(inputStream, javaEncoding); 2064 2065 } // createReader(InputStream,String, Boolean): Reader 2066 2067 /** 2068 * Returns the IANA encoding name that is auto-detected from 2069 * the bytes specified, with the endian-ness of that encoding where appropriate. 2070 * 2071 * @param b4 The first four bytes of the input. 2072 * @param count The number of bytes actually read. 2073 * @return a 2-element array: the first element, an IANA-encoding string, 2074 * the second element a Boolean which is true iff the document is big endian, false 2075 * if it's little-endian, and null if the distinction isn't relevant. 2076 */ 2077 protected Object[] getEncodingName(byte[] b4, int count) { 2078 2079 if (count < 2) { 2080 return new Object[]{"UTF-8", null}; 2081 } 2082 2083 // UTF-16, with BOM 2084 int b0 = b4[0] & 0xFF; 2085 int b1 = b4[1] & 0xFF; 2086 if (b0 == 0xFE && b1 == 0xFF) { 2087 // UTF-16, big-endian 2088 return new Object [] {"UTF-16BE", new Boolean(true)}; 2089 } 2090 if (b0 == 0xFF && b1 == 0xFE) { 2091 // UTF-16, little-endian 2092 return new Object [] {"UTF-16LE", new Boolean(false)}; 2093 } 2094 2095 // default to UTF-8 if we don't have enough bytes to make a 2096 // good determination of the encoding 2097 if (count < 3) { 2098 return new Object [] {"UTF-8", null}; 2099 } 2100 2101 // UTF-8 with a BOM 2102 int b2 = b4[2] & 0xFF; 2103 if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { 2104 return new Object [] {"UTF-8", null}; 2105 } 2106 2107 // default to UTF-8 if we don't have enough bytes to make a 2108 // good determination of the encoding 2109 if (count < 4) { 2110 return new Object [] {"UTF-8", null}; 2111 } 2112 2113 // other encodings 2114 int b3 = b4[3] & 0xFF; 2115 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { 2116 // UCS-4, big endian (1234) 2117 return new Object [] {"ISO-10646-UCS-4", new Boolean(true)}; 2118 } 2119 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { 2120 // UCS-4, little endian (4321) 2121 return new Object [] {"ISO-10646-UCS-4", new Boolean(false)}; 2122 } 2123 if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { 2124 // UCS-4, unusual octet order (2143) 2125 // REVISIT: What should this be? 2126 return new Object [] {"ISO-10646-UCS-4", null}; 2127 } 2128 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { 2129 // UCS-4, unusual octect order (3412) 2130 // REVISIT: What should this be? 2131 return new Object [] {"ISO-10646-UCS-4", null}; 2132 } 2133 if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { 2134 // UTF-16, big-endian, no BOM 2135 // (or could turn out to be UCS-2... 2136 // REVISIT: What should this be? 2137 return new Object [] {"UTF-16BE", new Boolean(true)}; 2138 } 2139 if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { 2140 // UTF-16, little-endian, no BOM 2141 // (or could turn out to be UCS-2... 2142 return new Object [] {"UTF-16LE", new Boolean(false)}; 2143 } 2144 if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { 2145 // EBCDIC 2146 // a la xerces1, return CP037 instead of EBCDIC here 2147 return new Object [] {"CP037", null}; 2148 } 2149 2150 // default encoding 2151 return new Object [] {"UTF-8", null}; 2152 2153 } // getEncodingName(byte[],int):Object[] 2154 2155 /** 2156 * xxx not removing endEntity() so that i remember that we need to implement it. 2157 * Ends an entity. 2158 * 2159 * @throws XNIException Thrown by entity handler to signal an error. 2160 */ 2161 // 2162 /** Prints the contents of the buffer. */ 2163 final void print() { 2164 if (DEBUG_BUFFER) { 2165 if (fCurrentEntity != null) { 2166 System.out.print('['); 2167 System.out.print(fCurrentEntity.count); 2168 System.out.print(' '); 2169 System.out.print(fCurrentEntity.position); 2170 if (fCurrentEntity.count > 0) { 2171 System.out.print(" \""); 2172 for (int i = 0; i < fCurrentEntity.count; i++) { 2173 if (i == fCurrentEntity.position) { 2174 System.out.print('^'); 2175 } 2176 char c = fCurrentEntity.ch[i]; 2177 switch (c) { 2178 case '\n': { 2179 System.out.print("\\n"); 2180 break; 2181 } 2182 case '\r': { 2183 System.out.print("\\r"); 2184 break; 2185 } 2186 case '\t': { 2187 System.out.print("\\t"); 2188 break; 2189 } 2190 case '\\': { 2191 System.out.print("\\\\"); 2192 break; 2193 } 2194 default: { 2195 System.out.print(c); 2196 } 2197 } 2198 } 2199 if (fCurrentEntity.position == fCurrentEntity.count) { 2200 System.out.print('^'); 2201 } 2202 System.out.print('"'); 2203 } 2204 System.out.print(']'); 2205 System.out.print(" @ "); 2206 System.out.print(fCurrentEntity.lineNumber); 2207 System.out.print(','); 2208 System.out.print(fCurrentEntity.columnNumber); 2209 } else { 2210 System.out.print("*NO CURRENT ENTITY*"); 2211 } 2212 } 2213 } 2214 2215 /** 2216 * Registers the listener object and provides callback. 2217 * @param listener listener to which call back should be provided when scanner buffer 2218 * is being changed. 2219 */ 2220 public void registerListener(XMLBufferListener listener) { 2221 if (!listeners.contains(listener)) { 2222 listeners.add(listener); 2223 } 2224 } 2225 2226 /** 2227 * 2228 * @param loadPos Starting position from which new data is being loaded into scanner buffer. 2229 */ 2230 public void invokeListeners(int loadPos){ 2231 for (int i=0; i<listeners.size(); i++) { 2232 listeners.get(i).refresh(loadPos); 2233 } 2234 } 2235 2236 /** 2237 * Skips space characters appearing immediately on the input that would 2238 * match non-terminal S (0x09, 0x0A, 0x0D, 0x20) before end of line 2239 * normalization is performed. This is useful when scanning structures 2240 * such as the XMLDecl and TextDecl that can only contain US-ASCII 2241 * characters. 2242 * <p> 2243 * <strong>Note:</strong> The characters are consumed only if they would 2244 * match non-terminal S before end of line normalization is performed. 2245 * 2246 * @return Returns true if at least one space character was skipped. 2247 * 2248 * @throws IOException Thrown if i/o error occurs. 2249 * @throws EOFException Thrown on end of file. 2250 * 2251 * @see com.sun.org.apache.xerces.internal.util.XMLChar#isSpace 2252 */ 2253 protected final boolean skipDeclSpaces() throws IOException { 2254 if (DEBUG_BUFFER) { 2255 System.out.print("(skipDeclSpaces: "); 2256 //XMLEntityManager.print(fCurrentEntity); 2257 System.out.println(); 2258 } 2259 2260 // load more characters, if needed 2261 if (fCurrentEntity.position == fCurrentEntity.count) { 2262 load(0, true, false); 2263 } 2264 2265 // skip spaces 2266 int c = fCurrentEntity.ch[fCurrentEntity.position]; 2267 if (XMLChar.isSpace(c)) { 2268 boolean external = fCurrentEntity.isExternal(); 2269 do { 2270 boolean entityChanged = false; 2271 // handle newlines 2272 if (c == '\n' || (external && c == '\r')) { 2273 fCurrentEntity.lineNumber++; 2274 fCurrentEntity.columnNumber = 1; 2275 if (fCurrentEntity.position == fCurrentEntity.count - 1) { 2276 fCurrentEntity.ch[0] = (char)c; 2277 entityChanged = load(1, true, false); 2278 if (!entityChanged) 2279 // the load change the position to be 1, 2280 // need to restore it when entity not changed 2281 fCurrentEntity.position = 0; 2282 } 2283 if (c == '\r' && external) { 2284 // REVISIT: Does this need to be updated to fix the 2285 // #x0D ^#x0A newline normalization problem? -Ac 2286 if (fCurrentEntity.ch[++fCurrentEntity.position] != '\n') { 2287 fCurrentEntity.position--; 2288 } 2289 } 2290 /*** NEWLINE NORMALIZATION *** 2291 * else { 2292 * if (fCurrentEntity.ch[fCurrentEntity.position + 1] == '\r' 2293 * && external) { 2294 * fCurrentEntity.position++; 2295 * } 2296 * } 2297 * /***/ 2298 } else { 2299 fCurrentEntity.columnNumber++; 2300 } 2301 // load more characters, if needed 2302 if (!entityChanged) 2303 fCurrentEntity.position++; 2304 if (fCurrentEntity.position == fCurrentEntity.count) { 2305 load(0, true, false); 2306 } 2307 } while (XMLChar.isSpace(c = fCurrentEntity.ch[fCurrentEntity.position])); 2308 if (DEBUG_BUFFER) { 2309 System.out.print(")skipDeclSpaces: "); 2310 // XMLEntityManager.print(fCurrentEntity); 2311 System.out.println(" -> true"); 2312 } 2313 return true; 2314 } 2315 2316 // no spaces were found 2317 if (DEBUG_BUFFER) { 2318 System.out.print(")skipDeclSpaces: "); 2319 //XMLEntityManager.print(fCurrentEntity); 2320 System.out.println(" -> false"); 2321 } 2322 return false; 2323 2324 } // skipDeclSpaces():boolean 2325 2326 2327} // class XMLEntityScanner 2328