1/* 2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26package com.sun.tools.internal.xjc.reader.internalizer; 27 28import com.sun.istack.internal.NotNull; 29import com.sun.istack.internal.XMLStreamReaderToContentHandler; 30import com.sun.tools.internal.xjc.ErrorReceiver; 31import com.sun.tools.internal.xjc.Options; 32import com.sun.tools.internal.xjc.reader.Const; 33import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker; 34import com.sun.tools.internal.xjc.util.ErrorReceiverFilter; 35import com.sun.xml.internal.bind.marshaller.DataWriter; 36import com.sun.xml.internal.bind.v2.util.XmlFactory; 37import com.sun.xml.internal.xsom.parser.JAXPParser; 38import com.sun.xml.internal.xsom.parser.XMLParser; 39import org.w3c.dom.Document; 40import org.w3c.dom.Element; 41import org.xml.sax.*; 42import org.xml.sax.helpers.XMLFilterImpl; 43 44import javax.xml.parsers.DocumentBuilder; 45import javax.xml.parsers.DocumentBuilderFactory; 46import javax.xml.parsers.ParserConfigurationException; 47import javax.xml.parsers.SAXParserFactory; 48import javax.xml.stream.XMLStreamException; 49import javax.xml.stream.XMLStreamReader; 50import javax.xml.transform.Source; 51import javax.xml.transform.Transformer; 52import javax.xml.transform.TransformerException; 53import javax.xml.transform.TransformerFactory; 54import javax.xml.transform.dom.DOMSource; 55import javax.xml.transform.sax.SAXResult; 56import javax.xml.transform.sax.SAXSource; 57import javax.xml.validation.SchemaFactory; 58import java.io.IOException; 59import java.io.OutputStream; 60import java.io.OutputStreamWriter; 61import java.util.*; 62 63import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess; 64import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI; 65 66 67/** 68 * Builds a DOM forest and maintains association from 69 * system IDs to DOM trees. 70 * 71 * <p> 72 * A forest is a transitive reflexive closure of referenced documents. 73 * IOW, if a document is in a forest, all the documents referenced from 74 * it is in a forest, too. To support this semantics, {@link DOMForest} 75 * uses {@link InternalizationLogic} to find referenced documents. 76 * 77 * <p> 78 * Some documents are marked as "root"s, meaning those documents were 79 * put into a forest explicitly, not because it is referenced from another 80 * document. (However, a root document can be referenced from other 81 * documents, too.) 82 * 83 * @author 84 * Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com) 85 */ 86public final class DOMForest { 87 /** actual data storage {@code map<SystemId,Document>}. */ 88 private final Map<String,Document> core = new LinkedHashMap<>(); 89 90 /** 91 * To correctly feed documents to a schema parser, we need to remember 92 * which documents (of the forest) were given as the root 93 * documents, and which of them are read as included/imported 94 * documents. 95 * 96 * <p> 97 * Set of system ids as strings. 98 */ 99 private final Set<String> rootDocuments = new LinkedHashSet<String>(); 100 101 /** Stores location information for all the trees in this forest. */ 102 public final LocatorTable locatorTable = new LocatorTable(); 103 104 /** Stores all the outer-most {@code <jaxb:bindings>} customizations. */ 105 public final Set<Element> outerMostBindings = new HashSet<Element>(); 106 107 /** Used to resolve references to other schema documents. */ 108 private EntityResolver entityResolver = null; 109 110 /** Errors encountered during the parsing will be sent to this object. */ 111 private ErrorReceiver errorReceiver = null; 112 113 /** Schema language dependent part of the processing. */ 114 protected final InternalizationLogic logic; 115 116 private final SAXParserFactory parserFactory; 117 private final DocumentBuilder documentBuilder; 118 119 private final Options options; 120 121 public DOMForest( 122 SAXParserFactory parserFactory, DocumentBuilder documentBuilder, 123 InternalizationLogic logic ) { 124 125 this.parserFactory = parserFactory; 126 this.documentBuilder = documentBuilder; 127 this.logic = logic; 128 this.options = null; 129 } 130 131 public DOMForest( InternalizationLogic logic, Options opt ) { 132 133 if (opt == null) throw new AssertionError("Options object null"); 134 this.options = opt; 135 136 try { 137 DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity); 138 this.documentBuilder = dbf.newDocumentBuilder(); 139 this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity); 140 } catch( ParserConfigurationException e ) { 141 throw new AssertionError(e); 142 } 143 144 this.logic = logic; 145 } 146 147 /** 148 * Gets the DOM tree associated with the specified system ID, 149 * or null if none is found. 150 */ 151 public Document get( String systemId ) { 152 Document doc = core.get(systemId); 153 154 if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { 155 // As of JDK1.4, java.net.URL.toExternal method returns URLs like 156 // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. 157 // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), 158 // and this descripancy breaks DOM look up by system ID. 159 160 // this extra check solves this problem. 161 doc = core.get( "file://"+systemId.substring(5) ); 162 } 163 164 if( doc==null && systemId.startsWith("file:") ) { 165 // on Windows, filenames are case insensitive. 166 // perform case-insensitive search for improved user experience 167 String systemPath = getPath(systemId); 168 for (String key : core.keySet()) { 169 if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { 170 doc = core.get(key); 171 break; 172 } 173 } 174 } 175 176 return doc; 177 } 178 179 /** 180 * Strips off the leading 'file:///' portion from an URL. 181 */ 182 private String getPath(String key) { 183 key = key.substring(5); // skip 'file:' 184 while(key.length()>0 && key.charAt(0)=='/') { 185 key = key.substring(1); 186 } 187 return key; 188 } 189 190 /** 191 * Returns a read-only set of root document system IDs. 192 */ 193 public Set<String> getRootDocuments() { 194 return Collections.unmodifiableSet(rootDocuments); 195 } 196 197 /** 198 * Picks one document at random and returns it. 199 */ 200 public Document getOneDocument() { 201 for (Document dom : core.values()) { 202 if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 203 return dom; 204 } 205 // we should have caught this error very early on 206 throw new AssertionError(); 207 } 208 209 /** 210 * Checks the correctness of the XML Schema documents and return true 211 * if it's OK. 212 * 213 * <p> 214 * This method performs a weaker version of the tests where error messages 215 * are provided without line number information. So whenever possible 216 * use {@link SchemaConstraintChecker}. 217 * 218 * @see SchemaConstraintChecker 219 */ 220 public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { 221 try { 222 boolean disableXmlSecurity = false; 223 if (options != null) { 224 disableXmlSecurity = options.disableXmlSecurity; 225 } 226 SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity); 227 ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); 228 sf.setErrorHandler(filter); 229 Set<String> roots = getRootDocuments(); 230 Source[] sources = new Source[roots.size()]; 231 int i=0; 232 for (String root : roots) { 233 sources[i++] = new DOMSource(get(root),root); 234 } 235 sf.newSchema(sources); 236 return !filter.hadError(); 237 } catch (SAXException e) { 238 // the errors should have been reported 239 return false; 240 } 241 } 242 243 /** 244 * Gets the system ID from which the given DOM is parsed. 245 * <p> 246 * Poor-man's base URI. 247 */ 248 public String getSystemId( Document dom ) { 249 for (Map.Entry<String,Document> e : core.entrySet()) { 250 if (e.getValue() == dom) 251 return e.getKey(); 252 } 253 return null; 254 } 255 256 public Document parse( InputSource source, boolean root ) throws SAXException { 257 if( source.getSystemId()==null ) 258 throw new IllegalArgumentException(); 259 260 return parse( source.getSystemId(), source, root ); 261 } 262 263 /** 264 * Parses an XML at the given location ( 265 * and XMLs referenced by it) into DOM trees 266 * and stores them to this forest. 267 * 268 * @return the parsed DOM document object. 269 */ 270 public Document parse( String systemId, boolean root ) throws SAXException, IOException { 271 272 systemId = Options.normalizeSystemId(systemId); 273 274 if( core.containsKey(systemId) ) 275 // this document has already been parsed. Just ignore. 276 return core.get(systemId); 277 278 InputSource is=null; 279 280 // allow entity resolver to find the actual byte stream. 281 if( entityResolver!=null ) 282 is = entityResolver.resolveEntity(null,systemId); 283 if( is==null ) 284 is = new InputSource(systemId); 285 286 // but we still use the original system Id as the key. 287 return parse( systemId, is, root ); 288 } 289 290 /** 291 * Returns a {@link ContentHandler} to feed SAX events into. 292 * 293 * <p> 294 * The client of this class can feed SAX events into the handler 295 * to parse a document into this DOM forest. 296 * 297 * This version requires that the DOM object to be created and registered 298 * to the map beforehand. 299 */ 300 private ContentHandler getParserHandler( Document dom ) { 301 ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); 302 handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); 303 handler = new VersionChecker(handler,errorReceiver,entityResolver); 304 305 // insert the reference finder so that 306 // included/imported schemas will be also parsed 307 XMLFilterImpl f = logic.createExternalReferenceFinder(this); 308 f.setContentHandler(handler); 309 310 if(errorReceiver!=null) 311 f.setErrorHandler(errorReceiver); 312 if(entityResolver!=null) 313 f.setEntityResolver(entityResolver); 314 315 return f; 316 } 317 318 public interface Handler extends ContentHandler { 319 /** 320 * Gets the DOM that was built. 321 */ 322 public Document getDocument(); 323 } 324 325 private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { 326 } 327 328 /** 329 * Returns a {@link ContentHandler} to feed SAX events into. 330 * 331 * <p> 332 * The client of this class can feed SAX events into the handler 333 * to parse a document into this DOM forest. 334 */ 335 public Handler getParserHandler( String systemId, boolean root ) { 336 final Document dom = documentBuilder.newDocument(); 337 core.put( systemId, dom ); 338 if(root) 339 rootDocuments.add(systemId); 340 341 ContentHandler handler = getParserHandler(dom); 342 343 // we will register the DOM to the map once the system ID becomes available. 344 // but the SAX allows the event source to not to provide that information, 345 // so be prepared for such case. 346 HandlerImpl x = new HandlerImpl() { 347 public Document getDocument() { 348 return dom; 349 } 350 }; 351 x.setContentHandler(handler); 352 353 return x; 354 } 355 356 /** 357 * Parses the given document and add it to the DOM forest. 358 * 359 * @return 360 * null if there was a parse error. otherwise non-null. 361 */ 362 public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { 363 Document dom = documentBuilder.newDocument(); 364 365 systemId = Options.normalizeSystemId(systemId); 366 367 // put into the map before growing a tree, to 368 // prevent recursive reference from causing infinite loop. 369 core.put( systemId, dom ); 370 if(root) 371 rootDocuments.add(systemId); 372 373 try { 374 XMLReader reader = parserFactory.newSAXParser().getXMLReader(); 375 reader.setContentHandler(getParserHandler(dom)); 376 if(errorReceiver!=null) 377 reader.setErrorHandler(errorReceiver); 378 if(entityResolver!=null) 379 reader.setEntityResolver(entityResolver); 380 reader.parse(inputSource); 381 } catch( ParserConfigurationException e ) { 382 // in practice, this exception won't happen. 383 errorReceiver.error(e.getMessage(),e); 384 core.remove(systemId); 385 rootDocuments.remove(systemId); 386 return null; 387 } catch( IOException e ) { 388 errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); 389 core.remove(systemId); 390 rootDocuments.remove(systemId); 391 return null; 392 } 393 394 return dom; 395 } 396 397 public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { 398 Document dom = documentBuilder.newDocument(); 399 400 systemId = Options.normalizeSystemId(systemId); 401 402 if(root) 403 rootDocuments.add(systemId); 404 405 if(systemId==null) 406 throw new IllegalArgumentException("system id cannot be null"); 407 core.put( systemId, dom ); 408 409 new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); 410 411 return dom; 412 } 413 414 /** 415 * Performs internalization. 416 * 417 * This method should be called only once, only after all the 418 * schemas are parsed. 419 * 420 * @return 421 * the returned bindings need to be applied after schema 422 * components are built. 423 */ 424 public SCDBasedBindingSet transform(boolean enableSCD) { 425 return Internalizer.transform(this, enableSCD, options.disableXmlSecurity); 426 } 427 428 /** 429 * Performs the schema correctness check by using JAXP 1.3. 430 * 431 * <p> 432 * This is "weak", because {@link SchemaFactory#newSchema(Source[])} 433 * doesn't handle inclusions very correctly (it ends up parsing it 434 * from its original source, not in this tree), and because 435 * it doesn't handle two documents for the same namespace very 436 * well. 437 * 438 * <p> 439 * We should eventually fix JAXP (and Xerces), but meanwhile 440 * this weaker and potentially wrong correctness check is still 441 * better than nothing when used inside JAX-WS (JAXB CLI and Ant 442 * does a better job of checking this.) 443 * 444 * <p> 445 * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. 446 */ 447 public void weakSchemaCorrectnessCheck(SchemaFactory sf) { 448 List<SAXSource> sources = new ArrayList<SAXSource>(); 449 for( String systemId : getRootDocuments() ) { 450 Document dom = get(systemId); 451 if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) 452 continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error 453 454 SAXSource ss = createSAXSource(systemId); 455 try { 456 ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); 457 } catch (SAXException e) { 458 throw new AssertionError(e); // Xerces wants this. See 6395322. 459 } 460 sources.add(ss); 461 } 462 463 try { 464 allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0])); 465 } catch (SAXException e) { 466 // error should have been reported. 467 } catch (RuntimeException re) { 468 // JAXP RI isn't very trustworthy when it comes to schema error check, 469 // and we know some cases where it just dies with NPE. So handle it gracefully. 470 // this masks a bug in the JAXP RI, but we need a release that we have to make. 471 try { 472 sf.getErrorHandler().warning( 473 new SAXParseException(Messages.format( 474 Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()), 475 null,null,-1,-1,re)); 476 } catch (SAXException e) { 477 // ignore 478 } 479 } 480 } 481 482 /** 483 * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} 484 * (instead of parsing the original source identified by the system ID.) 485 */ 486 public @NotNull SAXSource createSAXSource(String systemId) { 487 ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { 488 // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect 489 // handlers, since SAX allows handlers to be changed while parsing. 490 @Override 491 public void parse(InputSource input) throws SAXException, IOException { 492 createParser().parse(input, this, this, this); 493 } 494 495 @Override 496 public void parse(String systemId) throws SAXException, IOException { 497 parse(new InputSource(systemId)); 498 } 499 }); 500 501 return new SAXSource(reader,new InputSource(systemId)); 502 } 503 504 /** 505 * Creates {@link XMLParser} for XSOM which reads documents from 506 * this DOMForest rather than doing a fresh parse. 507 * 508 * The net effect is that XSOM will read transformed XML Schemas 509 * instead of the original documents. 510 */ 511 public XMLParser createParser() { 512 return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity))); 513 } 514 515 public EntityResolver getEntityResolver() { 516 return entityResolver; 517 } 518 519 public void setEntityResolver(EntityResolver entityResolver) { 520 this.entityResolver = entityResolver; 521 } 522 523 public ErrorReceiver getErrorHandler() { 524 return errorReceiver; 525 } 526 527 public void setErrorHandler(ErrorReceiver errorHandler) { 528 this.errorReceiver = errorHandler; 529 } 530 531 /** 532 * Gets all the parsed documents. 533 */ 534 public Document[] listDocuments() { 535 return core.values().toArray(new Document[core.size()]); 536 } 537 538 /** 539 * Gets all the system IDs of the documents. 540 */ 541 public String[] listSystemIDs() { 542 return core.keySet().toArray(new String[core.keySet().size()]); 543 } 544 545 /** 546 * Dumps the contents of the forest to the specified stream. 547 * 548 * This is a debug method. As such, error handling is sloppy. 549 */ 550 @SuppressWarnings("CallToThreadDumpStack") 551 public void dump( OutputStream out ) throws IOException { 552 try { 553 // create identity transformer 554 boolean disableXmlSecurity = false; 555 if (options != null) { 556 disableXmlSecurity = options.disableXmlSecurity; 557 } 558 TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity); 559 Transformer it = tf.newTransformer(); 560 561 for (Map.Entry<String, Document> e : core.entrySet()) { 562 out.write( ("---<< "+e.getKey()+'\n').getBytes() ); 563 564 DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); 565 dw.setIndentStep(" "); 566 it.transform( new DOMSource(e.getValue()), 567 new SAXResult(dw)); 568 569 out.write( "\n\n\n".getBytes() ); 570 } 571 } catch( TransformerException e ) { 572 e.printStackTrace(); 573 } 574 } 575} 576