1/*
2 * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package com.sun.tools.internal.xjc.reader.internalizer;
27
28import com.sun.istack.internal.NotNull;
29import com.sun.istack.internal.XMLStreamReaderToContentHandler;
30import com.sun.tools.internal.xjc.ErrorReceiver;
31import com.sun.tools.internal.xjc.Options;
32import com.sun.tools.internal.xjc.reader.Const;
33import com.sun.tools.internal.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
34import com.sun.tools.internal.xjc.util.ErrorReceiverFilter;
35import com.sun.xml.internal.bind.marshaller.DataWriter;
36import com.sun.xml.internal.bind.v2.util.XmlFactory;
37import com.sun.xml.internal.xsom.parser.JAXPParser;
38import com.sun.xml.internal.xsom.parser.XMLParser;
39import org.w3c.dom.Document;
40import org.w3c.dom.Element;
41import org.xml.sax.*;
42import org.xml.sax.helpers.XMLFilterImpl;
43
44import javax.xml.parsers.DocumentBuilder;
45import javax.xml.parsers.DocumentBuilderFactory;
46import javax.xml.parsers.ParserConfigurationException;
47import javax.xml.parsers.SAXParserFactory;
48import javax.xml.stream.XMLStreamException;
49import javax.xml.stream.XMLStreamReader;
50import javax.xml.transform.Source;
51import javax.xml.transform.Transformer;
52import javax.xml.transform.TransformerException;
53import javax.xml.transform.TransformerFactory;
54import javax.xml.transform.dom.DOMSource;
55import javax.xml.transform.sax.SAXResult;
56import javax.xml.transform.sax.SAXSource;
57import javax.xml.validation.SchemaFactory;
58import java.io.IOException;
59import java.io.OutputStream;
60import java.io.OutputStreamWriter;
61import java.util.*;
62
63import static com.sun.xml.internal.bind.v2.util.XmlFactory.allowExternalAccess;
64import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
65
66
67/**
68 * Builds a DOM forest and maintains association from
69 * system IDs to DOM trees.
70 *
71 * <p>
72 * A forest is a transitive reflexive closure of referenced documents.
73 * IOW, if a document is in a forest, all the documents referenced from
74 * it is in a forest, too. To support this semantics, {@link DOMForest}
75 * uses {@link InternalizationLogic} to find referenced documents.
76 *
77 * <p>
78 * Some documents are marked as "root"s, meaning those documents were
79 * put into a forest explicitly, not because it is referenced from another
80 * document. (However, a root document can be referenced from other
81 * documents, too.)
82 *
83 * @author
84 *     Kohsuke Kawaguchi (kohsuke.kawaguchi@sun.com)
85 */
86public final class DOMForest {
87    /** actual data storage {@code map<SystemId,Document>}. */
88    private final Map<String,Document> core = new LinkedHashMap<>();
89
90    /**
91     * To correctly feed documents to a schema parser, we need to remember
92     * which documents (of the forest) were given as the root
93     * documents, and which of them are read as included/imported
94     * documents.
95     *
96     * <p>
97     * Set of system ids as strings.
98     */
99    private final Set<String> rootDocuments = new LinkedHashSet<String>();
100
101    /** Stores location information for all the trees in this forest. */
102    public final LocatorTable locatorTable = new LocatorTable();
103
104    /** Stores all the outer-most {@code <jaxb:bindings>} customizations. */
105    public final Set<Element> outerMostBindings = new HashSet<Element>();
106
107    /** Used to resolve references to other schema documents. */
108    private EntityResolver entityResolver = null;
109
110    /** Errors encountered during the parsing will be sent to this object. */
111    private ErrorReceiver errorReceiver = null;
112
113    /** Schema language dependent part of the processing. */
114    protected final InternalizationLogic logic;
115
116    private final SAXParserFactory parserFactory;
117    private final DocumentBuilder documentBuilder;
118
119    private final Options options;
120
121    public DOMForest(
122        SAXParserFactory parserFactory, DocumentBuilder documentBuilder,
123        InternalizationLogic logic ) {
124
125        this.parserFactory = parserFactory;
126        this.documentBuilder = documentBuilder;
127        this.logic = logic;
128        this.options = null;
129    }
130
131    public DOMForest( InternalizationLogic logic, Options opt ) {
132
133        if (opt == null) throw new AssertionError("Options object null");
134        this.options = opt;
135
136        try {
137            DocumentBuilderFactory dbf = XmlFactory.createDocumentBuilderFactory(opt.disableXmlSecurity);
138            this.documentBuilder = dbf.newDocumentBuilder();
139            this.parserFactory = XmlFactory.createParserFactory(opt.disableXmlSecurity);
140        } catch( ParserConfigurationException e ) {
141            throw new AssertionError(e);
142        }
143
144        this.logic = logic;
145    }
146
147    /**
148     * Gets the DOM tree associated with the specified system ID,
149     * or null if none is found.
150     */
151    public Document get( String systemId ) {
152        Document doc = core.get(systemId);
153
154        if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) {
155            // As of JDK1.4, java.net.URL.toExternal method returns URLs like
156            // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738.
157            // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"),
158            // and this descripancy breaks DOM look up by system ID.
159
160            // this extra check solves this problem.
161            doc = core.get( "file://"+systemId.substring(5) );
162        }
163
164        if( doc==null && systemId.startsWith("file:") ) {
165            // on Windows, filenames are case insensitive.
166            // perform case-insensitive search for improved user experience
167            String systemPath = getPath(systemId);
168            for (String key : core.keySet()) {
169                if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) {
170                    doc = core.get(key);
171                    break;
172                }
173            }
174        }
175
176        return doc;
177    }
178
179    /**
180     * Strips off the leading 'file:///' portion from an URL.
181     */
182    private String getPath(String key) {
183        key = key.substring(5); // skip 'file:'
184        while(key.length()>0 && key.charAt(0)=='/') {
185            key = key.substring(1);
186        }
187        return key;
188    }
189
190    /**
191     * Returns a read-only set of root document system IDs.
192     */
193    public Set<String> getRootDocuments() {
194        return Collections.unmodifiableSet(rootDocuments);
195    }
196
197    /**
198     * Picks one document at random and returns it.
199     */
200    public Document getOneDocument() {
201        for (Document dom : core.values()) {
202            if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
203                return dom;
204        }
205        // we should have caught this error very early on
206        throw new AssertionError();
207    }
208
209    /**
210     * Checks the correctness of the XML Schema documents and return true
211     * if it's OK.
212     *
213     * <p>
214     * This method performs a weaker version of the tests where error messages
215     * are provided without line number information. So whenever possible
216     * use {@link SchemaConstraintChecker}.
217     *
218     * @see SchemaConstraintChecker
219     */
220    public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) {
221        try {
222            boolean disableXmlSecurity = false;
223            if (options != null) {
224                disableXmlSecurity = options.disableXmlSecurity;
225            }
226            SchemaFactory sf = XmlFactory.createSchemaFactory(W3C_XML_SCHEMA_NS_URI, disableXmlSecurity);
227            ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler);
228            sf.setErrorHandler(filter);
229            Set<String> roots = getRootDocuments();
230            Source[] sources = new Source[roots.size()];
231            int i=0;
232            for (String root : roots) {
233                sources[i++] = new DOMSource(get(root),root);
234            }
235            sf.newSchema(sources);
236            return !filter.hadError();
237        } catch (SAXException e) {
238            // the errors should have been reported
239            return false;
240        }
241    }
242
243    /**
244     * Gets the system ID from which the given DOM is parsed.
245     * <p>
246     * Poor-man's base URI.
247     */
248    public String getSystemId( Document dom ) {
249        for (Map.Entry<String,Document> e : core.entrySet()) {
250            if (e.getValue() == dom)
251                return e.getKey();
252        }
253        return null;
254    }
255
256    public Document parse( InputSource source, boolean root ) throws SAXException {
257        if( source.getSystemId()==null )
258            throw new IllegalArgumentException();
259
260        return parse( source.getSystemId(), source, root );
261    }
262
263    /**
264     * Parses an XML at the given location (
265     * and XMLs referenced by it) into DOM trees
266     * and stores them to this forest.
267     *
268     * @return the parsed DOM document object.
269     */
270    public Document parse( String systemId, boolean root ) throws SAXException, IOException {
271
272        systemId = Options.normalizeSystemId(systemId);
273
274        if( core.containsKey(systemId) )
275            // this document has already been parsed. Just ignore.
276            return core.get(systemId);
277
278        InputSource is=null;
279
280        // allow entity resolver to find the actual byte stream.
281        if( entityResolver!=null )
282            is = entityResolver.resolveEntity(null,systemId);
283        if( is==null )
284            is = new InputSource(systemId);
285
286        // but we still use the original system Id as the key.
287        return parse( systemId, is, root );
288    }
289
290    /**
291     * Returns a {@link ContentHandler} to feed SAX events into.
292     *
293     * <p>
294     * The client of this class can feed SAX events into the handler
295     * to parse a document into this DOM forest.
296     *
297     * This version requires that the DOM object to be created and registered
298     * to the map beforehand.
299     */
300    private ContentHandler getParserHandler( Document dom ) {
301        ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings);
302        handler = new WhitespaceStripper(handler,errorReceiver,entityResolver);
303        handler = new VersionChecker(handler,errorReceiver,entityResolver);
304
305        // insert the reference finder so that
306        // included/imported schemas will be also parsed
307        XMLFilterImpl f = logic.createExternalReferenceFinder(this);
308        f.setContentHandler(handler);
309
310        if(errorReceiver!=null)
311            f.setErrorHandler(errorReceiver);
312        if(entityResolver!=null)
313            f.setEntityResolver(entityResolver);
314
315        return f;
316    }
317
318    public interface Handler extends ContentHandler {
319        /**
320         * Gets the DOM that was built.
321         */
322        public Document getDocument();
323    }
324
325    private static abstract class HandlerImpl extends XMLFilterImpl implements Handler {
326    }
327
328    /**
329     * Returns a {@link ContentHandler} to feed SAX events into.
330     *
331     * <p>
332     * The client of this class can feed SAX events into the handler
333     * to parse a document into this DOM forest.
334     */
335    public Handler getParserHandler( String systemId, boolean root ) {
336        final Document dom = documentBuilder.newDocument();
337        core.put( systemId, dom );
338        if(root)
339            rootDocuments.add(systemId);
340
341        ContentHandler handler = getParserHandler(dom);
342
343        // we will register the DOM to the map once the system ID becomes available.
344        // but the SAX allows the event source to not to provide that information,
345        // so be prepared for such case.
346        HandlerImpl x = new HandlerImpl() {
347            public Document getDocument() {
348                return dom;
349            }
350        };
351        x.setContentHandler(handler);
352
353        return x;
354   }
355
356    /**
357     * Parses the given document and add it to the DOM forest.
358     *
359     * @return
360     *      null if there was a parse error. otherwise non-null.
361     */
362    public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException {
363        Document dom = documentBuilder.newDocument();
364
365        systemId = Options.normalizeSystemId(systemId);
366
367        // put into the map before growing a tree, to
368        // prevent recursive reference from causing infinite loop.
369        core.put( systemId, dom );
370        if(root)
371            rootDocuments.add(systemId);
372
373        try {
374            XMLReader reader = parserFactory.newSAXParser().getXMLReader();
375            reader.setContentHandler(getParserHandler(dom));
376            if(errorReceiver!=null)
377                reader.setErrorHandler(errorReceiver);
378            if(entityResolver!=null)
379                reader.setEntityResolver(entityResolver);
380            reader.parse(inputSource);
381        } catch( ParserConfigurationException e ) {
382            // in practice, this exception won't happen.
383            errorReceiver.error(e.getMessage(),e);
384            core.remove(systemId);
385            rootDocuments.remove(systemId);
386            return null;
387        } catch( IOException e ) {
388            errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e);
389            core.remove(systemId);
390            rootDocuments.remove(systemId);
391            return null;
392        }
393
394        return dom;
395    }
396
397    public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException {
398        Document dom = documentBuilder.newDocument();
399
400        systemId = Options.normalizeSystemId(systemId);
401
402        if(root)
403            rootDocuments.add(systemId);
404
405        if(systemId==null)
406            throw new IllegalArgumentException("system id cannot be null");
407        core.put( systemId, dom );
408
409        new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge();
410
411        return dom;
412    }
413
414    /**
415     * Performs internalization.
416     *
417     * This method should be called only once, only after all the
418     * schemas are parsed.
419     *
420     * @return
421     *      the returned bindings need to be applied after schema
422     *      components are built.
423     */
424    public SCDBasedBindingSet transform(boolean enableSCD) {
425        return Internalizer.transform(this, enableSCD, options.disableXmlSecurity);
426    }
427
428    /**
429     * Performs the schema correctness check by using JAXP 1.3.
430     *
431     * <p>
432     * This is "weak", because {@link SchemaFactory#newSchema(Source[])}
433     * doesn't handle inclusions very correctly (it ends up parsing it
434     * from its original source, not in this tree), and because
435     * it doesn't handle two documents for the same namespace very
436     * well.
437     *
438     * <p>
439     * We should eventually fix JAXP (and Xerces), but meanwhile
440     * this weaker and potentially wrong correctness check is still
441     * better than nothing when used inside JAX-WS (JAXB CLI and Ant
442     * does a better job of checking this.)
443     *
444     * <p>
445     * To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}.
446     */
447    public void weakSchemaCorrectnessCheck(SchemaFactory sf) {
448        List<SAXSource> sources = new ArrayList<SAXSource>();
449        for( String systemId : getRootDocuments() ) {
450            Document dom = get(systemId);
451            if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI))
452                continue;   // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error
453
454            SAXSource ss = createSAXSource(systemId);
455            try {
456                ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true);
457            } catch (SAXException e) {
458                throw new AssertionError(e);    // Xerces wants this. See 6395322.
459            }
460            sources.add(ss);
461        }
462
463        try {
464            allowExternalAccess(sf, "file,http", options.disableXmlSecurity).newSchema(sources.toArray(new SAXSource[0]));
465        } catch (SAXException e) {
466            // error should have been reported.
467        } catch (RuntimeException re) {
468            // JAXP RI isn't very trustworthy when it comes to schema error check,
469            // and we know some cases where it just dies with NPE. So handle it gracefully.
470            // this masks a bug in the JAXP RI, but we need a release that we have to make.
471            try {
472                sf.getErrorHandler().warning(
473                    new SAXParseException(Messages.format(
474                        Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,re.getMessage()),
475                        null,null,-1,-1,re));
476            } catch (SAXException e) {
477                // ignore
478            }
479        }
480    }
481
482    /**
483     * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest}
484     * (instead of parsing the original source identified by the system ID.)
485     */
486    public @NotNull SAXSource createSAXSource(String systemId) {
487        ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() {
488            // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect
489            // handlers, since SAX allows handlers to be changed while parsing.
490            @Override
491            public void parse(InputSource input) throws SAXException, IOException {
492                createParser().parse(input, this, this, this);
493            }
494
495            @Override
496            public void parse(String systemId) throws SAXException, IOException {
497                parse(new InputSource(systemId));
498            }
499        });
500
501        return new SAXSource(reader,new InputSource(systemId));
502    }
503
504    /**
505     * Creates {@link XMLParser} for XSOM which reads documents from
506     * this DOMForest rather than doing a fresh parse.
507     *
508     * The net effect is that XSOM will read transformed XML Schemas
509     * instead of the original documents.
510     */
511    public XMLParser createParser() {
512        return new DOMForestParser(this, new JAXPParser(XmlFactory.createParserFactory(options.disableXmlSecurity)));
513    }
514
515    public EntityResolver getEntityResolver() {
516        return entityResolver;
517    }
518
519    public void setEntityResolver(EntityResolver entityResolver) {
520        this.entityResolver = entityResolver;
521    }
522
523    public ErrorReceiver getErrorHandler() {
524        return errorReceiver;
525    }
526
527    public void setErrorHandler(ErrorReceiver errorHandler) {
528        this.errorReceiver = errorHandler;
529    }
530
531    /**
532     * Gets all the parsed documents.
533     */
534    public Document[] listDocuments() {
535        return core.values().toArray(new Document[core.size()]);
536    }
537
538    /**
539     * Gets all the system IDs of the documents.
540     */
541    public String[] listSystemIDs() {
542        return core.keySet().toArray(new String[core.keySet().size()]);
543    }
544
545    /**
546     * Dumps the contents of the forest to the specified stream.
547     *
548     * This is a debug method. As such, error handling is sloppy.
549     */
550    @SuppressWarnings("CallToThreadDumpStack")
551    public void dump( OutputStream out ) throws IOException {
552        try {
553            // create identity transformer
554            boolean disableXmlSecurity = false;
555            if (options != null) {
556                disableXmlSecurity = options.disableXmlSecurity;
557            }
558            TransformerFactory tf = XmlFactory.createTransformerFactory(disableXmlSecurity);
559            Transformer it = tf.newTransformer();
560
561            for (Map.Entry<String, Document> e : core.entrySet()) {
562                out.write( ("---<< "+e.getKey()+'\n').getBytes() );
563
564                DataWriter dw = new DataWriter(new OutputStreamWriter(out),null);
565                dw.setIndentStep("  ");
566                it.transform( new DOMSource(e.getValue()),
567                    new SAXResult(dw));
568
569                out.write( "\n\n\n".getBytes() );
570            }
571        } catch( TransformerException e ) {
572            e.printStackTrace();
573        }
574    }
575}
576