1/*
2 * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
3 */
4/*
5 * Licensed to the Apache Software Foundation (ASF) under one or more
6 * contributor license agreements.  See the NOTICE file distributed with
7 * this work for additional information regarding copyright ownership.
8 * The ASF licenses this file to You under the Apache License, Version 2.0
9 * (the "License"); you may not use this file except in compliance with
10 * the License.  You may obtain a copy of the License at
11 *
12 *      http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing, software
15 * distributed under the License is distributed on an "AS IS" BASIS,
16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 * See the License for the specific language governing permissions and
18 * limitations under the License.
19 */
20
21package com.sun.org.apache.xml.internal.resolver.readers;
22
23import com.sun.org.apache.xml.internal.resolver.Catalog;
24import com.sun.org.apache.xml.internal.resolver.CatalogException;
25import com.sun.org.apache.xml.internal.resolver.helpers.Namespaces;
26import java.io.IOException;
27import java.io.InputStream;
28import java.net.MalformedURLException;
29import java.net.URL;
30import java.net.URLConnection;
31import java.util.HashMap;
32import java.util.Map;
33import javax.xml.parsers.DocumentBuilder;
34import javax.xml.parsers.DocumentBuilderFactory;
35import javax.xml.parsers.ParserConfigurationException;
36import org.w3c.dom.*;
37import org.xml.sax.SAXException;
38import sun.reflect.misc.ReflectUtil;
39
40/**
41 * A DOM-based CatalogReader.
42 *
43 * <p>This class is used to read XML Catalogs using the DOM. This reader
44 * has an advantage over the SAX-based reader that it can analyze the
45 * DOM tree rather than simply a series of SAX events. It has the disadvantage
46 * that it requires all of the code necessary to build and walk a DOM
47 * tree.</p>
48 *
49 * <p>Since the choice of CatalogReaders (in the InputStream case) can only
50 * be made on the basis of MIME type, the following problem occurs: only
51 * one CatalogReader can exist for all XML mime types. In order to get
52 * around this problem, the DOMCatalogReader relies on a set of external
53 * CatalogParsers to actually build the catalog.</p>
54 *
55 * <p>The selection of CatalogParsers is made on the basis of the QName
56 * of the root element of the document.</p>
57 *
58 *
59 * @see Catalog
60 * @see CatalogReader
61 * @see SAXCatalogReader
62 * @see TextCatalogReader
63 * @see DOMCatalogParser
64 *
65 * @author Norman Walsh
66 * <a href="mailto:Norman.Walsh@Sun.COM">Norman.Walsh@Sun.COM</a>
67 *
68 */
69public class DOMCatalogReader implements CatalogReader {
70  /**
71   * Mapping table from QNames to CatalogParser classes.
72   *
73   * <p>Each key in this hash table has the form "elementname"
74   * or "{namespaceuri}elementname". The former is used if the
75   * namespace URI is null.</p>
76   */
77  protected Map<String, String> namespaceMap = new HashMap<>();
78
79  /**
80   * Add a new parser to the reader.
81   *
82   * <p>This method associates the specified parserClass with the
83   * namespaceURI/rootElement names specified.</p>
84   *
85   * @param namespaceURI The namespace URI. <em>Not</em> the prefix.
86   * @param rootElement The name of the root element.
87   * @param parserClass The name of the parserClass to instantiate
88   * for this kind of catalog.
89   */
90  public void setCatalogParser(String namespaceURI,
91                               String rootElement,
92                               String parserClass) {
93    if (namespaceURI == null) {
94      namespaceMap.put(rootElement, parserClass);
95    } else {
96      namespaceMap.put("{"+namespaceURI+"}"+rootElement, parserClass);
97    }
98  }
99
100  /**
101   * Get the name of the parser class for a given catalog type.
102   *
103   * <p>This method returns the parserClass associated with the
104   * namespaceURI/rootElement names specified.</p>
105   *
106   * @param namespaceURI The namespace URI. <em>Not</em> the prefix.
107   * @param rootElement The name of the root element.
108   * @return The parser class.
109   */
110  public String getCatalogParser(String namespaceURI,
111                                 String rootElement) {
112    if (namespaceURI == null) {
113      return namespaceMap.get(rootElement);
114    } else {
115      return namespaceMap.get("{"+namespaceURI+"}"+rootElement);
116    }
117  }
118
119  /**
120   * Null constructor; something for subclasses to call.
121   */
122  public DOMCatalogReader() { }
123
124  /**
125   * Read a catalog from an input stream.
126   *
127   * <p>This class reads a catalog from an input stream:</p>
128   *
129   * <ul>
130   * <li>Based on the QName of the root element, it determines which
131   * parser to instantiate for this catalog.</li>
132   * <li>It constructs a DOM Document from the catalog and</li>
133   * <li>For each child of the root node, it calls the parser's
134   * parseCatalogEntry method. This method is expected to make
135   * appropriate calls back into the catalog to add entries for the
136   * entries in the catalog. It is free to do this in whatever manner
137   * is appropriate (perhaps using just the node passed in, perhaps
138   * wandering arbitrarily throughout the tree).</li>
139   * </ul>
140   *
141   * @param catalog The catalog for which this reader is called.
142   * @param is The input stream that is to be read.
143   * @throws IOException if the URL cannot be read.
144   * @throws UnknownCatalogFormatException if the catalog format is
145   * not recognized.
146   * @throws UnparseableCatalogException if the catalog cannot be parsed.
147   * (For example, if it is supposed to be XML and isn't well-formed or
148   * if the parser class cannot be instantiated.)
149   */
150  public void readCatalog(Catalog catalog, InputStream is)
151    throws IOException, CatalogException {
152
153    DocumentBuilderFactory factory = null;
154    DocumentBuilder builder = null;
155
156    factory = DocumentBuilderFactory.newInstance();
157    factory.setNamespaceAware(false);
158    factory.setValidating(false);
159    try {
160      builder = factory.newDocumentBuilder();
161    } catch (ParserConfigurationException pce) {
162      throw new CatalogException(CatalogException.UNPARSEABLE);
163    }
164
165    Document doc = null;
166
167    try {
168      doc = builder.parse(is);
169    } catch (SAXException se) {
170      throw new CatalogException(CatalogException.UNKNOWN_FORMAT);
171    }
172
173    Element root = doc.getDocumentElement();
174
175    String namespaceURI = Namespaces.getNamespaceURI(root);
176    String localName    = Namespaces.getLocalName(root);
177
178    String domParserClass = getCatalogParser(namespaceURI,
179                                             localName);
180
181    if (domParserClass == null) {
182      if (namespaceURI == null) {
183        catalog.getCatalogManager().debug.message(1, "No Catalog parser for "
184                                                  + localName);
185      } else {
186        catalog.getCatalogManager().debug.message(1, "No Catalog parser for "
187                                                  + "{" + namespaceURI + "}"
188                                                  + localName);
189      }
190      return;
191    }
192
193    DOMCatalogParser domParser = null;
194
195    try {
196      domParser = (DOMCatalogParser) ReflectUtil.forName(domParserClass).newInstance();
197    } catch (ClassNotFoundException cnfe) {
198      catalog.getCatalogManager().debug.message(1, "Cannot load XML Catalog Parser class", domParserClass);
199      throw new CatalogException(CatalogException.UNPARSEABLE);
200    } catch (InstantiationException ie) {
201      catalog.getCatalogManager().debug.message(1, "Cannot instantiate XML Catalog Parser class", domParserClass);
202      throw new CatalogException(CatalogException.UNPARSEABLE);
203    } catch (IllegalAccessException iae) {
204      catalog.getCatalogManager().debug.message(1, "Cannot access XML Catalog Parser class", domParserClass);
205      throw new CatalogException(CatalogException.UNPARSEABLE);
206    } catch (ClassCastException cce ) {
207      catalog.getCatalogManager().debug.message(1, "Cannot cast XML Catalog Parser class", domParserClass);
208      throw new CatalogException(CatalogException.UNPARSEABLE);
209    }
210
211    Node node = root.getFirstChild();
212    while (node != null) {
213      domParser.parseCatalogEntry(catalog, node);
214      node = node.getNextSibling();
215    }
216  }
217
218  /**
219   * Read the catalog behind the specified URL.
220   *
221   * @see #readCatalog(Catalog, InputStream)
222   *
223   * @param catalog The catalog for which we are reading.
224   * @param fileUrl The URL of the document that should be read.
225   *
226   * @throws MalformedURLException if the specified URL cannot be
227   * turned into a URL object.
228   * @throws IOException if the URL cannot be read.
229   * @throws UnknownCatalogFormatException if the catalog format is
230   * not recognized.
231   * @throws UnparseableCatalogException if the catalog cannot be parsed.
232   * (For example, if it is supposed to be XML and isn't well-formed.)
233   */
234  public void readCatalog(Catalog catalog, String fileUrl)
235    throws MalformedURLException, IOException, CatalogException {
236    URL url = new URL(fileUrl);
237    URLConnection urlCon = url.openConnection();
238    readCatalog(catalog, urlCon.getInputStream());
239  }
240}
241