1/* 2 * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. 3 */ 4/* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20package com.sun.org.apache.xml.internal.utils; 21 22import com.sun.org.apache.xalan.internal.utils.SecuritySupport; 23import java.io.File; 24 25import org.w3c.dom.Comment; 26import org.w3c.dom.Element; 27import org.w3c.dom.EntityReference; 28import org.w3c.dom.NamedNodeMap; 29import org.w3c.dom.Node; 30import org.w3c.dom.ProcessingInstruction; 31import org.w3c.dom.Text; 32 33import org.xml.sax.ContentHandler; 34import org.xml.sax.Locator; 35import org.xml.sax.ext.LexicalHandler; 36import org.xml.sax.helpers.LocatorImpl; 37 38/** 39 * This class does a pre-order walk of the DOM tree, calling a ContentHandler 40 * interface as it goes. 41 * @xsl.usage advanced 42 */ 43 44public class TreeWalker 45{ 46 47 /** Local reference to a ContentHandler */ 48 private ContentHandler m_contentHandler = null; 49 50 // ARGHH!! JAXP Uses Xerces without setting the namespace processing to ON! 51 // DOM2Helper m_dh = new DOM2Helper(); 52 53 /** DomHelper for this TreeWalker */ 54 protected DOMHelper m_dh; 55 56 /** Locator object for this TreeWalker */ 57 private LocatorImpl m_locator = new LocatorImpl(); 58 59 /** 60 * Get the ContentHandler used for the tree walk. 61 * 62 * @return the ContentHandler used for the tree walk 63 */ 64 public ContentHandler getContentHandler() 65 { 66 return m_contentHandler; 67 } 68 69 /** 70 * Get the ContentHandler used for the tree walk. 71 * 72 * @return the ContentHandler used for the tree walk 73 */ 74 public void setContentHandler(ContentHandler ch) 75 { 76 m_contentHandler = ch; 77 } 78 79 /** 80 * Constructor. 81 * @param contentHandler The implementation of the 82 * @param systemId System identifier for the document. 83 * contentHandler operation (toXMLString, digest, ...) 84 */ 85 public TreeWalker(ContentHandler contentHandler, DOMHelper dh, String systemId) 86 { 87 this.m_contentHandler = contentHandler; 88 m_contentHandler.setDocumentLocator(m_locator); 89 if (systemId != null) { 90 m_locator.setSystemId(systemId); 91 } 92 m_dh = dh; 93 } 94 95 /** 96 * Constructor. 97 * @param contentHandler The implementation of the 98 * contentHandler operation (toXMLString, digest, ...) 99 */ 100 public TreeWalker(ContentHandler contentHandler, DOMHelper dh) 101 { 102 this.m_contentHandler = contentHandler; 103 m_contentHandler.setDocumentLocator(m_locator); 104 m_dh = dh; 105 } 106 107 /** 108 * Constructor. 109 * @param contentHandler The implementation of the 110 * contentHandler operation (toXMLString, digest, ...) 111 */ 112 public TreeWalker(ContentHandler contentHandler) 113 { 114 this.m_contentHandler = contentHandler; 115 if (m_contentHandler != null) { 116 m_contentHandler.setDocumentLocator(m_locator); 117 } 118 m_dh = new DOM2Helper(); 119 } 120 121 /** 122 * Perform a pre-order traversal non-recursive style. 123 * 124 * Note that TreeWalker assumes that the subtree is intended to represent 125 * a complete (though not necessarily well-formed) document and, during a 126 * traversal, startDocument and endDocument will always be issued to the 127 * SAX listener. 128 * 129 * @param pos Node in the tree where to start traversal 130 * 131 * @throws TransformerException 132 */ 133 public void traverse(Node pos) throws org.xml.sax.SAXException 134 { 135 this.m_contentHandler.startDocument(); 136 137 traverseFragment(pos); 138 139 this.m_contentHandler.endDocument(); 140 } 141 142 /** 143 * Perform a pre-order traversal non-recursive style. 144 * 145 * In contrast to the traverse() method this method will not issue 146 * startDocument() and endDocument() events to the SAX listener. 147 * 148 * @param pos Node in the tree where to start traversal 149 * 150 * @throws TransformerException 151 */ 152 public void traverseFragment(Node pos) throws org.xml.sax.SAXException 153 { 154 Node top = pos; 155 156 while (null != pos) 157 { 158 startNode(pos); 159 160 Node nextNode = pos.getFirstChild(); 161 162 while (null == nextNode) 163 { 164 endNode(pos); 165 166 if (top.equals(pos)) 167 break; 168 169 nextNode = pos.getNextSibling(); 170 171 if (null == nextNode) 172 { 173 pos = pos.getParentNode(); 174 175 if ((null == pos) || (top.equals(pos))) 176 { 177 if (null != pos) 178 endNode(pos); 179 180 nextNode = null; 181 182 break; 183 } 184 } 185 } 186 187 pos = nextNode; 188 } 189 } 190 191 /** 192 * Perform a pre-order traversal non-recursive style. 193 194 * Note that TreeWalker assumes that the subtree is intended to represent 195 * a complete (though not necessarily well-formed) document and, during a 196 * traversal, startDocument and endDocument will always be issued to the 197 * SAX listener. 198 * 199 * @param pos Node in the tree where to start traversal 200 * @param top Node in the tree where to end traversal 201 * 202 * @throws TransformerException 203 */ 204 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException 205 { 206 207 this.m_contentHandler.startDocument(); 208 209 while (null != pos) 210 { 211 startNode(pos); 212 213 Node nextNode = pos.getFirstChild(); 214 215 while (null == nextNode) 216 { 217 endNode(pos); 218 219 if ((null != top) && top.equals(pos)) 220 break; 221 222 nextNode = pos.getNextSibling(); 223 224 if (null == nextNode) 225 { 226 pos = pos.getParentNode(); 227 228 if ((null == pos) || ((null != top) && top.equals(pos))) 229 { 230 nextNode = null; 231 232 break; 233 } 234 } 235 } 236 237 pos = nextNode; 238 } 239 this.m_contentHandler.endDocument(); 240 } 241 242 /** Flag indicating whether following text to be processed is raw text */ 243 boolean nextIsRaw = false; 244 245 /** 246 * Optimized dispatch of characters. 247 */ 248 private final void dispatachChars(Node node) 249 throws org.xml.sax.SAXException 250 { 251 if(m_contentHandler instanceof com.sun.org.apache.xml.internal.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler) 252 { 253 ((com.sun.org.apache.xml.internal.dtm.ref.dom2dtm.DOM2DTM.CharacterNodeHandler)m_contentHandler).characters(node); 254 } 255 else 256 { 257 String data = ((Text) node).getData(); 258 this.m_contentHandler.characters(data.toCharArray(), 0, data.length()); 259 } 260 } 261 262 /** 263 * Start processing given node 264 * 265 * 266 * @param node Node to process 267 * 268 * @throws org.xml.sax.SAXException 269 */ 270 protected void startNode(Node node) throws org.xml.sax.SAXException 271 { 272 273 if (m_contentHandler instanceof NodeConsumer) 274 { 275 ((NodeConsumer) m_contentHandler).setOriginatingNode(node); 276 } 277 278 if (node instanceof Locator) 279 { 280 Locator loc = (Locator)node; 281 m_locator.setColumnNumber(loc.getColumnNumber()); 282 m_locator.setLineNumber(loc.getLineNumber()); 283 m_locator.setPublicId(loc.getPublicId()); 284 m_locator.setSystemId(loc.getSystemId()); 285 } 286 else 287 { 288 m_locator.setColumnNumber(0); 289 m_locator.setLineNumber(0); 290 } 291 292 switch (node.getNodeType()) 293 { 294 case Node.COMMENT_NODE : 295 { 296 String data = ((Comment) node).getData(); 297 298 if (m_contentHandler instanceof LexicalHandler) 299 { 300 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 301 302 lh.comment(data.toCharArray(), 0, data.length()); 303 } 304 } 305 break; 306 case Node.DOCUMENT_FRAGMENT_NODE : 307 308 // ??; 309 break; 310 case Node.DOCUMENT_NODE : 311 312 break; 313 case Node.ELEMENT_NODE : 314 NamedNodeMap atts = ((Element) node).getAttributes(); 315 int nAttrs = atts.getLength(); 316 // System.out.println("TreeWalker#startNode: "+node.getNodeName()); 317 318 for (int i = 0; i < nAttrs; i++) 319 { 320 Node attr = atts.item(i); 321 String attrName = attr.getNodeName(); 322 323 // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue()); 324 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 325 { 326 // System.out.println("TreeWalker#startNode: attr["+i+"] = "+attrName+", "+attr.getNodeValue()); 327 int index; 328 // Use "" instead of null, as Xerces likes "" for the 329 // name of the default namespace. Fix attributed 330 // to "Steven Murray" <smurray@ebt.com>. 331 String prefix = (index = attrName.indexOf(":")) < 0 332 ? "" : attrName.substring(index + 1); 333 334 this.m_contentHandler.startPrefixMapping(prefix, 335 attr.getNodeValue()); 336 } 337 338 } 339 340 // System.out.println("m_dh.getNamespaceOfNode(node): "+m_dh.getNamespaceOfNode(node)); 341 // System.out.println("m_dh.getLocalNameOfNode(node): "+m_dh.getLocalNameOfNode(node)); 342 String ns = m_dh.getNamespaceOfNode(node); 343 if(null == ns) 344 ns = ""; 345 this.m_contentHandler.startElement(ns, 346 m_dh.getLocalNameOfNode(node), 347 node.getNodeName(), 348 new AttList(atts, m_dh)); 349 break; 350 case Node.PROCESSING_INSTRUCTION_NODE : 351 { 352 ProcessingInstruction pi = (ProcessingInstruction) node; 353 String name = pi.getNodeName(); 354 355 // String data = pi.getData(); 356 if (name.equals("xslt-next-is-raw")) 357 { 358 nextIsRaw = true; 359 } 360 else 361 { 362 this.m_contentHandler.processingInstruction(pi.getNodeName(), 363 pi.getData()); 364 } 365 } 366 break; 367 case Node.CDATA_SECTION_NODE : 368 { 369 boolean isLexH = (m_contentHandler instanceof LexicalHandler); 370 LexicalHandler lh = isLexH 371 ? ((LexicalHandler) this.m_contentHandler) : null; 372 373 if (isLexH) 374 { 375 lh.startCDATA(); 376 } 377 378 dispatachChars(node); 379 380 { 381 if (isLexH) 382 { 383 lh.endCDATA(); 384 } 385 } 386 } 387 break; 388 case Node.TEXT_NODE : 389 { 390 //String data = ((Text) node).getData(); 391 392 if (nextIsRaw) 393 { 394 nextIsRaw = false; 395 396 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, ""); 397 dispatachChars(node); 398 m_contentHandler.processingInstruction(javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, ""); 399 } 400 else 401 { 402 dispatachChars(node); 403 } 404 } 405 break; 406 case Node.ENTITY_REFERENCE_NODE : 407 { 408 EntityReference eref = (EntityReference) node; 409 410 if (m_contentHandler instanceof LexicalHandler) 411 { 412 ((LexicalHandler) this.m_contentHandler).startEntity( 413 eref.getNodeName()); 414 } 415 else 416 { 417 418 // warning("Can not output entity to a pure SAX ContentHandler"); 419 } 420 } 421 break; 422 default : 423 } 424 } 425 426 /** 427 * End processing of given node 428 * 429 * 430 * @param node Node we just finished processing 431 * 432 * @throws org.xml.sax.SAXException 433 */ 434 protected void endNode(Node node) throws org.xml.sax.SAXException 435 { 436 437 switch (node.getNodeType()) 438 { 439 case Node.DOCUMENT_NODE : 440 break; 441 442 case Node.ELEMENT_NODE : 443 String ns = m_dh.getNamespaceOfNode(node); 444 if(null == ns) 445 ns = ""; 446 this.m_contentHandler.endElement(ns, 447 m_dh.getLocalNameOfNode(node), 448 node.getNodeName()); 449 450 NamedNodeMap atts = ((Element) node).getAttributes(); 451 int nAttrs = atts.getLength(); 452 453 for (int i = 0; i < nAttrs; i++) 454 { 455 Node attr = atts.item(i); 456 String attrName = attr.getNodeName(); 457 458 if (attrName.equals("xmlns") || attrName.startsWith("xmlns:")) 459 { 460 int index; 461 // Use "" instead of null, as Xerces likes "" for the 462 // name of the default namespace. Fix attributed 463 // to "Steven Murray" <smurray@ebt.com>. 464 String prefix = (index = attrName.indexOf(":")) < 0 465 ? "" : attrName.substring(index + 1); 466 467 this.m_contentHandler.endPrefixMapping(prefix); 468 } 469 } 470 break; 471 case Node.CDATA_SECTION_NODE : 472 break; 473 case Node.ENTITY_REFERENCE_NODE : 474 { 475 EntityReference eref = (EntityReference) node; 476 477 if (m_contentHandler instanceof LexicalHandler) 478 { 479 LexicalHandler lh = ((LexicalHandler) this.m_contentHandler); 480 481 lh.endEntity(eref.getNodeName()); 482 } 483 } 484 break; 485 default : 486 } 487 } 488} //TreeWalker 489