1/* 2 * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved. 3 */ 4/* 5 * Licensed to the Apache Software Foundation (ASF) under one or more 6 * contributor license agreements. See the NOTICE file distributed with 7 * this work for additional information regarding copyright ownership. 8 * The ASF licenses this file to You under the Apache License, Version 2.0 9 * (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 */ 20 21package com.sun.org.apache.xml.internal.serializer.dom3; 22 23import com.sun.org.apache.xerces.internal.util.XML11Char; 24import com.sun.org.apache.xerces.internal.util.XMLChar; 25import com.sun.org.apache.xml.internal.serializer.OutputPropertiesFactory; 26import com.sun.org.apache.xml.internal.serializer.SerializationHandler; 27import com.sun.org.apache.xml.internal.serializer.utils.MsgKey; 28import com.sun.org.apache.xml.internal.serializer.utils.Utils; 29import java.io.IOException; 30import java.io.Writer; 31import java.util.Collections; 32import java.util.Enumeration; 33import java.util.HashMap; 34import java.util.Map; 35import java.util.Properties; 36import org.w3c.dom.Attr; 37import org.w3c.dom.CDATASection; 38import org.w3c.dom.Comment; 39import org.w3c.dom.DOMError; 40import org.w3c.dom.DOMErrorHandler; 41import org.w3c.dom.Document; 42import org.w3c.dom.DocumentType; 43import org.w3c.dom.Element; 44import org.w3c.dom.Entity; 45import org.w3c.dom.EntityReference; 46import org.w3c.dom.NamedNodeMap; 47import org.w3c.dom.Node; 48import org.w3c.dom.NodeList; 49import org.w3c.dom.ProcessingInstruction; 50import org.w3c.dom.Text; 51import org.w3c.dom.ls.LSSerializerFilter; 52import org.w3c.dom.traversal.NodeFilter; 53import org.xml.sax.Locator; 54import org.xml.sax.SAXException; 55import org.xml.sax.ext.LexicalHandler; 56import org.xml.sax.helpers.LocatorImpl; 57 58/** 59 * Built on org.apache.xml.serializer.TreeWalker and adds functionality to 60 * traverse and serialize a DOM Node (Level 2 or Level 3) as specified in 61 * the DOM Level 3 LS Recommedation by evaluating and applying DOMConfiguration 62 * parameters and filters if any during serialization. 63 * 64 * @xsl.usage internal 65 */ 66final class DOM3TreeWalker { 67 68 /** 69 * The SerializationHandler, it extends ContentHandler and when 70 * this class is instantiated via the constructor provided, a 71 * SerializationHandler object is passed to it. 72 */ 73 private SerializationHandler fSerializer = null; 74 75 /** We do not need DOM2Helper since DOM Level 3 LS applies to DOM Level 2 or newer */ 76 77 /** Locator object for this TreeWalker */ 78 private LocatorImpl fLocator = new LocatorImpl(); 79 80 /** ErrorHandler */ 81 private DOMErrorHandler fErrorHandler = null; 82 83 /** LSSerializerFilter */ 84 private LSSerializerFilter fFilter = null; 85 86 /** If the serializer is an instance of a LexicalHandler */ 87 private LexicalHandler fLexicalHandler = null; 88 89 private int fWhatToShowFilter; 90 91 /** New Line character to use in serialization */ 92 private String fNewLine = null; 93 94 /** DOMConfiguration Properties */ 95 private Properties fDOMConfigProperties = null; 96 97 /** Keeps track if we are in an entity reference when entities=true */ 98 private boolean fInEntityRef = false; 99 100 /** Stores the version of the XML document to be serialize */ 101 private String fXMLVersion = null; 102 103 /** XML Version, default 1.0 */ 104 private boolean fIsXMLVersion11 = false; 105 106 /** Is the Node a Level 3 DOM node */ 107 private boolean fIsLevel3DOM = false; 108 109 /** DOM Configuration Parameters */ 110 private int fFeatures = 0; 111 112 /** Flag indicating whether following text to be processed is raw text */ 113 boolean fNextIsRaw = false; 114 115 // 116 private static final String XMLNS_URI = "http://www.w3.org/2000/xmlns/"; 117 118 // 119 private static final String XMLNS_PREFIX = "xmlns"; 120 121 // 122 private static final String XML_URI = "http://www.w3.org/XML/1998/namespace"; 123 124 // 125 private static final String XML_PREFIX = "xml"; 126 127 /** stores namespaces in scope */ 128 protected NamespaceSupport fNSBinder; 129 130 /** stores all namespace bindings on the current element */ 131 protected NamespaceSupport fLocalNSBinder; 132 133 /** stores the current element depth */ 134 private int fElementDepth = 0; 135 136 // *********************************************************************** 137 // DOMConfiguration paramter settings 138 // *********************************************************************** 139 // Parameter canonical-form, true [optional] - NOT SUPPORTED 140 private final static int CANONICAL = 0x1 << 0; 141 142 // Parameter cdata-sections, true [required] (default) 143 private final static int CDATA = 0x1 << 1; 144 145 // Parameter check-character-normalization, true [optional] - NOT SUPPORTED 146 private final static int CHARNORMALIZE = 0x1 << 2; 147 148 // Parameter comments, true [required] (default) 149 private final static int COMMENTS = 0x1 << 3; 150 151 // Parameter datatype-normalization, true [optional] - NOT SUPPORTED 152 private final static int DTNORMALIZE = 0x1 << 4; 153 154 // Parameter element-content-whitespace, true [required] (default) - value - false [optional] NOT SUPPORTED 155 private final static int ELEM_CONTENT_WHITESPACE = 0x1 << 5; 156 157 // Parameter entities, true [required] (default) 158 private final static int ENTITIES = 0x1 << 6; 159 160 // Parameter infoset, true [required] (default), false has no effect --> True has no effect for the serializer 161 private final static int INFOSET = 0x1 << 7; 162 163 // Parameter namespaces, true [required] (default) 164 private final static int NAMESPACES = 0x1 << 8; 165 166 // Parameter namespace-declarations, true [required] (default) 167 private final static int NAMESPACEDECLS = 0x1 << 9; 168 169 // Parameter normalize-characters, true [optional] - NOT SUPPORTED 170 private final static int NORMALIZECHARS = 0x1 << 10; 171 172 // Parameter split-cdata-sections, true [required] (default) 173 private final static int SPLITCDATA = 0x1 << 11; 174 175 // Parameter validate, true [optional] - NOT SUPPORTED 176 private final static int VALIDATE = 0x1 << 12; 177 178 // Parameter validate-if-schema, true [optional] - NOT SUPPORTED 179 private final static int SCHEMAVALIDATE = 0x1 << 13; 180 181 // Parameter split-cdata-sections, true [required] (default) 182 private final static int WELLFORMED = 0x1 << 14; 183 184 // Parameter discard-default-content, true [required] (default) 185 // Not sure how this will be used in level 2 Documents 186 private final static int DISCARDDEFAULT = 0x1 << 15; 187 188 // Parameter format-pretty-print, true [optional] 189 private final static int PRETTY_PRINT = 0x1 << 16; 190 191 // Parameter ignore-unknown-character-denormalizations, true [required] (default) 192 // We currently do not support XML 1.1 character normalization 193 private final static int IGNORE_CHAR_DENORMALIZE = 0x1 << 17; 194 195 // Parameter discard-default-content, true [required] (default) 196 private final static int XMLDECL = 0x1 << 18; 197 198 /** 199 * Constructor. 200 * @param contentHandler serialHandler The implemention of the SerializationHandler interface 201 */ 202 DOM3TreeWalker( 203 SerializationHandler serialHandler, 204 DOMErrorHandler errHandler, 205 LSSerializerFilter filter, 206 String newLine) { 207 fSerializer = serialHandler; 208 //fErrorHandler = errHandler == null ? new DOMErrorHandlerImpl() : errHandler; // Should we be using the default? 209 fErrorHandler = errHandler; 210 fFilter = filter; 211 fLexicalHandler = null; 212 fNewLine = newLine; 213 214 fNSBinder = new NamespaceSupport(); 215 fLocalNSBinder = new NamespaceSupport(); 216 217 fDOMConfigProperties = fSerializer.getOutputFormat(); 218 fSerializer.setDocumentLocator(fLocator); 219 initProperties(fDOMConfigProperties); 220 } 221 222 /** 223 * Perform a pre-order traversal non-recursive style. 224 * 225 * Note that TreeWalker assumes that the subtree is intended to represent 226 * a complete (though not necessarily well-formed) document and, during a 227 * traversal, startDocument and endDocument will always be issued to the 228 * SAX listener. 229 * 230 * @param pos Node in the tree where to start traversal 231 * 232 * @throws TransformerException 233 */ 234 public void traverse(Node pos) throws org.xml.sax.SAXException { 235 this.fSerializer.startDocument(); 236 237 // Determine if the Node is a DOM Level 3 Core Node. 238 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 239 Document ownerDoc = pos.getOwnerDocument(); 240 if (ownerDoc != null 241 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 242 fIsLevel3DOM = true; 243 } 244 } else { 245 if (((Document) pos) 246 .getImplementation() 247 .hasFeature("Core", "3.0")) { 248 fIsLevel3DOM = true; 249 } 250 } 251 252 if (fSerializer instanceof LexicalHandler) { 253 fLexicalHandler = ((LexicalHandler) this.fSerializer); 254 } 255 256 if (fFilter != null) 257 fWhatToShowFilter = fFilter.getWhatToShow(); 258 259 Node top = pos; 260 261 while (null != pos) { 262 startNode(pos); 263 264 Node nextNode = null; 265 266 nextNode = pos.getFirstChild(); 267 268 while (null == nextNode) { 269 endNode(pos); 270 271 if (top.equals(pos)) 272 break; 273 274 nextNode = pos.getNextSibling(); 275 276 if (null == nextNode) { 277 pos = pos.getParentNode(); 278 279 if ((null == pos) || (top.equals(pos))) { 280 if (null != pos) 281 endNode(pos); 282 283 nextNode = null; 284 285 break; 286 } 287 } 288 } 289 290 pos = nextNode; 291 } 292 this.fSerializer.endDocument(); 293 } 294 295 /** 296 * Perform a pre-order traversal non-recursive style. 297 298 * Note that TreeWalker assumes that the subtree is intended to represent 299 * a complete (though not necessarily well-formed) document and, during a 300 * traversal, startDocument and endDocument will always be issued to the 301 * SAX listener. 302 * 303 * @param pos Node in the tree where to start traversal 304 * @param top Node in the tree where to end traversal 305 * 306 * @throws TransformerException 307 */ 308 public void traverse(Node pos, Node top) throws org.xml.sax.SAXException { 309 310 this.fSerializer.startDocument(); 311 312 // Determine if the Node is a DOM Level 3 Core Node. 313 if (pos.getNodeType() != Node.DOCUMENT_NODE) { 314 Document ownerDoc = pos.getOwnerDocument(); 315 if (ownerDoc != null 316 && ownerDoc.getImplementation().hasFeature("Core", "3.0")) { 317 fIsLevel3DOM = true; 318 } 319 } else { 320 if (((Document) pos) 321 .getImplementation() 322 .hasFeature("Core", "3.0")) { 323 fIsLevel3DOM = true; 324 } 325 } 326 327 if (fSerializer instanceof LexicalHandler) { 328 fLexicalHandler = ((LexicalHandler) this.fSerializer); 329 } 330 331 if (fFilter != null) 332 fWhatToShowFilter = fFilter.getWhatToShow(); 333 334 while (null != pos) { 335 startNode(pos); 336 337 Node nextNode = null; 338 339 nextNode = pos.getFirstChild(); 340 341 while (null == nextNode) { 342 endNode(pos); 343 344 if ((null != top) && top.equals(pos)) 345 break; 346 347 nextNode = pos.getNextSibling(); 348 349 if (null == nextNode) { 350 pos = pos.getParentNode(); 351 352 if ((null == pos) || ((null != top) && top.equals(pos))) { 353 nextNode = null; 354 355 break; 356 } 357 } 358 } 359 360 pos = nextNode; 361 } 362 this.fSerializer.endDocument(); 363 } 364 365 /** 366 * Optimized dispatch of characters. 367 */ 368 private final void dispatachChars(Node node) 369 throws org.xml.sax.SAXException { 370 if (fSerializer != null) { 371 String data = ((Text) node).getData(); 372 this.fSerializer.characters(data.toCharArray(), 0, data.length()); 373 } 374 } 375 376 /** 377 * Start processing given node 378 * 379 * @param node Node to process 380 * 381 * @throws org.xml.sax.SAXException 382 */ 383 protected void startNode(Node node) throws org.xml.sax.SAXException { 384 if (node instanceof Locator) { 385 Locator loc = (Locator) node; 386 fLocator.setColumnNumber(loc.getColumnNumber()); 387 fLocator.setLineNumber(loc.getLineNumber()); 388 fLocator.setPublicId(loc.getPublicId()); 389 fLocator.setSystemId(loc.getSystemId()); 390 } else { 391 fLocator.setColumnNumber(0); 392 fLocator.setLineNumber(0); 393 } 394 395 switch (node.getNodeType()) { 396 case Node.DOCUMENT_TYPE_NODE : 397 serializeDocType((DocumentType) node, true); 398 break; 399 case Node.COMMENT_NODE : 400 serializeComment((Comment) node); 401 break; 402 case Node.DOCUMENT_FRAGMENT_NODE : 403 // Children are traversed 404 break; 405 case Node.DOCUMENT_NODE : 406 break; 407 case Node.ELEMENT_NODE : 408 serializeElement((Element) node, true); 409 break; 410 case Node.PROCESSING_INSTRUCTION_NODE : 411 serializePI((ProcessingInstruction) node); 412 break; 413 case Node.CDATA_SECTION_NODE : 414 serializeCDATASection((CDATASection) node); 415 break; 416 case Node.TEXT_NODE : 417 serializeText((Text) node); 418 break; 419 case Node.ENTITY_REFERENCE_NODE : 420 serializeEntityReference((EntityReference) node, true); 421 break; 422 default : 423 } 424 } 425 426 /** 427 * End processing of given node 428 * 429 * 430 * @param node Node we just finished processing 431 * 432 * @throws org.xml.sax.SAXException 433 */ 434 protected void endNode(Node node) throws org.xml.sax.SAXException { 435 436 switch (node.getNodeType()) { 437 case Node.DOCUMENT_NODE : 438 break; 439 case Node.DOCUMENT_TYPE_NODE : 440 serializeDocType((DocumentType) node, false); 441 break; 442 case Node.ELEMENT_NODE : 443 serializeElement((Element) node, false); 444 break; 445 case Node.CDATA_SECTION_NODE : 446 break; 447 case Node.ENTITY_REFERENCE_NODE : 448 serializeEntityReference((EntityReference) node, false); 449 break; 450 default : 451 } 452 } 453 454 // *********************************************************************** 455 // Node serialization methods 456 // *********************************************************************** 457 /** 458 * Applies a filter on the node to serialize 459 * 460 * @param node The Node to serialize 461 * @return True if the node is to be serialized else false if the node 462 * is to be rejected or skipped. 463 */ 464 protected boolean applyFilter(Node node, int nodeType) { 465 if (fFilter != null && (fWhatToShowFilter & nodeType) != 0) { 466 467 short code = fFilter.acceptNode(node); 468 switch (code) { 469 case NodeFilter.FILTER_REJECT : 470 case NodeFilter.FILTER_SKIP : 471 return false; // skip the node 472 default : // fall through.. 473 } 474 } 475 return true; 476 } 477 478 /** 479 * Serializes a Document Type Node. 480 * 481 * @param node The Docuemnt Type Node to serialize 482 * @param bStart Invoked at the start or end of node. Default true. 483 */ 484 protected void serializeDocType(DocumentType node, boolean bStart) 485 throws SAXException { 486 // The DocType and internalSubset can not be modified in DOM and is 487 // considered to be well-formed as the outcome of successful parsing. 488 String docTypeName = node.getNodeName(); 489 String publicId = node.getPublicId(); 490 String systemId = node.getSystemId(); 491 String internalSubset = node.getInternalSubset(); 492 493 //DocumentType nodes are never passed to the filter 494 495 if (internalSubset != null && !"".equals(internalSubset)) { 496 497 if (bStart) { 498 try { 499 // The Serializer does not provide a way to write out the 500 // DOCTYPE internal subset via an event call, so we write it 501 // out here. 502 Writer writer = fSerializer.getWriter(); 503 StringBuffer dtd = new StringBuffer(); 504 505 dtd.append("<!DOCTYPE "); 506 dtd.append(docTypeName); 507 if (null != publicId) { 508 dtd.append(" PUBLIC \""); 509 dtd.append(publicId); 510 dtd.append('\"'); 511 } 512 513 if (null != systemId) { 514 if (null == publicId) { 515 dtd.append(" SYSTEM \""); 516 } else { 517 dtd.append(" \""); 518 } 519 dtd.append(systemId); 520 dtd.append('\"'); 521 } 522 523 dtd.append(" [ "); 524 525 dtd.append(fNewLine); 526 dtd.append(internalSubset); 527 dtd.append("]>"); 528 dtd.append(fNewLine); 529 530 writer.write(dtd.toString()); 531 writer.flush(); 532 533 } catch (IOException e) { 534 throw new SAXException(Utils.messages.createMessage( 535 MsgKey.ER_WRITING_INTERNAL_SUBSET, null), e); 536 } 537 } // else if !bStart do nothing 538 539 } else { 540 541 if (bStart) { 542 if (fLexicalHandler != null) { 543 fLexicalHandler.startDTD(docTypeName, publicId, systemId); 544 } 545 } else { 546 if (fLexicalHandler != null) { 547 fLexicalHandler.endDTD(); 548 } 549 } 550 } 551 } 552 553 /** 554 * Serializes a Comment Node. 555 * 556 * @param node The Comment Node to serialize 557 */ 558 protected void serializeComment(Comment node) throws SAXException { 559 // comments=true 560 if ((fFeatures & COMMENTS) != 0) { 561 String data = node.getData(); 562 563 // well-formed=true 564 if ((fFeatures & WELLFORMED) != 0) { 565 isCommentWellFormed(data); 566 } 567 568 if (fLexicalHandler != null) { 569 // apply the LSSerializer filter after the operations requested by the 570 // DOMConfiguration parameters have been applied 571 if (!applyFilter(node, NodeFilter.SHOW_COMMENT)) { 572 return; 573 } 574 575 fLexicalHandler.comment(data.toCharArray(), 0, data.length()); 576 } 577 } 578 } 579 580 /** 581 * Serializes an Element Node. 582 * 583 * @param node The Element Node to serialize 584 * @param bStart Invoked at the start or end of node. 585 */ 586 protected void serializeElement(Element node, boolean bStart) 587 throws SAXException { 588 if (bStart) { 589 fElementDepth++; 590 591 // We use the Xalan specific startElement and starPrefixMapping calls 592 // (and addAttribute and namespaceAfterStartElement) as opposed to 593 // SAX specific, for performance reasons as they reduce the overhead 594 // of creating an AttList object upfront. 595 596 // well-formed=true 597 if ((fFeatures & WELLFORMED) != 0) { 598 isElementWellFormed(node); 599 } 600 601 // REVISIT: We apply the LSSerializer filter for elements before 602 // namesapce fixup 603 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 604 return; 605 } 606 607 // namespaces=true, record and fixup namspaced element 608 if ((fFeatures & NAMESPACES) != 0) { 609 fNSBinder.pushContext(); 610 fLocalNSBinder.reset(); 611 612 recordLocalNSDecl(node); 613 fixupElementNS(node); 614 } 615 616 // Namespace normalization 617 fSerializer.startElement( 618 node.getNamespaceURI(), 619 node.getLocalName(), 620 node.getNodeName()); 621 622 serializeAttList(node); 623 624 } else { 625 fElementDepth--; 626 627 // apply the LSSerializer filter 628 if (!applyFilter(node, NodeFilter.SHOW_ELEMENT)) { 629 return; 630 } 631 632 this.fSerializer.endElement( 633 node.getNamespaceURI(), 634 node.getLocalName(), 635 node.getNodeName()); 636 // since endPrefixMapping was not used by SerializationHandler it was removed 637 // for performance reasons. 638 639 if ((fFeatures & NAMESPACES) != 0 ) { 640 fNSBinder.popContext(); 641 } 642 643 } 644 } 645 646 /** 647 * Serializes the Attr Nodes of an Element. 648 * 649 * @param node The OwnerElement whose Attr Nodes are to be serialized. 650 */ 651 protected void serializeAttList(Element node) throws SAXException { 652 NamedNodeMap atts = node.getAttributes(); 653 int nAttrs = atts.getLength(); 654 655 for (int i = 0; i < nAttrs; i++) { 656 Node attr = atts.item(i); 657 658 String localName = attr.getLocalName(); 659 String attrName = attr.getNodeName(); 660 String attrPrefix = attr.getPrefix() == null ? "" : attr.getPrefix(); 661 String attrValue = attr.getNodeValue(); 662 663 // Determine the Attr's type. 664 String type = null; 665 if (fIsLevel3DOM) { 666 type = ((Attr) attr).getSchemaTypeInfo().getTypeName(); 667 } 668 type = type == null ? "CDATA" : type; 669 670 String attrNS = attr.getNamespaceURI(); 671 if (attrNS !=null && attrNS.length() == 0) { 672 attrNS=null; 673 // we must remove prefix for this attribute 674 attrName=attr.getLocalName(); 675 } 676 677 boolean isSpecified = ((Attr) attr).getSpecified(); 678 boolean addAttr = true; 679 boolean applyFilter = false; 680 boolean xmlnsAttr = 681 attrName.equals("xmlns") || attrName.startsWith("xmlns:"); 682 683 // well-formed=true 684 if ((fFeatures & WELLFORMED) != 0) { 685 isAttributeWellFormed(attr); 686 } 687 688 //----------------------------------------------------------------- 689 // start Attribute namespace fixup 690 //----------------------------------------------------------------- 691 // namespaces=true, normalize all non-namespace attributes 692 // Step 3. Attribute 693 if ((fFeatures & NAMESPACES) != 0 && !xmlnsAttr) { 694 695 // If the Attr has a namespace URI 696 if (attrNS != null) { 697 attrPrefix = attrPrefix == null ? "" : attrPrefix; 698 699 String declAttrPrefix = fNSBinder.getPrefix(attrNS); 700 String declAttrNS = fNSBinder.getURI(attrPrefix); 701 702 // attribute has no prefix (default namespace decl does not apply to 703 // attributes) 704 // OR 705 // attribute prefix is not declared 706 // OR 707 // conflict: attribute has a prefix that conflicts with a binding 708 if ("".equals(attrPrefix) || "".equals(declAttrPrefix) 709 || !attrPrefix.equals(declAttrPrefix)) { 710 711 // namespaceURI matches an in scope declaration of one or 712 // more prefixes 713 if (declAttrPrefix != null && !"".equals(declAttrPrefix)) { 714 // pick the prefix that was found and change attribute's 715 // prefix and nodeName. 716 attrPrefix = declAttrPrefix; 717 718 if (declAttrPrefix.length() > 0 ) { 719 attrName = declAttrPrefix + ":" + localName; 720 } else { 721 attrName = localName; 722 } 723 } else { 724 // The current prefix is not null and it has no in scope 725 // declaration 726 if (attrPrefix != null && !"".equals(attrPrefix) 727 && declAttrNS == null) { 728 // declare this prefix 729 if ((fFeatures & NAMESPACEDECLS) != 0) { 730 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 731 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 732 attrNS); 733 fNSBinder.declarePrefix(attrPrefix, attrNS); 734 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 735 } 736 } else { 737 // find a prefix following the pattern "NS" +index 738 // (starting at 1) 739 // make sure this prefix is not declared in the current 740 // scope. 741 int counter = 1; 742 attrPrefix = "NS" + counter++; 743 744 while (fLocalNSBinder.getURI(attrPrefix) != null) { 745 attrPrefix = "NS" + counter++; 746 } 747 // change attribute's prefix and Name 748 attrName = attrPrefix + ":" + localName; 749 750 // create a local namespace declaration attribute 751 // Add the xmlns declaration attribute 752 if ((fFeatures & NAMESPACEDECLS) != 0) { 753 754 fSerializer.addAttribute(XMLNS_URI, attrPrefix, 755 XMLNS_PREFIX + ":" + attrPrefix, "CDATA", 756 attrNS); 757 fNSBinder.declarePrefix(attrPrefix, attrNS); 758 fLocalNSBinder.declarePrefix(attrPrefix, attrNS); 759 } 760 } 761 } 762 } 763 764 } else { // if the Attr has no namespace URI 765 // Attr has no localName 766 if (localName == null) { 767 // DOM Level 1 node! 768 String msg = Utils.messages.createMessage( 769 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 770 new Object[] { attrName }); 771 772 if (fErrorHandler != null) { 773 fErrorHandler 774 .handleError(new DOMErrorImpl( 775 DOMError.SEVERITY_ERROR, msg, 776 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, null, 777 null, null)); 778 } 779 780 } else { // uri=null and no colon 781 // attr has no namespace URI and no prefix 782 // no action is required, since attrs don't use default 783 } 784 } 785 786 } 787 788 789 // discard-default-content=true 790 // Default attr's are not passed to the filter and this contraint 791 // is applied only when discard-default-content=true 792 // What about default xmlns attributes???? check for xmlnsAttr 793 if ((((fFeatures & DISCARDDEFAULT) != 0) && isSpecified) 794 || ((fFeatures & DISCARDDEFAULT) == 0)) { 795 applyFilter = true; 796 } else { 797 addAttr = false; 798 } 799 800 if (applyFilter) { 801 // apply the filter for Attributes that are not default attributes 802 // or namespace decl attributes 803 if (fFilter != null 804 && (fFilter.getWhatToShow() & NodeFilter.SHOW_ATTRIBUTE) 805 != 0) { 806 807 if (!xmlnsAttr) { 808 short code = fFilter.acceptNode(attr); 809 switch (code) { 810 case NodeFilter.FILTER_REJECT : 811 case NodeFilter.FILTER_SKIP : 812 addAttr = false; 813 break; 814 default : //fall through.. 815 } 816 } 817 } 818 } 819 820 // if the node is a namespace node 821 if (addAttr && xmlnsAttr) { 822 // If namespace-declarations=true, add the node , else don't add it 823 if ((fFeatures & NAMESPACEDECLS) != 0) { 824 // The namespace may have been fixed up, in that case don't add it. 825 if (localName != null && !"".equals(localName)) { 826 fSerializer.addAttribute(attrNS, localName, attrName, type, attrValue); 827 } 828 } 829 } else if ( 830 addAttr && !xmlnsAttr) { // if the node is not a namespace node 831 // If namespace-declarations=true, add the node with the Attr nodes namespaceURI 832 // else add the node setting it's namespace to null or else the serializer will later 833 // attempt to add a xmlns attr for the prefixed attribute 834 if (((fFeatures & NAMESPACEDECLS) != 0) && (attrNS != null)) { 835 fSerializer.addAttribute( 836 attrNS, 837 localName, 838 attrName, 839 type, 840 attrValue); 841 } else { 842 fSerializer.addAttribute( 843 "", 844 localName, 845 attrName, 846 type, 847 attrValue); 848 } 849 } 850 851 // 852 if (xmlnsAttr && ((fFeatures & NAMESPACEDECLS) != 0)) { 853 int index; 854 // Use "" instead of null, as Xerces likes "" for the 855 // name of the default namespace. Fix attributed 856 // to "Steven Murray" <smurray@ebt.com>. 857 String prefix = 858 (index = attrName.indexOf(":")) < 0 859 ? "" 860 : attrName.substring(index + 1); 861 862 if (!"".equals(prefix)) { 863 fSerializer.namespaceAfterStartElement(prefix, attrValue); 864 } 865 } 866 } 867 868 } 869 870 /** 871 * Serializes an ProcessingInstruction Node. 872 * 873 * @param node The ProcessingInstruction Node to serialize 874 */ 875 protected void serializePI(ProcessingInstruction node) 876 throws SAXException { 877 ProcessingInstruction pi = node; 878 String name = pi.getNodeName(); 879 880 // well-formed=true 881 if ((fFeatures & WELLFORMED) != 0) { 882 isPIWellFormed(node); 883 } 884 885 // apply the LSSerializer filter 886 if (!applyFilter(node, NodeFilter.SHOW_PROCESSING_INSTRUCTION)) { 887 return; 888 } 889 890 // String data = pi.getData(); 891 if (name.equals("xslt-next-is-raw")) { 892 fNextIsRaw = true; 893 } else { 894 this.fSerializer.processingInstruction(name, pi.getData()); 895 } 896 } 897 898 /** 899 * Serializes an CDATASection Node. 900 * 901 * @param node The CDATASection Node to serialize 902 */ 903 protected void serializeCDATASection(CDATASection node) 904 throws SAXException { 905 // well-formed=true 906 if ((fFeatures & WELLFORMED) != 0) { 907 isCDATASectionWellFormed(node); 908 } 909 910 // cdata-sections = true 911 if ((fFeatures & CDATA) != 0) { 912 913 // split-cdata-sections = true 914 // Assumption: This parameter has an effect only when 915 // cdata-sections=true 916 // ToStream, by default splits cdata-sections. Hence the check 917 // below. 918 String nodeValue = node.getNodeValue(); 919 int endIndex = nodeValue.indexOf("]]>"); 920 if ((fFeatures & SPLITCDATA) != 0) { 921 if (endIndex >= 0) { 922 // The first node split will contain the ]] markers 923 String relatedData = nodeValue.substring(0, endIndex + 2); 924 925 String msg = 926 Utils.messages.createMessage( 927 MsgKey.ER_CDATA_SECTIONS_SPLIT, 928 null); 929 930 if (fErrorHandler != null) { 931 fErrorHandler.handleError( 932 new DOMErrorImpl( 933 DOMError.SEVERITY_WARNING, 934 msg, 935 MsgKey.ER_CDATA_SECTIONS_SPLIT, 936 null, 937 relatedData, 938 null)); 939 } 940 } 941 } else { 942 if (endIndex >= 0) { 943 // The first node split will contain the ]] markers 944 String relatedData = nodeValue.substring(0, endIndex + 2); 945 946 String msg = 947 Utils.messages.createMessage( 948 MsgKey.ER_CDATA_SECTIONS_SPLIT, 949 null); 950 951 if (fErrorHandler != null) { 952 fErrorHandler.handleError( 953 new DOMErrorImpl( 954 DOMError.SEVERITY_ERROR, 955 msg, 956 MsgKey.ER_CDATA_SECTIONS_SPLIT)); 957 } 958 // Report an error and return. What error??? 959 return; 960 } 961 } 962 963 // apply the LSSerializer filter 964 if (!applyFilter(node, NodeFilter.SHOW_CDATA_SECTION)) { 965 return; 966 } 967 968 // splits the cdata-section 969 if (fLexicalHandler != null) { 970 fLexicalHandler.startCDATA(); 971 } 972 dispatachChars(node); 973 if (fLexicalHandler != null) { 974 fLexicalHandler.endCDATA(); 975 } 976 } else { 977 dispatachChars(node); 978 } 979 } 980 981 /** 982 * Serializes an Text Node. 983 * 984 * @param node The Text Node to serialize 985 */ 986 protected void serializeText(Text node) throws SAXException { 987 if (fNextIsRaw) { 988 fNextIsRaw = false; 989 fSerializer.processingInstruction( 990 javax.xml.transform.Result.PI_DISABLE_OUTPUT_ESCAPING, 991 ""); 992 dispatachChars(node); 993 fSerializer.processingInstruction( 994 javax.xml.transform.Result.PI_ENABLE_OUTPUT_ESCAPING, 995 ""); 996 } else { 997 // keep track of dispatch or not to avoid duplicaiton of filter code 998 boolean bDispatch = false; 999 1000 // well-formed=true 1001 if ((fFeatures & WELLFORMED) != 0) { 1002 isTextWellFormed(node); 1003 } 1004 1005 // if the node is whitespace 1006 // Determine the Attr's type. 1007 boolean isElementContentWhitespace = false; 1008 if (fIsLevel3DOM) { 1009 isElementContentWhitespace = 1010 node.isElementContentWhitespace(); 1011 } 1012 1013 if (isElementContentWhitespace) { 1014 // element-content-whitespace=true 1015 if ((fFeatures & ELEM_CONTENT_WHITESPACE) != 0) { 1016 bDispatch = true; 1017 } 1018 } else { 1019 bDispatch = true; 1020 } 1021 1022 // apply the LSSerializer filter 1023 if (!applyFilter(node, NodeFilter.SHOW_TEXT)) { 1024 return; 1025 } 1026 1027 if (bDispatch 1028 && (!fSerializer.getIndent() || !node.getData().replace('\n', ' ').trim().isEmpty())) { 1029 dispatachChars(node); 1030 } 1031 } 1032 } 1033 1034 /** 1035 * Serializes an EntityReference Node. 1036 * 1037 * @param node The EntityReference Node to serialize 1038 * @param bStart Inicates if called from start or endNode 1039 */ 1040 protected void serializeEntityReference( 1041 EntityReference node, 1042 boolean bStart) 1043 throws SAXException { 1044 if (bStart) { 1045 EntityReference eref = node; 1046 // entities=true 1047 if ((fFeatures & ENTITIES) != 0) { 1048 1049 // perform well-formedness and other checking only if 1050 // entities = true 1051 1052 // well-formed=true 1053 if ((fFeatures & WELLFORMED) != 0) { 1054 isEntityReferneceWellFormed(node); 1055 } 1056 1057 // check "unbound-prefix-in-entity-reference" [fatal] 1058 // Raised if the configuration parameter "namespaces" is set to true 1059 if ((fFeatures & NAMESPACES) != 0) { 1060 checkUnboundPrefixInEntRef(node); 1061 } 1062 1063 // The filter should not apply in this case, since the 1064 // EntityReference is not being expanded. 1065 // should we pass entity reference nodes to the filter??? 1066 } 1067 1068 // if "entities" is true, or EntityReference node has no children, 1069 // it will be serialized as the form "&entityName;" in the output. 1070 if (fLexicalHandler != null && ((fFeatures & ENTITIES) != 0 || !node.hasChildNodes())) { 1071 1072 // startEntity outputs only Text but not Element, Attr, Comment 1073 // and PI child nodes. It does so by setting the m_inEntityRef 1074 // in ToStream and using this to decide if a node is to be 1075 // serialized or not. 1076 fLexicalHandler.startEntity(eref.getNodeName()); 1077 } 1078 1079 } else { 1080 EntityReference eref = node; 1081 // entities=true or false, 1082 if (fLexicalHandler != null) { 1083 fLexicalHandler.endEntity(eref.getNodeName()); 1084 } 1085 } 1086 } 1087 1088 1089 // *********************************************************************** 1090 // Methods to check well-formedness 1091 // *********************************************************************** 1092 /** 1093 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1094 * 1095 * Check the string against XML's definition of acceptable names for 1096 * elements and attributes and so on using the XMLCharacterProperties 1097 * utility class 1098 */ 1099 protected boolean isXMLName(String s, boolean xml11Version) { 1100 1101 if (s == null) { 1102 return false; 1103 } 1104 if (!xml11Version) 1105 return XMLChar.isValidName(s); 1106 else 1107 return XML11Char.isXML11ValidName(s); 1108 } 1109 1110 /** 1111 * Taken from org.apache.xerces.dom.CoreDocumentImpl 1112 * 1113 * Checks if the given qualified name is legal with respect 1114 * to the version of XML to which this document must conform. 1115 * 1116 * @param prefix prefix of qualified name 1117 * @param local local part of qualified name 1118 */ 1119 protected boolean isValidQName( 1120 String prefix, 1121 String local, 1122 boolean xml11Version) { 1123 1124 // check that both prefix and local part match NCName 1125 if (local == null) 1126 return false; 1127 boolean validNCName = false; 1128 1129 if (!xml11Version) { 1130 validNCName = 1131 (prefix == null || XMLChar.isValidNCName(prefix)) 1132 && XMLChar.isValidNCName(local); 1133 } else { 1134 validNCName = 1135 (prefix == null || XML11Char.isXML11ValidNCName(prefix)) 1136 && XML11Char.isXML11ValidNCName(local); 1137 } 1138 1139 return validNCName; 1140 } 1141 1142 /** 1143 * Checks if a XML character is well-formed 1144 * 1145 * @param characters A String of characters to be checked for Well-Formedness 1146 * @param refInvalidChar A reference to the character to be returned that was determined invalid. 1147 */ 1148 protected boolean isWFXMLChar(String chardata, Character refInvalidChar) { 1149 if (chardata == null || (chardata.length() == 0)) { 1150 return true; 1151 } 1152 1153 char[] dataarray = chardata.toCharArray(); 1154 int datalength = dataarray.length; 1155 1156 // version of the document is XML 1.1 1157 if (fIsXMLVersion11) { 1158 //we need to check all characters as per production rules of XML11 1159 int i = 0; 1160 while (i < datalength) { 1161 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1162 // check if this is a supplemental character 1163 char ch = dataarray[i - 1]; 1164 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1165 char ch2 = dataarray[i++]; 1166 if (XMLChar.isLowSurrogate(ch2) 1167 && XMLChar.isSupplemental( 1168 XMLChar.supplemental(ch, ch2))) { 1169 continue; 1170 } 1171 } 1172 // Reference to invalid character which is returned 1173 refInvalidChar = ch; 1174 return false; 1175 } 1176 } 1177 } // version of the document is XML 1.0 1178 else { 1179 // we need to check all characters as per production rules of XML 1.0 1180 int i = 0; 1181 while (i < datalength) { 1182 if (XMLChar.isInvalid(dataarray[i++])) { 1183 // check if this is a supplemental character 1184 char ch = dataarray[i - 1]; 1185 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1186 char ch2 = dataarray[i++]; 1187 if (XMLChar.isLowSurrogate(ch2) 1188 && XMLChar.isSupplemental( 1189 XMLChar.supplemental(ch, ch2))) { 1190 continue; 1191 } 1192 } 1193 // Reference to invalid character which is returned 1194 refInvalidChar = ch; 1195 return false; 1196 } 1197 } 1198 } // end-else fDocument.isXMLVersion() 1199 1200 return true; 1201 } // isXMLCharWF 1202 1203 /** 1204 * Checks if a XML character is well-formed. If there is a problem with 1205 * the character a non-null Character is returned else null is returned. 1206 * 1207 * @param characters A String of characters to be checked for Well-Formedness 1208 * @return Character A reference to the character to be returned that was determined invalid. 1209 */ 1210 protected Character isWFXMLChar(String chardata) { 1211 Character refInvalidChar; 1212 if (chardata == null || (chardata.length() == 0)) { 1213 return null; 1214 } 1215 1216 char[] dataarray = chardata.toCharArray(); 1217 int datalength = dataarray.length; 1218 1219 // version of the document is XML 1.1 1220 if (fIsXMLVersion11) { 1221 //we need to check all characters as per production rules of XML11 1222 int i = 0; 1223 while (i < datalength) { 1224 if (XML11Char.isXML11Invalid(dataarray[i++])) { 1225 // check if this is a supplemental character 1226 char ch = dataarray[i - 1]; 1227 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1228 char ch2 = dataarray[i++]; 1229 if (XMLChar.isLowSurrogate(ch2) 1230 && XMLChar.isSupplemental( 1231 XMLChar.supplemental(ch, ch2))) { 1232 continue; 1233 } 1234 } 1235 // Reference to invalid character which is returned 1236 refInvalidChar = ch; 1237 return refInvalidChar; 1238 } 1239 } 1240 } // version of the document is XML 1.0 1241 else { 1242 // we need to check all characters as per production rules of XML 1.0 1243 int i = 0; 1244 while (i < datalength) { 1245 if (XMLChar.isInvalid(dataarray[i++])) { 1246 // check if this is a supplemental character 1247 char ch = dataarray[i - 1]; 1248 if (XMLChar.isHighSurrogate(ch) && i < datalength) { 1249 char ch2 = dataarray[i++]; 1250 if (XMLChar.isLowSurrogate(ch2) 1251 && XMLChar.isSupplemental( 1252 XMLChar.supplemental(ch, ch2))) { 1253 continue; 1254 } 1255 } 1256 // Reference to invalid character which is returned 1257 refInvalidChar = ch; 1258 return refInvalidChar; 1259 } 1260 } 1261 } // end-else fDocument.isXMLVersion() 1262 1263 return null; 1264 } // isXMLCharWF 1265 1266 /** 1267 * Checks if a comment node is well-formed 1268 * 1269 * @param data The contents of the comment node 1270 * @return a boolean indiacating if the comment is well-formed or not. 1271 */ 1272 protected void isCommentWellFormed(String data) { 1273 if (data == null || (data.length() == 0)) { 1274 return; 1275 } 1276 1277 char[] dataarray = data.toCharArray(); 1278 int datalength = dataarray.length; 1279 1280 // version of the document is XML 1.1 1281 if (fIsXMLVersion11) { 1282 // we need to check all chracters as per production rules of XML11 1283 int i = 0; 1284 while (i < datalength) { 1285 char c = dataarray[i++]; 1286 if (XML11Char.isXML11Invalid(c)) { 1287 // check if this is a supplemental character 1288 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1289 char c2 = dataarray[i++]; 1290 if (XMLChar.isLowSurrogate(c2) 1291 && XMLChar.isSupplemental( 1292 XMLChar.supplemental(c, c2))) { 1293 continue; 1294 } 1295 } 1296 String msg = 1297 Utils.messages.createMessage( 1298 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1299 new Object[] { c}); 1300 1301 if (fErrorHandler != null) { 1302 fErrorHandler.handleError( 1303 new DOMErrorImpl( 1304 DOMError.SEVERITY_FATAL_ERROR, 1305 msg, 1306 MsgKey.ER_WF_INVALID_CHARACTER, 1307 null, 1308 null, 1309 null)); 1310 } 1311 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1312 String msg = 1313 Utils.messages.createMessage( 1314 MsgKey.ER_WF_DASH_IN_COMMENT, 1315 null); 1316 1317 if (fErrorHandler != null) { 1318 fErrorHandler.handleError( 1319 new DOMErrorImpl( 1320 DOMError.SEVERITY_FATAL_ERROR, 1321 msg, 1322 MsgKey.ER_WF_INVALID_CHARACTER, 1323 null, 1324 null, 1325 null)); 1326 } 1327 } 1328 } 1329 } // version of the document is XML 1.0 1330 else { 1331 // we need to check all chracters as per production rules of XML 1.0 1332 int i = 0; 1333 while (i < datalength) { 1334 char c = dataarray[i++]; 1335 if (XMLChar.isInvalid(c)) { 1336 // check if this is a supplemental character 1337 if (XMLChar.isHighSurrogate(c) && i < datalength) { 1338 char c2 = dataarray[i++]; 1339 if (XMLChar.isLowSurrogate(c2) 1340 && XMLChar.isSupplemental( 1341 XMLChar.supplemental(c, c2))) { 1342 continue; 1343 } 1344 } 1345 String msg = 1346 Utils.messages.createMessage( 1347 MsgKey.ER_WF_INVALID_CHARACTER_IN_COMMENT, 1348 new Object[] { c}); 1349 1350 if (fErrorHandler != null) { 1351 fErrorHandler.handleError( 1352 new DOMErrorImpl( 1353 DOMError.SEVERITY_FATAL_ERROR, 1354 msg, 1355 MsgKey.ER_WF_INVALID_CHARACTER, 1356 null, 1357 null, 1358 null)); 1359 } 1360 } else if (c == '-' && i < datalength && dataarray[i] == '-') { 1361 String msg = 1362 Utils.messages.createMessage( 1363 MsgKey.ER_WF_DASH_IN_COMMENT, 1364 null); 1365 1366 if (fErrorHandler != null) { 1367 fErrorHandler.handleError( 1368 new DOMErrorImpl( 1369 DOMError.SEVERITY_FATAL_ERROR, 1370 msg, 1371 MsgKey.ER_WF_INVALID_CHARACTER, 1372 null, 1373 null, 1374 null)); 1375 } 1376 } 1377 } 1378 } 1379 return; 1380 } 1381 1382 /** 1383 * Checks if an element node is well-formed, by checking its Name for well-formedness. 1384 * 1385 * @param data The contents of the comment node 1386 * @return a boolean indiacating if the comment is well-formed or not. 1387 */ 1388 protected void isElementWellFormed(Node node) { 1389 boolean isNameWF = false; 1390 if ((fFeatures & NAMESPACES) != 0) { 1391 isNameWF = 1392 isValidQName( 1393 node.getPrefix(), 1394 node.getLocalName(), 1395 fIsXMLVersion11); 1396 } else { 1397 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1398 } 1399 1400 if (!isNameWF) { 1401 String msg = 1402 Utils.messages.createMessage( 1403 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1404 new Object[] { "Element", node.getNodeName()}); 1405 1406 if (fErrorHandler != null) { 1407 fErrorHandler.handleError( 1408 new DOMErrorImpl( 1409 DOMError.SEVERITY_FATAL_ERROR, 1410 msg, 1411 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1412 null, 1413 null, 1414 null)); 1415 } 1416 } 1417 } 1418 1419 /** 1420 * Checks if an attr node is well-formed, by checking it's Name and value 1421 * for well-formedness. 1422 * 1423 * @param data The contents of the comment node 1424 * @return a boolean indiacating if the comment is well-formed or not. 1425 */ 1426 protected void isAttributeWellFormed(Node node) { 1427 boolean isNameWF = false; 1428 if ((fFeatures & NAMESPACES) != 0) { 1429 isNameWF = 1430 isValidQName( 1431 node.getPrefix(), 1432 node.getLocalName(), 1433 fIsXMLVersion11); 1434 } else { 1435 isNameWF = isXMLName(node.getNodeName(), fIsXMLVersion11); 1436 } 1437 1438 if (!isNameWF) { 1439 String msg = 1440 Utils.messages.createMessage( 1441 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1442 new Object[] { "Attr", node.getNodeName()}); 1443 1444 if (fErrorHandler != null) { 1445 fErrorHandler.handleError( 1446 new DOMErrorImpl( 1447 DOMError.SEVERITY_FATAL_ERROR, 1448 msg, 1449 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1450 null, 1451 null, 1452 null)); 1453 } 1454 } 1455 1456 // Check the Attr's node value 1457 // WFC: No < in Attribute Values 1458 String value = node.getNodeValue(); 1459 if (value.indexOf('<') >= 0) { 1460 String msg = 1461 Utils.messages.createMessage( 1462 MsgKey.ER_WF_LT_IN_ATTVAL, 1463 new Object[] { 1464 ((Attr) node).getOwnerElement().getNodeName(), 1465 node.getNodeName()}); 1466 1467 if (fErrorHandler != null) { 1468 fErrorHandler.handleError( 1469 new DOMErrorImpl( 1470 DOMError.SEVERITY_FATAL_ERROR, 1471 msg, 1472 MsgKey.ER_WF_LT_IN_ATTVAL, 1473 null, 1474 null, 1475 null)); 1476 } 1477 } 1478 1479 // we need to loop through the children of attr nodes and check their values for 1480 // well-formedness 1481 NodeList children = node.getChildNodes(); 1482 for (int i = 0; i < children.getLength(); i++) { 1483 Node child = children.item(i); 1484 // An attribute node with no text or entity ref child for example 1485 // doc.createAttributeNS("http://www.w3.org/2000/xmlns/", "xmlns:ns"); 1486 // followes by 1487 // element.setAttributeNodeNS(attribute); 1488 // can potentially lead to this situation. If the attribute 1489 // was a prefix Namespace attribute declaration then then DOM Core 1490 // should have some exception defined for this. 1491 if (child == null) { 1492 // we should probably report an error 1493 continue; 1494 } 1495 switch (child.getNodeType()) { 1496 case Node.TEXT_NODE : 1497 isTextWellFormed((Text) child); 1498 break; 1499 case Node.ENTITY_REFERENCE_NODE : 1500 isEntityReferneceWellFormed((EntityReference) child); 1501 break; 1502 default : 1503 } 1504 } 1505 1506 // TODO: 1507 // WFC: Check if the attribute prefix is bound to 1508 // http://www.w3.org/2000/xmlns/ 1509 1510 // WFC: Unique Att Spec 1511 // Perhaps pass a seen boolean value to this method. serializeAttList will determine 1512 // if the attr was seen before. 1513 } 1514 1515 /** 1516 * Checks if a PI node is well-formed, by checking it's Name and data 1517 * for well-formedness. 1518 * 1519 * @param data The contents of the comment node 1520 */ 1521 protected void isPIWellFormed(ProcessingInstruction node) { 1522 // Is the PI Target a valid XML name 1523 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1524 String msg = 1525 Utils.messages.createMessage( 1526 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1527 new Object[] { "ProcessingInstruction", node.getTarget()}); 1528 1529 if (fErrorHandler != null) { 1530 fErrorHandler.handleError( 1531 new DOMErrorImpl( 1532 DOMError.SEVERITY_FATAL_ERROR, 1533 msg, 1534 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1535 null, 1536 null, 1537 null)); 1538 } 1539 } 1540 1541 // Does the PI Data carry valid XML characters 1542 1543 // REVISIT: Should we check if the PI DATA contains a ?> ??? 1544 Character invalidChar = isWFXMLChar(node.getData()); 1545 if (invalidChar != null) { 1546 String msg = 1547 Utils.messages.createMessage( 1548 MsgKey.ER_WF_INVALID_CHARACTER_IN_PI, 1549 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1550 1551 if (fErrorHandler != null) { 1552 fErrorHandler.handleError( 1553 new DOMErrorImpl( 1554 DOMError.SEVERITY_FATAL_ERROR, 1555 msg, 1556 MsgKey.ER_WF_INVALID_CHARACTER, 1557 null, 1558 null, 1559 null)); 1560 } 1561 } 1562 } 1563 1564 /** 1565 * Checks if an CDATASection node is well-formed, by checking it's data 1566 * for well-formedness. Note that the presence of a CDATA termination mark 1567 * in the contents of a CDATASection is handled by the parameter 1568 * spli-cdata-sections 1569 * 1570 * @param data The contents of the comment node 1571 */ 1572 protected void isCDATASectionWellFormed(CDATASection node) { 1573 // Does the data valid XML character data 1574 Character invalidChar = isWFXMLChar(node.getData()); 1575 //if (!isWFXMLChar(node.getData(), invalidChar)) { 1576 if (invalidChar != null) { 1577 String msg = 1578 Utils.messages.createMessage( 1579 MsgKey.ER_WF_INVALID_CHARACTER_IN_CDATA, 1580 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1581 1582 if (fErrorHandler != null) { 1583 fErrorHandler.handleError( 1584 new DOMErrorImpl( 1585 DOMError.SEVERITY_FATAL_ERROR, 1586 msg, 1587 MsgKey.ER_WF_INVALID_CHARACTER, 1588 null, 1589 null, 1590 null)); 1591 } 1592 } 1593 } 1594 1595 /** 1596 * Checks if an Text node is well-formed, by checking if it contains invalid 1597 * XML characters. 1598 * 1599 * @param data The contents of the comment node 1600 */ 1601 protected void isTextWellFormed(Text node) { 1602 // Does the data valid XML character data 1603 Character invalidChar = isWFXMLChar(node.getData()); 1604 if (invalidChar != null) { 1605 String msg = 1606 Utils.messages.createMessage( 1607 MsgKey.ER_WF_INVALID_CHARACTER_IN_TEXT, 1608 new Object[] { Integer.toHexString(Character.getNumericValue(invalidChar.charValue())) }); 1609 1610 if (fErrorHandler != null) { 1611 fErrorHandler.handleError( 1612 new DOMErrorImpl( 1613 DOMError.SEVERITY_FATAL_ERROR, 1614 msg, 1615 MsgKey.ER_WF_INVALID_CHARACTER, 1616 null, 1617 null, 1618 null)); 1619 } 1620 } 1621 } 1622 1623 /** 1624 * Checks if an EntityRefernece node is well-formed, by checking it's node name. Then depending 1625 * on whether it is referenced in Element content or in an Attr Node, checks if the EntityReference 1626 * references an unparsed entity or a external entity and if so throws raises the 1627 * appropriate well-formedness error. 1628 * 1629 * @param data The contents of the comment node 1630 * @parent The parent of the EntityReference Node 1631 */ 1632 protected void isEntityReferneceWellFormed(EntityReference node) { 1633 // Is the EntityReference name a valid XML name 1634 if (!isXMLName(node.getNodeName(), fIsXMLVersion11)) { 1635 String msg = 1636 Utils.messages.createMessage( 1637 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1638 new Object[] { "EntityReference", node.getNodeName()}); 1639 1640 if (fErrorHandler != null) { 1641 fErrorHandler.handleError( 1642 new DOMErrorImpl( 1643 DOMError.SEVERITY_FATAL_ERROR, 1644 msg, 1645 MsgKey.ER_WF_INVALID_CHARACTER_IN_NODE_NAME, 1646 null, 1647 null, 1648 null)); 1649 } 1650 } 1651 1652 // determine the parent node 1653 Node parent = node.getParentNode(); 1654 1655 // Traverse the declared entities and check if the nodeName and namespaceURI 1656 // of the EntityReference matches an Entity. If so, check the if the notationName 1657 // is not null, if so, report an error. 1658 DocumentType docType = node.getOwnerDocument().getDoctype(); 1659 if (docType != null) { 1660 NamedNodeMap entities = docType.getEntities(); 1661 for (int i = 0; i < entities.getLength(); i++) { 1662 Entity ent = (Entity) entities.item(i); 1663 1664 String nodeName = 1665 node.getNodeName() == null ? "" : node.getNodeName(); 1666 String nodeNamespaceURI = 1667 node.getNamespaceURI() == null 1668 ? "" 1669 : node.getNamespaceURI(); 1670 String entName = 1671 ent.getNodeName() == null ? "" : ent.getNodeName(); 1672 String entNamespaceURI = 1673 ent.getNamespaceURI() == null ? "" : ent.getNamespaceURI(); 1674 // If referenced in Element content 1675 // WFC: Parsed Entity 1676 if (parent.getNodeType() == Node.ELEMENT_NODE) { 1677 if (entNamespaceURI.equals(nodeNamespaceURI) 1678 && entName.equals(nodeName)) { 1679 1680 if (ent.getNotationName() != null) { 1681 String msg = 1682 Utils.messages.createMessage( 1683 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1684 new Object[] { node.getNodeName()}); 1685 1686 if (fErrorHandler != null) { 1687 fErrorHandler.handleError( 1688 new DOMErrorImpl( 1689 DOMError.SEVERITY_FATAL_ERROR, 1690 msg, 1691 MsgKey.ER_WF_REF_TO_UNPARSED_ENT, 1692 null, 1693 null, 1694 null)); 1695 } 1696 } 1697 } 1698 } // end if WFC: Parsed Entity 1699 1700 // If referenced in an Attr value 1701 // WFC: No External Entity References 1702 if (parent.getNodeType() == Node.ATTRIBUTE_NODE) { 1703 if (entNamespaceURI.equals(nodeNamespaceURI) 1704 && entName.equals(nodeName)) { 1705 1706 if (ent.getPublicId() != null 1707 || ent.getSystemId() != null 1708 || ent.getNotationName() != null) { 1709 String msg = 1710 Utils.messages.createMessage( 1711 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1712 new Object[] { node.getNodeName()}); 1713 1714 if (fErrorHandler != null) { 1715 fErrorHandler.handleError( 1716 new DOMErrorImpl( 1717 DOMError.SEVERITY_FATAL_ERROR, 1718 msg, 1719 MsgKey.ER_WF_REF_TO_EXTERNAL_ENT, 1720 null, 1721 null, 1722 null)); 1723 } 1724 } 1725 } 1726 } //end if WFC: No External Entity References 1727 } 1728 } 1729 } // isEntityReferneceWellFormed 1730 1731 /** 1732 * If the configuration parameter "namespaces" is set to true, this methods 1733 * checks if an entity whose replacement text contains unbound namespace 1734 * prefixes is referenced in a location where there are no bindings for 1735 * the namespace prefixes and if so raises a LSException with the error-type 1736 * "unbound-prefix-in-entity-reference" 1737 * 1738 * @param Node, The EntityReference nodes whose children are to be checked 1739 */ 1740 protected void checkUnboundPrefixInEntRef(Node node) { 1741 Node child, next; 1742 for (child = node.getFirstChild(); child != null; child = next) { 1743 next = child.getNextSibling(); 1744 1745 if (child.getNodeType() == Node.ELEMENT_NODE) { 1746 1747 //If a NamespaceURI is not declared for the current 1748 //node's prefix, raise a fatal error. 1749 String prefix = child.getPrefix(); 1750 if (prefix != null 1751 && fNSBinder.getURI(prefix) == null) { 1752 String msg = 1753 Utils.messages.createMessage( 1754 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1755 new Object[] { 1756 node.getNodeName(), 1757 child.getNodeName(), 1758 prefix }); 1759 1760 if (fErrorHandler != null) { 1761 fErrorHandler.handleError( 1762 new DOMErrorImpl( 1763 DOMError.SEVERITY_FATAL_ERROR, 1764 msg, 1765 MsgKey.ER_ELEM_UNBOUND_PREFIX_IN_ENTREF, 1766 null, 1767 null, 1768 null)); 1769 } 1770 } 1771 1772 NamedNodeMap attrs = child.getAttributes(); 1773 1774 for (int i = 0; i < attrs.getLength(); i++) { 1775 String attrPrefix = attrs.item(i).getPrefix(); 1776 if (attrPrefix != null 1777 && fNSBinder.getURI(attrPrefix) == null) { 1778 String msg = 1779 Utils.messages.createMessage( 1780 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1781 new Object[] { 1782 node.getNodeName(), 1783 child.getNodeName(), 1784 attrs.item(i)}); 1785 1786 if (fErrorHandler != null) { 1787 fErrorHandler.handleError( 1788 new DOMErrorImpl( 1789 DOMError.SEVERITY_FATAL_ERROR, 1790 msg, 1791 MsgKey.ER_ATTR_UNBOUND_PREFIX_IN_ENTREF, 1792 null, 1793 null, 1794 null)); 1795 } 1796 } 1797 } 1798 } 1799 1800 if (child.hasChildNodes()) { 1801 checkUnboundPrefixInEntRef(child); 1802 } 1803 } 1804 } 1805 1806 // *********************************************************************** 1807 // Namespace normalization 1808 // *********************************************************************** 1809 /** 1810 * Records local namespace declarations, to be used for normalization later 1811 * 1812 * @param Node, The element node, whose namespace declarations are to be recorded 1813 */ 1814 protected void recordLocalNSDecl(Node node) { 1815 NamedNodeMap atts = ((Element) node).getAttributes(); 1816 int length = atts.getLength(); 1817 1818 for (int i = 0; i < length; i++) { 1819 Node attr = atts.item(i); 1820 1821 String localName = attr.getLocalName(); 1822 String attrPrefix = attr.getPrefix(); 1823 String attrValue = attr.getNodeValue(); 1824 String attrNS = attr.getNamespaceURI(); 1825 1826 localName = 1827 localName == null 1828 || XMLNS_PREFIX.equals(localName) ? "" : localName; 1829 attrPrefix = attrPrefix == null ? "" : attrPrefix; 1830 attrValue = attrValue == null ? "" : attrValue; 1831 attrNS = attrNS == null ? "" : attrNS; 1832 1833 // check if attribute is a namespace decl 1834 if (XMLNS_URI.equals(attrNS)) { 1835 1836 // No prefix may be bound to http://www.w3.org/2000/xmlns/. 1837 if (XMLNS_URI.equals(attrValue)) { 1838 String msg = 1839 Utils.messages.createMessage( 1840 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1841 new Object[] { attrPrefix, XMLNS_URI }); 1842 1843 if (fErrorHandler != null) { 1844 fErrorHandler.handleError( 1845 new DOMErrorImpl( 1846 DOMError.SEVERITY_ERROR, 1847 msg, 1848 MsgKey.ER_NS_PREFIX_CANNOT_BE_BOUND, 1849 null, 1850 null, 1851 null)); 1852 } 1853 } else { 1854 // store the namespace-declaration 1855 if (XMLNS_PREFIX.equals(attrPrefix) ) { 1856 // record valid decl 1857 if (attrValue.length() != 0) { 1858 fNSBinder.declarePrefix(localName, attrValue); 1859 } else { 1860 // Error; xmlns:prefix="" 1861 } 1862 } else { // xmlns 1863 // empty prefix is always bound ("" or some string) 1864 fNSBinder.declarePrefix("", attrValue); 1865 } 1866 } 1867 1868 } 1869 } 1870 } 1871 1872 /** 1873 * Fixes an element's namespace 1874 * 1875 * @param Node, The element node, whose namespace is to be fixed 1876 */ 1877 protected void fixupElementNS(Node node) throws SAXException { 1878 String namespaceURI = ((Element) node).getNamespaceURI(); 1879 String prefix = ((Element) node).getPrefix(); 1880 String localName = ((Element) node).getLocalName(); 1881 1882 if (namespaceURI != null) { 1883 //if ( Element's prefix/namespace pair (or default namespace, 1884 // if no prefix) are within the scope of a binding ) 1885 prefix = prefix == null ? "" : prefix; 1886 String inScopeNamespaceURI = fNSBinder.getURI(prefix); 1887 1888 if ((inScopeNamespaceURI != null 1889 && inScopeNamespaceURI.equals(namespaceURI))) { 1890 // do nothing, declaration in scope is inherited 1891 1892 } else { 1893 // Create a local namespace declaration attr for this namespace, 1894 // with Element's current prefix (or a default namespace, if 1895 // no prefix). If there's a conflicting local declaration 1896 // already present, change its value to use this namespace. 1897 1898 // Add the xmlns declaration attribute 1899 //fNSBinder.pushNamespace(prefix, namespaceURI, fElementDepth); 1900 if ((fFeatures & NAMESPACEDECLS) != 0) { 1901 if ("".equals(prefix) || "".equals(namespaceURI)) { 1902 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, namespaceURI); 1903 } else { 1904 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX + ":" + prefix, namespaceURI); 1905 } 1906 } 1907 fLocalNSBinder.declarePrefix(prefix, namespaceURI); 1908 fNSBinder.declarePrefix(prefix, namespaceURI); 1909 1910 } 1911 } else { 1912 // Element has no namespace 1913 // DOM Level 1 1914 if (localName == null || "".equals(localName)) { 1915 // DOM Level 1 node! 1916 String msg = 1917 Utils.messages.createMessage( 1918 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1919 new Object[] { node.getNodeName()}); 1920 1921 if (fErrorHandler != null) { 1922 fErrorHandler.handleError( 1923 new DOMErrorImpl( 1924 DOMError.SEVERITY_ERROR, 1925 msg, 1926 MsgKey.ER_NULL_LOCAL_ELEMENT_NAME, 1927 null, 1928 null, 1929 null)); 1930 } 1931 } else { 1932 namespaceURI = fNSBinder.getURI(""); 1933 if (namespaceURI !=null && namespaceURI.length() > 0) { 1934 ((Element)node).setAttributeNS(XMLNS_URI, XMLNS_PREFIX, ""); 1935 fLocalNSBinder.declarePrefix("", ""); 1936 fNSBinder.declarePrefix("", ""); 1937 } 1938 } 1939 } 1940 } 1941 /** 1942 * This table is a quick lookup of a property key (String) to the integer that 1943 * is the bit to flip in the fFeatures field, so the integers should have 1944 * values 1,2,4,8,16... 1945 * 1946 */ 1947 private static final Map<String, Integer> fFeatureMap; 1948 static { 1949 1950 // Initialize the mappings of property keys to bit values (Integer objects) 1951 // or mappings to a String object "", which indicates we are interested 1952 // in the property, but it does not have a simple bit value to flip 1953 1954 Map<String, Integer> featureMap = new HashMap<>(); 1955 // cdata-sections 1956 featureMap.put( 1957 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_CDATA_SECTIONS, 1958 CDATA); 1959 1960 // comments 1961 featureMap.put( 1962 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_COMMENTS, 1963 COMMENTS); 1964 1965 // element-content-whitespace 1966 featureMap.put( 1967 DOMConstants.S_DOM3_PROPERTIES_NS 1968 + DOMConstants.DOM_ELEMENT_CONTENT_WHITESPACE, 1969 ELEM_CONTENT_WHITESPACE); 1970 1971 // entities 1972 featureMap.put( 1973 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_ENTITIES, 1974 ENTITIES); 1975 1976 // namespaces 1977 featureMap.put( 1978 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_NAMESPACES, 1979 NAMESPACES); 1980 1981 // namespace-declarations 1982 featureMap.put( 1983 DOMConstants.S_DOM3_PROPERTIES_NS 1984 + DOMConstants.DOM_NAMESPACE_DECLARATIONS, 1985 NAMESPACEDECLS); 1986 1987 // split-cdata-sections 1988 featureMap.put( 1989 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_SPLIT_CDATA, 1990 SPLITCDATA); 1991 1992 // discard-default-content 1993 featureMap.put( 1994 DOMConstants.S_DOM3_PROPERTIES_NS + DOMConstants.DOM_WELLFORMED, 1995 WELLFORMED); 1996 1997 // discard-default-content 1998 featureMap.put( 1999 DOMConstants.S_DOM3_PROPERTIES_NS 2000 + DOMConstants.DOM_DISCARD_DEFAULT_CONTENT, 2001 DISCARDDEFAULT); 2002 2003 fFeatureMap = Collections.unmodifiableMap(featureMap); 2004 } 2005 2006 /** 2007 * Initializes fFeatures based on the DOMConfiguration Parameters set. 2008 * 2009 * @param properties DOMConfiguraiton properties that were set and which are 2010 * to be used while serializing the DOM. 2011 */ 2012 protected void initProperties(Properties properties) { 2013 2014 for (Enumeration keys = properties.keys(); keys.hasMoreElements();) { 2015 2016 final String key = (String) keys.nextElement(); 2017 2018 // caonical-form 2019 // Other features will be enabled or disabled when this is set to true or false. 2020 2021 // error-handler; set via the constructor 2022 2023 // infoset 2024 // Other features will be enabled or disabled when this is set to true 2025 2026 // A quick lookup for the given set of properties (cdata-sections ...) 2027 final Integer bitFlag = fFeatureMap.get(key); 2028 if (bitFlag != null) { 2029 // Dealing with a property that has a simple bit value that 2030 // we need to set 2031 2032 // cdata-sections 2033 // comments 2034 // element-content-whitespace 2035 // entities 2036 // namespaces 2037 // namespace-declarations 2038 // split-cdata-sections 2039 // well-formed 2040 // discard-default-content 2041 if ((properties.getProperty(key).endsWith("yes"))) { 2042 fFeatures = fFeatures | bitFlag; 2043 } else { 2044 fFeatures = fFeatures & ~bitFlag; 2045 } 2046 } else { 2047 /** 2048 * Other properties that have a bit more complex value 2049 * than the features in the above map. 2050 */ 2051 if ((DOMConstants.S_DOM3_PROPERTIES_NS 2052 + DOMConstants.DOM_FORMAT_PRETTY_PRINT) 2053 .equals(key)) { 2054 // format-pretty-print; set internally on the serializers via xsl:output properties in LSSerializer 2055 if ((properties.getProperty(key).endsWith("yes"))) { 2056 fSerializer.setIndent(true); 2057 fSerializer.setIndentAmount(4); 2058 } else { 2059 fSerializer.setIndent(false); 2060 } 2061 } else if ((DOMConstants.S_XSL_OUTPUT_OMIT_XML_DECL).equals(key)) { 2062 // omit-xml-declaration; set internally on the serializers via xsl:output properties in LSSerializer 2063 if ((properties.getProperty(key).endsWith("yes"))) { 2064 fSerializer.setOmitXMLDeclaration(true); 2065 } else { 2066 fSerializer.setOmitXMLDeclaration(false); 2067 } 2068 } else if ((DOMConstants.S_XERCES_PROPERTIES_NS 2069 + DOMConstants.S_XML_VERSION).equals(key)) { 2070 // Retreive the value of the XML Version attribute via the xml-version 2071 String version = properties.getProperty(key); 2072 if ("1.1".equals(version)) { 2073 fIsXMLVersion11 = true; 2074 fSerializer.setVersion(version); 2075 } else { 2076 fSerializer.setVersion("1.0"); 2077 } 2078 } else if ((DOMConstants.S_XSL_OUTPUT_ENCODING).equals(key)) { 2079 // Retreive the value of the XML Encoding attribute 2080 String encoding = properties.getProperty(key); 2081 if (encoding != null) { 2082 fSerializer.setEncoding(encoding); 2083 } 2084 } else if ((OutputPropertiesFactory.S_KEY_ENTITIES).equals(key)) { 2085 // Retreive the value of the XML Encoding attribute 2086 String entities = properties.getProperty(key); 2087 if (DOMConstants.S_XSL_VALUE_ENTITIES.equals(entities)) { 2088 fSerializer.setDTDEntityExpansion(false); 2089 } 2090 } 2091 } 2092 } 2093 // Set the newLine character to use 2094 if (fNewLine != null) { 2095 fSerializer.setOutputProperty(OutputPropertiesFactory.S_KEY_LINE_SEPARATOR, fNewLine); 2096 } 2097 } 2098 2099} //TreeWalker 2100