1require "rexml/parent" 2require "rexml/namespace" 3require "rexml/attribute" 4require "rexml/cdata" 5require "rexml/xpath" 6require "rexml/parseexception" 7 8module REXML 9 # An implementation note about namespaces: 10 # As we parse, when we find namespaces we put them in a hash and assign 11 # them a unique ID. We then convert the namespace prefix for the node 12 # to the unique ID. This makes namespace lookup much faster for the 13 # cost of extra memory use. We save the namespace prefix for the 14 # context node and convert it back when we write it. 15 @@namespaces = {} 16 17 # Represents a tagged XML element. Elements are characterized by 18 # having children, attributes, and names, and can themselves be 19 # children. 20 class Element < Parent 21 include Namespace 22 23 UNDEFINED = "UNDEFINED"; # The default name 24 25 # Mechanisms for accessing attributes and child elements of this 26 # element. 27 attr_reader :attributes, :elements 28 # The context holds information about the processing environment, such as 29 # whitespace handling. 30 attr_accessor :context 31 32 # Constructor 33 # arg:: 34 # if not supplied, will be set to the default value. 35 # If a String, the name of this object will be set to the argument. 36 # If an Element, the object will be shallowly cloned; name, 37 # attributes, and namespaces will be copied. Children will +not+ be 38 # copied. 39 # parent:: 40 # if supplied, must be a Parent, and will be used as 41 # the parent of this object. 42 # context:: 43 # If supplied, must be a hash containing context items. Context items 44 # include: 45 # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of 46 # strings being the names of the elements to respect 47 # whitespace for. Defaults to :+all+. 48 # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of 49 # strings being the names of the elements to ignore whitespace on. 50 # Overrides :+respect_whitespace+. 51 # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array 52 # of strings being the names of the elements in which to ignore 53 # whitespace-only nodes. If this is set, Text nodes which contain only 54 # whitespace will not be added to the document tree. 55 # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of 56 # the elements to process in raw mode. In raw mode, special 57 # characters in text is not converted to or from entities. 58 def initialize( arg = UNDEFINED, parent=nil, context=nil ) 59 super(parent) 60 61 @elements = Elements.new(self) 62 @attributes = Attributes.new(self) 63 @context = context 64 65 if arg.kind_of? String 66 self.name = arg 67 elsif arg.kind_of? Element 68 self.name = arg.expanded_name 69 arg.attributes.each_attribute{ |attribute| 70 @attributes << Attribute.new( attribute ) 71 } 72 @context = arg.context 73 end 74 end 75 76 def inspect 77 rv = "<#@expanded_name" 78 79 @attributes.each_attribute do |attr| 80 rv << " " 81 attr.write( rv, 0 ) 82 end 83 84 if children.size > 0 85 rv << "> ... </>" 86 else 87 rv << "/>" 88 end 89 end 90 91 92 # Creates a shallow copy of self. 93 # d = Document.new "<a><b/><b/><c><d/></c></a>" 94 # new_a = d.root.clone 95 # puts new_a # => "<a/>" 96 def clone 97 self.class.new self 98 end 99 100 # Evaluates to the root node of the document that this element 101 # belongs to. If this element doesn't belong to a document, but does 102 # belong to another Element, the parent's root will be returned, until the 103 # earliest ancestor is found. 104 # 105 # Note that this is not the same as the document element. 106 # In the following example, <a> is the document element, and the root 107 # node is the parent node of the document element. You may ask yourself 108 # why the root node is useful: consider the doctype and XML declaration, 109 # and any processing instructions before the document element... they 110 # are children of the root node, or siblings of the document element. 111 # The only time this isn't true is when an Element is created that is 112 # not part of any Document. In this case, the ancestor that has no 113 # parent acts as the root node. 114 # d = Document.new '<a><b><c/></b></a>' 115 # a = d[1] ; c = a[1][1] 116 # d.root_node == d # TRUE 117 # a.root_node # namely, d 118 # c.root_node # again, d 119 def root_node 120 parent.nil? ? self : parent.root_node 121 end 122 123 def root 124 return elements[1] if self.kind_of? Document 125 return self if parent.kind_of? Document or parent.nil? 126 return parent.root 127 end 128 129 # Evaluates to the document to which this element belongs, or nil if this 130 # element doesn't belong to a document. 131 def document 132 rt = root 133 rt.parent if rt 134 end 135 136 # Evaluates to +true+ if whitespace is respected for this element. This 137 # is the case if: 138 # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value 139 # 2. The context has :+respect_whitespace+ set to :+all+ or 140 # an array containing the name of this element, and 141 # :+compress_whitespace+ isn't set to :+all+ or an array containing the 142 # name of this element. 143 # The evaluation is tested against +expanded_name+, and so is namespace 144 # sensitive. 145 def whitespace 146 @whitespace = nil 147 if @context 148 if @context[:respect_whitespace] 149 @whitespace = (@context[:respect_whitespace] == :all or 150 @context[:respect_whitespace].include? expanded_name) 151 end 152 @whitespace = false if (@context[:compress_whitespace] and 153 (@context[:compress_whitespace] == :all or 154 @context[:compress_whitespace].include? expanded_name) 155 ) 156 end 157 @whitespace = true unless @whitespace == false 158 @whitespace 159 end 160 161 def ignore_whitespace_nodes 162 @ignore_whitespace_nodes = false 163 if @context 164 if @context[:ignore_whitespace_nodes] 165 @ignore_whitespace_nodes = 166 (@context[:ignore_whitespace_nodes] == :all or 167 @context[:ignore_whitespace_nodes].include? expanded_name) 168 end 169 end 170 end 171 172 # Evaluates to +true+ if raw mode is set for this element. This 173 # is the case if the context has :+raw+ set to :+all+ or 174 # an array containing the name of this element. 175 # 176 # The evaluation is tested against +expanded_name+, and so is namespace 177 # sensitive. 178 def raw 179 @raw = (@context and @context[:raw] and 180 (@context[:raw] == :all or 181 @context[:raw].include? expanded_name)) 182 @raw 183 end 184 185 #once :whitespace, :raw, :ignore_whitespace_nodes 186 187 ################################################# 188 # Namespaces # 189 ################################################# 190 191 # Evaluates to an +Array+ containing the prefixes (names) of all defined 192 # namespaces at this context node. 193 # doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") 194 # doc.elements['//b'].prefixes # -> ['x', 'y'] 195 def prefixes 196 prefixes = [] 197 prefixes = parent.prefixes if parent 198 prefixes |= attributes.prefixes 199 return prefixes 200 end 201 202 def namespaces 203 namespaces = {} 204 namespaces = parent.namespaces if parent 205 namespaces = namespaces.merge( attributes.namespaces ) 206 return namespaces 207 end 208 209 # Evalutas to the URI for a prefix, or the empty string if no such 210 # namespace is declared for this element. Evaluates recursively for 211 # ancestors. Returns the default namespace, if there is one. 212 # prefix:: 213 # the prefix to search for. If not supplied, returns the default 214 # namespace if one exists 215 # Returns:: 216 # the namespace URI as a String, or nil if no such namespace 217 # exists. If the namespace is undefined, returns an empty string 218 # doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>") 219 # b = doc.elements['//b'] 220 # b.namespace # -> '1' 221 # b.namespace("y") # -> '2' 222 def namespace(prefix=nil) 223 if prefix.nil? 224 prefix = prefix() 225 end 226 if prefix == '' 227 prefix = "xmlns" 228 else 229 prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns' 230 end 231 ns = attributes[ prefix ] 232 ns = parent.namespace(prefix) if ns.nil? and parent 233 ns = '' if ns.nil? and prefix == 'xmlns' 234 return ns 235 end 236 237 # Adds a namespace to this element. 238 # prefix:: 239 # the prefix string, or the namespace URI if +uri+ is not 240 # supplied 241 # uri:: 242 # the namespace URI. May be nil, in which +prefix+ is used as 243 # the URI 244 # Evaluates to: this Element 245 # a = Element.new("a") 246 # a.add_namespace("xmlns:foo", "bar" ) 247 # a.add_namespace("foo", "bar") # shorthand for previous line 248 # a.add_namespace("twiddle") 249 # puts a #-> <a xmlns:foo='bar' xmlns='twiddle'/> 250 def add_namespace( prefix, uri=nil ) 251 unless uri 252 @attributes["xmlns"] = prefix 253 else 254 prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/ 255 @attributes[ prefix ] = uri 256 end 257 self 258 end 259 260 # Removes a namespace from this node. This only works if the namespace is 261 # actually declared in this node. If no argument is passed, deletes the 262 # default namespace. 263 # 264 # Evaluates to: this element 265 # doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>" 266 # doc.root.delete_namespace 267 # puts doc # -> <a xmlns:foo='bar'/> 268 # doc.root.delete_namespace 'foo' 269 # puts doc # -> <a/> 270 def delete_namespace namespace="xmlns" 271 namespace = "xmlns:#{namespace}" unless namespace == 'xmlns' 272 attribute = attributes.get_attribute(namespace) 273 attribute.remove unless attribute.nil? 274 self 275 end 276 277 ################################################# 278 # Elements # 279 ################################################# 280 281 # Adds a child to this element, optionally setting attributes in 282 # the element. 283 # element:: 284 # optional. If Element, the element is added. 285 # Otherwise, a new Element is constructed with the argument (see 286 # Element.initialize). 287 # attrs:: 288 # If supplied, must be a Hash containing String name,value 289 # pairs, which will be used to set the attributes of the new Element. 290 # Returns:: the Element that was added 291 # el = doc.add_element 'my-tag' 292 # el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'} 293 # el = Element.new 'my-tag' 294 # doc.add_element el 295 def add_element element, attrs=nil 296 raise "First argument must be either an element name, or an Element object" if element.nil? 297 el = @elements.add(element) 298 attrs.each do |key, value| 299 el.attributes[key]=value 300 end if attrs.kind_of? Hash 301 el 302 end 303 304 # Deletes a child element. 305 # element:: 306 # Must be an +Element+, +String+, or +Integer+. If Element, 307 # the element is removed. If String, the element is found (via XPath) 308 # and removed. <em>This means that any parent can remove any 309 # descendant.<em> If Integer, the Element indexed by that number will be 310 # removed. 311 # Returns:: the element that was removed. 312 # doc.delete_element "/a/b/c[@id='4']" 313 # doc.delete_element doc.elements["//k"] 314 # doc.delete_element 1 315 def delete_element element 316 @elements.delete element 317 end 318 319 # Evaluates to +true+ if this element has at least one child Element 320 # doc = Document.new "<a><b/><c>Text</c></a>" 321 # doc.root.has_elements # -> true 322 # doc.elements["/a/b"].has_elements # -> false 323 # doc.elements["/a/c"].has_elements # -> false 324 def has_elements? 325 !@elements.empty? 326 end 327 328 # Iterates through the child elements, yielding for each Element that 329 # has a particular attribute set. 330 # key:: 331 # the name of the attribute to search for 332 # value:: 333 # the value of the attribute 334 # max:: 335 # (optional) causes this method to return after yielding 336 # for this number of matching children 337 # name:: 338 # (optional) if supplied, this is an XPath that filters 339 # the children to check. 340 # 341 # doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>" 342 # # Yields b, c, d 343 # doc.root.each_element_with_attribute( 'id' ) {|e| p e} 344 # # Yields b, d 345 # doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e} 346 # # Yields b 347 # doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e} 348 # # Yields d 349 # doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e} 350 def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element 351 each_with_something( proc {|child| 352 if value.nil? 353 child.attributes[key] != nil 354 else 355 child.attributes[key]==value 356 end 357 }, max, name, &block ) 358 end 359 360 # Iterates through the children, yielding for each Element that 361 # has a particular text set. 362 # text:: 363 # the text to search for. If nil, or not supplied, will iterate 364 # over all +Element+ children that contain at least one +Text+ node. 365 # max:: 366 # (optional) causes this method to return after yielding 367 # for this number of matching children 368 # name:: 369 # (optional) if supplied, this is an XPath that filters 370 # the children to check. 371 # 372 # doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>' 373 # # Yields b, c, d 374 # doc.each_element_with_text {|e|p e} 375 # # Yields b, c 376 # doc.each_element_with_text('b'){|e|p e} 377 # # Yields b 378 # doc.each_element_with_text('b', 1){|e|p e} 379 # # Yields d 380 # doc.each_element_with_text(nil, 0, 'd'){|e|p e} 381 def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element 382 each_with_something( proc {|child| 383 if text.nil? 384 child.has_text? 385 else 386 child.text == text 387 end 388 }, max, name, &block ) 389 end 390 391 # Synonym for Element.elements.each 392 def each_element( xpath=nil, &block ) # :yields: Element 393 @elements.each( xpath, &block ) 394 end 395 396 # Synonym for Element.to_a 397 # This is a little slower than calling elements.each directly. 398 # xpath:: any XPath by which to search for elements in the tree 399 # Returns:: an array of Elements that match the supplied path 400 def get_elements( xpath ) 401 @elements.to_a( xpath ) 402 end 403 404 # Returns the next sibling that is an element, or nil if there is 405 # no Element sibling after this one 406 # doc = Document.new '<a><b/>text<c/></a>' 407 # doc.root.elements['b'].next_element #-> <c/> 408 # doc.root.elements['c'].next_element #-> nil 409 def next_element 410 element = next_sibling 411 element = element.next_sibling until element.nil? or element.kind_of? Element 412 return element 413 end 414 415 # Returns the previous sibling that is an element, or nil if there is 416 # no Element sibling prior to this one 417 # doc = Document.new '<a><b/>text<c/></a>' 418 # doc.root.elements['c'].previous_element #-> <b/> 419 # doc.root.elements['b'].previous_element #-> nil 420 def previous_element 421 element = previous_sibling 422 element = element.previous_sibling until element.nil? or element.kind_of? Element 423 return element 424 end 425 426 427 ################################################# 428 # Text # 429 ################################################# 430 431 # Evaluates to +true+ if this element has at least one Text child 432 def has_text? 433 not text().nil? 434 end 435 436 # A convenience method which returns the String value of the _first_ 437 # child text element, if one exists, and +nil+ otherwise. 438 # 439 # <em>Note that an element may have multiple Text elements, perhaps 440 # separated by other children</em>. Be aware that this method only returns 441 # the first Text node. 442 # 443 # This method returns the +value+ of the first text child node, which 444 # ignores the +raw+ setting, so always returns normalized text. See 445 # the Text::value documentation. 446 # 447 # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" 448 # # The element 'p' has two text elements, "some text " and " more text". 449 # doc.root.text #-> "some text " 450 def text( path = nil ) 451 rv = get_text(path) 452 return rv.value unless rv.nil? 453 nil 454 end 455 456 # Returns the first child Text node, if any, or +nil+ otherwise. 457 # This method returns the actual +Text+ node, rather than the String content. 458 # doc = Document.new "<p>some text <b>this is bold!</b> more text</p>" 459 # # The element 'p' has two text elements, "some text " and " more text". 460 # doc.root.get_text.value #-> "some text " 461 def get_text path = nil 462 rv = nil 463 if path 464 element = @elements[ path ] 465 rv = element.get_text unless element.nil? 466 else 467 rv = @children.find { |node| node.kind_of? Text } 468 end 469 return rv 470 end 471 472 # Sets the first Text child of this object. See text() for a 473 # discussion about Text children. 474 # 475 # If a Text child already exists, the child is replaced by this 476 # content. This means that Text content can be deleted by calling 477 # this method with a nil argument. In this case, the next Text 478 # child becomes the first Text child. In no case is the order of 479 # any siblings disturbed. 480 # text:: 481 # If a String, a new Text child is created and added to 482 # this Element as the first Text child. If Text, the text is set 483 # as the first Child element. If nil, then any existing first Text 484 # child is removed. 485 # Returns:: this Element. 486 # doc = Document.new '<a><b/></a>' 487 # doc.root.text = 'Sean' #-> '<a><b/>Sean</a>' 488 # doc.root.text = 'Elliott' #-> '<a><b/>Elliott</a>' 489 # doc.root.add_element 'c' #-> '<a><b/>Elliott<c/></a>' 490 # doc.root.text = 'Russell' #-> '<a><b/>Russell<c/></a>' 491 # doc.root.text = nil #-> '<a><b/><c/></a>' 492 def text=( text ) 493 if text.kind_of? String 494 text = Text.new( text, whitespace(), nil, raw() ) 495 elsif !text.nil? and !text.kind_of? Text 496 text = Text.new( text.to_s, whitespace(), nil, raw() ) 497 end 498 old_text = get_text 499 if text.nil? 500 old_text.remove unless old_text.nil? 501 else 502 if old_text.nil? 503 self << text 504 else 505 old_text.replace_with( text ) 506 end 507 end 508 return self 509 end 510 511 # A helper method to add a Text child. Actual Text instances can 512 # be added with regular Parent methods, such as add() and <<() 513 # text:: 514 # if a String, a new Text instance is created and added 515 # to the parent. If Text, the object is added directly. 516 # Returns:: this Element 517 # e = Element.new('a') #-> <e/> 518 # e.add_text 'foo' #-> <e>foo</e> 519 # e.add_text Text.new(' bar') #-> <e>foo bar</e> 520 # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e' 521 # element and <b>2</b> Text node children. 522 def add_text( text ) 523 if text.kind_of? String 524 if @children[-1].kind_of? Text 525 @children[-1] << text 526 return 527 end 528 text = Text.new( text, whitespace(), nil, raw() ) 529 end 530 self << text unless text.nil? 531 return self 532 end 533 534 def node_type 535 :element 536 end 537 538 def xpath 539 path_elements = [] 540 cur = self 541 path_elements << __to_xpath_helper( self ) 542 while cur.parent 543 cur = cur.parent 544 path_elements << __to_xpath_helper( cur ) 545 end 546 return path_elements.reverse.join( "/" ) 547 end 548 549 ################################################# 550 # Attributes # 551 ################################################# 552 553 def attribute( name, namespace=nil ) 554 prefix = nil 555 if namespaces.respond_to? :key 556 prefix = namespaces.key(namespace) if namespace 557 else 558 prefix = namespaces.index(namespace) if namespace 559 end 560 prefix = nil if prefix == 'xmlns' 561 562 ret_val = 563 attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" ) 564 565 return ret_val unless ret_val.nil? 566 return nil if prefix.nil? 567 568 # now check that prefix'es namespace is not the same as the 569 # default namespace 570 return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] ) 571 572 attributes.get_attribute( name ) 573 574 end 575 576 # Evaluates to +true+ if this element has any attributes set, false 577 # otherwise. 578 def has_attributes? 579 return !@attributes.empty? 580 end 581 582 # Adds an attribute to this element, overwriting any existing attribute 583 # by the same name. 584 # key:: 585 # can be either an Attribute or a String. If an Attribute, 586 # the attribute is added to the list of Element attributes. If String, 587 # the argument is used as the name of the new attribute, and the value 588 # parameter must be supplied. 589 # value:: 590 # Required if +key+ is a String, and ignored if the first argument is 591 # an Attribute. This is a String, and is used as the value 592 # of the new Attribute. This should be the unnormalized value of the 593 # attribute (without entities). 594 # Returns:: the Attribute added 595 # e = Element.new 'e' 596 # e.add_attribute( 'a', 'b' ) #-> <e a='b'/> 597 # e.add_attribute( 'x:a', 'c' ) #-> <e a='b' x:a='c'/> 598 # e.add_attribute Attribute.new('b', 'd') #-> <e a='b' x:a='c' b='d'/> 599 def add_attribute( key, value=nil ) 600 if key.kind_of? Attribute 601 @attributes << key 602 else 603 @attributes[key] = value 604 end 605 end 606 607 # Add multiple attributes to this element. 608 # hash:: is either a hash, or array of arrays 609 # el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} ) 610 # el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] ) 611 def add_attributes hash 612 if hash.kind_of? Hash 613 hash.each_pair {|key, value| @attributes[key] = value } 614 elsif hash.kind_of? Array 615 hash.each { |value| @attributes[ value[0] ] = value[1] } 616 end 617 end 618 619 # Removes an attribute 620 # key:: 621 # either an Attribute or a String. In either case, the 622 # attribute is found by matching the attribute name to the argument, 623 # and then removed. If no attribute is found, no action is taken. 624 # Returns:: 625 # the attribute removed, or nil if this Element did not contain 626 # a matching attribute 627 # e = Element.new('E') 628 # e.add_attribute( 'name', 'Sean' ) #-> <E name='Sean'/> 629 # r = e.add_attribute( 'sur:name', 'Russell' ) #-> <E name='Sean' sur:name='Russell'/> 630 # e.delete_attribute( 'name' ) #-> <E sur:name='Russell'/> 631 # e.delete_attribute( r ) #-> <E/> 632 def delete_attribute(key) 633 attr = @attributes.get_attribute(key) 634 attr.remove unless attr.nil? 635 end 636 637 ################################################# 638 # Other Utilities # 639 ################################################# 640 641 # Get an array of all CData children. 642 # IMMUTABLE 643 def cdatas 644 find_all { |child| child.kind_of? CData }.freeze 645 end 646 647 # Get an array of all Comment children. 648 # IMMUTABLE 649 def comments 650 find_all { |child| child.kind_of? Comment }.freeze 651 end 652 653 # Get an array of all Instruction children. 654 # IMMUTABLE 655 def instructions 656 find_all { |child| child.kind_of? Instruction }.freeze 657 end 658 659 # Get an array of all Text children. 660 # IMMUTABLE 661 def texts 662 find_all { |child| child.kind_of? Text }.freeze 663 end 664 665 # == DEPRECATED 666 # See REXML::Formatters 667 # 668 # Writes out this element, and recursively, all children. 669 # output:: 670 # output an object which supports '<< string'; this is where the 671 # document will be written. 672 # indent:: 673 # An integer. If -1, no indenting will be used; otherwise, the 674 # indentation will be this number of spaces, and children will be 675 # indented an additional amount. Defaults to -1 676 # transitive:: 677 # If transitive is true and indent is >= 0, then the output will be 678 # pretty-printed in such a way that the added whitespace does not affect 679 # the parse tree of the document 680 # ie_hack:: 681 # Internet Explorer is the worst piece of crap to have ever been 682 # written, with the possible exception of Windows itself. Since IE is 683 # unable to parse proper XML, we have to provide a hack to generate XML 684 # that IE's limited abilities can handle. This hack inserts a space 685 # before the /> on empty tags. Defaults to false 686 # 687 # out = '' 688 # doc.write( out ) #-> doc is written to the string 'out' 689 # doc.write( $stdout ) #-> doc written to the console 690 def write(output=$stdout, indent=-1, transitive=false, ie_hack=false) 691 Kernel.warn("#{self.class.name}.write is deprecated. See REXML::Formatters") 692 formatter = if indent > -1 693 if transitive 694 require "rexml/formatters/transitive" 695 REXML::Formatters::Transitive.new( indent, ie_hack ) 696 else 697 REXML::Formatters::Pretty.new( indent, ie_hack ) 698 end 699 else 700 REXML::Formatters::Default.new( ie_hack ) 701 end 702 formatter.write( self, output ) 703 end 704 705 706 private 707 def __to_xpath_helper node 708 rv = node.expanded_name.clone 709 if node.parent 710 results = node.parent.find_all {|n| 711 n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name 712 } 713 if results.length > 1 714 idx = results.index( node ) 715 rv << "[#{idx+1}]" 716 end 717 end 718 rv 719 end 720 721 # A private helper method 722 def each_with_something( test, max=0, name=nil ) 723 num = 0 724 @elements.each( name ){ |child| 725 yield child if test.call(child) and num += 1 726 return if max>0 and num == max 727 } 728 end 729 end 730 731 ######################################################################## 732 # ELEMENTS # 733 ######################################################################## 734 735 # A class which provides filtering of children for Elements, and 736 # XPath search support. You are expected to only encounter this class as 737 # the <tt>element.elements</tt> object. Therefore, you are 738 # _not_ expected to instantiate this yourself. 739 class Elements 740 include Enumerable 741 # Constructor 742 # parent:: the parent Element 743 def initialize parent 744 @element = parent 745 end 746 747 # Fetches a child element. Filters only Element children, regardless of 748 # the XPath match. 749 # index:: 750 # the search parameter. This is either an Integer, which 751 # will be used to find the index'th child Element, or an XPath, 752 # which will be used to search for the Element. <em>Because 753 # of the nature of XPath searches, any element in the connected XML 754 # document can be fetched through any other element.</em> <b>The 755 # Integer index is 1-based, not 0-based.</b> This means that the first 756 # child element is at index 1, not 0, and the +n+th element is at index 757 # +n+, not <tt>n-1</tt>. This is because XPath indexes element children 758 # starting from 1, not 0, and the indexes should be the same. 759 # name:: 760 # optional, and only used in the first argument is an 761 # Integer. In that case, the index'th child Element that has the 762 # supplied name will be returned. Note again that the indexes start at 1. 763 # Returns:: the first matching Element, or nil if no child matched 764 # doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>' 765 # doc.root.elements[1] #-> <b/> 766 # doc.root.elements['c'] #-> <c id="1"/> 767 # doc.root.elements[2,'c'] #-> <c id="2"/> 768 def []( index, name=nil) 769 if index.kind_of? Integer 770 raise "index (#{index}) must be >= 1" if index < 1 771 name = literalize(name) if name 772 num = 0 773 @element.find { |child| 774 child.kind_of? Element and 775 (name.nil? ? true : child.has_name?( name )) and 776 (num += 1) == index 777 } 778 else 779 return XPath::first( @element, index ) 780 #{ |element| 781 # return element if element.kind_of? Element 782 #} 783 #return nil 784 end 785 end 786 787 # Sets an element, replacing any previous matching element. If no 788 # existing element is found ,the element is added. 789 # index:: Used to find a matching element to replace. See [](). 790 # element:: 791 # The element to replace the existing element with 792 # the previous element 793 # Returns:: nil if no previous element was found. 794 # 795 # doc = Document.new '<a/>' 796 # doc.root.elements[10] = Element.new('b') #-> <a><b/></a> 797 # doc.root.elements[1] #-> <b/> 798 # doc.root.elements[1] = Element.new('c') #-> <a><c/></a> 799 # doc.root.elements['c'] = Element.new('d') #-> <a><d/></a> 800 def []=( index, element ) 801 previous = self[index] 802 if previous.nil? 803 @element.add element 804 else 805 previous.replace_with element 806 end 807 return previous 808 end 809 810 # Returns +true+ if there are no +Element+ children, +false+ otherwise 811 def empty? 812 @element.find{ |child| child.kind_of? Element}.nil? 813 end 814 815 # Returns the index of the supplied child (starting at 1), or -1 if 816 # the element is not a child 817 # element:: an +Element+ child 818 def index element 819 rv = 0 820 found = @element.find do |child| 821 child.kind_of? Element and 822 (rv += 1) and 823 child == element 824 end 825 return rv if found == element 826 return -1 827 end 828 829 # Deletes a child Element 830 # element:: 831 # Either an Element, which is removed directly; an 832 # xpath, where the first matching child is removed; or an Integer, 833 # where the n'th Element is removed. 834 # Returns:: the removed child 835 # doc = Document.new '<a><b/><c/><c id="1"/></a>' 836 # b = doc.root.elements[1] 837 # doc.root.elements.delete b #-> <a><c/><c id="1"/></a> 838 # doc.elements.delete("a/c[@id='1']") #-> <a><c/></a> 839 # doc.root.elements.delete 1 #-> <a/> 840 def delete element 841 if element.kind_of? Element 842 @element.delete element 843 else 844 el = self[element] 845 el.remove if el 846 end 847 end 848 849 # Removes multiple elements. Filters for Element children, regardless of 850 # XPath matching. 851 # xpath:: all elements matching this String path are removed. 852 # Returns:: an Array of Elements that have been removed 853 # doc = Document.new '<a><c/><c/><c/><c/></a>' 854 # deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>] 855 def delete_all( xpath ) 856 rv = [] 857 XPath::each( @element, xpath) {|element| 858 rv << element if element.kind_of? Element 859 } 860 rv.each do |element| 861 @element.delete element 862 element.remove 863 end 864 return rv 865 end 866 867 # Adds an element 868 # element:: 869 # if supplied, is either an Element, String, or 870 # Source (see Element.initialize). If not supplied or nil, a 871 # new, default Element will be constructed 872 # Returns:: the added Element 873 # a = Element.new('a') 874 # a.elements.add(Element.new('b')) #-> <a><b/></a> 875 # a.elements.add('c') #-> <a><b/><c/></a> 876 def add element=nil 877 if element.nil? 878 Element.new("", self, @element.context) 879 elsif not element.kind_of?(Element) 880 Element.new(element, self, @element.context) 881 else 882 @element << element 883 element.context = @element.context 884 element 885 end 886 end 887 888 alias :<< :add 889 890 # Iterates through all of the child Elements, optionally filtering 891 # them by a given XPath 892 # xpath:: 893 # optional. If supplied, this is a String XPath, and is used to 894 # filter the children, so that only matching children are yielded. Note 895 # that XPaths are automatically filtered for Elements, so that 896 # non-Element children will not be yielded 897 # doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>' 898 # doc.root.elements.each {|e|p e} #-> Yields b, c, d, b, c, d elements 899 # doc.root.elements.each('b') {|e|p e} #-> Yields b, b elements 900 # doc.root.elements.each('child::node()') {|e|p e} 901 # #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/> 902 # XPath.each(doc.root, 'child::node()', &block) 903 # #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/> 904 def each( xpath=nil ) 905 XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element } 906 end 907 908 def collect( xpath=nil ) 909 collection = [] 910 XPath::each( @element, xpath ) {|e| 911 collection << yield(e) if e.kind_of?(Element) 912 } 913 collection 914 end 915 916 def inject( xpath=nil, initial=nil ) 917 first = true 918 XPath::each( @element, xpath ) {|e| 919 if (e.kind_of? Element) 920 if (first and initial == nil) 921 initial = e 922 first = false 923 else 924 initial = yield( initial, e ) if e.kind_of? Element 925 end 926 end 927 } 928 initial 929 end 930 931 # Returns the number of +Element+ children of the parent object. 932 # doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>' 933 # doc.root.size #-> 6, 3 element and 3 text nodes 934 # doc.root.elements.size #-> 3 935 def size 936 count = 0 937 @element.each {|child| count+=1 if child.kind_of? Element } 938 count 939 end 940 941 # Returns an Array of Element children. An XPath may be supplied to 942 # filter the children. Only Element children are returned, even if the 943 # supplied XPath matches non-Element children. 944 # doc = Document.new '<a>sean<b/>elliott<c/></a>' 945 # doc.root.elements.to_a #-> [ <b/>, <c/> ] 946 # doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ] 947 # XPath.match(doc.root, "child::node()") #-> [ sean, <b/>, elliott, <c/> ] 948 def to_a( xpath=nil ) 949 rv = XPath.match( @element, xpath ) 950 return rv.find_all{|e| e.kind_of? Element} if xpath 951 rv 952 end 953 954 private 955 # Private helper class. Removes quotes from quoted strings 956 def literalize name 957 name = name[1..-2] if name[0] == ?' or name[0] == ?" #' 958 name 959 end 960 end 961 962 ######################################################################## 963 # ATTRIBUTES # 964 ######################################################################## 965 966 # A class that defines the set of Attributes of an Element and provides 967 # operations for accessing elements in that set. 968 class Attributes < Hash 969 # Constructor 970 # element:: the Element of which this is an Attribute 971 def initialize element 972 @element = element 973 end 974 975 # Fetches an attribute value. If you want to get the Attribute itself, 976 # use get_attribute() 977 # name:: an XPath attribute name. Namespaces are relevant here. 978 # Returns:: 979 # the String value of the matching attribute, or +nil+ if no 980 # matching attribute was found. This is the unnormalized value 981 # (with entities expanded). 982 # 983 # doc = Document.new "<a foo:att='1' bar:att='2' att='<'/>" 984 # doc.root.attributes['att'] #-> '<' 985 # doc.root.attributes['bar:att'] #-> '2' 986 def [](name) 987 attr = get_attribute(name) 988 return attr.value unless attr.nil? 989 return nil 990 end 991 992 def to_a 993 values.flatten 994 end 995 996 # Returns the number of attributes the owning Element contains. 997 # doc = Document "<a x='1' y='2' foo:x='3'/>" 998 # doc.root.attributes.length #-> 3 999 def length 1000 c = 0 1001 each_attribute { c+=1 } 1002 c 1003 end 1004 alias :size :length 1005 1006 # Iterates over the attributes of an Element. Yields actual Attribute 1007 # nodes, not String values. 1008 # 1009 # doc = Document.new '<a x="1" y="2"/>' 1010 # doc.root.attributes.each_attribute {|attr| 1011 # p attr.expanded_name+" => "+attr.value 1012 # } 1013 def each_attribute # :yields: attribute 1014 each_value do |val| 1015 if val.kind_of? Attribute 1016 yield val 1017 else 1018 val.each_value { |atr| yield atr } 1019 end 1020 end 1021 end 1022 1023 # Iterates over each attribute of an Element, yielding the expanded name 1024 # and value as a pair of Strings. 1025 # 1026 # doc = Document.new '<a x="1" y="2"/>' 1027 # doc.root.attributes.each {|name, value| p name+" => "+value } 1028 def each 1029 each_attribute do |attr| 1030 yield [attr.expanded_name, attr.value] 1031 end 1032 end 1033 1034 # Fetches an attribute 1035 # name:: 1036 # the name by which to search for the attribute. Can be a 1037 # <tt>prefix:name</tt> namespace name. 1038 # Returns:: The first matching attribute, or nil if there was none. This 1039 # value is an Attribute node, not the String value of the attribute. 1040 # doc = Document.new '<a x:foo="1" foo="2" bar="3"/>' 1041 # doc.root.attributes.get_attribute("foo").value #-> "2" 1042 # doc.root.attributes.get_attribute("x:foo").value #-> "1" 1043 def get_attribute( name ) 1044 attr = fetch( name, nil ) 1045 if attr.nil? 1046 return nil if name.nil? 1047 # Look for prefix 1048 name =~ Namespace::NAMESPLIT 1049 prefix, n = $1, $2 1050 if prefix 1051 attr = fetch( n, nil ) 1052 # check prefix 1053 if attr == nil 1054 elsif attr.kind_of? Attribute 1055 return attr if prefix == attr.prefix 1056 else 1057 attr = attr[ prefix ] 1058 return attr 1059 end 1060 end 1061 element_document = @element.document 1062 if element_document and element_document.doctype 1063 expn = @element.expanded_name 1064 expn = element_document.doctype.name if expn.size == 0 1065 attr_val = element_document.doctype.attribute_of(expn, name) 1066 return Attribute.new( name, attr_val ) if attr_val 1067 end 1068 return nil 1069 end 1070 if attr.kind_of? Hash 1071 attr = attr[ @element.prefix ] 1072 end 1073 return attr 1074 end 1075 1076 # Sets an attribute, overwriting any existing attribute value by the 1077 # same name. Namespace is significant. 1078 # name:: the name of the attribute 1079 # value:: 1080 # (optional) If supplied, the value of the attribute. If 1081 # nil, any existing matching attribute is deleted. 1082 # Returns:: 1083 # Owning element 1084 # doc = Document.new "<a x:foo='1' foo='3'/>" 1085 # doc.root.attributes['y:foo'] = '2' 1086 # doc.root.attributes['foo'] = '4' 1087 # doc.root.attributes['x:foo'] = nil 1088 def []=( name, value ) 1089 if value.nil? # Delete the named attribute 1090 attr = get_attribute(name) 1091 delete attr 1092 return 1093 end 1094 1095 unless value.kind_of? Attribute 1096 if @element.document and @element.document.doctype 1097 value = Text::normalize( value, @element.document.doctype ) 1098 else 1099 value = Text::normalize( value, nil ) 1100 end 1101 value = Attribute.new(name, value) 1102 end 1103 value.element = @element 1104 old_attr = fetch(value.name, nil) 1105 if old_attr.nil? 1106 store(value.name, value) 1107 elsif old_attr.kind_of? Hash 1108 old_attr[value.prefix] = value 1109 elsif old_attr.prefix != value.prefix 1110 # Check for conflicting namespaces 1111 raise ParseException.new( 1112 "Namespace conflict in adding attribute \"#{value.name}\": "+ 1113 "Prefix \"#{old_attr.prefix}\" = "+ 1114 "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+ 1115 "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if 1116 value.prefix != "xmlns" and old_attr.prefix != "xmlns" and 1117 @element.namespace( old_attr.prefix ) == 1118 @element.namespace( value.prefix ) 1119 store value.name, { old_attr.prefix => old_attr, 1120 value.prefix => value } 1121 else 1122 store value.name, value 1123 end 1124 return @element 1125 end 1126 1127 # Returns an array of Strings containing all of the prefixes declared 1128 # by this set of # attributes. The array does not include the default 1129 # namespace declaration, if one exists. 1130 # doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+ 1131 # "z='glorp' p:k='gru'/>") 1132 # prefixes = doc.root.attributes.prefixes #-> ['x', 'y'] 1133 def prefixes 1134 ns = [] 1135 each_attribute do |attribute| 1136 ns << attribute.name if attribute.prefix == 'xmlns' 1137 end 1138 if @element.document and @element.document.doctype 1139 expn = @element.expanded_name 1140 expn = @element.document.doctype.name if expn.size == 0 1141 @element.document.doctype.attributes_of(expn).each { 1142 |attribute| 1143 ns << attribute.name if attribute.prefix == 'xmlns' 1144 } 1145 end 1146 ns 1147 end 1148 1149 def namespaces 1150 namespaces = {} 1151 each_attribute do |attribute| 1152 namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' 1153 end 1154 if @element.document and @element.document.doctype 1155 expn = @element.expanded_name 1156 expn = @element.document.doctype.name if expn.size == 0 1157 @element.document.doctype.attributes_of(expn).each { 1158 |attribute| 1159 namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns' 1160 } 1161 end 1162 namespaces 1163 end 1164 1165 # Removes an attribute 1166 # attribute:: 1167 # either a String, which is the name of the attribute to remove -- 1168 # namespaces are significant here -- or the attribute to remove. 1169 # Returns:: the owning element 1170 # doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>" 1171 # doc.root.attributes.delete 'foo' #-> <a y:foo='0' x:foo='1' z:foo='4'/>" 1172 # doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>" 1173 # attr = doc.root.attributes.get_attribute('y:foo') 1174 # doc.root.attributes.delete attr #-> <a z:foo='4'/>" 1175 def delete( attribute ) 1176 name = nil 1177 prefix = nil 1178 if attribute.kind_of? Attribute 1179 name = attribute.name 1180 prefix = attribute.prefix 1181 else 1182 attribute =~ Namespace::NAMESPLIT 1183 prefix, name = $1, $2 1184 prefix = '' unless prefix 1185 end 1186 old = fetch(name, nil) 1187 attr = nil 1188 if old.kind_of? Hash # the supplied attribute is one of many 1189 attr = old.delete(prefix) 1190 if old.size == 1 1191 repl = nil 1192 old.each_value{|v| repl = v} 1193 store name, repl 1194 end 1195 elsif old.nil? 1196 return @element 1197 else # the supplied attribute is a top-level one 1198 attr = old 1199 super(name) 1200 end 1201 @element 1202 end 1203 1204 # Adds an attribute, overriding any existing attribute by the 1205 # same name. Namespaces are significant. 1206 # attribute:: An Attribute 1207 def add( attribute ) 1208 self[attribute.name] = attribute 1209 end 1210 1211 alias :<< :add 1212 1213 # Deletes all attributes matching a name. Namespaces are significant. 1214 # name:: 1215 # A String; all attributes that match this path will be removed 1216 # Returns:: an Array of the Attributes that were removed 1217 def delete_all( name ) 1218 rv = [] 1219 each_attribute { |attribute| 1220 rv << attribute if attribute.expanded_name == name 1221 } 1222 rv.each{ |attr| attr.remove } 1223 return rv 1224 end 1225 1226 # The +get_attribute_ns+ method retrieves a method by its namespace 1227 # and name. Thus it is possible to reliably identify an attribute 1228 # even if an XML processor has changed the prefix. 1229 # 1230 # Method contributed by Henrik Martensson 1231 def get_attribute_ns(namespace, name) 1232 result = nil 1233 each_attribute() { |attribute| 1234 if name == attribute.name && 1235 namespace == attribute.namespace() && 1236 ( !namespace.empty? || !attribute.fully_expanded_name.index(':') ) 1237 # foo will match xmlns:foo, but only if foo isn't also an attribute 1238 result = attribute if !result or !namespace.empty? or 1239 !attribute.fully_expanded_name.index(':') 1240 end 1241 } 1242 result 1243 end 1244 end 1245end 1246