1require "rexml/parent"
2require "rexml/namespace"
3require "rexml/attribute"
4require "rexml/cdata"
5require "rexml/xpath"
6require "rexml/parseexception"
7
8module REXML
9  # An implementation note about namespaces:
10  # As we parse, when we find namespaces we put them in a hash and assign
11  # them a unique ID.  We then convert the namespace prefix for the node
12  # to the unique ID.  This makes namespace lookup much faster for the
13  # cost of extra memory use.  We save the namespace prefix for the
14  # context node and convert it back when we write it.
15  @@namespaces = {}
16
17  # Represents a tagged XML element.  Elements are characterized by
18  # having children, attributes, and names, and can themselves be
19  # children.
20  class Element < Parent
21    include Namespace
22
23    UNDEFINED = "UNDEFINED";            # The default name
24
25    # Mechanisms for accessing attributes and child elements of this
26    # element.
27    attr_reader :attributes, :elements
28    # The context holds information about the processing environment, such as
29    # whitespace handling.
30    attr_accessor :context
31
32    # Constructor
33    # arg::
34    #   if not supplied, will be set to the default value.
35    #   If a String, the name of this object will be set to the argument.
36    #   If an Element, the object will be shallowly cloned; name,
37    #   attributes, and namespaces will be copied.  Children will +not+ be
38    #   copied.
39    # parent::
40    #   if supplied, must be a Parent, and will be used as
41    #   the parent of this object.
42    # context::
43    #   If supplied, must be a hash containing context items.  Context items
44    #   include:
45    # * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
46    #   strings being the names of the elements to respect
47    #   whitespace for.  Defaults to :+all+.
48    # * <tt>:compress_whitespace</tt> the value can be :+all+ or an array of
49    #   strings being the names of the elements to ignore whitespace on.
50    #   Overrides :+respect_whitespace+.
51    # * <tt>:ignore_whitespace_nodes</tt> the value can be :+all+ or an array
52    #   of strings being the names of the elements in which to ignore
53    #   whitespace-only nodes.  If this is set, Text nodes which contain only
54    #   whitespace will not be added to the document tree.
55    # * <tt>:raw</tt> can be :+all+, or an array of strings being the names of
56    #   the elements to process in raw mode.  In raw mode, special
57    #   characters in text is not converted to or from entities.
58    def initialize( arg = UNDEFINED, parent=nil, context=nil )
59      super(parent)
60
61      @elements = Elements.new(self)
62      @attributes = Attributes.new(self)
63      @context = context
64
65      if arg.kind_of? String
66        self.name = arg
67      elsif arg.kind_of? Element
68        self.name = arg.expanded_name
69        arg.attributes.each_attribute{ |attribute|
70          @attributes << Attribute.new( attribute )
71        }
72        @context = arg.context
73      end
74    end
75
76    def inspect
77      rv = "<#@expanded_name"
78
79      @attributes.each_attribute do |attr|
80        rv << " "
81        attr.write( rv, 0 )
82      end
83
84      if children.size > 0
85        rv << "> ... </>"
86      else
87        rv << "/>"
88      end
89    end
90
91
92    # Creates a shallow copy of self.
93    #   d = Document.new "<a><b/><b/><c><d/></c></a>"
94    #   new_a = d.root.clone
95    #   puts new_a  # => "<a/>"
96    def clone
97      self.class.new self
98    end
99
100    # Evaluates to the root node of the document that this element
101    # belongs to. If this element doesn't belong to a document, but does
102    # belong to another Element, the parent's root will be returned, until the
103    # earliest ancestor is found.
104    #
105    # Note that this is not the same as the document element.
106    # In the following example, <a> is the document element, and the root
107    # node is the parent node of the document element.  You may ask yourself
108    # why the root node is useful: consider the doctype and XML declaration,
109    # and any processing instructions before the document element... they
110    # are children of the root node, or siblings of the document element.
111    # The only time this isn't true is when an Element is created that is
112    # not part of any Document.  In this case, the ancestor that has no
113    # parent acts as the root node.
114    #  d = Document.new '<a><b><c/></b></a>'
115    #  a = d[1] ; c = a[1][1]
116    #  d.root_node == d   # TRUE
117    #  a.root_node        # namely, d
118    #  c.root_node        # again, d
119    def root_node
120      parent.nil? ? self : parent.root_node
121    end
122
123    def root
124      return elements[1] if self.kind_of? Document
125      return self if parent.kind_of? Document or parent.nil?
126      return parent.root
127    end
128
129    # Evaluates to the document to which this element belongs, or nil if this
130    # element doesn't belong to a document.
131    def document
132      rt = root
133      rt.parent if rt
134    end
135
136    # Evaluates to +true+ if whitespace is respected for this element.  This
137    # is the case if:
138    # 1. Neither :+respect_whitespace+ nor :+compress_whitespace+ has any value
139    # 2. The context has :+respect_whitespace+ set to :+all+ or
140    #    an array containing the name of this element, and
141    #    :+compress_whitespace+ isn't set to :+all+ or an array containing the
142    #    name of this element.
143    # The evaluation is tested against +expanded_name+, and so is namespace
144    # sensitive.
145    def whitespace
146      @whitespace = nil
147      if @context
148        if @context[:respect_whitespace]
149          @whitespace = (@context[:respect_whitespace] == :all or
150                         @context[:respect_whitespace].include? expanded_name)
151        end
152        @whitespace = false if (@context[:compress_whitespace] and
153                                (@context[:compress_whitespace] == :all or
154                                 @context[:compress_whitespace].include? expanded_name)
155                               )
156      end
157      @whitespace = true unless @whitespace == false
158      @whitespace
159    end
160
161    def ignore_whitespace_nodes
162      @ignore_whitespace_nodes = false
163      if @context
164        if @context[:ignore_whitespace_nodes]
165          @ignore_whitespace_nodes =
166            (@context[:ignore_whitespace_nodes] == :all or
167             @context[:ignore_whitespace_nodes].include? expanded_name)
168        end
169      end
170    end
171
172    # Evaluates to +true+ if raw mode is set for this element.  This
173    # is the case if the context has :+raw+ set to :+all+ or
174    # an array containing the name of this element.
175    #
176    # The evaluation is tested against +expanded_name+, and so is namespace
177    # sensitive.
178    def raw
179      @raw = (@context and @context[:raw] and
180              (@context[:raw] == :all or
181               @context[:raw].include? expanded_name))
182               @raw
183    end
184
185    #once :whitespace, :raw, :ignore_whitespace_nodes
186
187    #################################################
188    # Namespaces                                    #
189    #################################################
190
191    # Evaluates to an +Array+ containing the prefixes (names) of all defined
192    # namespaces at this context node.
193    #  doc = Document.new("<a xmlns:x='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
194    #  doc.elements['//b'].prefixes # -> ['x', 'y']
195    def prefixes
196      prefixes = []
197      prefixes = parent.prefixes if parent
198      prefixes |= attributes.prefixes
199      return prefixes
200    end
201
202    def namespaces
203      namespaces = {}
204      namespaces = parent.namespaces if parent
205      namespaces = namespaces.merge( attributes.namespaces )
206      return namespaces
207    end
208
209    # Evalutas to the URI for a prefix, or the empty string if no such
210    # namespace is declared for this element. Evaluates recursively for
211    # ancestors.  Returns the default namespace, if there is one.
212    # prefix::
213    #   the prefix to search for.  If not supplied, returns the default
214    #   namespace if one exists
215    # Returns::
216    #   the namespace URI as a String, or nil if no such namespace
217    #   exists.  If the namespace is undefined, returns an empty string
218    #  doc = Document.new("<a xmlns='1' xmlns:y='2'><b/><c xmlns:z='3'/></a>")
219    #  b = doc.elements['//b']
220    #  b.namespace           # -> '1'
221    #  b.namespace("y")      # -> '2'
222    def namespace(prefix=nil)
223      if prefix.nil?
224        prefix = prefix()
225      end
226      if prefix == ''
227        prefix = "xmlns"
228      else
229        prefix = "xmlns:#{prefix}" unless prefix[0,5] == 'xmlns'
230      end
231      ns = attributes[ prefix ]
232      ns = parent.namespace(prefix) if ns.nil? and parent
233      ns = '' if ns.nil? and prefix == 'xmlns'
234      return ns
235    end
236
237    # Adds a namespace to this element.
238    # prefix::
239    #   the prefix string, or the namespace URI if +uri+ is not
240    #   supplied
241    # uri::
242    #   the namespace URI.  May be nil, in which +prefix+ is used as
243    #   the URI
244    # Evaluates to: this Element
245    #  a = Element.new("a")
246    #  a.add_namespace("xmlns:foo", "bar" )
247    #  a.add_namespace("foo", "bar")  # shorthand for previous line
248    #  a.add_namespace("twiddle")
249    #  puts a   #-> <a xmlns:foo='bar' xmlns='twiddle'/>
250    def add_namespace( prefix, uri=nil )
251      unless uri
252        @attributes["xmlns"] = prefix
253      else
254        prefix = "xmlns:#{prefix}" unless prefix =~ /^xmlns:/
255        @attributes[ prefix ] = uri
256      end
257      self
258    end
259
260    # Removes a namespace from this node.  This only works if the namespace is
261    # actually declared in this node.  If no argument is passed, deletes the
262    # default namespace.
263    #
264    # Evaluates to: this element
265    #  doc = Document.new "<a xmlns:foo='bar' xmlns='twiddle'/>"
266    #  doc.root.delete_namespace
267    #  puts doc     # -> <a xmlns:foo='bar'/>
268    #  doc.root.delete_namespace 'foo'
269    #  puts doc     # -> <a/>
270    def delete_namespace namespace="xmlns"
271      namespace = "xmlns:#{namespace}" unless namespace == 'xmlns'
272      attribute = attributes.get_attribute(namespace)
273      attribute.remove unless attribute.nil?
274      self
275    end
276
277    #################################################
278    # Elements                                      #
279    #################################################
280
281    # Adds a child to this element, optionally setting attributes in
282    # the element.
283    # element::
284    #   optional.  If Element, the element is added.
285    #   Otherwise, a new Element is constructed with the argument (see
286    #   Element.initialize).
287    # attrs::
288    #   If supplied, must be a Hash containing String name,value
289    #   pairs, which will be used to set the attributes of the new Element.
290    # Returns:: the Element that was added
291    #  el = doc.add_element 'my-tag'
292    #  el = doc.add_element 'my-tag', {'attr1'=>'val1', 'attr2'=>'val2'}
293    #  el = Element.new 'my-tag'
294    #  doc.add_element el
295    def add_element element, attrs=nil
296      raise "First argument must be either an element name, or an Element object" if element.nil?
297      el = @elements.add(element)
298      attrs.each do |key, value|
299        el.attributes[key]=value
300      end       if attrs.kind_of? Hash
301      el
302    end
303
304    # Deletes a child element.
305    # element::
306    #   Must be an +Element+, +String+, or +Integer+.  If Element,
307    #   the element is removed.  If String, the element is found (via XPath)
308    #   and removed.  <em>This means that any parent can remove any
309    #   descendant.<em>  If Integer, the Element indexed by that number will be
310    #   removed.
311    # Returns:: the element that was removed.
312    #  doc.delete_element "/a/b/c[@id='4']"
313    #  doc.delete_element doc.elements["//k"]
314    #  doc.delete_element 1
315    def delete_element element
316      @elements.delete element
317    end
318
319    # Evaluates to +true+ if this element has at least one child Element
320    #  doc = Document.new "<a><b/><c>Text</c></a>"
321    #  doc.root.has_elements               # -> true
322    #  doc.elements["/a/b"].has_elements   # -> false
323    #  doc.elements["/a/c"].has_elements   # -> false
324    def has_elements?
325      !@elements.empty?
326    end
327
328    # Iterates through the child elements, yielding for each Element that
329    # has a particular attribute set.
330    # key::
331    #   the name of the attribute to search for
332    # value::
333    #   the value of the attribute
334    # max::
335    #   (optional) causes this method to return after yielding
336    #   for this number of matching children
337    # name::
338    #   (optional) if supplied, this is an XPath that filters
339    #   the children to check.
340    #
341    #  doc = Document.new "<a><b @id='1'/><c @id='2'/><d @id='1'/><e/></a>"
342    #  # Yields b, c, d
343    #  doc.root.each_element_with_attribute( 'id' ) {|e| p e}
344    #  # Yields b, d
345    #  doc.root.each_element_with_attribute( 'id', '1' ) {|e| p e}
346    #  # Yields b
347    #  doc.root.each_element_with_attribute( 'id', '1', 1 ) {|e| p e}
348    #  # Yields d
349    #  doc.root.each_element_with_attribute( 'id', '1', 0, 'd' ) {|e| p e}
350    def each_element_with_attribute( key, value=nil, max=0, name=nil, &block ) # :yields: Element
351      each_with_something( proc {|child|
352        if value.nil?
353          child.attributes[key] != nil
354        else
355          child.attributes[key]==value
356        end
357      }, max, name, &block )
358    end
359
360    # Iterates through the children, yielding for each Element that
361    # has a particular text set.
362    # text::
363    #   the text to search for.  If nil, or not supplied, will iterate
364    #   over all +Element+ children that contain at least one +Text+ node.
365    # max::
366    #   (optional) causes this method to return after yielding
367    #   for this number of matching children
368    # name::
369    #   (optional) if supplied, this is an XPath that filters
370    #   the children to check.
371    #
372    #  doc = Document.new '<a><b>b</b><c>b</c><d>d</d><e/></a>'
373    #  # Yields b, c, d
374    #  doc.each_element_with_text {|e|p e}
375    #  # Yields b, c
376    #  doc.each_element_with_text('b'){|e|p e}
377    #  # Yields b
378    #  doc.each_element_with_text('b', 1){|e|p e}
379    #  # Yields d
380    #  doc.each_element_with_text(nil, 0, 'd'){|e|p e}
381    def each_element_with_text( text=nil, max=0, name=nil, &block ) # :yields: Element
382      each_with_something( proc {|child|
383        if text.nil?
384          child.has_text?
385        else
386          child.text == text
387        end
388      }, max, name, &block )
389    end
390
391    # Synonym for Element.elements.each
392    def each_element( xpath=nil, &block ) # :yields: Element
393      @elements.each( xpath, &block )
394    end
395
396    # Synonym for Element.to_a
397    # This is a little slower than calling elements.each directly.
398    # xpath:: any XPath by which to search for elements in the tree
399    # Returns:: an array of Elements that match the supplied path
400    def get_elements( xpath )
401      @elements.to_a( xpath )
402    end
403
404    # Returns the next sibling that is an element, or nil if there is
405    # no Element sibling after this one
406    #  doc = Document.new '<a><b/>text<c/></a>'
407    #  doc.root.elements['b'].next_element          #-> <c/>
408    #  doc.root.elements['c'].next_element          #-> nil
409    def next_element
410      element = next_sibling
411      element = element.next_sibling until element.nil? or element.kind_of? Element
412      return element
413    end
414
415    # Returns the previous sibling that is an element, or nil if there is
416    # no Element sibling prior to this one
417    #  doc = Document.new '<a><b/>text<c/></a>'
418    #  doc.root.elements['c'].previous_element          #-> <b/>
419    #  doc.root.elements['b'].previous_element          #-> nil
420    def previous_element
421      element = previous_sibling
422      element = element.previous_sibling until element.nil? or element.kind_of? Element
423      return element
424    end
425
426
427    #################################################
428    # Text                                          #
429    #################################################
430
431    # Evaluates to +true+ if this element has at least one Text child
432    def has_text?
433      not text().nil?
434    end
435
436    # A convenience method which returns the String value of the _first_
437    # child text element, if one exists, and +nil+ otherwise.
438    #
439    # <em>Note that an element may have multiple Text elements, perhaps
440    # separated by other children</em>.  Be aware that this method only returns
441    # the first Text node.
442    #
443    # This method returns the +value+ of the first text child node, which
444    # ignores the +raw+ setting, so always returns normalized text. See
445    # the Text::value documentation.
446    #
447    #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
448    #  # The element 'p' has two text elements, "some text " and " more text".
449    #  doc.root.text              #-> "some text "
450    def text( path = nil )
451      rv = get_text(path)
452      return rv.value unless rv.nil?
453      nil
454    end
455
456    # Returns the first child Text node, if any, or +nil+ otherwise.
457    # This method returns the actual +Text+ node, rather than the String content.
458    #  doc = Document.new "<p>some text <b>this is bold!</b> more text</p>"
459    #  # The element 'p' has two text elements, "some text " and " more text".
460    #  doc.root.get_text.value            #-> "some text "
461    def get_text path = nil
462      rv = nil
463      if path
464        element = @elements[ path ]
465        rv = element.get_text unless element.nil?
466      else
467        rv = @children.find { |node| node.kind_of? Text }
468      end
469      return rv
470    end
471
472    # Sets the first Text child of this object.  See text() for a
473    # discussion about Text children.
474    #
475    # If a Text child already exists, the child is replaced by this
476    # content.  This means that Text content can be deleted by calling
477    # this method with a nil argument.  In this case, the next Text
478    # child becomes the first Text child.  In no case is the order of
479    # any siblings disturbed.
480    # text::
481    #   If a String, a new Text child is created and added to
482    #   this Element as the first Text child.  If Text, the text is set
483    #   as the first Child element.  If nil, then any existing first Text
484    #   child is removed.
485    # Returns:: this Element.
486    #  doc = Document.new '<a><b/></a>'
487    #  doc.root.text = 'Sean'      #-> '<a><b/>Sean</a>'
488    #  doc.root.text = 'Elliott'   #-> '<a><b/>Elliott</a>'
489    #  doc.root.add_element 'c'    #-> '<a><b/>Elliott<c/></a>'
490    #  doc.root.text = 'Russell'   #-> '<a><b/>Russell<c/></a>'
491    #  doc.root.text = nil         #-> '<a><b/><c/></a>'
492    def text=( text )
493      if text.kind_of? String
494        text = Text.new( text, whitespace(), nil, raw() )
495      elsif !text.nil? and !text.kind_of? Text
496        text = Text.new( text.to_s, whitespace(), nil, raw() )
497      end
498      old_text = get_text
499      if text.nil?
500        old_text.remove unless old_text.nil?
501      else
502        if old_text.nil?
503          self << text
504        else
505          old_text.replace_with( text )
506        end
507      end
508      return self
509    end
510
511    # A helper method to add a Text child.  Actual Text instances can
512    # be added with regular Parent methods, such as add() and <<()
513    # text::
514    #   if a String, a new Text instance is created and added
515    #   to the parent.  If Text, the object is added directly.
516    # Returns:: this Element
517    #  e = Element.new('a')          #-> <e/>
518    #  e.add_text 'foo'              #-> <e>foo</e>
519    #  e.add_text Text.new(' bar')    #-> <e>foo bar</e>
520    # Note that at the end of this example, the branch has <b>3</b> nodes; the 'e'
521    # element and <b>2</b> Text node children.
522    def add_text( text )
523      if text.kind_of? String
524        if @children[-1].kind_of? Text
525          @children[-1] << text
526          return
527        end
528        text = Text.new( text, whitespace(), nil, raw() )
529      end
530      self << text unless text.nil?
531      return self
532    end
533
534    def node_type
535      :element
536    end
537
538    def xpath
539      path_elements = []
540      cur = self
541      path_elements << __to_xpath_helper( self )
542      while cur.parent
543        cur = cur.parent
544        path_elements << __to_xpath_helper( cur )
545      end
546      return path_elements.reverse.join( "/" )
547    end
548
549    #################################################
550    # Attributes                                    #
551    #################################################
552
553    def attribute( name, namespace=nil )
554      prefix = nil
555      if namespaces.respond_to? :key
556        prefix = namespaces.key(namespace) if namespace
557      else
558        prefix = namespaces.index(namespace) if namespace
559      end
560      prefix = nil if prefix == 'xmlns'
561
562      ret_val =
563        attributes.get_attribute( "#{prefix ? prefix + ':' : ''}#{name}" )
564
565      return ret_val unless ret_val.nil?
566      return nil if prefix.nil?
567
568      # now check that prefix'es namespace is not the same as the
569      # default namespace
570      return nil unless ( namespaces[ prefix ] == namespaces[ 'xmlns' ] )
571
572      attributes.get_attribute( name )
573
574    end
575
576    # Evaluates to +true+ if this element has any attributes set, false
577    # otherwise.
578    def has_attributes?
579      return !@attributes.empty?
580    end
581
582    # Adds an attribute to this element, overwriting any existing attribute
583    # by the same name.
584    # key::
585    #   can be either an Attribute or a String.  If an Attribute,
586    #   the attribute is added to the list of Element attributes.  If String,
587    #   the argument is used as the name of the new attribute, and the value
588    #   parameter must be supplied.
589    # value::
590    #   Required if +key+ is a String, and ignored if the first argument is
591    #   an Attribute.  This is a String, and is used as the value
592    #   of the new Attribute.  This should be the unnormalized value of the
593    #   attribute (without entities).
594    # Returns:: the Attribute added
595    #  e = Element.new 'e'
596    #  e.add_attribute( 'a', 'b' )               #-> <e a='b'/>
597    #  e.add_attribute( 'x:a', 'c' )             #-> <e a='b' x:a='c'/>
598    #  e.add_attribute Attribute.new('b', 'd')   #-> <e a='b' x:a='c' b='d'/>
599    def add_attribute( key, value=nil )
600      if key.kind_of? Attribute
601        @attributes << key
602      else
603        @attributes[key] = value
604      end
605    end
606
607    # Add multiple attributes to this element.
608    # hash:: is either a hash, or array of arrays
609    #  el.add_attributes( {"name1"=>"value1", "name2"=>"value2"} )
610    #  el.add_attributes( [ ["name1","value1"], ["name2"=>"value2"] ] )
611    def add_attributes hash
612      if hash.kind_of? Hash
613        hash.each_pair {|key, value| @attributes[key] = value }
614      elsif hash.kind_of? Array
615        hash.each { |value| @attributes[ value[0] ] = value[1] }
616      end
617    end
618
619    # Removes an attribute
620    # key::
621    #   either an Attribute or a String.  In either case, the
622    #   attribute is found by matching the attribute name to the argument,
623    #   and then removed.  If no attribute is found, no action is taken.
624    # Returns::
625    #   the attribute removed, or nil if this Element did not contain
626    #   a matching attribute
627    #  e = Element.new('E')
628    #  e.add_attribute( 'name', 'Sean' )             #-> <E name='Sean'/>
629    #  r = e.add_attribute( 'sur:name', 'Russell' )  #-> <E name='Sean' sur:name='Russell'/>
630    #  e.delete_attribute( 'name' )                  #-> <E sur:name='Russell'/>
631    #  e.delete_attribute( r )                       #-> <E/>
632    def delete_attribute(key)
633      attr = @attributes.get_attribute(key)
634      attr.remove unless attr.nil?
635    end
636
637    #################################################
638    # Other Utilities                               #
639    #################################################
640
641    # Get an array of all CData children.
642    # IMMUTABLE
643    def cdatas
644      find_all { |child| child.kind_of? CData }.freeze
645    end
646
647    # Get an array of all Comment children.
648    # IMMUTABLE
649    def comments
650      find_all { |child| child.kind_of? Comment }.freeze
651    end
652
653    # Get an array of all Instruction children.
654    # IMMUTABLE
655    def instructions
656      find_all { |child| child.kind_of? Instruction }.freeze
657    end
658
659    # Get an array of all Text children.
660    # IMMUTABLE
661    def texts
662      find_all { |child| child.kind_of? Text }.freeze
663    end
664
665    # == DEPRECATED
666    # See REXML::Formatters
667    #
668    # Writes out this element, and recursively, all children.
669    # output::
670    #     output an object which supports '<< string'; this is where the
671    #   document will be written.
672    # indent::
673    #   An integer.  If -1, no indenting will be used; otherwise, the
674    #   indentation will be this number of spaces, and children will be
675    #   indented an additional amount.  Defaults to -1
676    # transitive::
677    #   If transitive is true and indent is >= 0, then the output will be
678    #   pretty-printed in such a way that the added whitespace does not affect
679    #   the parse tree of the document
680    # ie_hack::
681    #   Internet Explorer is the worst piece of crap to have ever been
682    #   written, with the possible exception of Windows itself.  Since IE is
683    #   unable to parse proper XML, we have to provide a hack to generate XML
684    #   that IE's limited abilities can handle.  This hack inserts a space
685    #   before the /> on empty tags.  Defaults to false
686    #
687    #  out = ''
688    #  doc.write( out )     #-> doc is written to the string 'out'
689    #  doc.write( $stdout ) #-> doc written to the console
690    def write(output=$stdout, indent=-1, transitive=false, ie_hack=false)
691      Kernel.warn("#{self.class.name}.write is deprecated.  See REXML::Formatters")
692      formatter = if indent > -1
693          if transitive
694            require "rexml/formatters/transitive"
695            REXML::Formatters::Transitive.new( indent, ie_hack )
696          else
697            REXML::Formatters::Pretty.new( indent, ie_hack )
698          end
699        else
700          REXML::Formatters::Default.new( ie_hack )
701        end
702      formatter.write( self, output )
703    end
704
705
706    private
707    def __to_xpath_helper node
708      rv = node.expanded_name.clone
709      if node.parent
710        results = node.parent.find_all {|n|
711          n.kind_of?(REXML::Element) and n.expanded_name == node.expanded_name
712        }
713        if results.length > 1
714          idx = results.index( node )
715          rv << "[#{idx+1}]"
716        end
717      end
718      rv
719    end
720
721    # A private helper method
722    def each_with_something( test, max=0, name=nil )
723      num = 0
724      @elements.each( name ){ |child|
725        yield child if test.call(child) and num += 1
726        return if max>0 and num == max
727      }
728    end
729  end
730
731  ########################################################################
732  # ELEMENTS                                                             #
733  ########################################################################
734
735  # A class which provides filtering of children for Elements, and
736  # XPath search support.  You are expected to only encounter this class as
737  # the <tt>element.elements</tt> object.  Therefore, you are
738  # _not_ expected to instantiate this yourself.
739  class Elements
740    include Enumerable
741    # Constructor
742    # parent:: the parent Element
743    def initialize parent
744      @element = parent
745    end
746
747    # Fetches a child element.  Filters only Element children, regardless of
748    # the XPath match.
749    # index::
750    #   the search parameter.  This is either an Integer, which
751    #   will be used to find the index'th child Element, or an XPath,
752    #   which will be used to search for the Element.  <em>Because
753    #   of the nature of XPath searches, any element in the connected XML
754    #   document can be fetched through any other element.</em>  <b>The
755    #   Integer index is 1-based, not 0-based.</b>  This means that the first
756    #   child element is at index 1, not 0, and the +n+th element is at index
757    #   +n+, not <tt>n-1</tt>.  This is because XPath indexes element children
758    #   starting from 1, not 0, and the indexes should be the same.
759    # name::
760    #   optional, and only used in the first argument is an
761    #   Integer.  In that case, the index'th child Element that has the
762    #   supplied name will be returned.  Note again that the indexes start at 1.
763    # Returns:: the first matching Element, or nil if no child matched
764    #  doc = Document.new '<a><b/><c id="1"/><c id="2"/><d/></a>'
765    #  doc.root.elements[1]       #-> <b/>
766    #  doc.root.elements['c']     #-> <c id="1"/>
767    #  doc.root.elements[2,'c']   #-> <c id="2"/>
768    def []( index, name=nil)
769      if index.kind_of? Integer
770        raise "index (#{index}) must be >= 1" if index < 1
771        name = literalize(name) if name
772        num = 0
773        @element.find { |child|
774          child.kind_of? Element and
775          (name.nil? ? true : child.has_name?( name )) and
776          (num += 1) == index
777        }
778      else
779        return XPath::first( @element, index )
780        #{ |element|
781        #       return element if element.kind_of? Element
782        #}
783        #return nil
784      end
785    end
786
787    # Sets an element, replacing any previous matching element.  If no
788    # existing element is found ,the element is added.
789    # index:: Used to find a matching element to replace.  See []().
790    # element::
791    #   The element to replace the existing element with
792    #   the previous element
793    # Returns:: nil if no previous element was found.
794    #
795    #  doc = Document.new '<a/>'
796    #  doc.root.elements[10] = Element.new('b')    #-> <a><b/></a>
797    #  doc.root.elements[1]                        #-> <b/>
798    #  doc.root.elements[1] = Element.new('c')     #-> <a><c/></a>
799    #  doc.root.elements['c'] = Element.new('d')   #-> <a><d/></a>
800    def []=( index, element )
801      previous = self[index]
802      if previous.nil?
803        @element.add element
804      else
805        previous.replace_with element
806      end
807      return previous
808    end
809
810    # Returns +true+ if there are no +Element+ children, +false+ otherwise
811    def empty?
812      @element.find{ |child| child.kind_of? Element}.nil?
813    end
814
815    # Returns the index of the supplied child (starting at 1), or -1 if
816    # the element is not a child
817    # element:: an +Element+ child
818    def index element
819      rv = 0
820      found = @element.find do |child|
821        child.kind_of? Element and
822        (rv += 1) and
823        child == element
824      end
825      return rv if found == element
826      return -1
827    end
828
829    # Deletes a child Element
830    # element::
831    #   Either an Element, which is removed directly; an
832    #   xpath, where the first matching child is removed; or an Integer,
833    #   where the n'th Element is removed.
834    # Returns:: the removed child
835    #  doc = Document.new '<a><b/><c/><c id="1"/></a>'
836    #  b = doc.root.elements[1]
837    #  doc.root.elements.delete b           #-> <a><c/><c id="1"/></a>
838    #  doc.elements.delete("a/c[@id='1']")  #-> <a><c/></a>
839    #  doc.root.elements.delete 1           #-> <a/>
840    def delete element
841      if element.kind_of? Element
842        @element.delete element
843      else
844        el = self[element]
845        el.remove if el
846      end
847    end
848
849    # Removes multiple elements.  Filters for Element children, regardless of
850    # XPath matching.
851    # xpath:: all elements matching this String path are removed.
852    # Returns:: an Array of Elements that have been removed
853    #  doc = Document.new '<a><c/><c/><c/><c/></a>'
854    #  deleted = doc.elements.delete_all 'a/c' #-> [<c/>, <c/>, <c/>, <c/>]
855    def delete_all( xpath )
856      rv = []
857      XPath::each( @element, xpath) {|element|
858        rv << element if element.kind_of? Element
859      }
860      rv.each do |element|
861        @element.delete element
862        element.remove
863      end
864      return rv
865    end
866
867    # Adds an element
868    # element::
869    #   if supplied, is either an Element, String, or
870    #   Source (see Element.initialize).  If not supplied or nil, a
871    #   new, default Element will be constructed
872    # Returns:: the added Element
873    #  a = Element.new('a')
874    #  a.elements.add(Element.new('b'))  #-> <a><b/></a>
875    #  a.elements.add('c')               #-> <a><b/><c/></a>
876    def add element=nil
877      if element.nil?
878        Element.new("", self, @element.context)
879      elsif not element.kind_of?(Element)
880        Element.new(element, self, @element.context)
881      else
882        @element << element
883        element.context = @element.context
884        element
885      end
886    end
887
888    alias :<< :add
889
890    # Iterates through all of the child Elements, optionally filtering
891    # them by a given XPath
892    # xpath::
893    #   optional.  If supplied, this is a String XPath, and is used to
894    #   filter the children, so that only matching children are yielded.  Note
895    #   that XPaths are automatically filtered for Elements, so that
896    #   non-Element children will not be yielded
897    #  doc = Document.new '<a><b/><c/><d/>sean<b/><c/><d/></a>'
898    #  doc.root.elements.each {|e|p e}       #-> Yields b, c, d, b, c, d elements
899    #  doc.root.elements.each('b') {|e|p e}  #-> Yields b, b elements
900    #  doc.root.elements.each('child::node()')  {|e|p e}
901    #  #-> Yields <b/>, <c/>, <d/>, <b/>, <c/>, <d/>
902    #  XPath.each(doc.root, 'child::node()', &block)
903    #  #-> Yields <b/>, <c/>, <d/>, sean, <b/>, <c/>, <d/>
904    def each( xpath=nil )
905      XPath::each( @element, xpath ) {|e| yield e if e.kind_of? Element }
906    end
907
908    def collect( xpath=nil )
909      collection = []
910      XPath::each( @element, xpath ) {|e|
911        collection << yield(e)  if e.kind_of?(Element)
912      }
913      collection
914    end
915
916    def inject( xpath=nil, initial=nil )
917      first = true
918      XPath::each( @element, xpath ) {|e|
919        if (e.kind_of? Element)
920          if (first and initial == nil)
921            initial = e
922            first = false
923          else
924            initial = yield( initial, e ) if e.kind_of? Element
925          end
926        end
927      }
928      initial
929    end
930
931    # Returns the number of +Element+ children of the parent object.
932    #  doc = Document.new '<a>sean<b/>elliott<b/>russell<b/></a>'
933    #  doc.root.size            #-> 6, 3 element and 3 text nodes
934    #  doc.root.elements.size   #-> 3
935    def size
936      count = 0
937      @element.each {|child| count+=1 if child.kind_of? Element }
938      count
939    end
940
941    # Returns an Array of Element children.  An XPath may be supplied to
942    # filter the children.  Only Element children are returned, even if the
943    # supplied XPath matches non-Element children.
944    #  doc = Document.new '<a>sean<b/>elliott<c/></a>'
945    #  doc.root.elements.to_a                  #-> [ <b/>, <c/> ]
946    #  doc.root.elements.to_a("child::node()") #-> [ <b/>, <c/> ]
947    #  XPath.match(doc.root, "child::node()")  #-> [ sean, <b/>, elliott, <c/> ]
948    def to_a( xpath=nil )
949      rv = XPath.match( @element, xpath )
950      return rv.find_all{|e| e.kind_of? Element} if xpath
951      rv
952    end
953
954    private
955    # Private helper class.  Removes quotes from quoted strings
956    def literalize name
957      name = name[1..-2] if name[0] == ?' or name[0] == ?"               #'
958      name
959    end
960  end
961
962  ########################################################################
963  # ATTRIBUTES                                                           #
964  ########################################################################
965
966  # A class that defines the set of Attributes of an Element and provides
967  # operations for accessing elements in that set.
968  class Attributes < Hash
969    # Constructor
970    # element:: the Element of which this is an Attribute
971    def initialize element
972      @element = element
973    end
974
975    # Fetches an attribute value.  If you want to get the Attribute itself,
976    # use get_attribute()
977    # name:: an XPath attribute name.  Namespaces are relevant here.
978    # Returns::
979    #   the String value of the matching attribute, or +nil+ if no
980    #   matching attribute was found.  This is the unnormalized value
981    #   (with entities expanded).
982    #
983    #  doc = Document.new "<a foo:att='1' bar:att='2' att='&lt;'/>"
984    #  doc.root.attributes['att']         #-> '<'
985    #  doc.root.attributes['bar:att']     #-> '2'
986    def [](name)
987      attr = get_attribute(name)
988      return attr.value unless attr.nil?
989      return nil
990    end
991
992    def to_a
993      values.flatten
994    end
995
996    # Returns the number of attributes the owning Element contains.
997    #  doc = Document "<a x='1' y='2' foo:x='3'/>"
998    #  doc.root.attributes.length        #-> 3
999    def length
1000      c = 0
1001      each_attribute { c+=1 }
1002      c
1003    end
1004    alias :size :length
1005
1006    # Iterates over the attributes of an Element.  Yields actual Attribute
1007    # nodes, not String values.
1008    #
1009    #  doc = Document.new '<a x="1" y="2"/>'
1010    #  doc.root.attributes.each_attribute {|attr|
1011    #    p attr.expanded_name+" => "+attr.value
1012    #  }
1013    def each_attribute # :yields: attribute
1014      each_value do |val|
1015        if val.kind_of? Attribute
1016          yield val
1017        else
1018          val.each_value { |atr| yield atr }
1019        end
1020      end
1021    end
1022
1023    # Iterates over each attribute of an Element, yielding the expanded name
1024    # and value as a pair of Strings.
1025    #
1026    #  doc = Document.new '<a x="1" y="2"/>'
1027    #  doc.root.attributes.each {|name, value| p name+" => "+value }
1028    def each
1029      each_attribute do |attr|
1030        yield [attr.expanded_name, attr.value]
1031      end
1032    end
1033
1034    # Fetches an attribute
1035    # name::
1036    #   the name by which to search for the attribute.  Can be a
1037    #   <tt>prefix:name</tt> namespace name.
1038    # Returns:: The first matching attribute, or nil if there was none.  This
1039    # value is an Attribute node, not the String value of the attribute.
1040    #  doc = Document.new '<a x:foo="1" foo="2" bar="3"/>'
1041    #  doc.root.attributes.get_attribute("foo").value    #-> "2"
1042    #  doc.root.attributes.get_attribute("x:foo").value  #-> "1"
1043    def get_attribute( name )
1044      attr = fetch( name, nil )
1045      if attr.nil?
1046        return nil if name.nil?
1047        # Look for prefix
1048        name =~ Namespace::NAMESPLIT
1049        prefix, n = $1, $2
1050        if prefix
1051          attr = fetch( n, nil )
1052          # check prefix
1053          if attr == nil
1054          elsif attr.kind_of? Attribute
1055            return attr if prefix == attr.prefix
1056          else
1057            attr = attr[ prefix ]
1058            return attr
1059          end
1060        end
1061        element_document = @element.document
1062        if element_document and element_document.doctype
1063          expn = @element.expanded_name
1064          expn = element_document.doctype.name if expn.size == 0
1065          attr_val = element_document.doctype.attribute_of(expn, name)
1066          return Attribute.new( name, attr_val ) if attr_val
1067        end
1068        return nil
1069      end
1070      if attr.kind_of? Hash
1071        attr = attr[ @element.prefix ]
1072      end
1073      return attr
1074    end
1075
1076    # Sets an attribute, overwriting any existing attribute value by the
1077    # same name.  Namespace is significant.
1078    # name:: the name of the attribute
1079    # value::
1080    #   (optional) If supplied, the value of the attribute.  If
1081    #   nil, any existing matching attribute is deleted.
1082    # Returns::
1083    #   Owning element
1084    #  doc = Document.new "<a x:foo='1' foo='3'/>"
1085    #  doc.root.attributes['y:foo'] = '2'
1086    #  doc.root.attributes['foo'] = '4'
1087    #  doc.root.attributes['x:foo'] = nil
1088    def []=( name, value )
1089      if value.nil?             # Delete the named attribute
1090        attr = get_attribute(name)
1091        delete attr
1092        return
1093      end
1094
1095      unless value.kind_of? Attribute
1096        if @element.document and @element.document.doctype
1097          value = Text::normalize( value, @element.document.doctype )
1098        else
1099          value = Text::normalize( value, nil )
1100        end
1101        value = Attribute.new(name, value)
1102      end
1103      value.element = @element
1104      old_attr = fetch(value.name, nil)
1105      if old_attr.nil?
1106        store(value.name, value)
1107      elsif old_attr.kind_of? Hash
1108        old_attr[value.prefix] = value
1109      elsif old_attr.prefix != value.prefix
1110        # Check for conflicting namespaces
1111        raise ParseException.new(
1112          "Namespace conflict in adding attribute \"#{value.name}\": "+
1113          "Prefix \"#{old_attr.prefix}\" = "+
1114          "\"#{@element.namespace(old_attr.prefix)}\" and prefix "+
1115          "\"#{value.prefix}\" = \"#{@element.namespace(value.prefix)}\"") if
1116          value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
1117          @element.namespace( old_attr.prefix ) ==
1118            @element.namespace( value.prefix )
1119          store value.name, { old_attr.prefix   => old_attr,
1120            value.prefix                => value }
1121      else
1122        store value.name, value
1123      end
1124      return @element
1125    end
1126
1127    # Returns an array of Strings containing all of the prefixes declared
1128    # by this set of # attributes.  The array does not include the default
1129    # namespace declaration, if one exists.
1130    #  doc = Document.new("<a xmlns='foo' xmlns:x='bar' xmlns:y='twee' "+
1131    #        "z='glorp' p:k='gru'/>")
1132    #  prefixes = doc.root.attributes.prefixes    #-> ['x', 'y']
1133    def prefixes
1134      ns = []
1135      each_attribute do |attribute|
1136        ns << attribute.name if attribute.prefix == 'xmlns'
1137      end
1138      if @element.document and @element.document.doctype
1139        expn = @element.expanded_name
1140        expn = @element.document.doctype.name if expn.size == 0
1141        @element.document.doctype.attributes_of(expn).each {
1142          |attribute|
1143          ns << attribute.name if attribute.prefix == 'xmlns'
1144        }
1145      end
1146      ns
1147    end
1148
1149    def namespaces
1150      namespaces = {}
1151      each_attribute do |attribute|
1152        namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1153      end
1154      if @element.document and @element.document.doctype
1155        expn = @element.expanded_name
1156        expn = @element.document.doctype.name if expn.size == 0
1157        @element.document.doctype.attributes_of(expn).each {
1158          |attribute|
1159          namespaces[attribute.name] = attribute.value if attribute.prefix == 'xmlns' or attribute.name == 'xmlns'
1160        }
1161      end
1162      namespaces
1163    end
1164
1165    # Removes an attribute
1166    # attribute::
1167    #   either a String, which is the name of the attribute to remove --
1168    #   namespaces are significant here -- or the attribute to remove.
1169    # Returns:: the owning element
1170    #  doc = Document.new "<a y:foo='0' x:foo='1' foo='3' z:foo='4'/>"
1171    #  doc.root.attributes.delete 'foo'   #-> <a y:foo='0' x:foo='1' z:foo='4'/>"
1172    #  doc.root.attributes.delete 'x:foo' #-> <a y:foo='0' z:foo='4'/>"
1173    #  attr = doc.root.attributes.get_attribute('y:foo')
1174    #  doc.root.attributes.delete attr    #-> <a z:foo='4'/>"
1175    def delete( attribute )
1176      name = nil
1177      prefix = nil
1178      if attribute.kind_of? Attribute
1179        name = attribute.name
1180        prefix = attribute.prefix
1181      else
1182        attribute =~ Namespace::NAMESPLIT
1183        prefix, name = $1, $2
1184        prefix = '' unless prefix
1185      end
1186      old = fetch(name, nil)
1187      attr = nil
1188      if old.kind_of? Hash # the supplied attribute is one of many
1189        attr = old.delete(prefix)
1190        if old.size == 1
1191          repl = nil
1192          old.each_value{|v| repl = v}
1193          store name, repl
1194        end
1195      elsif old.nil?
1196        return @element
1197      else # the supplied attribute is a top-level one
1198        attr = old
1199        super(name)
1200      end
1201      @element
1202    end
1203
1204    # Adds an attribute, overriding any existing attribute by the
1205    # same name.  Namespaces are significant.
1206    # attribute:: An Attribute
1207    def add( attribute )
1208      self[attribute.name] = attribute
1209    end
1210
1211    alias :<< :add
1212
1213    # Deletes all attributes matching a name.  Namespaces are significant.
1214    # name::
1215    #   A String; all attributes that match this path will be removed
1216    # Returns:: an Array of the Attributes that were removed
1217    def delete_all( name )
1218      rv = []
1219      each_attribute { |attribute|
1220        rv << attribute if attribute.expanded_name == name
1221      }
1222      rv.each{ |attr| attr.remove }
1223      return rv
1224    end
1225
1226    # The +get_attribute_ns+ method retrieves a method by its namespace
1227    # and name. Thus it is possible to reliably identify an attribute
1228    # even if an XML processor has changed the prefix.
1229    #
1230    # Method contributed by Henrik Martensson
1231    def get_attribute_ns(namespace, name)
1232      result = nil
1233      each_attribute() { |attribute|
1234        if name == attribute.name &&
1235          namespace == attribute.namespace() &&
1236          ( !namespace.empty? || !attribute.fully_expanded_name.index(':') )
1237          # foo will match xmlns:foo, but only if foo isn't also an attribute
1238          result = attribute if !result or !namespace.empty? or
1239                                !attribute.fully_expanded_name.index(':')
1240        end
1241      }
1242      result
1243    end
1244  end
1245end
1246