1require 'rexml/formatters/default'
2
3module REXML
4  module Formatters
5    # Pretty-prints an XML document.  This destroys whitespace in text nodes
6    # and will insert carriage returns and indentations.
7    #
8    # TODO: Add an option to print attributes on new lines
9    class Pretty < Default
10
11      # If compact is set to true, then the formatter will attempt to use as
12      # little space as possible
13      attr_accessor :compact
14      # The width of a page.  Used for formatting text
15      attr_accessor :width
16
17      # Create a new pretty printer.
18      #
19      # output::
20      #   An object implementing '<<(String)', to which the output will be written.
21      # indentation::
22      #   An integer greater than 0.  The indentation of each level will be
23      #   this number of spaces.  If this is < 1, the behavior of this object
24      #   is undefined.  Defaults to 2.
25      # ie_hack::
26      #   If true, the printer will insert whitespace before closing empty
27      #   tags, thereby allowing Internet Explorer's feeble XML parser to
28      #   function. Defaults to false.
29      def initialize( indentation=2, ie_hack=false )
30        @indentation = indentation
31        @level = 0
32        @ie_hack = ie_hack
33        @width = 80
34        @compact = false
35      end
36
37      protected
38      def write_element(node, output)
39        output << ' '*@level
40        output << "<#{node.expanded_name}"
41
42        node.attributes.each_attribute do |attr|
43          output << " "
44          attr.write( output )
45        end unless node.attributes.empty?
46
47        if node.children.empty?
48          if @ie_hack
49            output << " "
50          end
51          output << "/"
52        else
53          output << ">"
54          # If compact and all children are text, and if the formatted output
55          # is less than the specified width, then try to print everything on
56          # one line
57          skip = false
58          if compact
59            if node.children.inject(true) {|s,c| s & c.kind_of?(Text)}
60              string = ""
61              old_level = @level
62              @level = 0
63              node.children.each { |child| write( child, string ) }
64              @level = old_level
65              if string.length < @width
66                output << string
67                skip = true
68              end
69            end
70          end
71          unless skip
72            output << "\n"
73            @level += @indentation
74            node.children.each { |child|
75              next if child.kind_of?(Text) and child.to_s.strip.length == 0
76              write( child, output )
77              output << "\n"
78            }
79            @level -= @indentation
80            output << ' '*@level
81          end
82          output << "</#{node.expanded_name}"
83        end
84        output << ">"
85      end
86
87      def write_text( node, output )
88        s = node.to_s()
89        s.gsub!(/\s/,' ')
90        s.squeeze!(" ")
91        s = wrap(s, @width - @level)
92        s = indent_text(s, @level, " ", true)
93        output << (' '*@level + s)
94      end
95
96      def write_comment( node, output)
97        output << ' ' * @level
98        super
99      end
100
101      def write_cdata( node, output)
102        output << ' ' * @level
103        super
104      end
105
106      def write_document( node, output )
107        # Ok, this is a bit odd.  All XML documents have an XML declaration,
108        # but it may not write itself if the user didn't specifically add it,
109        # either through the API or in the input document.  If it doesn't write
110        # itself, then we don't need a carriage return... which makes this
111        # logic more complex.
112        node.children.each { |child|
113          next if child == node.children[-1] and child.instance_of?(Text)
114          unless child == node.children[0] or child.instance_of?(Text) or
115            (child == node.children[1] and !node.children[0].writethis)
116            output << "\n"
117          end
118          write( child, output )
119        }
120      end
121
122      private
123      def indent_text(string, level=1, style="\t", indentfirstline=true)
124        return string if level < 0
125        string.gsub(/\n/, "\n#{style*level}")
126      end
127
128      def wrap(string, width)
129        parts = []
130        while string.length > width and place = string.rindex(' ', width)
131          parts << string[0...place]
132          string = string[place+1..-1]
133        end
134        parts << string
135        parts.join("\n")
136      end
137
138    end
139  end
140end
141
142