1require 'rexml/formatters/default' 2 3module REXML 4 module Formatters 5 # Pretty-prints an XML document. This destroys whitespace in text nodes 6 # and will insert carriage returns and indentations. 7 # 8 # TODO: Add an option to print attributes on new lines 9 class Pretty < Default 10 11 # If compact is set to true, then the formatter will attempt to use as 12 # little space as possible 13 attr_accessor :compact 14 # The width of a page. Used for formatting text 15 attr_accessor :width 16 17 # Create a new pretty printer. 18 # 19 # output:: 20 # An object implementing '<<(String)', to which the output will be written. 21 # indentation:: 22 # An integer greater than 0. The indentation of each level will be 23 # this number of spaces. If this is < 1, the behavior of this object 24 # is undefined. Defaults to 2. 25 # ie_hack:: 26 # If true, the printer will insert whitespace before closing empty 27 # tags, thereby allowing Internet Explorer's feeble XML parser to 28 # function. Defaults to false. 29 def initialize( indentation=2, ie_hack=false ) 30 @indentation = indentation 31 @level = 0 32 @ie_hack = ie_hack 33 @width = 80 34 @compact = false 35 end 36 37 protected 38 def write_element(node, output) 39 output << ' '*@level 40 output << "<#{node.expanded_name}" 41 42 node.attributes.each_attribute do |attr| 43 output << " " 44 attr.write( output ) 45 end unless node.attributes.empty? 46 47 if node.children.empty? 48 if @ie_hack 49 output << " " 50 end 51 output << "/" 52 else 53 output << ">" 54 # If compact and all children are text, and if the formatted output 55 # is less than the specified width, then try to print everything on 56 # one line 57 skip = false 58 if compact 59 if node.children.inject(true) {|s,c| s & c.kind_of?(Text)} 60 string = "" 61 old_level = @level 62 @level = 0 63 node.children.each { |child| write( child, string ) } 64 @level = old_level 65 if string.length < @width 66 output << string 67 skip = true 68 end 69 end 70 end 71 unless skip 72 output << "\n" 73 @level += @indentation 74 node.children.each { |child| 75 next if child.kind_of?(Text) and child.to_s.strip.length == 0 76 write( child, output ) 77 output << "\n" 78 } 79 @level -= @indentation 80 output << ' '*@level 81 end 82 output << "</#{node.expanded_name}" 83 end 84 output << ">" 85 end 86 87 def write_text( node, output ) 88 s = node.to_s() 89 s.gsub!(/\s/,' ') 90 s.squeeze!(" ") 91 s = wrap(s, @width - @level) 92 s = indent_text(s, @level, " ", true) 93 output << (' '*@level + s) 94 end 95 96 def write_comment( node, output) 97 output << ' ' * @level 98 super 99 end 100 101 def write_cdata( node, output) 102 output << ' ' * @level 103 super 104 end 105 106 def write_document( node, output ) 107 # Ok, this is a bit odd. All XML documents have an XML declaration, 108 # but it may not write itself if the user didn't specifically add it, 109 # either through the API or in the input document. If it doesn't write 110 # itself, then we don't need a carriage return... which makes this 111 # logic more complex. 112 node.children.each { |child| 113 next if child == node.children[-1] and child.instance_of?(Text) 114 unless child == node.children[0] or child.instance_of?(Text) or 115 (child == node.children[1] and !node.children[0].writethis) 116 output << "\n" 117 end 118 write( child, output ) 119 } 120 end 121 122 private 123 def indent_text(string, level=1, style="\t", indentfirstline=true) 124 return string if level < 0 125 string.gsub(/\n/, "\n#{style*level}") 126 end 127 128 def wrap(string, width) 129 parts = [] 130 while string.length > width and place = string.rindex(' ', width) 131 parts << string[0...place] 132 string = string[place+1..-1] 133 end 134 parts << string 135 parts.join("\n") 136 end 137 138 end 139 end 140end 141 142