1class CGI 2 @@accept_charset="UTF-8" unless defined?(@@accept_charset) 3 # URL-encode a string. 4 # url_encoded_string = CGI::escape("'Stop!' said Fred") 5 # # => "%27Stop%21%27+said+Fred" 6 def CGI::escape(string) 7 encoding = string.encoding 8 string.dup.force_encoding('ASCII-8BIT').gsub(/([^ a-zA-Z0-9_.-]+)/) do 9 '%' + $1.unpack('H2' * $1.bytesize).join('%').upcase 10 end.tr(' ', '+').force_encoding(encoding) 11 end 12 13 # URL-decode a string with encoding(optional). 14 # string = CGI::unescape("%27Stop%21%27+said+Fred") 15 # # => "'Stop!' said Fred" 16 def CGI::unescape(string,encoding=@@accept_charset) 17 str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/) do 18 [$1.delete('%')].pack('H*') 19 end.force_encoding(encoding) 20 str.valid_encoding? ? str : str.force_encoding(string.encoding) 21 end 22 23 # The set of special characters and their escaped values 24 TABLE_FOR_ESCAPE_HTML__ = { 25 "'" => ''', 26 '&' => '&', 27 '"' => '"', 28 '<' => '<', 29 '>' => '>', 30 } 31 32 # Escape special characters in HTML, namely &\"<> 33 # CGI::escapeHTML('Usage: foo "bar" <baz>') 34 # # => "Usage: foo "bar" <baz>" 35 def CGI::escapeHTML(string) 36 string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__) 37 end 38 39 # Unescape a string that has been HTML-escaped 40 # CGI::unescapeHTML("Usage: foo "bar" <baz>") 41 # # => "Usage: foo \"bar\" <baz>" 42 def CGI::unescapeHTML(string) 43 enc = string.encoding 44 if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc) 45 return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do 46 case $1.encode("US-ASCII") 47 when 'apos' then "'".encode(enc) 48 when 'amp' then '&'.encode(enc) 49 when 'quot' then '"'.encode(enc) 50 when 'gt' then '>'.encode(enc) 51 when 'lt' then '<'.encode(enc) 52 when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) 53 when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) 54 end 55 end 56 end 57 asciicompat = Encoding.compatible?(string, "a") 58 string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do 59 match = $1.dup 60 case match 61 when 'apos' then "'" 62 when 'amp' then '&' 63 when 'quot' then '"' 64 when 'gt' then '>' 65 when 'lt' then '<' 66 when /\A#0*(\d+)\z/ 67 n = $1.to_i 68 if enc == Encoding::UTF_8 or 69 enc == Encoding::ISO_8859_1 && n < 256 or 70 asciicompat && n < 128 71 n.chr(enc) 72 else 73 "&##{$1};" 74 end 75 when /\A#x([0-9a-f]+)\z/i 76 n = $1.hex 77 if enc == Encoding::UTF_8 or 78 enc == Encoding::ISO_8859_1 && n < 256 or 79 asciicompat && n < 128 80 n.chr(enc) 81 else 82 "&#x#{$1};" 83 end 84 else 85 "&#{match};" 86 end 87 end 88 end 89 90 # Synonym for CGI::escapeHTML(str) 91 def CGI::escape_html(str) 92 escapeHTML(str) 93 end 94 95 # Synonym for CGI::unescapeHTML(str) 96 def CGI::unescape_html(str) 97 unescapeHTML(str) 98 end 99 100 # Escape only the tags of certain HTML elements in +string+. 101 # 102 # Takes an element or elements or array of elements. Each element 103 # is specified by the name of the element, without angle brackets. 104 # This matches both the start and the end tag of that element. 105 # The attribute list of the open tag will also be escaped (for 106 # instance, the double-quotes surrounding attribute values). 107 # 108 # print CGI::escapeElement('<BR><A HREF="url"></A>', "A", "IMG") 109 # # "<BR><A HREF="url"></A>" 110 # 111 # print CGI::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"]) 112 # # "<BR><A HREF="url"></A>" 113 def CGI::escapeElement(string, *elements) 114 elements = elements[0] if elements[0].kind_of?(Array) 115 unless elements.empty? 116 string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do 117 CGI::escapeHTML($&) 118 end 119 else 120 string 121 end 122 end 123 124 # Undo escaping such as that done by CGI::escapeElement() 125 # 126 # print CGI::unescapeElement( 127 # CGI::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG") 128 # # "<BR><A HREF="url"></A>" 129 # 130 # print CGI::unescapeElement( 131 # CGI::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"]) 132 # # "<BR><A HREF="url"></A>" 133 def CGI::unescapeElement(string, *elements) 134 elements = elements[0] if elements[0].kind_of?(Array) 135 unless elements.empty? 136 string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do 137 CGI::unescapeHTML($&) 138 end 139 else 140 string 141 end 142 end 143 144 # Synonym for CGI::escapeElement(str) 145 def CGI::escape_element(str) 146 escapeElement(str) 147 end 148 149 # Synonym for CGI::unescapeElement(str) 150 def CGI::unescape_element(str) 151 unescapeElement(str) 152 end 153 154 # Abbreviated day-of-week names specified by RFC 822 155 RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ] 156 157 # Abbreviated month names specified by RFC 822 158 RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ] 159 160 # Format a +Time+ object as a String using the format specified by RFC 1123. 161 # 162 # CGI::rfc1123_date(Time.now) 163 # # Sat, 01 Jan 2000 00:00:00 GMT 164 def CGI::rfc1123_date(time) 165 t = time.clone.gmtime 166 return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT", 167 RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year, 168 t.hour, t.min, t.sec) 169 end 170 171 # Prettify (indent) an HTML string. 172 # 173 # +string+ is the HTML string to indent. +shift+ is the indentation 174 # unit to use; it defaults to two spaces. 175 # 176 # print CGI::pretty("<HTML><BODY></BODY></HTML>") 177 # # <HTML> 178 # # <BODY> 179 # # </BODY> 180 # # </HTML> 181 # 182 # print CGI::pretty("<HTML><BODY></BODY></HTML>", "\t") 183 # # <HTML> 184 # # <BODY> 185 # # </BODY> 186 # # </HTML> 187 # 188 def CGI::pretty(string, shift = " ") 189 lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n") 190 end_pos = 0 191 while end_pos = lines.index(/^<\/(\w+)/, end_pos) 192 element = $1.dup 193 start_pos = lines.rindex(/^\s*<#{element}/i, end_pos) 194 lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__" 195 end 196 lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1') 197 end 198end 199