1class CGI
2  @@accept_charset="UTF-8" unless defined?(@@accept_charset)
3  # URL-encode a string.
4  #   url_encoded_string = CGI::escape("'Stop!' said Fred")
5  #      # => "%27Stop%21%27+said+Fred"
6  def CGI::escape(string)
7    encoding = string.encoding
8    string.dup.force_encoding('ASCII-8BIT').gsub(/([^ a-zA-Z0-9_.-]+)/) do
9      '%' + $1.unpack('H2' * $1.bytesize).join('%').upcase
10    end.tr(' ', '+').force_encoding(encoding)
11  end
12
13  # URL-decode a string with encoding(optional).
14  #   string = CGI::unescape("%27Stop%21%27+said+Fred")
15  #      # => "'Stop!' said Fred"
16  def CGI::unescape(string,encoding=@@accept_charset)
17    str=string.tr('+', ' ').force_encoding(Encoding::ASCII_8BIT).gsub(/((?:%[0-9a-fA-F]{2})+)/) do
18      [$1.delete('%')].pack('H*')
19    end.force_encoding(encoding)
20    str.valid_encoding? ? str : str.force_encoding(string.encoding)
21  end
22
23  # The set of special characters and their escaped values
24  TABLE_FOR_ESCAPE_HTML__ = {
25    "'" => ''',
26    '&' => '&',
27    '"' => '"',
28    '<' => '&lt;',
29    '>' => '&gt;',
30  }
31
32  # Escape special characters in HTML, namely &\"<>
33  #   CGI::escapeHTML('Usage: foo "bar" <baz>')
34  #      # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"
35  def CGI::escapeHTML(string)
36    string.gsub(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
37  end
38
39  # Unescape a string that has been HTML-escaped
40  #   CGI::unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
41  #      # => "Usage: foo \"bar\" <baz>"
42  def CGI::unescapeHTML(string)
43    enc = string.encoding
44    if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc)
45      return string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
46        case $1.encode("US-ASCII")
47        when 'apos'                then "'".encode(enc)
48        when 'amp'                 then '&'.encode(enc)
49        when 'quot'                then '"'.encode(enc)
50        when 'gt'                  then '>'.encode(enc)
51        when 'lt'                  then '<'.encode(enc)
52        when /\A#0*(\d+)\z/        then $1.to_i.chr(enc)
53        when /\A#x([0-9a-f]+)\z/i  then $1.hex.chr(enc)
54        end
55      end
56    end
57    asciicompat = Encoding.compatible?(string, "a")
58    string.gsub(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
59      match = $1.dup
60      case match
61      when 'apos'                then "'"
62      when 'amp'                 then '&'
63      when 'quot'                then '"'
64      when 'gt'                  then '>'
65      when 'lt'                  then '<'
66      when /\A#0*(\d+)\z/
67        n = $1.to_i
68        if enc == Encoding::UTF_8 or
69          enc == Encoding::ISO_8859_1 && n < 256 or
70          asciicompat && n < 128
71          n.chr(enc)
72        else
73          "&##{$1};"
74        end
75      when /\A#x([0-9a-f]+)\z/i
76        n = $1.hex
77        if enc == Encoding::UTF_8 or
78          enc == Encoding::ISO_8859_1 && n < 256 or
79          asciicompat && n < 128
80          n.chr(enc)
81        else
82          "&#x#{$1};"
83        end
84      else
85        "&#{match};"
86      end
87    end
88  end
89
90  # Synonym for CGI::escapeHTML(str)
91  def CGI::escape_html(str)
92    escapeHTML(str)
93  end
94
95  # Synonym for CGI::unescapeHTML(str)
96  def CGI::unescape_html(str)
97    unescapeHTML(str)
98  end
99
100  # Escape only the tags of certain HTML elements in +string+.
101  #
102  # Takes an element or elements or array of elements.  Each element
103  # is specified by the name of the element, without angle brackets.
104  # This matches both the start and the end tag of that element.
105  # The attribute list of the open tag will also be escaped (for
106  # instance, the double-quotes surrounding attribute values).
107  #
108  #   print CGI::escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
109  #     # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
110  #
111  #   print CGI::escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
112  #     # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"
113  def CGI::escapeElement(string, *elements)
114    elements = elements[0] if elements[0].kind_of?(Array)
115    unless elements.empty?
116      string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
117        CGI::escapeHTML($&)
118      end
119    else
120      string
121    end
122  end
123
124  # Undo escaping such as that done by CGI::escapeElement()
125  #
126  #   print CGI::unescapeElement(
127  #           CGI::escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
128  #     # "&lt;BR&gt;<A HREF="url"></A>"
129  #
130  #   print CGI::unescapeElement(
131  #           CGI::escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
132  #     # "&lt;BR&gt;<A HREF="url"></A>"
133  def CGI::unescapeElement(string, *elements)
134    elements = elements[0] if elements[0].kind_of?(Array)
135    unless elements.empty?
136      string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
137        CGI::unescapeHTML($&)
138      end
139    else
140      string
141    end
142  end
143
144  # Synonym for CGI::escapeElement(str)
145  def CGI::escape_element(str)
146    escapeElement(str)
147  end
148
149  # Synonym for CGI::unescapeElement(str)
150  def CGI::unescape_element(str)
151    unescapeElement(str)
152  end
153
154  # Abbreviated day-of-week names specified by RFC 822
155  RFC822_DAYS = %w[ Sun Mon Tue Wed Thu Fri Sat ]
156
157  # Abbreviated month names specified by RFC 822
158  RFC822_MONTHS = %w[ Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec ]
159
160  # Format a +Time+ object as a String using the format specified by RFC 1123.
161  #
162  #   CGI::rfc1123_date(Time.now)
163  #     # Sat, 01 Jan 2000 00:00:00 GMT
164  def CGI::rfc1123_date(time)
165    t = time.clone.gmtime
166    return format("%s, %.2d %s %.4d %.2d:%.2d:%.2d GMT",
167                RFC822_DAYS[t.wday], t.day, RFC822_MONTHS[t.month-1], t.year,
168                t.hour, t.min, t.sec)
169  end
170
171  # Prettify (indent) an HTML string.
172  #
173  # +string+ is the HTML string to indent.  +shift+ is the indentation
174  # unit to use; it defaults to two spaces.
175  #
176  #   print CGI::pretty("<HTML><BODY></BODY></HTML>")
177  #     # <HTML>
178  #     #   <BODY>
179  #     #   </BODY>
180  #     # </HTML>
181  #
182  #   print CGI::pretty("<HTML><BODY></BODY></HTML>", "\t")
183  #     # <HTML>
184  #     #         <BODY>
185  #     #         </BODY>
186  #     # </HTML>
187  #
188  def CGI::pretty(string, shift = "  ")
189    lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
190    end_pos = 0
191    while end_pos = lines.index(/^<\/(\w+)/, end_pos)
192      element = $1.dup
193      start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
194      lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
195    end
196    lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
197  end
198end
199