1# The HTTPHeader module defines methods for reading and writing
2# HTTP headers.
3#
4# It is used as a mixin by other classes, to provide hash-like
5# access to HTTP header values. Unlike raw hash access, HTTPHeader
6# provides access via case-insensitive keys. It also provides
7# methods for accessing commonly-used HTTP header values in more
8# convenient formats.
9#
10module Net::HTTPHeader
11
12  def initialize_http_header(initheader)
13    @header = {}
14    return unless initheader
15    initheader.each do |key, value|
16      warn "net/http: warning: duplicated HTTP header: #{key}" if key?(key) and $VERBOSE
17      @header[key.downcase] = [value.strip]
18    end
19  end
20
21  def size   #:nodoc: obsolete
22    @header.size
23  end
24
25  alias length size   #:nodoc: obsolete
26
27  # Returns the header field corresponding to the case-insensitive key.
28  # For example, a key of "Content-Type" might return "text/html"
29  def [](key)
30    a = @header[key.downcase] or return nil
31    a.join(', ')
32  end
33
34  # Sets the header field corresponding to the case-insensitive key.
35  def []=(key, val)
36    unless val
37      @header.delete key.downcase
38      return val
39    end
40    @header[key.downcase] = [val]
41  end
42
43  # [Ruby 1.8.3]
44  # Adds a value to a named header field, instead of replacing its value.
45  # Second argument +val+ must be a String.
46  # See also #[]=, #[] and #get_fields.
47  #
48  #   request.add_field 'X-My-Header', 'a'
49  #   p request['X-My-Header']              #=> "a"
50  #   p request.get_fields('X-My-Header')   #=> ["a"]
51  #   request.add_field 'X-My-Header', 'b'
52  #   p request['X-My-Header']              #=> "a, b"
53  #   p request.get_fields('X-My-Header')   #=> ["a", "b"]
54  #   request.add_field 'X-My-Header', 'c'
55  #   p request['X-My-Header']              #=> "a, b, c"
56  #   p request.get_fields('X-My-Header')   #=> ["a", "b", "c"]
57  #
58  def add_field(key, val)
59    if @header.key?(key.downcase)
60      @header[key.downcase].push val
61    else
62      @header[key.downcase] = [val]
63    end
64  end
65
66  # [Ruby 1.8.3]
67  # Returns an array of header field strings corresponding to the
68  # case-insensitive +key+.  This method allows you to get duplicated
69  # header fields without any processing.  See also #[].
70  #
71  #   p response.get_fields('Set-Cookie')
72  #     #=> ["session=al98axx; expires=Fri, 31-Dec-1999 23:58:23",
73  #          "query=rubyscript; expires=Fri, 31-Dec-1999 23:58:23"]
74  #   p response['Set-Cookie']
75  #     #=> "session=al98axx; expires=Fri, 31-Dec-1999 23:58:23, query=rubyscript; expires=Fri, 31-Dec-1999 23:58:23"
76  #
77  def get_fields(key)
78    return nil unless @header[key.downcase]
79    @header[key.downcase].dup
80  end
81
82  # Returns the header field corresponding to the case-insensitive key.
83  # Returns the default value +args+, or the result of the block, or
84  # raises an IndexError if there's no header field named +key+
85  # See Hash#fetch
86  def fetch(key, *args, &block)   #:yield: +key+
87    a = @header.fetch(key.downcase, *args, &block)
88    a.kind_of?(Array) ? a.join(', ') : a
89  end
90
91  # Iterates through the header names and values, passing in the name
92  # and value to the code block supplied.
93  #
94  # Example:
95  #
96  #     response.header.each_header {|key,value| puts "#{key} = #{value}" }
97  #
98  def each_header   #:yield: +key+, +value+
99    block_given? or return enum_for(__method__)
100    @header.each do |k,va|
101      yield k, va.join(', ')
102    end
103  end
104
105  alias each each_header
106
107  # Iterates through the header names in the header, passing
108  # each header name to the code block.
109  def each_name(&block)   #:yield: +key+
110    block_given? or return enum_for(__method__)
111    @header.each_key(&block)
112  end
113
114  alias each_key each_name
115
116  # Iterates through the header names in the header, passing
117  # capitalized header names to the code block.
118  #
119  # Note that header names are capitalized systematically;
120  # capitalization may not match that used by the remote HTTP
121  # server in its response.
122  def each_capitalized_name  #:yield: +key+
123    block_given? or return enum_for(__method__)
124    @header.each_key do |k|
125      yield capitalize(k)
126    end
127  end
128
129  # Iterates through header values, passing each value to the
130  # code block.
131  def each_value   #:yield: +value+
132    block_given? or return enum_for(__method__)
133    @header.each_value do |va|
134      yield va.join(', ')
135    end
136  end
137
138  # Removes a header field, specified by case-insensitive key.
139  def delete(key)
140    @header.delete(key.downcase)
141  end
142
143  # true if +key+ header exists.
144  def key?(key)
145    @header.key?(key.downcase)
146  end
147
148  # Returns a Hash consisting of header names and values.
149  # e.g.
150  # {"cache-control" => "private",
151  #  "content-type" => "text/html",
152  #  "date" => "Wed, 22 Jun 2005 22:11:50 GMT"}
153  def to_hash
154    @header.dup
155  end
156
157  # As for #each_header, except the keys are provided in capitalized form.
158  #
159  # Note that header names are capitalized systematically;
160  # capitalization may not match that used by the remote HTTP
161  # server in its response.
162  def each_capitalized
163    block_given? or return enum_for(__method__)
164    @header.each do |k,v|
165      yield capitalize(k), v.join(', ')
166    end
167  end
168
169  alias canonical_each each_capitalized
170
171  def capitalize(name)
172    name.split(/-/).map {|s| s.capitalize }.join('-')
173  end
174  private :capitalize
175
176  # Returns an Array of Range objects which represent the Range:
177  # HTTP header field, or +nil+ if there is no such header.
178  def range
179    return nil unless @header['range']
180
181    value = self['Range']
182    # byte-range-set = *( "," OWS ) ( byte-range-spec / suffix-byte-range-spec )
183    #   *( OWS "," [ OWS ( byte-range-spec / suffix-byte-range-spec ) ] )
184    # corrected collected ABNF
185    # http://tools.ietf.org/html/draft-ietf-httpbis-p5-range-19#section-5.4.1
186    # http://tools.ietf.org/html/draft-ietf-httpbis-p5-range-19#appendix-C
187    # http://tools.ietf.org/html/draft-ietf-httpbis-p1-messaging-19#section-3.2.5
188    unless /\Abytes=((?:,[ \t]*)*(?:\d+-\d*|-\d+)(?:[ \t]*,(?:[ \t]*\d+-\d*|-\d+)?)*)\z/ =~ value
189      raise Net::HTTPHeaderSyntaxError, "invalid syntax for byte-ranges-specifier: '#{value}'"
190    end
191
192    byte_range_set = $1
193    result = byte_range_set.split(/,/).map {|spec|
194      m = /(\d+)?\s*-\s*(\d+)?/i.match(spec) or
195              raise Net::HTTPHeaderSyntaxError, "invalid byte-range-spec: '#{spec}'"
196      d1 = m[1].to_i
197      d2 = m[2].to_i
198      if m[1] and m[2]
199        if d1 > d2
200          raise Net::HTTPHeaderSyntaxError, "last-byte-pos MUST greater than or equal to first-byte-pos but '#{spec}'"
201        end
202        d1..d2
203      elsif m[1]
204        d1..-1
205      elsif m[2]
206        -d2..-1
207      else
208        raise Net::HTTPHeaderSyntaxError, 'range is not specified'
209      end
210    }
211    # if result.empty?
212    # byte-range-set must include at least one byte-range-spec or suffix-byte-range-spec
213    # but above regexp already denies it.
214    if result.size == 1 && result[0].begin == 0 && result[0].end == -1
215      raise Net::HTTPHeaderSyntaxError, 'only one suffix-byte-range-spec with zero suffix-length'
216    end
217    result
218  end
219
220  # Sets the HTTP Range: header.
221  # Accepts either a Range object as a single argument,
222  # or a beginning index and a length from that index.
223  # Example:
224  #
225  #   req.range = (0..1023)
226  #   req.set_range 0, 1023
227  #
228  def set_range(r, e = nil)
229    unless r
230      @header.delete 'range'
231      return r
232    end
233    r = (r...r+e) if e
234    case r
235    when Numeric
236      n = r.to_i
237      rangestr = (n > 0 ? "0-#{n-1}" : "-#{-n}")
238    when Range
239      first = r.first
240      last = r.last
241      last -= 1 if r.exclude_end?
242      if last == -1
243        rangestr = (first > 0 ? "#{first}-" : "-#{-first}")
244      else
245        raise Net::HTTPHeaderSyntaxError, 'range.first is negative' if first < 0
246        raise Net::HTTPHeaderSyntaxError, 'range.last is negative' if last < 0
247        raise Net::HTTPHeaderSyntaxError, 'must be .first < .last' if first > last
248        rangestr = "#{first}-#{last}"
249      end
250    else
251      raise TypeError, 'Range/Integer is required'
252    end
253    @header['range'] = ["bytes=#{rangestr}"]
254    r
255  end
256
257  alias range= set_range
258
259  # Returns an Integer object which represents the HTTP Content-Length:
260  # header field, or +nil+ if that field was not provided.
261  def content_length
262    return nil unless key?('Content-Length')
263    len = self['Content-Length'].slice(/\d+/) or
264        raise Net::HTTPHeaderSyntaxError, 'wrong Content-Length format'
265    len.to_i
266  end
267
268  def content_length=(len)
269    unless len
270      @header.delete 'content-length'
271      return nil
272    end
273    @header['content-length'] = [len.to_i.to_s]
274  end
275
276  # Returns "true" if the "transfer-encoding" header is present and
277  # set to "chunked".  This is an HTTP/1.1 feature, allowing the
278  # the content to be sent in "chunks" without at the outset
279  # stating the entire content length.
280  def chunked?
281    return false unless @header['transfer-encoding']
282    field = self['Transfer-Encoding']
283    (/(?:\A|[^\-\w])chunked(?![\-\w])/i =~ field) ? true : false
284  end
285
286  # Returns a Range object which represents the value of the Content-Range:
287  # header field.
288  # For a partial entity body, this indicates where this fragment
289  # fits inside the full entity body, as range of byte offsets.
290  def content_range
291    return nil unless @header['content-range']
292    m = %r<bytes\s+(\d+)-(\d+)/(\d+|\*)>i.match(self['Content-Range']) or
293        raise Net::HTTPHeaderSyntaxError, 'wrong Content-Range format'
294    m[1].to_i .. m[2].to_i
295  end
296
297  # The length of the range represented in Content-Range: header.
298  def range_length
299    r = content_range() or return nil
300    r.end - r.begin + 1
301  end
302
303  # Returns a content type string such as "text/html".
304  # This method returns nil if Content-Type: header field does not exist.
305  def content_type
306    return nil unless main_type()
307    if sub_type()
308    then "#{main_type()}/#{sub_type()}"
309    else main_type()
310    end
311  end
312
313  # Returns a content type string such as "text".
314  # This method returns nil if Content-Type: header field does not exist.
315  def main_type
316    return nil unless @header['content-type']
317    self['Content-Type'].split(';').first.to_s.split('/')[0].to_s.strip
318  end
319
320  # Returns a content type string such as "html".
321  # This method returns nil if Content-Type: header field does not exist
322  # or sub-type is not given (e.g. "Content-Type: text").
323  def sub_type
324    return nil unless @header['content-type']
325    _, sub = *self['Content-Type'].split(';').first.to_s.split('/')
326    return nil unless sub
327    sub.strip
328  end
329
330  # Any parameters specified for the content type, returned as a Hash.
331  # For example, a header of Content-Type: text/html; charset=EUC-JP
332  # would result in type_params returning {'charset' => 'EUC-JP'}
333  def type_params
334    result = {}
335    list = self['Content-Type'].to_s.split(';')
336    list.shift
337    list.each do |param|
338      k, v = *param.split('=', 2)
339      result[k.strip] = v.strip
340    end
341    result
342  end
343
344  # Sets the content type in an HTTP header.
345  # The +type+ should be a full HTTP content type, e.g. "text/html".
346  # The +params+ are an optional Hash of parameters to add after the
347  # content type, e.g. {'charset' => 'iso-8859-1'}
348  def set_content_type(type, params = {})
349    @header['content-type'] = [type + params.map{|k,v|"; #{k}=#{v}"}.join('')]
350  end
351
352  alias content_type= set_content_type
353
354  # Set header fields and a body from HTML form data.
355  # +params+ should be an Array of Arrays or
356  # a Hash containing HTML form data.
357  # Optional argument +sep+ means data record separator.
358  #
359  # Values are URL encoded as necessary and the content-type is set to
360  # application/x-www-form-urlencoded
361  #
362  # Example:
363  #    http.form_data = {"q" => "ruby", "lang" => "en"}
364  #    http.form_data = {"q" => ["ruby", "perl"], "lang" => "en"}
365  #    http.set_form_data({"q" => "ruby", "lang" => "en"}, ';')
366  #
367  def set_form_data(params, sep = '&')
368    query = URI.encode_www_form(params)
369    query.gsub!(/&/, sep) if sep != '&'
370    self.body = query
371    self.content_type = 'application/x-www-form-urlencoded'
372  end
373
374  alias form_data= set_form_data
375
376  # Set a HTML form data set.
377  # +params+ is the form data set; it is an Array of Arrays or a Hash
378  # +enctype is the type to encode the form data set.
379  # It is application/x-www-form-urlencoded or multipart/form-data.
380  # +formpot+ is an optional hash to specify the detail.
381  #
382  # boundary:: the boundary of the multipart message
383  # charset::  the charset of the message. All names and the values of
384  #            non-file fields are encoded as the charset.
385  #
386  # Each item of params is an array and contains following items:
387  # +name+::  the name of the field
388  # +value+:: the value of the field, it should be a String or a File
389  # +opt+::   an optional hash to specify additional information
390  #
391  # Each item is a file field or a normal field.
392  # If +value+ is a File object or the +opt+ have a filename key,
393  # the item is treated as a file field.
394  #
395  # If Transfer-Encoding is set as chunked, this send the request in
396  # chunked encoding. Because chunked encoding is HTTP/1.1 feature,
397  # you must confirm the server to support HTTP/1.1 before sending it.
398  #
399  # Example:
400  #    http.set_form([["q", "ruby"], ["lang", "en"]])
401  #
402  # See also RFC 2388, RFC 2616, HTML 4.01, and HTML5
403  #
404  def set_form(params, enctype='application/x-www-form-urlencoded', formopt={})
405    @body_data = params
406    @body = nil
407    @body_stream = nil
408    @form_option = formopt
409    case enctype
410    when /\Aapplication\/x-www-form-urlencoded\z/i,
411      /\Amultipart\/form-data\z/i
412      self.content_type = enctype
413    else
414      raise ArgumentError, "invalid enctype: #{enctype}"
415    end
416  end
417
418  # Set the Authorization: header for "Basic" authorization.
419  def basic_auth(account, password)
420    @header['authorization'] = [basic_encode(account, password)]
421  end
422
423  # Set Proxy-Authorization: header for "Basic" authorization.
424  def proxy_basic_auth(account, password)
425    @header['proxy-authorization'] = [basic_encode(account, password)]
426  end
427
428  def basic_encode(account, password)
429    'Basic ' + ["#{account}:#{password}"].pack('m').delete("\r\n")
430  end
431  private :basic_encode
432
433  def connection_close?
434    tokens(@header['connection']).include?('close') or
435    tokens(@header['proxy-connection']).include?('close')
436  end
437
438  def connection_keep_alive?
439    tokens(@header['connection']).include?('keep-alive') or
440    tokens(@header['proxy-connection']).include?('keep-alive')
441  end
442
443  def tokens(vals)
444    return [] unless vals
445    vals.map {|v| v.split(',') }.flatten\
446        .reject {|str| str.strip.empty? }\
447        .map {|tok| tok.strip.downcase }
448  end
449  private :tokens
450
451end
452
453