1#
2# filehandler.rb -- FileHandler Module
3#
4# Author: IPR -- Internet Programming with Ruby -- writers
5# Copyright (c) 2001 TAKAHASHI Masayoshi, GOTOU Yuuzou
6# Copyright (c) 2003 Internet Programming with Ruby writers. All rights
7# reserved.
8#
9# $IPR: filehandler.rb,v 1.44 2003/06/07 01:34:51 gotoyuzo Exp $
10
11require 'thread'
12require 'time'
13
14require 'webrick/htmlutils'
15require 'webrick/httputils'
16require 'webrick/httpstatus'
17
18module WEBrick
19  module HTTPServlet
20
21    ##
22    # Servlet for serving a single file.  You probably want to use the
23    # FileHandler servlet instead as it handles directories and fancy indexes.
24    #
25    # Example:
26    #
27    #   server.mount('/my_page.txt', WEBrick::HTTPServlet::DefaultFileHandler,
28    #                '/path/to/my_page.txt')
29    #
30    # This servlet handles If-Modified-Since and Range requests.
31
32    class DefaultFileHandler < AbstractServlet
33
34      ##
35      # Creates a DefaultFileHandler instance for the file at +local_path+.
36
37      def initialize(server, local_path)
38        super(server, local_path)
39        @local_path = local_path
40      end
41
42      # :stopdoc:
43
44      def do_GET(req, res)
45        st = File::stat(@local_path)
46        mtime = st.mtime
47        res['etag'] = sprintf("%x-%x-%x", st.ino, st.size, st.mtime.to_i)
48
49        if not_modified?(req, res, mtime, res['etag'])
50          res.body = ''
51          raise HTTPStatus::NotModified
52        elsif req['range']
53          make_partial_content(req, res, @local_path, st.size)
54          raise HTTPStatus::PartialContent
55        else
56          mtype = HTTPUtils::mime_type(@local_path, @config[:MimeTypes])
57          res['content-type'] = mtype
58          res['content-length'] = st.size
59          res['last-modified'] = mtime.httpdate
60          res.body = open(@local_path, "rb")
61        end
62      end
63
64      def not_modified?(req, res, mtime, etag)
65        if ir = req['if-range']
66          begin
67            if Time.httpdate(ir) >= mtime
68              return true
69            end
70          rescue
71            if HTTPUtils::split_header_value(ir).member?(res['etag'])
72              return true
73            end
74          end
75        end
76
77        if (ims = req['if-modified-since']) && Time.parse(ims) >= mtime
78          return true
79        end
80
81        if (inm = req['if-none-match']) &&
82           HTTPUtils::split_header_value(inm).member?(res['etag'])
83          return true
84        end
85
86        return false
87      end
88
89      def make_partial_content(req, res, filename, filesize)
90        mtype = HTTPUtils::mime_type(filename, @config[:MimeTypes])
91        unless ranges = HTTPUtils::parse_range_header(req['range'])
92          raise HTTPStatus::BadRequest,
93            "Unrecognized range-spec: \"#{req['range']}\""
94        end
95        open(filename, "rb"){|io|
96          if ranges.size > 1
97            time = Time.now
98            boundary = "#{time.sec}_#{time.usec}_#{Process::pid}"
99            body = ''
100            ranges.each{|range|
101              first, last = prepare_range(range, filesize)
102              next if first < 0
103              io.pos = first
104              content = io.read(last-first+1)
105              body << "--" << boundary << CRLF
106              body << "Content-Type: #{mtype}" << CRLF
107              body << "Content-Range: bytes #{first}-#{last}/#{filesize}" << CRLF
108              body << CRLF
109              body << content
110              body << CRLF
111            }
112            raise HTTPStatus::RequestRangeNotSatisfiable if body.empty?
113            body << "--" << boundary << "--" << CRLF
114            res["content-type"] = "multipart/byteranges; boundary=#{boundary}"
115            res.body = body
116          elsif range = ranges[0]
117            first, last = prepare_range(range, filesize)
118            raise HTTPStatus::RequestRangeNotSatisfiable if first < 0
119            if last == filesize - 1
120              content = io.dup
121              content.pos = first
122            else
123              io.pos = first
124              content = io.read(last-first+1)
125            end
126            res['content-type'] = mtype
127            res['content-range'] = "bytes #{first}-#{last}/#{filesize}"
128            res['content-length'] = last - first + 1
129            res.body = content
130          else
131            raise HTTPStatus::BadRequest
132          end
133        }
134      end
135
136      def prepare_range(range, filesize)
137        first = range.first < 0 ? filesize + range.first : range.first
138        return -1, -1 if first < 0 || first >= filesize
139        last = range.last < 0 ? filesize + range.last : range.last
140        last = filesize - 1 if last >= filesize
141        return first, last
142      end
143
144      # :startdoc:
145    end
146
147    ##
148    # Serves a directory including fancy indexing and a variety of other
149    # options.
150    #
151    # Example:
152    #
153    #   server.mount '/assets', WEBrick::FileHandler, '/path/to/assets'
154
155    class FileHandler < AbstractServlet
156      HandlerTable = Hash.new # :nodoc:
157
158      ##
159      # Allow custom handling of requests for files with +suffix+ by class
160      # +handler+
161
162      def self.add_handler(suffix, handler)
163        HandlerTable[suffix] = handler
164      end
165
166      ##
167      # Remove custom handling of requests for files with +suffix+
168
169      def self.remove_handler(suffix)
170        HandlerTable.delete(suffix)
171      end
172
173      ##
174      # Creates a FileHandler servlet on +server+ that serves files starting
175      # at directory +root+
176      #
177      # +options+ may be a Hash containing keys from
178      # WEBrick::Config::FileHandler or +true+ or +false+.
179      #
180      # If +options+ is true or false then +:FancyIndexing+ is enabled or
181      # disabled respectively.
182
183      def initialize(server, root, options={}, default=Config::FileHandler)
184        @config = server.config
185        @logger = @config[:Logger]
186        @root = File.expand_path(root)
187        if options == true || options == false
188          options = { :FancyIndexing => options }
189        end
190        @options = default.dup.update(options)
191      end
192
193      # :stopdoc:
194
195      def service(req, res)
196        # if this class is mounted on "/" and /~username is requested.
197        # we're going to override path informations before invoking service.
198        if defined?(Etc) && @options[:UserDir] && req.script_name.empty?
199          if %r|^(/~([^/]+))| =~ req.path_info
200            script_name, user = $1, $2
201            path_info = $'
202            begin
203              passwd = Etc::getpwnam(user)
204              @root = File::join(passwd.dir, @options[:UserDir])
205              req.script_name = script_name
206              req.path_info = path_info
207            rescue
208              @logger.debug "#{self.class}#do_GET: getpwnam(#{user}) failed"
209            end
210          end
211        end
212        prevent_directory_traversal(req, res)
213        super(req, res)
214      end
215
216      def do_GET(req, res)
217        unless exec_handler(req, res)
218          set_dir_list(req, res)
219        end
220      end
221
222      def do_POST(req, res)
223        unless exec_handler(req, res)
224          raise HTTPStatus::NotFound, "`#{req.path}' not found."
225        end
226      end
227
228      def do_OPTIONS(req, res)
229        unless exec_handler(req, res)
230          super(req, res)
231        end
232      end
233
234      # ToDo
235      # RFC2518: HTTP Extensions for Distributed Authoring -- WEBDAV
236      #
237      # PROPFIND PROPPATCH MKCOL DELETE PUT COPY MOVE
238      # LOCK UNLOCK
239
240      # RFC3253: Versioning Extensions to WebDAV
241      #          (Web Distributed Authoring and Versioning)
242      #
243      # VERSION-CONTROL REPORT CHECKOUT CHECK_IN UNCHECKOUT
244      # MKWORKSPACE UPDATE LABEL MERGE ACTIVITY
245
246      private
247
248      def trailing_pathsep?(path)
249        # check for trailing path separator:
250        #   File.dirname("/aaaa/bbbb/")      #=> "/aaaa")
251        #   File.dirname("/aaaa/bbbb/x")     #=> "/aaaa/bbbb")
252        #   File.dirname("/aaaa/bbbb")       #=> "/aaaa")
253        #   File.dirname("/aaaa/bbbbx")      #=> "/aaaa")
254        return File.dirname(path) != File.dirname(path+"x")
255      end
256
257      def prevent_directory_traversal(req, res)
258        # Preventing directory traversal on Windows platforms;
259        # Backslashes (0x5c) in path_info are not interpreted as special
260        # character in URI notation. So the value of path_info should be
261        # normalize before accessing to the filesystem.
262
263        # dirty hack for filesystem encoding; in nature, File.expand_path
264        # should not be used for path normalization.  [Bug #3345]
265        path = req.path_info.dup.force_encoding(Encoding.find("filesystem"))
266        if trailing_pathsep?(req.path_info)
267          # File.expand_path removes the trailing path separator.
268          # Adding a character is a workaround to save it.
269          #  File.expand_path("/aaa/")        #=> "/aaa"
270          #  File.expand_path("/aaa/" + "x")  #=> "/aaa/x"
271          expanded = File.expand_path(path + "x")
272          expanded.chop!  # remove trailing "x"
273        else
274          expanded = File.expand_path(path)
275        end
276        expanded.force_encoding(req.path_info.encoding)
277        req.path_info = expanded
278      end
279
280      def exec_handler(req, res)
281        raise HTTPStatus::NotFound, "`#{req.path}' not found" unless @root
282        if set_filename(req, res)
283          handler = get_handler(req, res)
284          call_callback(:HandlerCallback, req, res)
285          h = handler.get_instance(@config, res.filename)
286          h.service(req, res)
287          return true
288        end
289        call_callback(:HandlerCallback, req, res)
290        return false
291      end
292
293      def get_handler(req, res)
294        suffix1 = (/\.(\w+)\z/ =~ res.filename) && $1.downcase
295        if /\.(\w+)\.([\w\-]+)\z/ =~ res.filename
296          if @options[:AcceptableLanguages].include?($2.downcase)
297            suffix2 = $1.downcase
298          end
299        end
300        handler_table = @options[:HandlerTable]
301        return handler_table[suffix1] || handler_table[suffix2] ||
302               HandlerTable[suffix1] || HandlerTable[suffix2] ||
303               DefaultFileHandler
304      end
305
306      def set_filename(req, res)
307        res.filename = @root.dup
308        path_info = req.path_info.scan(%r|/[^/]*|)
309
310        path_info.unshift("")  # dummy for checking @root dir
311        while base = path_info.first
312          break if base == "/"
313          break unless File.directory?(File.expand_path(res.filename + base))
314          shift_path_info(req, res, path_info)
315          call_callback(:DirectoryCallback, req, res)
316        end
317
318        if base = path_info.first
319          if base == "/"
320            if file = search_index_file(req, res)
321              shift_path_info(req, res, path_info, file)
322              call_callback(:FileCallback, req, res)
323              return true
324            end
325            shift_path_info(req, res, path_info)
326          elsif file = search_file(req, res, base)
327            shift_path_info(req, res, path_info, file)
328            call_callback(:FileCallback, req, res)
329            return true
330          else
331            raise HTTPStatus::NotFound, "`#{req.path}' not found."
332          end
333        end
334
335        return false
336      end
337
338      def check_filename(req, res, name)
339        if nondisclosure_name?(name) || windows_ambiguous_name?(name)
340          @logger.warn("the request refers nondisclosure name `#{name}'.")
341          raise HTTPStatus::NotFound, "`#{req.path}' not found."
342        end
343      end
344
345      def shift_path_info(req, res, path_info, base=nil)
346        tmp = path_info.shift
347        base = base || tmp
348        req.path_info = path_info.join
349        req.script_name << base
350        res.filename = File.expand_path(res.filename + base)
351        check_filename(req, res, File.basename(res.filename))
352      end
353
354      def search_index_file(req, res)
355        @config[:DirectoryIndex].each{|index|
356          if file = search_file(req, res, "/"+index)
357            return file
358          end
359        }
360        return nil
361      end
362
363      def search_file(req, res, basename)
364        langs = @options[:AcceptableLanguages]
365        path = res.filename + basename
366        if File.file?(path)
367          return basename
368        elsif langs.size > 0
369          req.accept_language.each{|lang|
370            path_with_lang = path + ".#{lang}"
371            if langs.member?(lang) && File.file?(path_with_lang)
372              return basename + ".#{lang}"
373            end
374          }
375          (langs - req.accept_language).each{|lang|
376            path_with_lang = path + ".#{lang}"
377            if File.file?(path_with_lang)
378              return basename + ".#{lang}"
379            end
380          }
381        end
382        return nil
383      end
384
385      def call_callback(callback_name, req, res)
386        if cb = @options[callback_name]
387          cb.call(req, res)
388        end
389      end
390
391      def windows_ambiguous_name?(name)
392        return true if /[. ]+\z/ =~ name
393        return true if /::\$DATA\z/ =~ name
394        return false
395      end
396
397      def nondisclosure_name?(name)
398        @options[:NondisclosureName].each{|pattern|
399          if File.fnmatch(pattern, name, File::FNM_CASEFOLD)
400            return true
401          end
402        }
403        return false
404      end
405
406      def set_dir_list(req, res)
407        redirect_to_directory_uri(req, res)
408        unless @options[:FancyIndexing]
409          raise HTTPStatus::Forbidden, "no access permission to `#{req.path}'"
410        end
411        local_path = res.filename
412        list = Dir::entries(local_path).collect{|name|
413          next if name == "." || name == ".."
414          next if nondisclosure_name?(name)
415          next if windows_ambiguous_name?(name)
416          st = (File::stat(File.join(local_path, name)) rescue nil)
417          if st.nil?
418            [ name, nil, -1 ]
419          elsif st.directory?
420            [ name + "/", st.mtime, -1 ]
421          else
422            [ name, st.mtime, st.size ]
423          end
424        }
425        list.compact!
426
427        if    d0 = req.query["N"]; idx = 0
428        elsif d0 = req.query["M"]; idx = 1
429        elsif d0 = req.query["S"]; idx = 2
430        else  d0 = "A"           ; idx = 0
431        end
432        d1 = (d0 == "A") ? "D" : "A"
433
434        if d0 == "A"
435          list.sort!{|a,b| a[idx] <=> b[idx] }
436        else
437          list.sort!{|a,b| b[idx] <=> a[idx] }
438        end
439
440        res['content-type'] = "text/html"
441
442        res.body = <<-_end_of_html_
443<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
444<HTML>
445  <HEAD><TITLE>Index of #{HTMLUtils::escape(req.path)}</TITLE></HEAD>
446  <BODY>
447    <H1>Index of #{HTMLUtils::escape(req.path)}</H1>
448        _end_of_html_
449
450        res.body << "<PRE>\n"
451        res.body << " <A HREF=\"?N=#{d1}\">Name</A>                          "
452        res.body << "<A HREF=\"?M=#{d1}\">Last modified</A>         "
453        res.body << "<A HREF=\"?S=#{d1}\">Size</A>\n"
454        res.body << "<HR>\n"
455
456        list.unshift [ "..", File::mtime(local_path+"/.."), -1 ]
457        list.each{ |name, time, size|
458          if name == ".."
459            dname = "Parent Directory"
460          elsif name.bytesize > 25
461            dname = name.sub(/^(.{23})(?:.*)/, '\1..')
462          else
463            dname = name
464          end
465          s =  " <A HREF=\"#{HTTPUtils::escape(name)}\">#{HTMLUtils::escape(dname)}</A>"
466          s << " " * (30 - dname.bytesize)
467          s << (time ? time.strftime("%Y/%m/%d %H:%M      ") : " " * 22)
468          s << (size >= 0 ? size.to_s : "-") << "\n"
469          res.body << s
470        }
471        res.body << "</PRE><HR>"
472
473        res.body << <<-_end_of_html_
474    <ADDRESS>
475     #{HTMLUtils::escape(@config[:ServerSoftware])}<BR>
476     at #{req.host}:#{req.port}
477    </ADDRESS>
478  </BODY>
479</HTML>
480        _end_of_html_
481      end
482
483      # :startdoc:
484    end
485  end
486end
487