1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Author: mod_file_cache by Bill Stoddard <stoddard apache.org>
19 *         Based on mod_mmap_static by Dean Gaudet <dgaudet arctic.org>
20 *
21 * v0.01: initial implementation
22 */
23
24/*
25    Documentation:
26
27    Some sites have a set of static files that are really busy, and
28    change infrequently (or even on a regular schedule). Save time
29    by caching open handles to these files. This module, unlike
30    mod_mmap_static, caches open file handles, not file content.
31    On systems (like Windows) with heavy system call overhead and
32    that have an efficient sendfile implementation, caching file handles
33    offers several advantages over caching content. First, the file system
34    can manage the memory, allowing infrequently hit cached files to
35    be paged out. Second, since caching open handles does not consume
36    significant resources, it will be possible to enable an AutoLoadCache
37    feature where static files are dynamically loaded in the cache
38    as the server runs. On systems that have file change notification,
39    this module can be enhanced to automatically garbage collect
40    cached files that change on disk.
41
42    This module should work on Unix systems that have sendfile. Place
43    cachefile directives into your configuration to direct files to
44    be cached.
45
46        cachefile /path/to/file1
47        cachefile /path/to/file2
48        ...
49
50    These files are only cached when the server is restarted, so if you
51    change the list, or if the files are changed, then you'll need to
52    restart the server.
53
54    To reiterate that point:  if the files are modified *in place*
55    without restarting the server you may end up serving requests that
56    are completely bogus.  You should update files by unlinking the old
57    copy and putting a new copy in place.
58
59    There's no such thing as inheriting these files across vhosts or
60    whatever... place the directives in the main server only.
61
62    Known problems:
63
64    Don't use Alias or RewriteRule to move these files around...  unless
65    you feel like paying for an extra stat() on each request.  This is
66    a deficiency in the Apache API that will hopefully be solved some day.
67    The file will be served out of the file handle cache, but there will be
68    an extra stat() that's a waste.
69*/
70
71#include "apr.h"
72
73#if !(APR_HAS_SENDFILE || APR_HAS_MMAP)
74#error mod_file_cache only works on systems with APR_HAS_SENDFILE or APR_HAS_MMAP
75#endif
76
77#include "apr_mmap.h"
78#include "apr_strings.h"
79#include "apr_hash.h"
80#include "apr_buckets.h"
81
82#define APR_WANT_STRFUNC
83#include "apr_want.h"
84
85#if APR_HAVE_SYS_TYPES_H
86#include <sys/types.h>
87#endif
88
89#define CORE_PRIVATE
90
91#include "httpd.h"
92#include "http_config.h"
93#include "http_log.h"
94#include "http_protocol.h"
95#include "http_request.h"
96#include "http_core.h"
97
98module AP_MODULE_DECLARE_DATA file_cache_module;
99
100typedef struct {
101#if APR_HAS_SENDFILE
102    apr_file_t *file;
103#endif
104    const char *filename;
105    apr_finfo_t finfo;
106    int is_mmapped;
107#if APR_HAS_MMAP
108    apr_mmap_t *mm;
109#endif
110    char mtimestr[APR_RFC822_DATE_LEN];
111    char sizestr[21];   /* big enough to hold any 64-bit file size + null */
112} a_file;
113
114typedef struct {
115    apr_hash_t *fileht;
116} a_server_config;
117
118
119static void *create_server_config(apr_pool_t *p, server_rec *s)
120{
121    a_server_config *sconf = apr_palloc(p, sizeof(*sconf));
122
123    sconf->fileht = apr_hash_make(p);
124    return sconf;
125}
126
127static void cache_the_file(cmd_parms *cmd, const char *filename, int mmap)
128{
129    a_server_config *sconf;
130    a_file *new_file;
131    a_file tmp;
132    apr_file_t *fd = NULL;
133    apr_status_t rc;
134    const char *fspec;
135
136    fspec = ap_server_root_relative(cmd->pool, filename);
137    if (!fspec) {
138        ap_log_error(APLOG_MARK, APLOG_WARNING, APR_EBADPATH, cmd->server,
139                     "mod_file_cache: invalid file path "
140                     "%s, skipping", filename);
141        return;
142    }
143    if ((rc = apr_stat(&tmp.finfo, fspec, APR_FINFO_MIN,
144                                 cmd->temp_pool)) != APR_SUCCESS) {
145        ap_log_error(APLOG_MARK, APLOG_WARNING, rc, cmd->server,
146            "mod_file_cache: unable to stat(%s), skipping", fspec);
147        return;
148    }
149    if (tmp.finfo.filetype != APR_REG) {
150        ap_log_error(APLOG_MARK, APLOG_WARNING, 0, cmd->server,
151            "mod_file_cache: %s isn't a regular file, skipping", fspec);
152        return;
153    }
154    if (tmp.finfo.size > AP_MAX_SENDFILE) {
155        ap_log_error(APLOG_MARK, APLOG_WARNING, 0, cmd->server,
156            "mod_file_cache: %s is too large to cache, skipping", fspec);
157        return;
158    }
159
160    rc = apr_file_open(&fd, fspec, APR_READ | APR_BINARY | APR_XTHREAD,
161                       APR_OS_DEFAULT, cmd->pool);
162    if (rc != APR_SUCCESS) {
163        ap_log_error(APLOG_MARK, APLOG_WARNING, rc, cmd->server,
164                     "mod_file_cache: unable to open(%s, O_RDONLY), skipping", fspec);
165        return;
166    }
167    apr_file_inherit_set(fd);
168
169    /* WooHoo, we have a file to put in the cache */
170    new_file = apr_pcalloc(cmd->pool, sizeof(a_file));
171    new_file->finfo = tmp.finfo;
172
173#if APR_HAS_MMAP
174    if (mmap) {
175        /* MMAPFile directive. MMAP'ing the file
176         * XXX: APR_HAS_LARGE_FILES issue; need to reject this request if
177         * size is greater than MAX(apr_size_t) (perhaps greater than 1M?).
178         */
179        if ((rc = apr_mmap_create(&new_file->mm, fd, 0,
180                                  (apr_size_t)new_file->finfo.size,
181                                  APR_MMAP_READ, cmd->pool)) != APR_SUCCESS) {
182            apr_file_close(fd);
183            ap_log_error(APLOG_MARK, APLOG_WARNING, rc, cmd->server,
184                         "mod_file_cache: unable to mmap %s, skipping", filename);
185            return;
186        }
187        apr_file_close(fd);
188        new_file->is_mmapped = TRUE;
189    }
190#endif
191#if APR_HAS_SENDFILE
192    if (!mmap) {
193        /* CacheFile directive. Caching the file handle */
194        new_file->is_mmapped = FALSE;
195        new_file->file = fd;
196    }
197#endif
198
199    new_file->filename = fspec;
200    apr_rfc822_date(new_file->mtimestr, new_file->finfo.mtime);
201    apr_snprintf(new_file->sizestr, sizeof new_file->sizestr, "%" APR_OFF_T_FMT, new_file->finfo.size);
202
203    sconf = ap_get_module_config(cmd->server->module_config, &file_cache_module);
204    apr_hash_set(sconf->fileht, new_file->filename, strlen(new_file->filename), new_file);
205
206}
207
208static const char *cachefilehandle(cmd_parms *cmd, void *dummy, const char *filename)
209{
210#if APR_HAS_SENDFILE
211    cache_the_file(cmd, filename, 0);
212#else
213    /* Sendfile not supported by this OS */
214    ap_log_error(APLOG_MARK, APLOG_WARNING, 0, cmd->server,
215                 "mod_file_cache: unable to cache file: %s. Sendfile is not supported on this OS", filename);
216#endif
217    return NULL;
218}
219static const char *cachefilemmap(cmd_parms *cmd, void *dummy, const char *filename)
220{
221#if APR_HAS_MMAP
222    cache_the_file(cmd, filename, 1);
223#else
224    /* MMAP not supported by this OS */
225    ap_log_error(APLOG_MARK, APLOG_WARNING, 0, cmd->server,
226                 "mod_file_cache: unable to cache file: %s. MMAP is not supported by this OS", filename);
227#endif
228    return NULL;
229}
230
231static int file_cache_post_config(apr_pool_t *p, apr_pool_t *plog,
232                                   apr_pool_t *ptemp, server_rec *s)
233{
234    /* Hummm, anything to do here? */
235    return OK;
236}
237
238/* If it's one of ours, fill in r->finfo now to avoid extra stat()... this is a
239 * bit of a kludge, because we really want to run after core_translate runs.
240 */
241static int file_cache_xlat(request_rec *r)
242{
243    a_server_config *sconf;
244    a_file *match;
245    int res;
246
247    sconf = ap_get_module_config(r->server->module_config, &file_cache_module);
248
249    /* we only operate when at least one cachefile directive was used */
250    if (!apr_hash_count(sconf->fileht)) {
251        return DECLINED;
252    }
253
254    res = ap_core_translate(r);
255    if (res != OK || !r->filename) {
256        return res;
257    }
258
259    /* search the cache */
260    match = (a_file *) apr_hash_get(sconf->fileht, r->filename, APR_HASH_KEY_STRING);
261    if (match == NULL)
262        return DECLINED;
263
264    /* pass search results to handler */
265    ap_set_module_config(r->request_config, &file_cache_module, match);
266
267    /* shortcircuit the get_path_info() stat() calls and stuff */
268    r->finfo = match->finfo;
269    return OK;
270}
271
272static int mmap_handler(request_rec *r, a_file *file)
273{
274#if APR_HAS_MMAP
275    conn_rec *c = r->connection;
276    apr_bucket *b;
277    apr_mmap_t *mm;
278    apr_bucket_brigade *bb = apr_brigade_create(r->pool, c->bucket_alloc);
279
280    apr_mmap_dup(&mm, file->mm, r->pool);
281    b = apr_bucket_mmap_create(mm, 0, (apr_size_t)file->finfo.size,
282                               c->bucket_alloc);
283    APR_BRIGADE_INSERT_TAIL(bb, b);
284    b = apr_bucket_eos_create(c->bucket_alloc);
285    APR_BRIGADE_INSERT_TAIL(bb, b);
286
287    if (ap_pass_brigade(r->output_filters, bb) != APR_SUCCESS)
288        return HTTP_INTERNAL_SERVER_ERROR;
289#endif
290    return OK;
291}
292
293static int sendfile_handler(request_rec *r, a_file *file)
294{
295#if APR_HAS_SENDFILE
296    conn_rec *c = r->connection;
297    apr_bucket *b;
298    apr_bucket_brigade *bb = apr_brigade_create(r->pool, c->bucket_alloc);
299
300    b = apr_bucket_file_create(file->file, 0, (apr_size_t)file->finfo.size,
301                               r->pool, c->bucket_alloc);
302    APR_BRIGADE_INSERT_TAIL(bb, b);
303    b = apr_bucket_eos_create(c->bucket_alloc);
304    APR_BRIGADE_INSERT_TAIL(bb, b);
305
306    if (ap_pass_brigade(r->output_filters, bb) != APR_SUCCESS)
307        return HTTP_INTERNAL_SERVER_ERROR;
308#endif
309    return OK;
310}
311
312static int file_cache_handler(request_rec *r)
313{
314    a_file *match;
315    int errstatus;
316    int rc = OK;
317
318    /* XXX: not sure if this is right yet
319     * see comment in http_core.c:default_handler
320     */
321    if (ap_strcmp_match(r->handler, "*/*")) {
322        return DECLINED;
323    }
324
325    /* we don't handle anything but GET */
326    if (r->method_number != M_GET) return DECLINED;
327
328    /* did xlat phase find the file? */
329    match = ap_get_module_config(r->request_config, &file_cache_module);
330
331    if (match == NULL) {
332        return DECLINED;
333    }
334
335    /* note that we would handle GET on this resource */
336    r->allowed |= (AP_METHOD_BIT << M_GET);
337
338    /* This handler has no use for a request body (yet), but we still
339     * need to read and discard it if the client sent one.
340     */
341    if ((errstatus = ap_discard_request_body(r)) != OK)
342        return errstatus;
343
344    ap_update_mtime(r, match->finfo.mtime);
345
346    /* ap_set_last_modified() always converts the file mtime to a string
347     * which is slow.  Accelerate the common case.
348     * ap_set_last_modified(r);
349     */
350    {
351        apr_time_t mod_time;
352        char *datestr;
353
354        mod_time = ap_rationalize_mtime(r, r->mtime);
355        if (mod_time == match->finfo.mtime)
356            datestr = match->mtimestr;
357        else {
358            datestr = apr_palloc(r->pool, APR_RFC822_DATE_LEN);
359            apr_rfc822_date(datestr, mod_time);
360        }
361        apr_table_setn(r->headers_out, "Last-Modified", datestr);
362    }
363
364    ap_set_etag(r);
365    if ((errstatus = ap_meets_conditions(r)) != OK) {
366       return errstatus;
367    }
368
369    /* ap_set_content_length() always converts the same number and never
370     * returns an error.  Accelerate it.
371     */
372    r->clength = match->finfo.size;
373    apr_table_setn(r->headers_out, "Content-Length", match->sizestr);
374
375    /* Call appropriate handler */
376    if (!r->header_only) {
377        if (match->is_mmapped == TRUE)
378            rc = mmap_handler(r, match);
379        else
380            rc = sendfile_handler(r, match);
381    }
382
383    return rc;
384}
385
386static command_rec file_cache_cmds[] =
387{
388AP_INIT_ITERATE("cachefile", cachefilehandle, NULL, RSRC_CONF,
389     "A space separated list of files to add to the file handle cache at config time"),
390AP_INIT_ITERATE("mmapfile", cachefilemmap, NULL, RSRC_CONF,
391     "A space separated list of files to mmap at config time"),
392    {NULL}
393};
394
395static void register_hooks(apr_pool_t *p)
396{
397    ap_hook_handler(file_cache_handler, NULL, NULL, APR_HOOK_LAST);
398    ap_hook_post_config(file_cache_post_config, NULL, NULL, APR_HOOK_MIDDLE);
399    ap_hook_translate_name(file_cache_xlat, NULL, NULL, APR_HOOK_MIDDLE);
400    /* This trick doesn't work apparently because the translate hooks
401       are single shot. If the core_hook returns OK, then our hook is
402       not called.
403    ap_hook_translate_name(file_cache_xlat, aszPre, NULL, APR_HOOK_MIDDLE);
404    */
405
406}
407
408module AP_MODULE_DECLARE_DATA file_cache_module =
409{
410    STANDARD20_MODULE_STUFF,
411    NULL,                     /* create per-directory config structure */
412    NULL,                     /* merge per-directory config structures */
413    create_server_config,     /* create per-server config structure */
414    NULL,                     /* merge per-server config structures */
415    file_cache_cmds,          /* command handlers */
416    register_hooks            /* register hooks */
417};
418