1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "apr_lib.h"
18#include "apr_file_io.h"
19#include "apr_strings.h"
20#include "mod_cache.h"
21#include "mod_cache_disk.h"
22#include "http_config.h"
23#include "http_log.h"
24#include "http_core.h"
25#include "ap_provider.h"
26#include "util_filter.h"
27#include "util_script.h"
28#include "util_charset.h"
29
30/*
31 * mod_cache_disk: Disk Based HTTP 1.1 Cache.
32 *
33 * Flow to Find the .data file:
34 *   Incoming client requests URI /foo/bar/baz
35 *   Generate <hash> off of /foo/bar/baz
36 *   Open <hash>.header
37 *   Read in <hash>.header file (may contain Format #1 or Format #2)
38 *   If format #1 (Contains a list of Vary Headers):
39 *      Use each header name (from .header) with our request values (headers_in) to
40 *      regenerate <hash> using HeaderName+HeaderValue+.../foo/bar/baz
41 *      re-read in <hash>.header (must be format #2)
42 *   read in <hash>.data
43 *
44 * Format #1:
45 *   apr_uint32_t format;
46 *   apr_time_t expire;
47 *   apr_array_t vary_headers (delimited by CRLF)
48 *
49 * Format #2:
50 *   disk_cache_info_t (first sizeof(apr_uint32_t) bytes is the format)
51 *   entity name (dobj->name) [length is in disk_cache_info_t->name_len]
52 *   r->headers_out (delimited by CRLF)
53 *   CRLF
54 *   r->headers_in (delimited by CRLF)
55 *   CRLF
56 */
57
58module AP_MODULE_DECLARE_DATA cache_disk_module;
59
60/* Forward declarations */
61static int remove_entity(cache_handle_t *h);
62static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *i);
63static apr_status_t store_body(cache_handle_t *h, request_rec *r, apr_bucket_brigade *in,
64                               apr_bucket_brigade *out);
65static apr_status_t recall_headers(cache_handle_t *h, request_rec *r);
66static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb);
67static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
68                               apr_file_t *file);
69
70/*
71 * Local static functions
72 */
73
74static char *header_file(apr_pool_t *p, disk_cache_conf *conf,
75                         disk_cache_object_t *dobj, const char *name)
76{
77    if (!dobj->hashfile) {
78        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
79                                                conf->dirlength, name);
80    }
81
82    if (dobj->prefix) {
83        return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX "/",
84                           dobj->hashfile, CACHE_HEADER_SUFFIX, NULL);
85     }
86     else {
87        return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
88                           CACHE_HEADER_SUFFIX, NULL);
89     }
90}
91
92static char *data_file(apr_pool_t *p, disk_cache_conf *conf,
93                       disk_cache_object_t *dobj, const char *name)
94{
95    if (!dobj->hashfile) {
96        dobj->hashfile = ap_cache_generate_name(p, conf->dirlevels,
97                                                conf->dirlength, name);
98    }
99
100    if (dobj->prefix) {
101        return apr_pstrcat(p, dobj->prefix, CACHE_VDIR_SUFFIX "/",
102                           dobj->hashfile, CACHE_DATA_SUFFIX, NULL);
103     }
104     else {
105        return apr_pstrcat(p, conf->cache_root, "/", dobj->hashfile,
106                           CACHE_DATA_SUFFIX, NULL);
107     }
108}
109
110static apr_status_t mkdir_structure(disk_cache_conf *conf, const char *file, apr_pool_t *pool)
111{
112    apr_status_t rv;
113    char *p;
114
115    for (p = (char*)file + conf->cache_root_len + 1;;) {
116        p = strchr(p, '/');
117        if (!p)
118            break;
119        *p = '\0';
120
121        rv = apr_dir_make(file,
122                          APR_UREAD|APR_UWRITE|APR_UEXECUTE, pool);
123        if (rv != APR_SUCCESS && !APR_STATUS_IS_EEXIST(rv)) {
124            return rv;
125        }
126        *p = '/';
127        ++p;
128    }
129    return APR_SUCCESS;
130}
131
132/* htcacheclean may remove directories underneath us.
133 * So, we'll try renaming three times at a cost of 0.002 seconds.
134 */
135static apr_status_t safe_file_rename(disk_cache_conf *conf,
136                                     const char *src, const char *dest,
137                                     apr_pool_t *pool)
138{
139    apr_status_t rv;
140
141    rv = apr_file_rename(src, dest, pool);
142
143    if (rv != APR_SUCCESS) {
144        int i;
145
146        for (i = 0; i < 2 && rv != APR_SUCCESS; i++) {
147            /* 1000 micro-seconds aka 0.001 seconds. */
148            apr_sleep(1000);
149
150            rv = mkdir_structure(conf, dest, pool);
151            if (rv != APR_SUCCESS)
152                continue;
153
154            rv = apr_file_rename(src, dest, pool);
155        }
156    }
157
158    return rv;
159}
160
161static apr_status_t file_cache_el_final(disk_cache_conf *conf, disk_cache_file_t *file,
162                                        request_rec *r)
163{
164    apr_status_t rv = APR_SUCCESS;
165
166    /* This assumes that the tempfiles are on the same file system
167     * as the cache_root. If not, then we need a file copy/move
168     * rather than a rename.
169     */
170
171    /* move the file over */
172    if (file->tempfd) {
173
174        rv = safe_file_rename(conf, file->tempfile, file->file, file->pool);
175        if (rv != APR_SUCCESS) {
176            ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00699)
177                    "rename tempfile to file failed:"
178                    " %s -> %s", file->tempfile, file->file);
179            apr_file_remove(file->tempfile, file->pool);
180        }
181
182        file->tempfd = NULL;
183    }
184
185    return rv;
186}
187
188static apr_status_t file_cache_temp_cleanup(void *dummy) {
189    disk_cache_file_t *file = (disk_cache_file_t *)dummy;
190
191    /* clean up the temporary file */
192    if (file->tempfd) {
193        apr_file_remove(file->tempfile, file->pool);
194        file->tempfd = NULL;
195    }
196    file->tempfile = NULL;
197    file->pool = NULL;
198
199    return APR_SUCCESS;
200}
201
202static apr_status_t file_cache_create(disk_cache_conf *conf, disk_cache_file_t *file,
203                                      apr_pool_t *pool)
204{
205    file->pool = pool;
206    file->tempfile = apr_pstrcat(pool, conf->cache_root, AP_TEMPFILE, NULL);
207
208    apr_pool_cleanup_register(pool, file, file_cache_temp_cleanup, apr_pool_cleanup_null);
209
210    return APR_SUCCESS;
211}
212
213/* These two functions get and put state information into the data
214 * file for an ap_cache_el, this state information will be read
215 * and written transparent to clients of this module
216 */
217static int file_cache_recall_mydata(apr_file_t *fd, cache_info *info,
218                                    disk_cache_object_t *dobj, request_rec *r)
219{
220    apr_status_t rv;
221    char *urlbuff;
222    apr_size_t len;
223
224    /* read the data from the cache file */
225    len = sizeof(disk_cache_info_t);
226    rv = apr_file_read_full(fd, &dobj->disk_info, len, &len);
227    if (rv != APR_SUCCESS) {
228        return rv;
229    }
230
231    /* Store it away so we can get it later. */
232    info->status = dobj->disk_info.status;
233    info->date = dobj->disk_info.date;
234    info->expire = dobj->disk_info.expire;
235    info->request_time = dobj->disk_info.request_time;
236    info->response_time = dobj->disk_info.response_time;
237
238    memcpy(&info->control, &dobj->disk_info.control, sizeof(cache_control_t));
239
240    /* Note that we could optimize this by conditionally doing the palloc
241     * depending upon the size. */
242    urlbuff = apr_palloc(r->pool, dobj->disk_info.name_len + 1);
243    len = dobj->disk_info.name_len;
244    rv = apr_file_read_full(fd, urlbuff, len, &len);
245    if (rv != APR_SUCCESS) {
246        return rv;
247    }
248    urlbuff[dobj->disk_info.name_len] = '\0';
249
250    /* check that we have the same URL */
251    /* Would strncmp be correct? */
252    if (strcmp(urlbuff, dobj->name) != 0) {
253        return APR_EGENERAL;
254    }
255
256    return APR_SUCCESS;
257}
258
259static const char* regen_key(apr_pool_t *p, apr_table_t *headers,
260                             apr_array_header_t *varray, const char *oldkey)
261{
262    struct iovec *iov;
263    int i, k;
264    int nvec;
265    const char *header;
266    const char **elts;
267
268    nvec = (varray->nelts * 2) + 1;
269    iov = apr_palloc(p, sizeof(struct iovec) * nvec);
270    elts = (const char **) varray->elts;
271
272    /* TODO:
273     *    - Handle multiple-value headers better. (sort them?)
274     *    - Handle Case in-sensitive Values better.
275     *        This isn't the end of the world, since it just lowers the cache
276     *        hit rate, but it would be nice to fix.
277     *
278     * The majority are case insenstive if they are values (encoding etc).
279     * Most of rfc2616 is case insensitive on header contents.
280     *
281     * So the better solution may be to identify headers which should be
282     * treated case-sensitive?
283     *  HTTP URI's (3.2.3) [host and scheme are insensitive]
284     *  HTTP method (5.1.1)
285     *  HTTP-date values (3.3.1)
286     *  3.7 Media Types [exerpt]
287     *     The type, subtype, and parameter attribute names are case-
288     *     insensitive. Parameter values might or might not be case-sensitive,
289     *     depending on the semantics of the parameter name.
290     *  4.20 Except [exerpt]
291     *     Comparison of expectation values is case-insensitive for unquoted
292     *     tokens (including the 100-continue token), and is case-sensitive for
293     *     quoted-string expectation-extensions.
294     */
295
296    for(i=0, k=0; i < varray->nelts; i++) {
297        header = apr_table_get(headers, elts[i]);
298        if (!header) {
299            header = "";
300        }
301        iov[k].iov_base = (char*) elts[i];
302        iov[k].iov_len = strlen(elts[i]);
303        k++;
304        iov[k].iov_base = (char*) header;
305        iov[k].iov_len = strlen(header);
306        k++;
307    }
308    iov[k].iov_base = (char*) oldkey;
309    iov[k].iov_len = strlen(oldkey);
310    k++;
311
312    return apr_pstrcatv(p, iov, k, NULL);
313}
314
315static int array_alphasort(const void *fn1, const void *fn2)
316{
317    return strcmp(*(char**)fn1, *(char**)fn2);
318}
319
320static void tokens_to_array(apr_pool_t *p, const char *data,
321                            apr_array_header_t *arr)
322{
323    char *token;
324
325    while ((token = ap_get_list_item(p, &data)) != NULL) {
326        *((const char **) apr_array_push(arr)) = token;
327    }
328
329    /* Sort it so that "Vary: A, B" and "Vary: B, A" are stored the same. */
330    qsort((void *) arr->elts, arr->nelts,
331         sizeof(char *), array_alphasort);
332}
333
334/*
335 * Hook and mod_cache callback functions
336 */
337static int create_entity(cache_handle_t *h, request_rec *r, const char *key, apr_off_t len,
338                         apr_bucket_brigade *bb)
339{
340    disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &cache_disk_module);
341    disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
342                                                 &cache_disk_module);
343    cache_object_t *obj;
344    disk_cache_object_t *dobj;
345    apr_pool_t *pool;
346
347    if (conf->cache_root == NULL) {
348        return DECLINED;
349    }
350
351    /* we don't support caching of range requests (yet) */
352    if (r->status == HTTP_PARTIAL_CONTENT) {
353        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00700)
354                "URL %s partial content response not cached",
355                key);
356        return DECLINED;
357    }
358
359    /* Note, len is -1 if unknown so don't trust it too hard */
360    if (len > dconf->maxfs) {
361        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00701)
362                "URL %s failed the size check "
363                "(%" APR_OFF_T_FMT " > %" APR_OFF_T_FMT ")",
364                key, len, dconf->maxfs);
365        return DECLINED;
366    }
367    if (len >= 0 && len < dconf->minfs) {
368        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00702)
369                "URL %s failed the size check "
370                "(%" APR_OFF_T_FMT " < %" APR_OFF_T_FMT ")",
371                key, len, dconf->minfs);
372        return DECLINED;
373    }
374
375    /* Allocate and initialize cache_object_t and disk_cache_object_t */
376    h->cache_obj = obj = apr_pcalloc(r->pool, sizeof(*obj));
377    obj->vobj = dobj = apr_pcalloc(r->pool, sizeof(*dobj));
378
379    obj->key = apr_pstrdup(r->pool, key);
380
381    dobj->name = obj->key;
382    dobj->prefix = NULL;
383    /* Save the cache root */
384    dobj->root = apr_pstrmemdup(r->pool, conf->cache_root, conf->cache_root_len);
385    dobj->root_len = conf->cache_root_len;
386
387    apr_pool_create(&pool, r->pool);
388    apr_pool_tag(pool, "mod_cache (create_entity)");
389
390    file_cache_create(conf, &dobj->hdrs, pool);
391    file_cache_create(conf, &dobj->vary, pool);
392    file_cache_create(conf, &dobj->data, pool);
393
394    dobj->data.file = data_file(r->pool, conf, dobj, key);
395    dobj->hdrs.file = header_file(r->pool, conf, dobj, key);
396    dobj->vary.file = header_file(r->pool, conf, dobj, key);
397
398    dobj->disk_info.header_only = r->header_only;
399
400    return OK;
401}
402
403static int open_entity(cache_handle_t *h, request_rec *r, const char *key)
404{
405    apr_uint32_t format;
406    apr_size_t len;
407    const char *nkey;
408    apr_status_t rc;
409    static int error_logged = 0;
410    disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
411                                                 &cache_disk_module);
412#ifdef APR_SENDFILE_ENABLED
413    core_dir_config *coreconf = ap_get_core_module_config(r->per_dir_config);
414#endif
415    apr_finfo_t finfo;
416    cache_object_t *obj;
417    cache_info *info;
418    disk_cache_object_t *dobj;
419    int flags;
420    apr_pool_t *pool;
421
422    h->cache_obj = NULL;
423
424    /* Look up entity keyed to 'url' */
425    if (conf->cache_root == NULL) {
426        if (!error_logged) {
427            error_logged = 1;
428            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00703)
429                    "Cannot cache files to disk without a CacheRoot specified.");
430        }
431        return DECLINED;
432    }
433
434    /* Create and init the cache object */
435    obj = apr_pcalloc(r->pool, sizeof(cache_object_t));
436    dobj = apr_pcalloc(r->pool, sizeof(disk_cache_object_t));
437
438    info = &(obj->info);
439
440    /* Open the headers file */
441    dobj->prefix = NULL;
442
443    /* Save the cache root */
444    dobj->root = apr_pstrmemdup(r->pool, conf->cache_root, conf->cache_root_len);
445    dobj->root_len = conf->cache_root_len;
446
447    dobj->vary.file = header_file(r->pool, conf, dobj, key);
448    flags = APR_READ|APR_BINARY|APR_BUFFERED;
449    rc = apr_file_open(&dobj->vary.fd, dobj->vary.file, flags, 0, r->pool);
450    if (rc != APR_SUCCESS) {
451        return DECLINED;
452    }
453
454    /* read the format from the cache file */
455    len = sizeof(format);
456    apr_file_read_full(dobj->vary.fd, &format, len, &len);
457
458    if (format == VARY_FORMAT_VERSION) {
459        apr_array_header_t* varray;
460        apr_time_t expire;
461
462        len = sizeof(expire);
463        apr_file_read_full(dobj->vary.fd, &expire, len, &len);
464
465        varray = apr_array_make(r->pool, 5, sizeof(char*));
466        rc = read_array(r, varray, dobj->vary.fd);
467        if (rc != APR_SUCCESS) {
468            ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00704)
469                    "Cannot parse vary header file: %s",
470                    dobj->vary.file);
471            apr_file_close(dobj->vary.fd);
472            return DECLINED;
473        }
474        apr_file_close(dobj->vary.fd);
475
476        nkey = regen_key(r->pool, r->headers_in, varray, key);
477
478        dobj->hashfile = NULL;
479        dobj->prefix = dobj->vary.file;
480        dobj->hdrs.file = header_file(r->pool, conf, dobj, nkey);
481
482        flags = APR_READ|APR_BINARY|APR_BUFFERED;
483        rc = apr_file_open(&dobj->hdrs.fd, dobj->hdrs.file, flags, 0, r->pool);
484        if (rc != APR_SUCCESS) {
485            return DECLINED;
486        }
487    }
488    else if (format != DISK_FORMAT_VERSION) {
489        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00705)
490                "File '%s' has a version mismatch. File had version: %d.",
491                dobj->vary.file, format);
492        apr_file_close(dobj->vary.fd);
493        return DECLINED;
494    }
495    else {
496        apr_off_t offset = 0;
497
498        /* oops, not vary as it turns out */
499        dobj->hdrs.fd = dobj->vary.fd;
500        dobj->vary.fd = NULL;
501        dobj->hdrs.file = dobj->vary.file;
502
503        /* This wasn't a Vary Format file, so we must seek to the
504         * start of the file again, so that later reads work.
505         */
506        apr_file_seek(dobj->hdrs.fd, APR_SET, &offset);
507        nkey = key;
508    }
509
510    obj->key = nkey;
511    dobj->key = nkey;
512    dobj->name = key;
513
514    apr_pool_create(&pool, r->pool);
515    apr_pool_tag(pool, "mod_cache (open_entity)");
516
517    file_cache_create(conf, &dobj->hdrs, pool);
518    file_cache_create(conf, &dobj->vary, pool);
519    file_cache_create(conf, &dobj->data, pool);
520
521    dobj->data.file = data_file(r->pool, conf, dobj, nkey);
522
523    /* Read the bytes to setup the cache_info fields */
524    rc = file_cache_recall_mydata(dobj->hdrs.fd, info, dobj, r);
525    if (rc != APR_SUCCESS) {
526        ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00706)
527                "Cannot read header file %s", dobj->hdrs.file);
528        apr_file_close(dobj->hdrs.fd);
529        return DECLINED;
530    }
531
532
533    /* Is this a cached HEAD request? */
534    if (dobj->disk_info.header_only && !r->header_only) {
535        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, APR_SUCCESS, r, APLOGNO(00707)
536                "HEAD request cached, non-HEAD requested, ignoring: %s",
537                dobj->hdrs.file);
538        apr_file_close(dobj->hdrs.fd);
539        return DECLINED;
540    }
541
542    /* Open the data file */
543    if (dobj->disk_info.has_body) {
544        flags = APR_READ | APR_BINARY;
545#ifdef APR_SENDFILE_ENABLED
546        /* When we are in the quick handler we don't have the per-directory
547         * configuration, so this check only takes the global setting of
548         * the EnableSendFile directive into account.
549         */
550        flags |= AP_SENDFILE_ENABLED(coreconf->enable_sendfile);
551#endif
552        rc = apr_file_open(&dobj->data.fd, dobj->data.file, flags, 0, r->pool);
553        if (rc != APR_SUCCESS) {
554            ap_log_rerror(APLOG_MARK, APLOG_ERR, rc, r, APLOGNO(00708)
555                    "Cannot open data file %s", dobj->data.file);
556            apr_file_close(dobj->hdrs.fd);
557            return DECLINED;
558        }
559
560        rc = apr_file_info_get(&finfo, APR_FINFO_SIZE | APR_FINFO_IDENT,
561                dobj->data.fd);
562        if (rc == APR_SUCCESS) {
563            dobj->file_size = finfo.size;
564        }
565
566        /* Atomic check - does the body file belong to the header file? */
567        if (dobj->disk_info.inode == finfo.inode &&
568                dobj->disk_info.device == finfo.device) {
569
570            /* Initialize the cache_handle callback functions */
571            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00709)
572                    "Recalled cached URL info header %s", dobj->name);
573
574            /* make the configuration stick */
575            h->cache_obj = obj;
576            obj->vobj = dobj;
577
578            return OK;
579        }
580
581    }
582    else {
583
584        /* make the configuration stick */
585        h->cache_obj = obj;
586        obj->vobj = dobj;
587
588        return OK;
589    }
590
591    /* Oh dear, no luck matching header to the body */
592    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00710)
593            "Cached URL info header '%s' didn't match body, ignoring this entry",
594            dobj->name);
595
596    apr_file_close(dobj->hdrs.fd);
597    return DECLINED;
598}
599
600static void close_disk_cache_fd(disk_cache_file_t *file)
601{
602   if (file->fd != NULL) {
603       apr_file_close(file->fd);
604       file->fd = NULL;
605   }
606   if (file->tempfd != NULL) {
607       apr_file_close(file->tempfd);
608       file->tempfd = NULL;
609   }
610}
611
612static int remove_entity(cache_handle_t *h)
613{
614    disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
615
616    close_disk_cache_fd(&(dobj->hdrs));
617    close_disk_cache_fd(&(dobj->vary));
618    close_disk_cache_fd(&(dobj->data));
619
620    /* Null out the cache object pointer so next time we start from scratch  */
621    h->cache_obj = NULL;
622    return OK;
623}
624
625static int remove_url(cache_handle_t *h, request_rec *r)
626{
627    apr_status_t rc;
628    disk_cache_object_t *dobj;
629
630    /* Get disk cache object from cache handle */
631    dobj = (disk_cache_object_t *) h->cache_obj->vobj;
632    if (!dobj) {
633        return DECLINED;
634    }
635
636    /* Delete headers file */
637    if (dobj->hdrs.file) {
638        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00711)
639                "Deleting %s from cache.", dobj->hdrs.file);
640
641        rc = apr_file_remove(dobj->hdrs.file, r->pool);
642        if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
643            /* Will only result in an output if httpd is started with -e debug.
644             * For reason see log_error_core for the case s == NULL.
645             */
646            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00712)
647                    "Failed to delete headers file %s from cache.",
648                    dobj->hdrs.file);
649            return DECLINED;
650        }
651    }
652
653    /* Delete data file */
654    if (dobj->data.file) {
655        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00713)
656                "Deleting %s from cache.", dobj->data.file);
657
658        rc = apr_file_remove(dobj->data.file, r->pool);
659        if ((rc != APR_SUCCESS) && !APR_STATUS_IS_ENOENT(rc)) {
660            /* Will only result in an output if httpd is started with -e debug.
661             * For reason see log_error_core for the case s == NULL.
662             */
663            ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rc, r, APLOGNO(00714)
664                    "Failed to delete data file %s from cache.",
665                    dobj->data.file);
666            return DECLINED;
667        }
668    }
669
670    /* now delete directories as far as possible up to our cache root */
671    if (dobj->root) {
672        const char *str_to_copy;
673
674        str_to_copy = dobj->hdrs.file ? dobj->hdrs.file : dobj->data.file;
675        if (str_to_copy) {
676            char *dir, *slash, *q;
677
678            dir = apr_pstrdup(r->pool, str_to_copy);
679
680            /* remove filename */
681            slash = strrchr(dir, '/');
682            *slash = '\0';
683
684            /*
685             * now walk our way back to the cache root, delete everything
686             * in the way as far as possible
687             *
688             * Note: due to the way we constructed the file names in
689             * header_file and data_file, we are guaranteed that the
690             * cache_root is suffixed by at least one '/' which will be
691             * turned into a terminating null by this loop.  Therefore,
692             * we won't either delete or go above our cache root.
693             */
694            for (q = dir + dobj->root_len; *q ; ) {
695                 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00715)
696                        "Deleting directory %s from cache", dir);
697
698                 rc = apr_dir_remove(dir, r->pool);
699                 if (rc != APR_SUCCESS && !APR_STATUS_IS_ENOENT(rc)) {
700                    break;
701                 }
702                 slash = strrchr(q, '/');
703                 *slash = '\0';
704            }
705        }
706    }
707
708    return OK;
709}
710
711static apr_status_t read_array(request_rec *r, apr_array_header_t* arr,
712                               apr_file_t *file)
713{
714    char w[MAX_STRING_LEN];
715    int p;
716    apr_status_t rv;
717
718    while (1) {
719        rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
720        if (rv != APR_SUCCESS) {
721            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00716)
722                          "Premature end of vary array.");
723            return rv;
724        }
725
726        p = strlen(w);
727        if (p > 0 && w[p - 1] == '\n') {
728            if (p > 1 && w[p - 2] == CR) {
729                w[p - 2] = '\0';
730            }
731            else {
732                w[p - 1] = '\0';
733            }
734        }
735
736        /* If we've finished reading the array, break out of the loop. */
737        if (w[0] == '\0') {
738            break;
739        }
740
741       *((const char **) apr_array_push(arr)) = apr_pstrdup(r->pool, w);
742    }
743
744    return APR_SUCCESS;
745}
746
747static apr_status_t store_array(apr_file_t *fd, apr_array_header_t* arr)
748{
749    int i;
750    apr_status_t rv;
751    struct iovec iov[2];
752    apr_size_t amt;
753    const char **elts;
754
755    elts = (const char **) arr->elts;
756
757    for (i = 0; i < arr->nelts; i++) {
758        iov[0].iov_base = (char*) elts[i];
759        iov[0].iov_len = strlen(elts[i]);
760        iov[1].iov_base = CRLF;
761        iov[1].iov_len = sizeof(CRLF) - 1;
762
763        rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 2, &amt);
764        if (rv != APR_SUCCESS) {
765            return rv;
766        }
767    }
768
769    iov[0].iov_base = CRLF;
770    iov[0].iov_len = sizeof(CRLF) - 1;
771
772    return apr_file_writev_full(fd, (const struct iovec *) &iov, 1, &amt);
773}
774
775static apr_status_t read_table(cache_handle_t *handle, request_rec *r,
776                               apr_table_t *table, apr_file_t *file)
777{
778    char w[MAX_STRING_LEN];
779    char *l;
780    int p;
781    apr_status_t rv;
782
783    while (1) {
784
785        /* ### What about APR_EOF? */
786        rv = apr_file_gets(w, MAX_STRING_LEN - 1, file);
787        if (rv != APR_SUCCESS) {
788            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00717)
789                          "Premature end of cache headers.");
790            return rv;
791        }
792
793        /* Delete terminal (CR?)LF */
794
795        p = strlen(w);
796        /* Indeed, the host's '\n':
797           '\012' for UNIX; '\015' for MacOS; '\025' for OS/390
798           -- whatever the script generates.
799        */
800        if (p > 0 && w[p - 1] == '\n') {
801            if (p > 1 && w[p - 2] == CR) {
802                w[p - 2] = '\0';
803            }
804            else {
805                w[p - 1] = '\0';
806            }
807        }
808
809        /* If we've finished reading the headers, break out of the loop. */
810        if (w[0] == '\0') {
811            break;
812        }
813
814#if APR_CHARSET_EBCDIC
815        /* Chances are that we received an ASCII header text instead of
816         * the expected EBCDIC header lines. Try to auto-detect:
817         */
818        if (!(l = strchr(w, ':'))) {
819            int maybeASCII = 0, maybeEBCDIC = 0;
820            unsigned char *cp, native;
821            apr_size_t inbytes_left, outbytes_left;
822
823            for (cp = w; *cp != '\0'; ++cp) {
824                native = apr_xlate_conv_byte(ap_hdrs_from_ascii, *cp);
825                if (apr_isprint(*cp) && !apr_isprint(native))
826                    ++maybeEBCDIC;
827                if (!apr_isprint(*cp) && apr_isprint(native))
828                    ++maybeASCII;
829            }
830            if (maybeASCII > maybeEBCDIC) {
831                ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00718)
832                        "CGI Interface Error: Script headers apparently ASCII: (CGI = %s)",
833                        r->filename);
834                inbytes_left = outbytes_left = cp - w;
835                apr_xlate_conv_buffer(ap_hdrs_from_ascii,
836                                      w, &inbytes_left, w, &outbytes_left);
837            }
838        }
839#endif /*APR_CHARSET_EBCDIC*/
840
841        /* if we see a bogus header don't ignore it. Shout and scream */
842        if (!(l = strchr(w, ':'))) {
843            return APR_EGENERAL;
844        }
845
846        *l++ = '\0';
847        while (apr_isspace(*l)) {
848            ++l;
849        }
850
851        apr_table_add(table, w, l);
852    }
853
854    return APR_SUCCESS;
855}
856
857/*
858 * Reads headers from a buffer and returns an array of headers.
859 * Returns NULL on file error
860 * This routine tries to deal with too long lines and continuation lines.
861 * @@@: XXX: FIXME: currently the headers are passed thru un-merged.
862 * Is that okay, or should they be collapsed where possible?
863 */
864static apr_status_t recall_headers(cache_handle_t *h, request_rec *r)
865{
866    disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
867
868    /* This case should not happen... */
869    if (!dobj->hdrs.fd) {
870        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00719)
871                "recalling headers; but no header fd for %s", dobj->name);
872        return APR_NOTFOUND;
873    }
874
875    h->req_hdrs = apr_table_make(r->pool, 20);
876    h->resp_hdrs = apr_table_make(r->pool, 20);
877
878    /* Call routine to read the header lines/status line */
879    read_table(h, r, h->resp_hdrs, dobj->hdrs.fd);
880    read_table(h, r, h->req_hdrs, dobj->hdrs.fd);
881
882    apr_file_close(dobj->hdrs.fd);
883
884    ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00720)
885            "Recalled headers for URL %s", dobj->name);
886    return APR_SUCCESS;
887}
888
889static apr_status_t recall_body(cache_handle_t *h, apr_pool_t *p, apr_bucket_brigade *bb)
890{
891    disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
892
893    if (dobj->data.fd) {
894        apr_brigade_insert_file(bb, dobj->data.fd, 0, dobj->file_size, p);
895    }
896
897    return APR_SUCCESS;
898}
899
900static apr_status_t store_table(apr_file_t *fd, apr_table_t *table)
901{
902    int i;
903    apr_status_t rv;
904    struct iovec iov[4];
905    apr_size_t amt;
906    apr_table_entry_t *elts;
907
908    elts = (apr_table_entry_t *) apr_table_elts(table)->elts;
909    for (i = 0; i < apr_table_elts(table)->nelts; ++i) {
910        if (elts[i].key != NULL) {
911            iov[0].iov_base = elts[i].key;
912            iov[0].iov_len = strlen(elts[i].key);
913            iov[1].iov_base = ": ";
914            iov[1].iov_len = sizeof(": ") - 1;
915            iov[2].iov_base = elts[i].val;
916            iov[2].iov_len = strlen(elts[i].val);
917            iov[3].iov_base = CRLF;
918            iov[3].iov_len = sizeof(CRLF) - 1;
919
920            rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 4, &amt);
921            if (rv != APR_SUCCESS) {
922                return rv;
923            }
924        }
925    }
926    iov[0].iov_base = CRLF;
927    iov[0].iov_len = sizeof(CRLF) - 1;
928    rv = apr_file_writev_full(fd, (const struct iovec *) &iov, 1, &amt);
929    return rv;
930}
931
932static apr_status_t store_headers(cache_handle_t *h, request_rec *r, cache_info *info)
933{
934    disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
935
936    memcpy(&h->cache_obj->info, info, sizeof(cache_info));
937
938    if (r->headers_out) {
939        dobj->headers_out = ap_cache_cacheable_headers_out(r);
940    }
941
942    if (r->headers_in) {
943        dobj->headers_in = ap_cache_cacheable_headers_in(r);
944    }
945
946    if (r->header_only && r->status != HTTP_NOT_MODIFIED) {
947        dobj->disk_info.header_only = 1;
948    }
949
950    return APR_SUCCESS;
951}
952
953static apr_status_t write_headers(cache_handle_t *h, request_rec *r)
954{
955    disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
956                                                 &cache_disk_module);
957    apr_status_t rv;
958    apr_size_t amt;
959    disk_cache_object_t *dobj = (disk_cache_object_t*) h->cache_obj->vobj;
960
961    disk_cache_info_t disk_info;
962    struct iovec iov[2];
963
964    memset(&disk_info, 0, sizeof(disk_cache_info_t));
965
966    if (dobj->headers_out) {
967        const char *tmp;
968
969        tmp = apr_table_get(dobj->headers_out, "Vary");
970
971        if (tmp) {
972            apr_array_header_t* varray;
973            apr_uint32_t format = VARY_FORMAT_VERSION;
974
975            /* If we were initially opened as a vary format, rollback
976             * that internal state for the moment so we can recreate the
977             * vary format hints in the appropriate directory.
978             */
979            if (dobj->prefix) {
980                dobj->hdrs.file = dobj->prefix;
981                dobj->prefix = NULL;
982            }
983
984            rv = mkdir_structure(conf, dobj->hdrs.file, r->pool);
985
986            rv = apr_file_mktemp(&dobj->vary.tempfd, dobj->vary.tempfile,
987                                 APR_CREATE | APR_WRITE | APR_BINARY | APR_EXCL,
988                                 dobj->vary.pool);
989
990            if (rv != APR_SUCCESS) {
991                ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00721)
992                        "could not create vary file %s",
993                        dobj->vary.tempfile);
994                return rv;
995            }
996
997            amt = sizeof(format);
998            rv = apr_file_write_full(dobj->vary.tempfd, &format, amt, NULL);
999            if (rv != APR_SUCCESS) {
1000                ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00722)
1001                        "could not write to vary file %s",
1002                        dobj->vary.tempfile);
1003                apr_file_close(dobj->vary.tempfd);
1004                apr_pool_destroy(dobj->vary.pool);
1005                return rv;
1006            }
1007
1008            amt = sizeof(h->cache_obj->info.expire);
1009            rv = apr_file_write_full(dobj->vary.tempfd,
1010                                     &h->cache_obj->info.expire, amt, NULL);
1011            if (rv != APR_SUCCESS) {
1012                ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00723)
1013                        "could not write to vary file %s",
1014                        dobj->vary.tempfile);
1015                apr_file_close(dobj->vary.tempfd);
1016                apr_pool_destroy(dobj->vary.pool);
1017                return rv;
1018            }
1019
1020            varray = apr_array_make(r->pool, 6, sizeof(char*));
1021            tokens_to_array(r->pool, tmp, varray);
1022
1023            store_array(dobj->vary.tempfd, varray);
1024
1025            rv = apr_file_close(dobj->vary.tempfd);
1026            if (rv != APR_SUCCESS) {
1027                ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00724)
1028                        "could not close vary file %s",
1029                        dobj->vary.tempfile);
1030                apr_pool_destroy(dobj->vary.pool);
1031                return rv;
1032            }
1033
1034            tmp = regen_key(r->pool, dobj->headers_in, varray, dobj->name);
1035            dobj->prefix = dobj->hdrs.file;
1036            dobj->hashfile = NULL;
1037            dobj->data.file = data_file(r->pool, conf, dobj, tmp);
1038            dobj->hdrs.file = header_file(r->pool, conf, dobj, tmp);
1039        }
1040    }
1041
1042
1043    rv = apr_file_mktemp(&dobj->hdrs.tempfd, dobj->hdrs.tempfile,
1044                         APR_CREATE | APR_WRITE | APR_BINARY |
1045                         APR_BUFFERED | APR_EXCL, dobj->hdrs.pool);
1046
1047    if (rv != APR_SUCCESS) {
1048       ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00725)
1049                "could not create header file %s",
1050                dobj->hdrs.tempfile);
1051        return rv;
1052    }
1053
1054    disk_info.format = DISK_FORMAT_VERSION;
1055    disk_info.date = h->cache_obj->info.date;
1056    disk_info.expire = h->cache_obj->info.expire;
1057    disk_info.entity_version = dobj->disk_info.entity_version++;
1058    disk_info.request_time = h->cache_obj->info.request_time;
1059    disk_info.response_time = h->cache_obj->info.response_time;
1060    disk_info.status = h->cache_obj->info.status;
1061    disk_info.inode = dobj->disk_info.inode;
1062    disk_info.device = dobj->disk_info.device;
1063    disk_info.has_body = dobj->disk_info.has_body;
1064    disk_info.header_only = dobj->disk_info.header_only;
1065
1066    disk_info.name_len = strlen(dobj->name);
1067
1068    memcpy(&disk_info.control, &h->cache_obj->info.control, sizeof(cache_control_t));
1069
1070    iov[0].iov_base = (void*)&disk_info;
1071    iov[0].iov_len = sizeof(disk_cache_info_t);
1072    iov[1].iov_base = (void*)dobj->name;
1073    iov[1].iov_len = disk_info.name_len;
1074
1075    rv = apr_file_writev_full(dobj->hdrs.tempfd, (const struct iovec *) &iov,
1076                              2, &amt);
1077    if (rv != APR_SUCCESS) {
1078        ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00726)
1079                "could not write info to header file %s",
1080                dobj->hdrs.tempfile);
1081        apr_file_close(dobj->hdrs.tempfd);
1082        apr_pool_destroy(dobj->hdrs.pool);
1083        return rv;
1084    }
1085
1086    if (dobj->headers_out) {
1087        rv = store_table(dobj->hdrs.tempfd, dobj->headers_out);
1088        if (rv != APR_SUCCESS) {
1089            ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00727)
1090                    "could not write out-headers to header file %s",
1091                    dobj->hdrs.tempfile);
1092            apr_file_close(dobj->hdrs.tempfd);
1093            apr_pool_destroy(dobj->hdrs.pool);
1094            return rv;
1095        }
1096    }
1097
1098    /* Parse the vary header and dump those fields from the headers_in. */
1099    /* FIXME: Make call to the same thing cache_select calls to crack Vary. */
1100    if (dobj->headers_in) {
1101        rv = store_table(dobj->hdrs.tempfd, dobj->headers_in);
1102        if (rv != APR_SUCCESS) {
1103            ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00728)
1104                    "could not write in-headers to header file %s",
1105                    dobj->hdrs.tempfile);
1106            apr_file_close(dobj->hdrs.tempfd);
1107            apr_pool_destroy(dobj->hdrs.pool);
1108            return rv;
1109        }
1110    }
1111
1112    rv = apr_file_close(dobj->hdrs.tempfd); /* flush and close */
1113    if (rv != APR_SUCCESS) {
1114        ap_log_rerror(APLOG_MARK, APLOG_WARNING, rv, r, APLOGNO(00729)
1115                "could not close header file %s",
1116                dobj->hdrs.tempfile);
1117        apr_pool_destroy(dobj->hdrs.pool);
1118        return rv;
1119    }
1120
1121    return APR_SUCCESS;
1122}
1123
1124static apr_status_t store_body(cache_handle_t *h, request_rec *r,
1125                               apr_bucket_brigade *in, apr_bucket_brigade *out)
1126{
1127    apr_bucket *e;
1128    apr_status_t rv = APR_SUCCESS;
1129    disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1130    disk_cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &cache_disk_module);
1131    int seen_eos = 0;
1132
1133    if (!dobj->offset) {
1134        dobj->offset = dconf->readsize;
1135    }
1136    if (!dobj->timeout && dconf->readtime) {
1137        dobj->timeout = apr_time_now() + dconf->readtime;
1138    }
1139
1140    if (dobj->offset) {
1141        apr_brigade_partition(in, dobj->offset, &e);
1142    }
1143
1144    while (APR_SUCCESS == rv && !APR_BRIGADE_EMPTY(in)) {
1145        const char *str;
1146        apr_size_t length, written;
1147
1148        e = APR_BRIGADE_FIRST(in);
1149
1150        /* are we done completely? if so, pass any trailing buckets right through */
1151        if (dobj->done || !dobj->data.pool) {
1152            APR_BUCKET_REMOVE(e);
1153            APR_BRIGADE_INSERT_TAIL(out, e);
1154            continue;
1155        }
1156
1157        /* have we seen eos yet? */
1158        if (APR_BUCKET_IS_EOS(e)) {
1159            seen_eos = 1;
1160            dobj->done = 1;
1161            APR_BUCKET_REMOVE(e);
1162            APR_BRIGADE_INSERT_TAIL(out, e);
1163            break;
1164        }
1165
1166        /* honour flush buckets, we'll get called again */
1167        if (APR_BUCKET_IS_FLUSH(e)) {
1168            APR_BUCKET_REMOVE(e);
1169            APR_BRIGADE_INSERT_TAIL(out, e);
1170            break;
1171        }
1172
1173        /* metadata buckets are preserved as is */
1174        if (APR_BUCKET_IS_METADATA(e)) {
1175            APR_BUCKET_REMOVE(e);
1176            APR_BRIGADE_INSERT_TAIL(out, e);
1177            continue;
1178        }
1179
1180        /* read the bucket, write to the cache */
1181        rv = apr_bucket_read(e, &str, &length, APR_BLOCK_READ);
1182        APR_BUCKET_REMOVE(e);
1183        APR_BRIGADE_INSERT_TAIL(out, e);
1184        if (rv != APR_SUCCESS) {
1185            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00730)
1186                    "Error when reading bucket for URL %s",
1187                    h->cache_obj->key);
1188            /* Remove the intermediate cache file and return non-APR_SUCCESS */
1189            apr_pool_destroy(dobj->data.pool);
1190            return rv;
1191        }
1192
1193        /* don't write empty buckets to the cache */
1194        if (!length) {
1195            continue;
1196        }
1197
1198        if (!dobj->disk_info.header_only) {
1199
1200            /* Attempt to create the data file at the last possible moment, if
1201             * the body is empty, we don't write a file at all, and save an inode.
1202             */
1203            if (!dobj->data.tempfd) {
1204                apr_finfo_t finfo;
1205                rv = apr_file_mktemp(&dobj->data.tempfd, dobj->data.tempfile,
1206                        APR_CREATE | APR_WRITE | APR_BINARY | APR_BUFFERED
1207                                | APR_EXCL, dobj->data.pool);
1208                if (rv != APR_SUCCESS) {
1209                    apr_pool_destroy(dobj->data.pool);
1210                    return rv;
1211                }
1212                dobj->file_size = 0;
1213                rv = apr_file_info_get(&finfo, APR_FINFO_IDENT,
1214                        dobj->data.tempfd);
1215                if (rv != APR_SUCCESS) {
1216                    apr_pool_destroy(dobj->data.pool);
1217                    return rv;
1218                }
1219                dobj->disk_info.device = finfo.device;
1220                dobj->disk_info.inode = finfo.inode;
1221                dobj->disk_info.has_body = 1;
1222            }
1223
1224            /* write to the cache, leave if we fail */
1225            rv = apr_file_write_full(dobj->data.tempfd, str, length, &written);
1226            if (rv != APR_SUCCESS) {
1227                ap_log_rerror(
1228                        APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00731) "Error when writing cache file for URL %s", h->cache_obj->key);
1229                /* Remove the intermediate cache file and return non-APR_SUCCESS */
1230                apr_pool_destroy(dobj->data.pool);
1231                return rv;
1232            }
1233            dobj->file_size += written;
1234            if (dobj->file_size > dconf->maxfs) {
1235                ap_log_rerror(
1236                        APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00732) "URL %s failed the size check "
1237                        "(%" APR_OFF_T_FMT ">%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->maxfs);
1238                /* Remove the intermediate cache file and return non-APR_SUCCESS */
1239                apr_pool_destroy(dobj->data.pool);
1240                return APR_EGENERAL;
1241            }
1242
1243        }
1244
1245        /* have we reached the limit of how much we're prepared to write in one
1246         * go? If so, leave, we'll get called again. This prevents us from trying
1247         * to swallow too much data at once, or taking so long to write the data
1248         * the client times out.
1249         */
1250        dobj->offset -= length;
1251        if (dobj->offset <= 0) {
1252            dobj->offset = 0;
1253            break;
1254        }
1255        if ((dconf->readtime && apr_time_now() > dobj->timeout)) {
1256            dobj->timeout = 0;
1257            break;
1258        }
1259
1260    }
1261
1262    /* Was this the final bucket? If yes, close the temp file and perform
1263     * sanity checks.
1264     */
1265    if (seen_eos) {
1266        const char *cl_header = apr_table_get(r->headers_out, "Content-Length");
1267
1268        if (!dobj->disk_info.header_only) {
1269
1270            if (dobj->data.tempfd) {
1271                rv = apr_file_close(dobj->data.tempfd);
1272                if (rv != APR_SUCCESS) {
1273                    /* Buffered write failed, abandon attempt to write */
1274                    apr_pool_destroy(dobj->data.pool);
1275                    return rv;
1276                }
1277            }
1278
1279            if (r->connection->aborted || r->no_cache) {
1280                ap_log_rerror(
1281                        APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00733) "Discarding body for URL %s "
1282                        "because connection has been aborted.", h->cache_obj->key);
1283                /* Remove the intermediate cache file and return non-APR_SUCCESS */
1284                apr_pool_destroy(dobj->data.pool);
1285                return APR_EGENERAL;
1286            }
1287            if (dobj->file_size < dconf->minfs) {
1288                ap_log_rerror(
1289                        APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00734) "URL %s failed the size check "
1290                        "(%" APR_OFF_T_FMT "<%" APR_OFF_T_FMT ")", h->cache_obj->key, dobj->file_size, dconf->minfs);
1291                /* Remove the intermediate cache file and return non-APR_SUCCESS */
1292                apr_pool_destroy(dobj->data.pool);
1293                return APR_EGENERAL;
1294            }
1295            if (cl_header) {
1296                apr_int64_t cl = apr_atoi64(cl_header);
1297                if ((errno == 0) && (dobj->file_size != cl)) {
1298                    ap_log_rerror(
1299                            APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00735) "URL %s didn't receive complete response, not caching", h->cache_obj->key);
1300                    /* Remove the intermediate cache file and return non-APR_SUCCESS */
1301                    apr_pool_destroy(dobj->data.pool);
1302                    return APR_EGENERAL;
1303                }
1304            }
1305
1306        }
1307
1308        /* All checks were fine, we're good to go when the commit comes */
1309    }
1310
1311    return APR_SUCCESS;
1312}
1313
1314static apr_status_t commit_entity(cache_handle_t *h, request_rec *r)
1315{
1316    disk_cache_conf *conf = ap_get_module_config(r->server->module_config,
1317                                                 &cache_disk_module);
1318    disk_cache_object_t *dobj = (disk_cache_object_t *) h->cache_obj->vobj;
1319    apr_status_t rv;
1320
1321    /* write the headers to disk at the last possible moment */
1322    rv = write_headers(h, r);
1323
1324    /* move header and data tempfiles to the final destination */
1325    if (APR_SUCCESS == rv) {
1326        rv = file_cache_el_final(conf, &dobj->hdrs, r);
1327    }
1328    if (APR_SUCCESS == rv) {
1329        rv = file_cache_el_final(conf, &dobj->vary, r);
1330    }
1331    if (APR_SUCCESS == rv) {
1332        if (!dobj->disk_info.header_only) {
1333            rv = file_cache_el_final(conf, &dobj->data, r);
1334        }
1335        else if (dobj->data.file){
1336            rv = apr_file_remove(dobj->data.file, dobj->data.pool);
1337        }
1338    }
1339
1340    /* remove the cached items completely on any failure */
1341    if (APR_SUCCESS != rv) {
1342        remove_url(h, r);
1343        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00736)
1344                "commit_entity: URL '%s' not cached due to earlier disk error.",
1345                dobj->name);
1346    }
1347    else {
1348        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00737)
1349                "commit_entity: Headers and body for URL %s cached.",
1350                dobj->name);
1351    }
1352
1353    apr_pool_destroy(dobj->data.pool);
1354
1355    return APR_SUCCESS;
1356}
1357
1358static apr_status_t invalidate_entity(cache_handle_t *h, request_rec *r)
1359{
1360    apr_status_t rv;
1361
1362    rv = recall_headers(h, r);
1363    if (rv != APR_SUCCESS) {
1364        return rv;
1365    }
1366
1367    /* mark the entity as invalidated */
1368    h->cache_obj->info.control.invalidated = 1;
1369
1370    return commit_entity(h, r);
1371}
1372
1373static void *create_dir_config(apr_pool_t *p, char *dummy)
1374{
1375    disk_cache_dir_conf *dconf = apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1376
1377    dconf->maxfs = DEFAULT_MAX_FILE_SIZE;
1378    dconf->minfs = DEFAULT_MIN_FILE_SIZE;
1379    dconf->readsize = DEFAULT_READSIZE;
1380    dconf->readtime = DEFAULT_READTIME;
1381
1382    return dconf;
1383}
1384
1385static void *merge_dir_config(apr_pool_t *p, void *basev, void *addv) {
1386    disk_cache_dir_conf *new = (disk_cache_dir_conf *) apr_pcalloc(p, sizeof(disk_cache_dir_conf));
1387    disk_cache_dir_conf *add = (disk_cache_dir_conf *) addv;
1388    disk_cache_dir_conf *base = (disk_cache_dir_conf *) basev;
1389
1390    new->maxfs = (add->maxfs_set == 0) ? base->maxfs : add->maxfs;
1391    new->maxfs_set = add->maxfs_set || base->maxfs_set;
1392    new->minfs = (add->minfs_set == 0) ? base->minfs : add->minfs;
1393    new->minfs_set = add->minfs_set || base->minfs_set;
1394    new->readsize = (add->readsize_set == 0) ? base->readsize : add->readsize;
1395    new->readsize_set = add->readsize_set || base->readsize_set;
1396    new->readtime = (add->readtime_set == 0) ? base->readtime : add->readtime;
1397    new->readtime_set = add->readtime_set || base->readtime_set;
1398
1399    return new;
1400}
1401
1402static void *create_config(apr_pool_t *p, server_rec *s)
1403{
1404    disk_cache_conf *conf = apr_pcalloc(p, sizeof(disk_cache_conf));
1405
1406    /* XXX: Set default values */
1407    conf->dirlevels = DEFAULT_DIRLEVELS;
1408    conf->dirlength = DEFAULT_DIRLENGTH;
1409
1410    conf->cache_root = NULL;
1411    conf->cache_root_len = 0;
1412
1413    return conf;
1414}
1415
1416/*
1417 * mod_cache_disk configuration directives handlers.
1418 */
1419static const char
1420*set_cache_root(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1421{
1422    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1423                                                 &cache_disk_module);
1424    conf->cache_root = arg;
1425    conf->cache_root_len = strlen(arg);
1426    /* TODO: canonicalize cache_root and strip off any trailing slashes */
1427
1428    return NULL;
1429}
1430
1431/*
1432 * Consider eliminating the next two directives in favor of
1433 * Ian's prime number hash...
1434 * key = hash_fn( r->uri)
1435 * filename = "/key % prime1 /key %prime2/key %prime3"
1436 */
1437static const char
1438*set_cache_dirlevels(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1439{
1440    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1441                                                 &cache_disk_module);
1442    int val = atoi(arg);
1443    if (val < 1)
1444        return "CacheDirLevels value must be an integer greater than 0";
1445    if (val * conf->dirlength > CACHEFILE_LEN)
1446        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1447    conf->dirlevels = val;
1448    return NULL;
1449}
1450static const char
1451*set_cache_dirlength(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1452{
1453    disk_cache_conf *conf = ap_get_module_config(parms->server->module_config,
1454                                                 &cache_disk_module);
1455    int val = atoi(arg);
1456    if (val < 1)
1457        return "CacheDirLength value must be an integer greater than 0";
1458    if (val * conf->dirlevels > CACHEFILE_LEN)
1459        return "CacheDirLevels*CacheDirLength value must not be higher than 20";
1460
1461    conf->dirlength = val;
1462    return NULL;
1463}
1464
1465static const char
1466*set_cache_minfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1467{
1468    disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1469
1470    if (apr_strtoff(&dconf->minfs, arg, NULL, 10) != APR_SUCCESS ||
1471            dconf->minfs < 0)
1472    {
1473        return "CacheMinFileSize argument must be a non-negative integer representing the min size of a file to cache in bytes.";
1474    }
1475    dconf->minfs_set = 1;
1476    return NULL;
1477}
1478
1479static const char
1480*set_cache_maxfs(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1481{
1482    disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1483
1484    if (apr_strtoff(&dconf->maxfs, arg, NULL, 10) != APR_SUCCESS ||
1485            dconf->maxfs < 0)
1486    {
1487        return "CacheMaxFileSize argument must be a non-negative integer representing the max size of a file to cache in bytes.";
1488    }
1489    dconf->maxfs_set = 1;
1490    return NULL;
1491}
1492
1493static const char
1494*set_cache_readsize(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1495{
1496    disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1497
1498    if (apr_strtoff(&dconf->readsize, arg, NULL, 10) != APR_SUCCESS ||
1499            dconf->readsize < 0)
1500    {
1501        return "CacheReadSize argument must be a non-negative integer representing the max amount of data to cache in go.";
1502    }
1503    dconf->readsize_set = 1;
1504    return NULL;
1505}
1506
1507static const char
1508*set_cache_readtime(cmd_parms *parms, void *in_struct_ptr, const char *arg)
1509{
1510    disk_cache_dir_conf *dconf = (disk_cache_dir_conf *)in_struct_ptr;
1511    apr_off_t milliseconds;
1512
1513    if (apr_strtoff(&milliseconds, arg, NULL, 10) != APR_SUCCESS ||
1514            milliseconds < 0)
1515    {
1516        return "CacheReadTime argument must be a non-negative integer representing the max amount of time taken to cache in go.";
1517    }
1518    dconf->readtime = apr_time_from_msec(milliseconds);
1519    dconf->readtime_set = 1;
1520    return NULL;
1521}
1522
1523static const command_rec disk_cache_cmds[] =
1524{
1525    AP_INIT_TAKE1("CacheRoot", set_cache_root, NULL, RSRC_CONF,
1526                 "The directory to store cache files"),
1527    AP_INIT_TAKE1("CacheDirLevels", set_cache_dirlevels, NULL, RSRC_CONF,
1528                  "The number of levels of subdirectories in the cache"),
1529    AP_INIT_TAKE1("CacheDirLength", set_cache_dirlength, NULL, RSRC_CONF,
1530                  "The number of characters in subdirectory names"),
1531    AP_INIT_TAKE1("CacheMinFileSize", set_cache_minfs, NULL, RSRC_CONF | ACCESS_CONF,
1532                  "The minimum file size to cache a document"),
1533    AP_INIT_TAKE1("CacheMaxFileSize", set_cache_maxfs, NULL, RSRC_CONF | ACCESS_CONF,
1534                  "The maximum file size to cache a document"),
1535    AP_INIT_TAKE1("CacheReadSize", set_cache_readsize, NULL, RSRC_CONF | ACCESS_CONF,
1536                  "The maximum quantity of data to attempt to read and cache in one go"),
1537    AP_INIT_TAKE1("CacheReadTime", set_cache_readtime, NULL, RSRC_CONF | ACCESS_CONF,
1538                  "The maximum time taken to attempt to read and cache in go"),
1539    {NULL}
1540};
1541
1542static const cache_provider cache_disk_provider =
1543{
1544    &remove_entity,
1545    &store_headers,
1546    &store_body,
1547    &recall_headers,
1548    &recall_body,
1549    &create_entity,
1550    &open_entity,
1551    &remove_url,
1552    &commit_entity,
1553    &invalidate_entity
1554};
1555
1556static void disk_cache_register_hook(apr_pool_t *p)
1557{
1558    /* cache initializer */
1559    ap_register_provider(p, CACHE_PROVIDER_GROUP, "disk", "0",
1560                         &cache_disk_provider);
1561}
1562
1563AP_DECLARE_MODULE(cache_disk) = {
1564    STANDARD20_MODULE_STUFF,
1565    create_dir_config,          /* create per-directory config structure */
1566    merge_dir_config,           /* merge per-directory config structures */
1567    create_config,              /* create per-server config structure */
1568    NULL,                       /* merge per-server config structures */
1569    disk_cache_cmds,            /* command apr_table_t */
1570    disk_cache_register_hook    /* register hooks */
1571};
1572