1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "mod_cache.h"
18
19#include "cache_util.h"
20#include <ap_provider.h>
21
22APLOG_USE_MODULE(cache);
23
24/* -------------------------------------------------------------- */
25
26extern APR_OPTIONAL_FN_TYPE(ap_cache_generate_key) *cache_generate_key;
27
28extern module AP_MODULE_DECLARE_DATA cache_module;
29
30/* Determine if "url" matches the hostname, scheme and port and path
31 * in "filter". All but the path comparisons are case-insensitive.
32 */
33static int uri_meets_conditions(const apr_uri_t *filter, const int pathlen,
34                                const apr_uri_t *url)
35{
36
37    /* Scheme, hostname port and local part. The filter URI and the
38     * URI we test may have the following shapes:
39     *   /<path>
40     *   <scheme>[:://<hostname>[:<port>][/<path>]]
41     * That is, if there is no scheme then there must be only the path,
42     * and we check only the path; if there is a scheme, we check the
43     * scheme for equality, and then if present we match the hostname,
44     * and then if present match the port, and finally the path if any.
45     *
46     * Note that this means that "/<path>" only matches local paths,
47     * and to match proxied paths one *must* specify the scheme.
48     */
49
50    /* Is the filter is just for a local path or a proxy URI? */
51    if (!filter->scheme) {
52        if (url->scheme || url->hostname) {
53            return 0;
54        }
55    }
56    else {
57        /* The URI scheme must be present and identical except for case. */
58        if (!url->scheme || strcasecmp(filter->scheme, url->scheme)) {
59            return 0;
60        }
61
62        /* If the filter hostname is null or empty it matches any hostname,
63         * if it begins with a "*" it matches the _end_ of the URI hostname
64         * excluding the "*", if it begins with a "." it matches the _end_
65         * of the URI * hostname including the ".", otherwise it must match
66         * the URI hostname exactly. */
67
68        if (filter->hostname && filter->hostname[0]) {
69            if (filter->hostname[0] == '.') {
70                const size_t fhostlen = strlen(filter->hostname);
71                const size_t uhostlen = url->hostname ? strlen(url->hostname) : 0;
72
73                if (fhostlen > uhostlen
74                    || (url->hostname
75                        && strcasecmp(filter->hostname,
76                                      url->hostname + uhostlen - fhostlen))) {
77                    return 0;
78                }
79            }
80            else if (filter->hostname[0] == '*') {
81                const size_t fhostlen = strlen(filter->hostname + 1);
82                const size_t uhostlen = url->hostname ? strlen(url->hostname) : 0;
83
84                if (fhostlen > uhostlen
85                    || (url->hostname
86                        && strcasecmp(filter->hostname + 1,
87                                      url->hostname + uhostlen - fhostlen))) {
88                    return 0;
89                }
90            }
91            else if (!url->hostname || strcasecmp(filter->hostname, url->hostname)) {
92                return 0;
93            }
94        }
95
96        /* If the filter port is empty it matches any URL port.
97         * If the filter or URL port are missing, or the URL port is
98         * empty, they default to the port for their scheme. */
99
100        if (!(filter->port_str && !filter->port_str[0])) {
101            /* NOTE:  ap_port_of_scheme will return 0 if given NULL input */
102            const unsigned fport = filter->port_str ? filter->port
103                    : apr_uri_port_of_scheme(filter->scheme);
104            const unsigned uport = (url->port_str && url->port_str[0])
105                    ? url->port : apr_uri_port_of_scheme(url->scheme);
106
107            if (fport != uport) {
108                return 0;
109            }
110        }
111    }
112
113    /* For HTTP caching purposes, an empty (NULL) path is equivalent to
114     * a single "/" path. RFCs 3986/2396
115     */
116    if (!url->path) {
117        if (*filter->path == '/' && pathlen == 1) {
118            return 1;
119        }
120        else {
121            return 0;
122        }
123    }
124
125    /* Url has met all of the filter conditions so far, determine
126     * if the paths match.
127     */
128    return !strncmp(filter->path, url->path, pathlen);
129}
130
131static cache_provider_list *get_provider(request_rec *r, struct cache_enable *ent,
132        cache_provider_list *providers)
133{
134    /* Fetch from global config and add to the list. */
135    cache_provider *provider;
136    provider = ap_lookup_provider(CACHE_PROVIDER_GROUP, ent->type,
137                                  "0");
138    if (!provider) {
139        /* Log an error! */
140    }
141    else {
142        cache_provider_list *newp;
143        newp = apr_pcalloc(r->pool, sizeof(cache_provider_list));
144        newp->provider_name = ent->type;
145        newp->provider = provider;
146
147        if (!providers) {
148            providers = newp;
149        }
150        else {
151            cache_provider_list *last = providers;
152
153            while (last->next) {
154                if (last->provider == provider) {
155                    return providers;
156                }
157                last = last->next;
158            }
159            if (last->provider == provider) {
160                return providers;
161            }
162            last->next = newp;
163        }
164    }
165
166    return providers;
167}
168
169cache_provider_list *cache_get_providers(request_rec *r,
170        cache_server_conf *conf,
171        apr_uri_t uri)
172{
173    cache_dir_conf *dconf = ap_get_module_config(r->per_dir_config, &cache_module);
174    cache_provider_list *providers = NULL;
175    int i;
176
177    /* per directory cache disable */
178    if (dconf->disable) {
179        return NULL;
180    }
181
182    /* global cache disable */
183    for (i = 0; i < conf->cachedisable->nelts; i++) {
184        struct cache_disable *ent =
185                               (struct cache_disable *)conf->cachedisable->elts;
186        if (uri_meets_conditions(&ent[i].url, ent[i].pathlen, &uri)) {
187            /* Stop searching now. */
188            return NULL;
189        }
190    }
191
192    /* loop through all the per directory cacheenable entries */
193    for (i = 0; i < dconf->cacheenable->nelts; i++) {
194        struct cache_enable *ent =
195                                (struct cache_enable *)dconf->cacheenable->elts;
196        providers = get_provider(r, &ent[i], providers);
197    }
198
199    /* loop through all the global cacheenable entries */
200    for (i = 0; i < conf->cacheenable->nelts; i++) {
201        struct cache_enable *ent =
202                                (struct cache_enable *)conf->cacheenable->elts;
203        if (uri_meets_conditions(&ent[i].url, ent[i].pathlen, &uri)) {
204            providers = get_provider(r, &ent[i], providers);
205        }
206    }
207
208    return providers;
209}
210
211
212/* do a HTTP/1.1 age calculation */
213CACHE_DECLARE(apr_int64_t) ap_cache_current_age(cache_info *info,
214                                                const apr_time_t age_value,
215                                                apr_time_t now)
216{
217    apr_time_t apparent_age, corrected_received_age, response_delay,
218               corrected_initial_age, resident_time, current_age,
219               age_value_usec;
220
221    age_value_usec = apr_time_from_sec(age_value);
222
223    /* Perform an HTTP/1.1 age calculation. (RFC2616 13.2.3) */
224
225    apparent_age = MAX(0, info->response_time - info->date);
226    corrected_received_age = MAX(apparent_age, age_value_usec);
227    response_delay = info->response_time - info->request_time;
228    corrected_initial_age = corrected_received_age + response_delay;
229    resident_time = now - info->response_time;
230    current_age = corrected_initial_age + resident_time;
231
232    if (current_age < 0) {
233        current_age = 0;
234    }
235
236    return apr_time_sec(current_age);
237}
238
239/**
240 * Try obtain a cache wide lock on the given cache key.
241 *
242 * If we return APR_SUCCESS, we obtained the lock, and we are clear to
243 * proceed to the backend. If we return APR_EEXISTS, then the lock is
244 * already locked, someone else has gone to refresh the backend data
245 * already, so we must return stale data with a warning in the mean
246 * time. If we return anything else, then something has gone pear
247 * shaped, and we allow the request through to the backend regardless.
248 *
249 * This lock is created from the request pool, meaning that should
250 * something go wrong and the lock isn't deleted on return of the
251 * request headers from the backend for whatever reason, at worst the
252 * lock will be cleaned up when the request dies or finishes.
253 *
254 * If something goes truly bananas and the lock isn't deleted when the
255 * request dies, the lock will be trashed when its max-age is reached,
256 * or when a request arrives containing a Cache-Control: no-cache. At
257 * no point is it possible for this lock to permanently deny access to
258 * the backend.
259 */
260apr_status_t cache_try_lock(cache_server_conf *conf, cache_request_rec *cache,
261        request_rec *r)
262{
263    apr_status_t status;
264    const char *lockname;
265    const char *path;
266    char dir[5];
267    apr_time_t now = apr_time_now();
268    apr_finfo_t finfo;
269    apr_file_t *lockfile;
270    void *dummy;
271
272    finfo.mtime = 0;
273
274    if (!conf || !conf->lock || !conf->lockpath) {
275        /* no locks configured, leave */
276        return APR_SUCCESS;
277    }
278
279    /* lock already obtained earlier? if so, success */
280    apr_pool_userdata_get(&dummy, CACHE_LOCKFILE_KEY, r->pool);
281    if (dummy) {
282        return APR_SUCCESS;
283    }
284
285    /* create the key if it doesn't exist */
286    if (!cache->key) {
287        cache_generate_key(r, r->pool, &cache->key);
288    }
289
290    /* create a hashed filename from the key, and save it for later */
291    lockname = ap_cache_generate_name(r->pool, 0, 0, cache->key);
292
293    /* lock files represent discrete just-went-stale URLs "in flight", so
294     * we support a simple two level directory structure, more is overkill.
295     */
296    dir[0] = '/';
297    dir[1] = lockname[0];
298    dir[2] = '/';
299    dir[3] = lockname[1];
300    dir[4] = 0;
301
302    /* make the directories */
303    path = apr_pstrcat(r->pool, conf->lockpath, dir, NULL);
304    if (APR_SUCCESS != (status = apr_dir_make_recursive(path,
305            APR_UREAD|APR_UWRITE|APR_UEXECUTE, r->pool))) {
306        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(00778)
307                "Could not create a cache lock directory: %s",
308                path);
309        return status;
310    }
311    lockname = apr_pstrcat(r->pool, path, "/", lockname, NULL);
312    apr_pool_userdata_set(lockname, CACHE_LOCKNAME_KEY, NULL, r->pool);
313
314    /* is an existing lock file too old? */
315    status = apr_stat(&finfo, lockname,
316                APR_FINFO_MTIME | APR_FINFO_NLINK, r->pool);
317    if (!(APR_STATUS_IS_ENOENT(status)) && APR_SUCCESS != status) {
318        ap_log_rerror(APLOG_MARK, APLOG_ERR, APR_EEXIST, r, APLOGNO(00779)
319                "Could not stat a cache lock file: %s",
320                lockname);
321        return status;
322    }
323    if ((status == APR_SUCCESS) && (((now - finfo.mtime) > conf->lockmaxage)
324                                  || (now < finfo.mtime))) {
325        ap_log_rerror(APLOG_MARK, APLOG_INFO, status, r, APLOGNO(00780)
326                "Cache lock file for '%s' too old, removing: %s",
327                r->uri, lockname);
328        apr_file_remove(lockname, r->pool);
329    }
330
331    /* try obtain a lock on the file */
332    if (APR_SUCCESS == (status = apr_file_open(&lockfile, lockname,
333            APR_WRITE | APR_CREATE | APR_EXCL | APR_DELONCLOSE,
334            APR_UREAD | APR_UWRITE, r->pool))) {
335        apr_pool_userdata_set(lockfile, CACHE_LOCKFILE_KEY, NULL, r->pool);
336    }
337    return status;
338
339}
340
341/**
342 * Remove the cache lock, if present.
343 *
344 * First, try to close the file handle, whose delete-on-close should
345 * kill the file. Otherwise, just delete the file by name.
346 *
347 * If no lock name has yet been calculated, do the calculation of the
348 * lock name first before trying to delete the file.
349 *
350 * If an optional bucket brigade is passed, the lock will only be
351 * removed if the bucket brigade contains an EOS bucket.
352 */
353apr_status_t cache_remove_lock(cache_server_conf *conf,
354        cache_request_rec *cache, request_rec *r, apr_bucket_brigade *bb)
355{
356    void *dummy;
357    const char *lockname;
358
359    if (!conf || !conf->lock || !conf->lockpath) {
360        /* no locks configured, leave */
361        return APR_SUCCESS;
362    }
363    if (bb) {
364        apr_bucket *e;
365        int eos_found = 0;
366        for (e = APR_BRIGADE_FIRST(bb);
367             e != APR_BRIGADE_SENTINEL(bb);
368             e = APR_BUCKET_NEXT(e))
369        {
370            if (APR_BUCKET_IS_EOS(e)) {
371                eos_found = 1;
372                break;
373            }
374        }
375        if (!eos_found) {
376            /* no eos found in brigade, don't delete anything just yet,
377             * we are not done.
378             */
379            return APR_SUCCESS;
380        }
381    }
382    apr_pool_userdata_get(&dummy, CACHE_LOCKFILE_KEY, r->pool);
383    if (dummy) {
384        return apr_file_close((apr_file_t *)dummy);
385    }
386    apr_pool_userdata_get(&dummy, CACHE_LOCKNAME_KEY, r->pool);
387    lockname = (const char *)dummy;
388    if (!lockname) {
389        char dir[5];
390
391        /* create the key if it doesn't exist */
392        if (!cache->key) {
393            cache_generate_key(r, r->pool, &cache->key);
394        }
395
396        /* create a hashed filename from the key, and save it for later */
397        lockname = ap_cache_generate_name(r->pool, 0, 0, cache->key);
398
399        /* lock files represent discrete just-went-stale URLs "in flight", so
400         * we support a simple two level directory structure, more is overkill.
401         */
402        dir[0] = '/';
403        dir[1] = lockname[0];
404        dir[2] = '/';
405        dir[3] = lockname[1];
406        dir[4] = 0;
407
408        lockname = apr_pstrcat(r->pool, conf->lockpath, dir, "/", lockname, NULL);
409    }
410    return apr_file_remove(lockname, r->pool);
411}
412
413int ap_cache_check_no_cache(cache_request_rec *cache, request_rec *r)
414{
415
416    cache_server_conf *conf =
417      (cache_server_conf *)ap_get_module_config(r->server->module_config,
418                                                &cache_module);
419
420    /*
421     * At this point, we may have data cached, but the request may have
422     * specified that cached data may not be used in a response.
423     *
424     * This is covered under RFC2616 section 14.9.4 (Cache Revalidation and
425     * Reload Controls).
426     *
427     * - RFC2616 14.9.4 End to end reload, Cache-Control: no-cache, or Pragma:
428     * no-cache. The server MUST NOT use a cached copy when responding to such
429     * a request.
430     */
431
432    /* This value comes from the client's initial request. */
433    if (!cache->control_in.parsed) {
434        const char *cc_req = cache_table_getm(r->pool, r->headers_in,
435                "Cache-Control");
436        const char *pragma = cache_table_getm(r->pool, r->headers_in, "Pragma");
437        ap_cache_control(r, &cache->control_in, cc_req, pragma, r->headers_in);
438    }
439
440    if (cache->control_in.no_cache) {
441
442        if (!conf->ignorecachecontrol) {
443            return 0;
444        }
445        else {
446            ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
447                    "Incoming request is asking for an uncached version of "
448                    "%s, but we have been configured to ignore it and serve "
449                    "cached content anyway", r->unparsed_uri);
450        }
451    }
452
453    return 1;
454}
455
456int ap_cache_check_no_store(cache_request_rec *cache, request_rec *r)
457{
458
459    cache_server_conf *conf =
460      (cache_server_conf *)ap_get_module_config(r->server->module_config,
461                                                &cache_module);
462
463    /*
464     * At this point, we may have data cached, but the request may have
465     * specified that cached data may not be used in a response.
466     *
467     * - RFC2616 14.9.2 What May be Stored by Caches. If Cache-Control:
468     * no-store arrives, do not serve from or store to the cache.
469     */
470
471    /* This value comes from the client's initial request. */
472    if (!cache->control_in.parsed) {
473        const char *cc_req = cache_table_getm(r->pool, r->headers_in,
474                "Cache-Control");
475        const char *pragma = cache_table_getm(r->pool, r->headers_in, "Pragma");
476        ap_cache_control(r, &cache->control_in, cc_req, pragma, r->headers_in);
477    }
478
479    if (cache->control_in.no_store) {
480
481        if (!conf->ignorecachecontrol) {
482            /* We're not allowed to serve a cached copy */
483            return 0;
484        }
485        else {
486            ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r,
487                    "Incoming request is asking for a no-store version of "
488                    "%s, but we have been configured to ignore it and serve "
489                    "cached content anyway", r->unparsed_uri);
490        }
491    }
492
493    return 1;
494}
495
496int cache_check_freshness(cache_handle_t *h, cache_request_rec *cache,
497        request_rec *r)
498{
499    apr_status_t status;
500    apr_int64_t age, maxage_req, maxage_cresp, maxage, smaxage, maxstale;
501    apr_int64_t minfresh;
502    const char *cc_req;
503    const char *pragma;
504    const char *agestr = NULL;
505    apr_time_t age_c = 0;
506    cache_info *info = &(h->cache_obj->info);
507    const char *warn_head;
508    cache_server_conf *conf =
509      (cache_server_conf *)ap_get_module_config(r->server->module_config,
510                                                &cache_module);
511
512    /*
513     * We now want to check if our cached data is still fresh. This depends
514     * on a few things, in this order:
515     *
516     * - RFC2616 14.9.4 End to end reload, Cache-Control: no-cache. no-cache
517     * in either the request or the cached response means that we must
518     * perform the request unconditionally, and ignore cached content. We
519     * should never reach here, but if we do, mark the content as stale,
520     * as this is the best we can do.
521     *
522     * - RFC2616 14.32 Pragma: no-cache This is treated the same as
523     * Cache-Control: no-cache.
524     *
525     * - RFC2616 14.9.3 Cache-Control: max-stale, must-revalidate,
526     * proxy-revalidate if the max-stale request header exists, modify the
527     * stale calculations below so that an object can be at most <max-stale>
528     * seconds stale before we request a revalidation, _UNLESS_ a
529     * must-revalidate or proxy-revalidate cached response header exists to
530     * stop us doing this.
531     *
532     * - RFC2616 14.9.3 Cache-Control: s-maxage the origin server specifies the
533     * maximum age an object can be before it is considered stale. This
534     * directive has the effect of proxy|must revalidate, which in turn means
535     * simple ignore any max-stale setting.
536     *
537     * - RFC2616 14.9.4 Cache-Control: max-age this header can appear in both
538     * requests and responses. If both are specified, the smaller of the two
539     * takes priority.
540     *
541     * - RFC2616 14.21 Expires: if this request header exists in the cached
542     * entity, and it's value is in the past, it has expired.
543     *
544     */
545
546    /* This value comes from the client's initial request. */
547    cc_req = apr_table_get(r->headers_in, "Cache-Control");
548    pragma = apr_table_get(r->headers_in, "Pragma");
549
550    ap_cache_control(r, &cache->control_in, cc_req, pragma, r->headers_in);
551
552    if (cache->control_in.no_cache) {
553
554        if (!conf->ignorecachecontrol) {
555            /* Treat as stale, causing revalidation */
556            return 0;
557        }
558
559        ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(00781)
560                "Incoming request is asking for a uncached version of "
561                "%s, but we have been configured to ignore it and "
562                "serve a cached response anyway",
563                r->unparsed_uri);
564    }
565
566    /* These come from the cached entity. */
567    if (h->cache_obj->info.control.no_cache
568            || h->cache_obj->info.control.invalidated) {
569        /*
570         * The cached entity contained Cache-Control: no-cache, or a
571         * no-cache with a header present, or a private with a header
572         * present, or the cached entity has been invalidated in the
573         * past, so treat as stale causing revalidation.
574         */
575        return 0;
576    }
577
578    if ((agestr = apr_table_get(h->resp_hdrs, "Age"))) {
579        age_c = apr_atoi64(agestr);
580    }
581
582    /* calculate age of object */
583    age = ap_cache_current_age(info, age_c, r->request_time);
584
585    /* extract s-maxage */
586    smaxage = h->cache_obj->info.control.s_maxage_value;
587
588    /* extract max-age from request */
589    maxage_req = -1;
590    if (!conf->ignorecachecontrol) {
591        maxage_req = cache->control_in.max_age_value;
592    }
593
594    /*
595     * extract max-age from response, if both s-maxage and max-age, s-maxage
596     * takes priority
597     */
598    if (smaxage != -1) {
599        maxage_cresp = smaxage;
600    }
601    else {
602        maxage_cresp = h->cache_obj->info.control.max_age_value;
603    }
604
605    /*
606     * if both maxage request and response, the smaller one takes priority
607     */
608    if (maxage_req == -1) {
609        maxage = maxage_cresp;
610    }
611    else if (maxage_cresp == -1) {
612        maxage = maxage_req;
613    }
614    else {
615        maxage = MIN(maxage_req, maxage_cresp);
616    }
617
618    /* extract max-stale */
619    if (cache->control_in.max_stale) {
620        if(cache->control_in.max_stale_value != -1) {
621            maxstale = cache->control_in.max_stale_value;
622        }
623        else {
624            /*
625             * If no value is assigned to max-stale, then the client is willing
626             * to accept a stale response of any age (RFC2616 14.9.3). We will
627             * set it to one year in this case as this situation is somewhat
628             * similar to a "never expires" Expires header (RFC2616 14.21)
629             * which is set to a date one year from the time the response is
630             * sent in this case.
631             */
632            maxstale = APR_INT64_C(86400*365);
633        }
634    }
635    else {
636        maxstale = 0;
637    }
638
639    /* extract min-fresh */
640    if (!conf->ignorecachecontrol && cache->control_in.min_fresh) {
641        minfresh = cache->control_in.min_fresh_value;
642    }
643    else {
644        minfresh = 0;
645    }
646
647    /* override maxstale if must-revalidate, proxy-revalidate or s-maxage */
648    if (maxstale && (h->cache_obj->info.control.must_revalidate
649            || h->cache_obj->info.control.proxy_revalidate || smaxage != -1)) {
650        maxstale = 0;
651    }
652
653    /* handle expiration */
654    if (((maxage != -1) && (age < (maxage + maxstale - minfresh))) ||
655        ((smaxage == -1) && (maxage == -1) &&
656         (info->expire != APR_DATE_BAD) &&
657         (age < (apr_time_sec(info->expire - info->date) + maxstale - minfresh)))) {
658
659        warn_head = apr_table_get(h->resp_hdrs, "Warning");
660
661        /* it's fresh darlings... */
662        /* set age header on response */
663        apr_table_set(h->resp_hdrs, "Age",
664                      apr_psprintf(r->pool, "%lu", (unsigned long)age));
665
666        /* add warning if maxstale overrode freshness calculation */
667        if (!(((maxage != -1) && age < maxage) ||
668              (info->expire != APR_DATE_BAD &&
669               (apr_time_sec(info->expire - info->date)) > age))) {
670            /* make sure we don't stomp on a previous warning */
671            if ((warn_head == NULL) ||
672                ((warn_head != NULL) && (ap_strstr_c(warn_head, "110") == NULL))) {
673                apr_table_mergen(h->resp_hdrs, "Warning",
674                                 "110 Response is stale");
675            }
676        }
677
678        /*
679         * If none of Expires, Cache-Control: max-age, or Cache-Control:
680         * s-maxage appears in the response, and the response header age
681         * calculated is more than 24 hours add the warning 113
682         */
683        if ((maxage_cresp == -1) && (smaxage == -1) && (apr_table_get(
684                h->resp_hdrs, "Expires") == NULL) && (age > 86400)) {
685
686            /* Make sure we don't stomp on a previous warning, and don't dup
687             * a 113 marning that is already present. Also, make sure to add
688             * the new warning to the correct *headers_out location.
689             */
690            if ((warn_head == NULL) ||
691                ((warn_head != NULL) && (ap_strstr_c(warn_head, "113") == NULL))) {
692                apr_table_mergen(h->resp_hdrs, "Warning",
693                                 "113 Heuristic expiration");
694            }
695        }
696        return 1;    /* Cache object is fresh (enough) */
697    }
698
699    /*
700     * At this point we are stale, but: if we are under load, we may let
701     * a significant number of stale requests through before the first
702     * stale request successfully revalidates itself, causing a sudden
703     * unexpected thundering herd which in turn brings angst and drama.
704     *
705     * So.
706     *
707     * We want the first stale request to go through as normal. But the
708     * second and subsequent request, we must pretend to be fresh until
709     * the first request comes back with either new content or confirmation
710     * that the stale content is still fresh.
711     *
712     * To achieve this, we create a very simple file based lock based on
713     * the key of the cached object. We attempt to open the lock file with
714     * exclusive write access. If we succeed, woohoo! we're first, and we
715     * follow the stale path to the backend server. If we fail, oh well,
716     * we follow the fresh path, and avoid being a thundering herd.
717     *
718     * The lock lives only as long as the stale request that went on ahead.
719     * If the request succeeds, the lock is deleted. If the request fails,
720     * the lock is deleted, and another request gets to make a new lock
721     * and try again.
722     *
723     * At any time, a request marked "no-cache" will force a refresh,
724     * ignoring the lock, ensuring an extended lockout is impossible.
725     *
726     * A lock that exceeds a maximum age will be deleted, and another
727     * request gets to make a new lock and try again.
728     */
729    status = cache_try_lock(conf, cache, r);
730    if (APR_SUCCESS == status) {
731        /* we obtained a lock, follow the stale path */
732        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00782)
733                "Cache lock obtained for stale cached URL, "
734                "revalidating entry: %s",
735                r->unparsed_uri);
736        return 0;
737    }
738    else if (APR_EEXIST == status) {
739        /* lock already exists, return stale data anyway, with a warning */
740        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(00783)
741                "Cache already locked for stale cached URL, "
742                "pretend it is fresh: %s",
743                r->unparsed_uri);
744
745        /* make sure we don't stomp on a previous warning */
746        warn_head = apr_table_get(h->resp_hdrs, "Warning");
747        if ((warn_head == NULL) ||
748            ((warn_head != NULL) && (ap_strstr_c(warn_head, "110") == NULL))) {
749            apr_table_mergen(h->resp_hdrs, "Warning",
750                             "110 Response is stale");
751        }
752
753        return 1;
754    }
755    else {
756        /* some other error occurred, just treat the object as stale */
757        ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r, APLOGNO(00784)
758                "Attempt to obtain a cache lock for stale "
759                "cached URL failed, revalidating entry anyway: %s",
760                r->unparsed_uri);
761        return 0;
762    }
763
764}
765
766/* return each comma separated token, one at a time */
767CACHE_DECLARE(const char *)ap_cache_tokstr(apr_pool_t *p, const char *list,
768                                           const char **str)
769{
770    apr_size_t i;
771    const char *s;
772
773    s = ap_strchr_c(list, ',');
774    if (s != NULL) {
775        i = s - list;
776        do
777            s++;
778        while (apr_isspace(*s))
779            ; /* noop */
780    }
781    else
782        i = strlen(list);
783
784    while (i > 0 && apr_isspace(list[i - 1]))
785        i--;
786
787    *str = s;
788    if (i)
789        return apr_pstrndup(p, list, i);
790    else
791        return NULL;
792}
793
794/*
795 * Converts apr_time_t expressed as hex digits to
796 * a true apr_time_t.
797 */
798CACHE_DECLARE(apr_time_t) ap_cache_hex2usec(const char *x)
799{
800    int i, ch;
801    apr_time_t j;
802    for (i = 0, j = 0; i < sizeof(j) * 2; i++) {
803        ch = x[i];
804        j <<= 4;
805        if (apr_isdigit(ch))
806            j |= ch - '0';
807        else if (apr_isupper(ch))
808            j |= ch - ('A' - 10);
809        else
810            j |= ch - ('a' - 10);
811    }
812    return j;
813}
814
815/*
816 * Converts apr_time_t to apr_time_t expressed as hex digits.
817 */
818CACHE_DECLARE(void) ap_cache_usec2hex(apr_time_t j, char *y)
819{
820    int i, ch;
821
822    for (i = (sizeof(j) * 2)-1; i >= 0; i--) {
823        ch = (int)(j & 0xF);
824        j >>= 4;
825        if (ch >= 10)
826            y[i] = ch + ('A' - 10);
827        else
828            y[i] = ch + '0';
829    }
830    y[sizeof(j) * 2] = '\0';
831}
832
833static void cache_hash(const char *it, char *val, int ndepth, int nlength)
834{
835    apr_md5_ctx_t context;
836    unsigned char digest[16];
837    char tmp[22];
838    int i, k, d;
839    unsigned int x;
840    static const char enc_table[64] =
841    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789_@";
842
843    apr_md5_init(&context);
844    apr_md5_update(&context, (const unsigned char *) it, strlen(it));
845    apr_md5_final(digest, &context);
846
847    /* encode 128 bits as 22 characters, using a modified uuencoding
848     * the encoding is 3 bytes -> 4 characters* i.e. 128 bits is
849     * 5 x 3 bytes + 1 byte -> 5 * 4 characters + 2 characters
850     */
851    for (i = 0, k = 0; i < 15; i += 3) {
852        x = (digest[i] << 16) | (digest[i + 1] << 8) | digest[i + 2];
853        tmp[k++] = enc_table[x >> 18];
854        tmp[k++] = enc_table[(x >> 12) & 0x3f];
855        tmp[k++] = enc_table[(x >> 6) & 0x3f];
856        tmp[k++] = enc_table[x & 0x3f];
857    }
858
859    /* one byte left */
860    x = digest[15];
861    tmp[k++] = enc_table[x >> 2];    /* use up 6 bits */
862    tmp[k++] = enc_table[(x << 4) & 0x3f];
863
864    /* now split into directory levels */
865    for (i = k = d = 0; d < ndepth; ++d) {
866        memcpy(&val[i], &tmp[k], nlength);
867        k += nlength;
868        val[i + nlength] = '/';
869        i += nlength + 1;
870    }
871    memcpy(&val[i], &tmp[k], 22 - k);
872    val[i + 22 - k] = '\0';
873}
874
875CACHE_DECLARE(char *)ap_cache_generate_name(apr_pool_t *p, int dirlevels,
876                                            int dirlength, const char *name)
877{
878    char hashfile[66];
879    cache_hash(name, hashfile, dirlevels, dirlength);
880    return apr_pstrdup(p, hashfile);
881}
882
883/**
884 * String tokenizer that ignores separator characters within quoted strings
885 * and escaped characters, as per RFC2616 section 2.2.
886 */
887char *cache_strqtok(char *str, const char *sep, char **last)
888{
889    char *token;
890    int quoted = 0;
891
892    if (!str) {         /* subsequent call */
893        str = *last;    /* start where we left off */
894    }
895
896    if (!str) {         /* no more tokens */
897        return NULL;
898    }
899
900    /* skip characters in sep (will terminate at '\0') */
901    while (*str && ap_strchr_c(sep, *str)) {
902        ++str;
903    }
904
905    if (!*str) {        /* no more tokens */
906        return NULL;
907    }
908
909    token = str;
910
911    /* skip valid token characters to terminate token and
912     * prepare for the next call (will terminate at '\0)
913     * on the way, ignore all quoted strings, and within
914     * quoted strings, escaped characters.
915     */
916    *last = token;
917    while (**last) {
918        if (!quoted) {
919            if (**last == '\"' && !ap_strchr_c(sep, '\"')) {
920                quoted = 1;
921                ++*last;
922            }
923            else if (!ap_strchr_c(sep, **last)) {
924                ++*last;
925            }
926            else {
927                break;
928            }
929        }
930        else {
931            if (**last == '\"') {
932                quoted = 0;
933                ++*last;
934            }
935            else if (**last == '\\') {
936                ++*last;
937                if (**last) {
938                    ++*last;
939                }
940            }
941            else {
942                ++*last;
943            }
944        }
945    }
946
947    if (**last) {
948        **last = '\0';
949        ++*last;
950    }
951
952    return token;
953}
954
955/**
956 * Parse the Cache-Control and Pragma headers in one go, marking
957 * which tokens appear within the header. Populate the structure
958 * passed in.
959 */
960int ap_cache_control(request_rec *r, cache_control_t *cc,
961        const char *cc_header, const char *pragma_header, apr_table_t *headers)
962{
963    char *last;
964
965    if (cc->parsed) {
966        return cc->cache_control || cc->pragma;
967    }
968
969    cc->parsed = 1;
970    cc->max_age_value = -1;
971    cc->max_stale_value = -1;
972    cc->min_fresh_value = -1;
973    cc->s_maxage_value = -1;
974
975    if (pragma_header) {
976        char *header = apr_pstrdup(r->pool, pragma_header);
977        const char *token = cache_strqtok(header, CACHE_SEPARATOR, &last);
978        while (token) {
979            /* handle most common quickest case... */
980            if (!strcmp(token, "no-cache")) {
981                cc->no_cache = 1;
982            }
983            /* ...then try slowest case */
984            else if (!strcasecmp(token, "no-cache")) {
985                cc->no_cache = 1;
986            }
987            token = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
988        }
989        cc->pragma = 1;
990    }
991
992    if (cc_header) {
993        char *header = apr_pstrdup(r->pool, cc_header);
994        const char *token = cache_strqtok(header, CACHE_SEPARATOR, &last);
995        while (token) {
996            switch (token[0]) {
997            case 'n':
998            case 'N': {
999                /* handle most common quickest cases... */
1000                if (!strcmp(token, "no-cache")) {
1001                    cc->no_cache = 1;
1002                }
1003                else if (!strcmp(token, "no-store")) {
1004                    cc->no_store = 1;
1005                }
1006                /* ...then try slowest cases */
1007                else if (!strncasecmp(token, "no-cache", 8)) {
1008                    if (token[8] == '=') {
1009                        cc->no_cache_header = 1;
1010                    }
1011                    else if (!token[8]) {
1012                        cc->no_cache = 1;
1013                    }
1014                    break;
1015                }
1016                else if (!strcasecmp(token, "no-store")) {
1017                    cc->no_store = 1;
1018                }
1019                else if (!strcasecmp(token, "no-transform")) {
1020                    cc->no_transform = 1;
1021                }
1022                break;
1023            }
1024            case 'm':
1025            case 'M': {
1026                /* handle most common quickest cases... */
1027                if (!strcmp(token, "max-age=0")) {
1028                    cc->max_age = 1;
1029                    cc->max_age_value = 0;
1030                }
1031                else if (!strcmp(token, "must-revalidate")) {
1032                    cc->must_revalidate = 1;
1033                }
1034                /* ...then try slowest cases */
1035                else if (!strncasecmp(token, "max-age", 7)) {
1036                    if (token[7] == '=') {
1037                        cc->max_age = 1;
1038                        cc->max_age_value = apr_atoi64(token + 8);
1039                    }
1040                    break;
1041                }
1042                else if (!strncasecmp(token, "max-stale", 9)) {
1043                    if (token[9] == '=') {
1044                        cc->max_stale = 1;
1045                        cc->max_stale_value = apr_atoi64(token + 10);
1046                    }
1047                    else if (!token[10]) {
1048                        cc->max_stale = 1;
1049                        cc->max_stale_value = -1;
1050                    }
1051                    break;
1052                }
1053                else if (!strncasecmp(token, "min-fresh", 9)) {
1054                    if (token[9] == '=') {
1055                        cc->min_fresh = 1;
1056                        cc->min_fresh_value = apr_atoi64(token + 10);
1057                    }
1058                    break;
1059                }
1060                else if (!strcasecmp(token, "must-revalidate")) {
1061                    cc->must_revalidate = 1;
1062                }
1063                break;
1064            }
1065            case 'o':
1066            case 'O': {
1067                if (!strcasecmp(token, "only-if-cached")) {
1068                    cc->only_if_cached = 1;
1069                }
1070                break;
1071            }
1072            case 'p':
1073            case 'P': {
1074                /* handle most common quickest cases... */
1075                if (!strcmp(token, "private")) {
1076                    cc->private = 1;
1077                }
1078                /* ...then try slowest cases */
1079                else if (!strcasecmp(token, "public")) {
1080                    cc->public = 1;
1081                }
1082                else if (!strncasecmp(token, "private", 7)) {
1083                    if (token[7] == '=') {
1084                        cc->private_header = 1;
1085                    }
1086                    else if (!token[7]) {
1087                        cc->private = 1;
1088                    }
1089                    break;
1090                }
1091                else if (!strcasecmp(token, "proxy-revalidate")) {
1092                    cc->proxy_revalidate = 1;
1093                }
1094                break;
1095            }
1096            case 's':
1097            case 'S': {
1098                if (!strncasecmp(token, "s-maxage", 8)) {
1099                    if (token[8] == '=') {
1100                        cc->s_maxage = 1;
1101                        cc->s_maxage_value = apr_atoi64(token + 9);
1102                    }
1103                    break;
1104                }
1105                break;
1106            }
1107            }
1108            token = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
1109        }
1110        cc->cache_control = 1;
1111    }
1112
1113    return (cc_header != NULL || pragma_header != NULL);
1114}
1115
1116/**
1117 * Parse the Cache-Control, identifying and removing headers that
1118 * exist as tokens after the no-cache and private tokens.
1119 */
1120static int cache_control_remove(request_rec *r, const char *cc_header,
1121        apr_table_t *headers)
1122{
1123    char *last, *slast;
1124    int found = 0;
1125
1126    if (cc_header) {
1127        char *header = apr_pstrdup(r->pool, cc_header);
1128        char *token = cache_strqtok(header, CACHE_SEPARATOR, &last);
1129        while (token) {
1130            switch (token[0]) {
1131            case 'n':
1132            case 'N': {
1133                if (!strncmp(token, "no-cache", 8)
1134                        || !strncasecmp(token, "no-cache", 8)) {
1135                    if (token[8] == '=') {
1136                        const char *header = cache_strqtok(token + 9,
1137                                CACHE_SEPARATOR "\"", &slast);
1138                        while (header) {
1139                            apr_table_unset(headers, header);
1140                            header = cache_strqtok(NULL, CACHE_SEPARATOR "\"",
1141                                    &slast);
1142                        }
1143                        found = 1;
1144                    }
1145                    break;
1146                }
1147                break;
1148            }
1149            case 'p':
1150            case 'P': {
1151                if (!strncmp(token, "private", 7)
1152                        || !strncasecmp(token, "private", 7)) {
1153                    if (token[7] == '=') {
1154                        const char *header = cache_strqtok(token + 8,
1155                                CACHE_SEPARATOR "\"", &slast);
1156                        while (header) {
1157                            apr_table_unset(headers, header);
1158                            header = cache_strqtok(NULL, CACHE_SEPARATOR "\"",
1159                                    &slast);
1160                        }
1161                        found = 1;
1162                    }
1163                }
1164                break;
1165            }
1166            }
1167            token = cache_strqtok(NULL, CACHE_SEPARATOR, &last);
1168        }
1169    }
1170
1171    return found;
1172}
1173
1174/*
1175 * Create a new table consisting of those elements from an
1176 * headers table that are allowed to be stored in a cache.
1177 */
1178CACHE_DECLARE(apr_table_t *)ap_cache_cacheable_headers(apr_pool_t *pool,
1179                                                        apr_table_t *t,
1180                                                        server_rec *s)
1181{
1182    cache_server_conf *conf;
1183    char **header;
1184    int i;
1185    apr_table_t *headers_out;
1186
1187    /* Short circuit the common case that there are not
1188     * (yet) any headers populated.
1189     */
1190    if (t == NULL) {
1191        return apr_table_make(pool, 10);
1192    };
1193
1194    /* Make a copy of the headers, and remove from
1195     * the copy any hop-by-hop headers, as defined in Section
1196     * 13.5.1 of RFC 2616
1197     */
1198    headers_out = apr_table_copy(pool, t);
1199
1200    apr_table_unset(headers_out, "Connection");
1201    apr_table_unset(headers_out, "Keep-Alive");
1202    apr_table_unset(headers_out, "Proxy-Authenticate");
1203    apr_table_unset(headers_out, "Proxy-Authorization");
1204    apr_table_unset(headers_out, "TE");
1205    apr_table_unset(headers_out, "Trailers");
1206    apr_table_unset(headers_out, "Transfer-Encoding");
1207    apr_table_unset(headers_out, "Upgrade");
1208
1209    conf = (cache_server_conf *)ap_get_module_config(s->module_config,
1210                                                     &cache_module);
1211
1212    /* Remove the user defined headers set with CacheIgnoreHeaders.
1213     * This may break RFC 2616 compliance on behalf of the administrator.
1214     */
1215    header = (char **)conf->ignore_headers->elts;
1216    for (i = 0; i < conf->ignore_headers->nelts; i++) {
1217        apr_table_unset(headers_out, header[i]);
1218    }
1219    return headers_out;
1220}
1221
1222/*
1223 * Create a new table consisting of those elements from an input
1224 * headers table that are allowed to be stored in a cache.
1225 */
1226CACHE_DECLARE(apr_table_t *)ap_cache_cacheable_headers_in(request_rec *r)
1227{
1228    return ap_cache_cacheable_headers(r->pool, r->headers_in, r->server);
1229}
1230
1231/*
1232 * Create a new table consisting of those elements from an output
1233 * headers table that are allowed to be stored in a cache;
1234 * ensure there is a content type and capture any errors.
1235 */
1236CACHE_DECLARE(apr_table_t *)ap_cache_cacheable_headers_out(request_rec *r)
1237{
1238    apr_table_t *headers_out;
1239
1240    headers_out = apr_table_overlay(r->pool, r->headers_out,
1241                                        r->err_headers_out);
1242
1243    apr_table_clear(r->err_headers_out);
1244
1245    headers_out = ap_cache_cacheable_headers(r->pool, headers_out,
1246                                                  r->server);
1247
1248    cache_control_remove(r,
1249            cache_table_getm(r->pool, headers_out, "Cache-Control"),
1250            headers_out);
1251
1252    if (!apr_table_get(headers_out, "Content-Type")
1253        && r->content_type) {
1254        apr_table_setn(headers_out, "Content-Type",
1255                       ap_make_content_type(r, r->content_type));
1256    }
1257
1258    if (!apr_table_get(headers_out, "Content-Encoding")
1259        && r->content_encoding) {
1260        apr_table_setn(headers_out, "Content-Encoding",
1261                       r->content_encoding);
1262    }
1263
1264    return headers_out;
1265}
1266
1267typedef struct
1268{
1269    apr_pool_t *p;
1270    const char *first;
1271    apr_array_header_t *merged;
1272} cache_table_getm_t;
1273
1274static int cache_table_getm_do(void *v, const char *key, const char *val)
1275{
1276    cache_table_getm_t *state = (cache_table_getm_t *) v;
1277
1278    if (!state->first) {
1279        /**
1280         * The most common case is a single header, and this is covered by
1281         * a fast path that doesn't allocate any memory. On the second and
1282         * subsequent header, an array is created and the array concatenated
1283         * together to form the final value.
1284         */
1285        state->first = val;
1286    }
1287    else {
1288        const char **elt;
1289        if (!state->merged) {
1290            state->merged = apr_array_make(state->p, 10, sizeof(const char *));
1291            elt = apr_array_push(state->merged);
1292            *elt = state->first;
1293        }
1294        elt = apr_array_push(state->merged);
1295        *elt = val;
1296    }
1297    return 1;
1298}
1299
1300const char *cache_table_getm(apr_pool_t *p, const apr_table_t *t,
1301        const char *key)
1302{
1303    cache_table_getm_t state;
1304
1305    state.p = p;
1306    state.first = NULL;
1307    state.merged = NULL;
1308
1309    apr_table_do(cache_table_getm_do, &state, t, key, NULL);
1310
1311    if (!state.first) {
1312        return NULL;
1313    }
1314    else if (!state.merged) {
1315        return state.first;
1316    }
1317    else {
1318        return apr_array_pstrcat(p, state.merged, ',');
1319    }
1320}
1321