1/* Licensed to the Apache Software Foundation (ASF) under one or more
2 * contributor license agreements.  See the NOTICE file distributed with
3 * this work for additional information regarding copyright ownership.
4 * The ASF licenses this file to You under the Apache License, Version 2.0
5 * (the "License"); you may not use this file except in compliance with
6 * the License.  You may obtain a copy of the License at
7 *
8 *     http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * mod_negotiation.c: keeps track of MIME types the client is willing to
19 * accept, and contains code to handle type arbitration.
20 *
21 * rst
22 */
23
24#include "apr.h"
25#include "apr_strings.h"
26#include "apr_file_io.h"
27#include "apr_lib.h"
28
29#define APR_WANT_STRFUNC
30#include "apr_want.h"
31
32#include "ap_config.h"
33#include "httpd.h"
34#include "http_config.h"
35#include "http_request.h"
36#include "http_protocol.h"
37#include "http_core.h"
38#include "http_log.h"
39#include "util_script.h"
40
41
42#define MAP_FILE_MAGIC_TYPE "application/x-type-map"
43
44/* Commands --- configuring document caching on a per (virtual?)
45 * server basis...
46 */
47
48typedef struct {
49    int forcelangpriority;
50    apr_array_header_t *language_priority;
51} neg_dir_config;
52
53/* forcelangpriority flags
54 */
55#define FLP_UNDEF    0    /* Same as FLP_DEFAULT, but base overrides */
56#define FLP_NONE     1    /* Return 406, HTTP_NOT_ACCEPTABLE */
57#define FLP_PREFER   2    /* Use language_priority rather than MC */
58#define FLP_FALLBACK 4    /* Use language_priority rather than NA */
59
60#define FLP_DEFAULT  FLP_PREFER
61
62/* env evaluation
63 */
64#define DISCARD_ALL_ENCODINGS 1  /* no-gzip */
65#define DISCARD_ALL_BUT_HTML  2  /* gzip-only-text/html */
66
67module AP_MODULE_DECLARE_DATA negotiation_module;
68
69static void *create_neg_dir_config(apr_pool_t *p, char *dummy)
70{
71    neg_dir_config *new = (neg_dir_config *) apr_palloc(p,
72                                                        sizeof(neg_dir_config));
73
74    new->forcelangpriority = FLP_UNDEF;
75    new->language_priority = NULL;
76    return new;
77}
78
79static void *merge_neg_dir_configs(apr_pool_t *p, void *basev, void *addv)
80{
81    neg_dir_config *base = (neg_dir_config *) basev;
82    neg_dir_config *add = (neg_dir_config *) addv;
83    neg_dir_config *new = (neg_dir_config *) apr_palloc(p,
84                                                        sizeof(neg_dir_config));
85
86    /* give priority to the config in the subdirectory */
87    new->forcelangpriority = (add->forcelangpriority != FLP_UNDEF)
88                                ? add->forcelangpriority
89                                : base->forcelangpriority;
90    new->language_priority = add->language_priority
91                                ? add->language_priority
92                                : base->language_priority;
93    return new;
94}
95
96static const char *set_language_priority(cmd_parms *cmd, void *n_,
97                                         const char *lang)
98{
99    neg_dir_config *n = n_;
100    const char **langp;
101
102    if (!n->language_priority)
103        n->language_priority = apr_array_make(cmd->pool, 4, sizeof(char *));
104
105    langp = (const char **) apr_array_push(n->language_priority);
106    *langp = lang;
107    return NULL;
108}
109
110static const char *set_force_priority(cmd_parms *cmd, void *n_, const char *w)
111{
112    neg_dir_config *n = n_;
113
114    if (!strcasecmp(w, "None")) {
115        if (n->forcelangpriority & ~FLP_NONE) {
116            return "Cannot combine ForceLanguagePriority options with None";
117        }
118        n->forcelangpriority = FLP_NONE;
119    }
120    else if (!strcasecmp(w, "Prefer")) {
121        if (n->forcelangpriority & FLP_NONE) {
122            return "Cannot combine ForceLanguagePriority options None and "
123                   "Prefer";
124        }
125        n->forcelangpriority |= FLP_PREFER;
126    }
127    else if (!strcasecmp(w, "Fallback")) {
128        if (n->forcelangpriority & FLP_NONE) {
129            return "Cannot combine ForceLanguagePriority options None and "
130                   "Fallback";
131        }
132        n->forcelangpriority |= FLP_FALLBACK;
133    }
134    else {
135        return apr_pstrcat(cmd->pool, "Invalid ForceLanguagePriority option ",
136                           w, NULL);
137    }
138
139    return NULL;
140}
141
142static const char *cache_negotiated_docs(cmd_parms *cmd, void *dummy,
143                                         int arg)
144{
145    ap_set_module_config(cmd->server->module_config, &negotiation_module,
146                         (arg ? "Cache" : NULL));
147    return NULL;
148}
149
150static int do_cache_negotiated_docs(server_rec *s)
151{
152    return (ap_get_module_config(s->module_config,
153                                 &negotiation_module) != NULL);
154}
155
156static const command_rec negotiation_cmds[] =
157{
158    AP_INIT_FLAG("CacheNegotiatedDocs", cache_negotiated_docs, NULL, RSRC_CONF,
159                 "Either 'on' or 'off' (default)"),
160    AP_INIT_ITERATE("LanguagePriority", set_language_priority, NULL,
161                    OR_FILEINFO,
162                    "space-delimited list of MIME language abbreviations"),
163    AP_INIT_ITERATE("ForceLanguagePriority", set_force_priority, NULL,
164                    OR_FILEINFO,
165                    "Force LanguagePriority elections, either None, or "
166                    "Fallback and/or Prefer"),
167    {NULL}
168};
169
170/*
171 * Record of available info on a media type specified by the client
172 * (we also use 'em for encodings and languages)
173 */
174
175typedef struct accept_rec {
176    char *name;                 /* MUST be lowercase */
177    float quality;
178    float level;
179    char *charset;              /* for content-type only */
180} accept_rec;
181
182/*
183 * Record of available info on a particular variant
184 *
185 * Note that a few of these fields are updated by the actual negotiation
186 * code.  These are:
187 *
188 * level_matched --- initialized to zero.  Set to the value of level
189 *             if the client actually accepts this media type at that
190 *             level (and *not* if it got in on a wildcard).  See level_cmp
191 *             below.
192 * mime_stars -- initialized to zero.  Set to the number of stars
193 *               present in the best matching Accept header element.
194 *               1 for star/star, 2 for type/star and 3 for
195 *               type/subtype.
196 *
197 * definite -- initialized to 1.  Set to 0 if there is a match which
198 *             makes the variant non-definite according to the rules
199 *             in rfc2296.
200 */
201
202typedef struct var_rec {
203    request_rec *sub_req;       /* May be NULL (is, for map files) */
204    const char *mime_type;      /* MUST be lowercase */
205    const char *file_name;      /* Set to 'this' (for map file body content) */
206    apr_off_t body;             /* Only for map file body content */
207    const char *content_encoding;
208    apr_array_header_t *content_languages; /* list of lang. for this variant */
209    const char *content_charset;
210    const char *description;
211
212    /* The next five items give the quality values for the dimensions
213     * of negotiation for this variant. They are obtained from the
214     * appropriate header lines, except for source_quality, which
215     * is obtained from the variant itself (the 'qs' parameter value
216     * from the variant's mime-type). Apart from source_quality,
217     * these values are set when we find the quality for each variant
218     * (see best_match()). source_quality is set from the 'qs' parameter
219     * of the variant description or mime type: see set_mime_fields().
220     */
221    float lang_quality;         /* quality of this variant's language */
222    float encoding_quality;     /* ditto encoding */
223    float charset_quality;      /* ditto charset */
224    float mime_type_quality;    /* ditto media type */
225    float source_quality;       /* source quality for this variant */
226
227    /* Now some special values */
228    float level;                /* Auxiliary to content-type... */
229    apr_off_t bytes;            /* content length, if known */
230    int lang_index;             /* Index into LanguagePriority list */
231    int is_pseudo_html;         /* text/html, *or* the INCLUDES_MAGIC_TYPEs */
232
233    /* Above are all written-once properties of the variant.  The
234     * three fields below are changed during negotiation:
235     */
236
237    float level_matched;
238    int mime_stars;
239    int definite;
240} var_rec;
241
242/* Something to carry around the state of negotiation (and to keep
243 * all of this thread-safe)...
244 */
245
246typedef struct {
247    apr_pool_t *pool;
248    request_rec *r;
249    neg_dir_config *conf;
250    char *dir_name;
251    int accept_q;               /* 1 if an Accept item has a q= param */
252    float default_lang_quality; /* fiddle lang q for variants with no lang */
253
254    /* the array pointers below are NULL if the corresponding accept
255     * headers are not present
256     */
257    apr_array_header_t *accepts;            /* accept_recs */
258    apr_array_header_t *accept_encodings;   /* accept_recs */
259    apr_array_header_t *accept_charsets;    /* accept_recs */
260    apr_array_header_t *accept_langs;       /* accept_recs */
261
262    apr_array_header_t *avail_vars;         /* available variants */
263
264    int count_multiviews_variants;    /* number of variants found on disk */
265
266    int is_transparent;       /* 1 if this resource is trans. negotiable */
267
268    int dont_fiddle_headers;  /* 1 if we may not fiddle with accept hdrs */
269    int ua_supports_trans;    /* 1 if ua supports trans negotiation */
270    int send_alternates;      /* 1 if we want to send an Alternates header */
271    int may_choose;           /* 1 if we may choose a variant for the client */
272    int use_rvsa;             /* 1 if we must use RVSA/1.0 negotiation algo */
273} negotiation_state;
274
275/* A few functions to manipulate var_recs.
276 * Cleaning out the fields...
277 */
278
279static void clean_var_rec(var_rec *mime_info)
280{
281    mime_info->sub_req = NULL;
282    mime_info->mime_type = "";
283    mime_info->file_name = "";
284    mime_info->body = 0;
285    mime_info->content_encoding = NULL;
286    mime_info->content_languages = NULL;
287    mime_info->content_charset = "";
288    mime_info->description = "";
289
290    mime_info->is_pseudo_html = 0;
291    mime_info->level = 0.0f;
292    mime_info->level_matched = 0.0f;
293    mime_info->bytes = -1;
294    mime_info->lang_index = -1;
295    mime_info->mime_stars = 0;
296    mime_info->definite = 1;
297
298    mime_info->charset_quality = 1.0f;
299    mime_info->encoding_quality = 1.0f;
300    mime_info->lang_quality = 1.0f;
301    mime_info->mime_type_quality = 1.0f;
302    mime_info->source_quality = 0.0f;
303}
304
305/* Initializing the relevant fields of a variant record from the
306 * accept_info read out of its content-type, one way or another.
307 */
308
309static void set_mime_fields(var_rec *var, accept_rec *mime_info)
310{
311    var->mime_type = mime_info->name;
312    var->source_quality = mime_info->quality;
313    var->level = mime_info->level;
314    var->content_charset = mime_info->charset;
315
316    var->is_pseudo_html = (!strcmp(var->mime_type, "text/html")
317                           || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE)
318                           || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE3));
319}
320
321/* Create a variant list validator in r using info from vlistr. */
322
323static void set_vlist_validator(request_rec *r, request_rec *vlistr)
324{
325    /* Calculating the variant list validator is similar to
326     * calculating an etag for the source of the variant list
327     * information, so we use ap_make_etag().  Note that this
328     * validator can be 'weak' in extreme case.
329     */
330    ap_update_mtime(vlistr, vlistr->finfo.mtime);
331    r->vlist_validator = ap_make_etag(vlistr, 0);
332
333    /* ap_set_etag will later take r->vlist_validator into account
334     * when creating the etag header
335     */
336}
337
338
339/*****************************************************************
340 *
341 * Parsing (lists of) media types and their parameters, as seen in
342 * HTTPD header lines and elsewhere.
343 */
344
345/*
346 * parse quality value. atof(3) is not well-usable here, because it
347 * depends on the locale (argh).
348 *
349 * However, RFC 2616 states:
350 * 3.9 Quality Values
351 *
352 * [...] HTTP/1.1 applications MUST NOT generate more than three digits
353 * after the decimal point. User configuration of these values SHOULD also
354 * be limited in this fashion.
355 *
356 *     qvalue         = ( "0" [ "." 0*3DIGIT ] )
357 *                    | ( "1" [ "." 0*3("0") ] )
358 *
359 * This is quite easy. If the supplied string doesn't match the above
360 * definition (loosely), we simply return 1 (same as if there's no qvalue)
361 */
362
363static float atoq(const char *string)
364{
365    if (!string || !*string) {
366        return  1.0f;
367    }
368
369    while (apr_isspace(*string)) {
370        ++string;
371    }
372
373    /* be tolerant and accept qvalues without leading zero
374     * (also for backwards compat, where atof() was in use)
375     */
376    if (*string != '.' && *string++ != '0') {
377        return 1.0f;
378    }
379
380    if (*string == '.') {
381        /* better only one division later, than dealing with fscking
382         * IEEE format 0.1 factors ...
383         */
384        int i = 0;
385
386        if (*++string >= '0' && *string <= '9') {
387            i += (*string - '0') * 100;
388
389            if (*++string >= '0' && *string <= '9') {
390                i += (*string - '0') * 10;
391
392                if (*++string > '0' && *string <= '9') {
393                    i += (*string - '0');
394                }
395            }
396        }
397
398        return (float)i / 1000.0f;
399    }
400
401    return 0.0f;
402}
403
404/*
405 * Get a single mime type entry --- one media type and parameters;
406 * enter the values we recognize into the argument accept_rec
407 */
408
409static const char *get_entry(apr_pool_t *p, accept_rec *result,
410                             const char *accept_line)
411{
412    result->quality = 1.0f;
413    result->level = 0.0f;
414    result->charset = "";
415
416    /*
417     * Note that this handles what I gather is the "old format",
418     *
419     *    Accept: text/html text/plain moo/zot
420     *
421     * without any compatibility kludges --- if the token after the
422     * MIME type begins with a semicolon, we know we're looking at parms,
423     * otherwise, we know we aren't.  (So why all the pissing and moaning
424     * in the CERN server code?  I must be missing something).
425     */
426
427    result->name = ap_get_token(p, &accept_line, 0);
428    ap_str_tolower(result->name);     /* You want case insensitive,
429                                       * you'll *get* case insensitive.
430                                       */
431
432    /* KLUDGE!!! Default HTML to level 2.0 unless the browser
433     * *explicitly* says something else.
434     */
435
436    if (!strcmp(result->name, "text/html") && (result->level == 0.0)) {
437        result->level = 2.0f;
438    }
439    else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE)) {
440        result->level = 2.0f;
441    }
442    else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE3)) {
443        result->level = 3.0f;
444    }
445
446    while (*accept_line == ';') {
447        /* Parameters ... */
448
449        char *parm;
450        char *cp;
451        char *end;
452
453        ++accept_line;
454        parm = ap_get_token(p, &accept_line, 1);
455
456        /* Look for 'var = value' --- and make sure the var is in lcase. */
457
458        for (cp = parm; (*cp && !apr_isspace(*cp) && *cp != '='); ++cp) {
459            *cp = apr_tolower(*cp);
460        }
461
462        if (!*cp) {
463            continue;           /* No '='; just ignore it. */
464        }
465
466        *cp++ = '\0';           /* Delimit var */
467        while (apr_isspace(*cp) || *cp == '=') {
468            ++cp;
469        }
470
471        if (*cp == '"') {
472            ++cp;
473            for (end = cp;
474                 (*end && *end != '\n' && *end != '\r' && *end != '\"');
475                 end++);
476        }
477        else {
478            for (end = cp; (*end && !apr_isspace(*end)); end++);
479        }
480        if (*end) {
481            *end = '\0';        /* strip ending quote or return */
482        }
483        ap_str_tolower(cp);
484
485        if (parm[0] == 'q'
486            && (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0'))) {
487            result->quality = atoq(cp);
488        }
489        else if (parm[0] == 'l' && !strcmp(&parm[1], "evel")) {
490            result->level = (float)atoi(cp);
491        }
492        else if (!strcmp(parm, "charset")) {
493            result->charset = cp;
494        }
495    }
496
497    if (*accept_line == ',') {
498        ++accept_line;
499    }
500
501    return accept_line;
502}
503
504/*****************************************************************
505 *
506 * Dealing with header lines ...
507 *
508 * Accept, Accept-Charset, Accept-Language and Accept-Encoding
509 * are handled by do_header_line() - they all have the same
510 * basic structure of a list of items of the format
511 *    name; q=N; charset=TEXT
512 *
513 * where charset is only valid in Accept.
514 */
515
516static apr_array_header_t *do_header_line(apr_pool_t *p,
517                                          const char *accept_line)
518{
519    apr_array_header_t *accept_recs;
520
521    if (!accept_line) {
522        return NULL;
523    }
524
525    accept_recs = apr_array_make(p, 40, sizeof(accept_rec));
526
527    while (*accept_line) {
528        accept_rec *new = (accept_rec *) apr_array_push(accept_recs);
529        accept_line = get_entry(p, new, accept_line);
530    }
531
532    return accept_recs;
533}
534
535/* Given the text of the Content-Languages: line from the var map file,
536 * return an array containing the languages of this variant
537 */
538
539static apr_array_header_t *do_languages_line(apr_pool_t *p,
540                                             const char **lang_line)
541{
542    apr_array_header_t *lang_recs = apr_array_make(p, 2, sizeof(char *));
543
544    if (!lang_line) {
545        return lang_recs;
546    }
547
548    while (**lang_line) {
549        char **new = (char **) apr_array_push(lang_recs);
550        *new = ap_get_token(p, lang_line, 0);
551        ap_str_tolower(*new);
552        if (**lang_line == ',' || **lang_line == ';') {
553            ++(*lang_line);
554        }
555    }
556
557    return lang_recs;
558}
559
560/*****************************************************************
561 *
562 * Handling header lines from clients...
563 */
564
565static negotiation_state *parse_accept_headers(request_rec *r)
566{
567    negotiation_state *new =
568        (negotiation_state *) apr_pcalloc(r->pool, sizeof(negotiation_state));
569    accept_rec *elts;
570    apr_table_t *hdrs = r->headers_in;
571    int i;
572
573    new->pool = r->pool;
574    new->r = r;
575    new->conf = (neg_dir_config *)ap_get_module_config(r->per_dir_config,
576                                                       &negotiation_module);
577
578    new->dir_name = ap_make_dirstr_parent(r->pool, r->filename);
579
580    new->accepts = do_header_line(r->pool, apr_table_get(hdrs, "Accept"));
581
582    /* calculate new->accept_q value */
583    if (new->accepts) {
584        elts = (accept_rec *) new->accepts->elts;
585
586        for (i = 0; i < new->accepts->nelts; ++i) {
587            if (elts[i].quality < 1.0) {
588                new->accept_q = 1;
589            }
590        }
591    }
592
593    new->accept_encodings =
594        do_header_line(r->pool, apr_table_get(hdrs, "Accept-Encoding"));
595    new->accept_langs =
596        do_header_line(r->pool, apr_table_get(hdrs, "Accept-Language"));
597    new->accept_charsets =
598        do_header_line(r->pool, apr_table_get(hdrs, "Accept-Charset"));
599
600    /* This is possibly overkill for some servers, heck, we have
601     * only 33 index.html variants in docs/docroot (today).
602     * Make this configurable?
603     */
604    new->avail_vars = apr_array_make(r->pool, 40, sizeof(var_rec));
605
606    return new;
607}
608
609
610static void parse_negotiate_header(request_rec *r, negotiation_state *neg)
611{
612    const char *negotiate = apr_table_get(r->headers_in, "Negotiate");
613    char *tok;
614
615    /* First, default to no TCN, no Alternates, and the original Apache
616     * negotiation algorithm with fiddles for broken browser configs.
617     *
618     * To save network bandwidth, we do not configure to send an
619     * Alternates header to the user agent by default.  User
620     * agents that want an Alternates header for agent-driven
621     * negotiation will have to request it by sending an
622     * appropriate Negotiate header.
623     */
624    neg->ua_supports_trans   = 0;
625    neg->send_alternates     = 0;
626    neg->may_choose          = 1;
627    neg->use_rvsa            = 0;
628    neg->dont_fiddle_headers = 0;
629
630    if (!negotiate)
631        return;
632
633    if (strcmp(negotiate, "trans") == 0) {
634        /* Lynx 2.7 and 2.8 send 'negotiate: trans' even though they
635         * do not support transparent content negotiation, so for Lynx we
636         * ignore the negotiate header when its contents are exactly "trans".
637         * If future versions of Lynx ever need to say 'negotiate: trans',
638         * they can send the equivalent 'negotiate: trans, trans' instead
639         * to avoid triggering the workaround below.
640         */
641        const char *ua = apr_table_get(r->headers_in, "User-Agent");
642
643        if (ua && (strncmp(ua, "Lynx", 4) == 0))
644            return;
645    }
646
647    neg->may_choose = 0;  /* An empty Negotiate would require 300 response */
648
649    while ((tok = ap_get_list_item(neg->pool, &negotiate)) != NULL) {
650
651        if (strcmp(tok, "trans") == 0 ||
652            strcmp(tok, "vlist") == 0 ||
653            strcmp(tok, "guess-small") == 0 ||
654            apr_isdigit(tok[0]) ||
655            strcmp(tok, "*") == 0) {
656
657            /* The user agent supports transparent negotiation */
658            neg->ua_supports_trans = 1;
659
660            /* Send-alternates could be configurable, but note
661             * that it must be 1 if we have 'vlist' in the
662             * negotiate header.
663             */
664            neg->send_alternates = 1;
665
666            if (strcmp(tok, "1.0") == 0) {
667                /* we may use the RVSA/1.0 algorithm, configure for it */
668                neg->may_choose = 1;
669                neg->use_rvsa = 1;
670                neg->dont_fiddle_headers = 1;
671            }
672            else if (tok[0] == '*') {
673                /* we may use any variant selection algorithm, configure
674                 * to use the Apache algorithm
675                 */
676                neg->may_choose = 1;
677
678                /* We disable header fiddles on the assumption that a
679                 * client sending Negotiate knows how to send correct
680                 * headers which don't need fiddling.
681                 */
682                neg->dont_fiddle_headers = 1;
683            }
684        }
685    }
686
687#ifdef NEG_DEBUG
688    ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00680)
689            "dont_fiddle_headers=%d use_rvsa=%d ua_supports_trans=%d "
690            "send_alternates=%d, may_choose=%d",
691            neg->dont_fiddle_headers, neg->use_rvsa,
692            neg->ua_supports_trans, neg->send_alternates, neg->may_choose);
693#endif
694
695}
696
697/* Sometimes clients will give us no Accept info at all; this routine sets
698 * up the standard default for that case, and also arranges for us to be
699 * willing to run a CGI script if we find one.  (In fact, we set up to
700 * dramatically prefer CGI scripts in cases where that's appropriate,
701 * e.g., POST or when URI includes query args or extra path info).
702 */
703static void maybe_add_default_accepts(negotiation_state *neg,
704                                      int prefer_scripts)
705{
706    accept_rec *new_accept;
707
708    if (!neg->accepts) {
709        neg->accepts = apr_array_make(neg->pool, 4, sizeof(accept_rec));
710
711        new_accept = (accept_rec *) apr_array_push(neg->accepts);
712
713        new_accept->name = "*/*";
714        new_accept->quality = 1.0f;
715        new_accept->level = 0.0f;
716    }
717
718    new_accept = (accept_rec *) apr_array_push(neg->accepts);
719
720    new_accept->name = CGI_MAGIC_TYPE;
721    if (neg->use_rvsa) {
722        new_accept->quality = 0;
723    }
724    else {
725        new_accept->quality = prefer_scripts ? 2.0f : 0.001f;
726    }
727    new_accept->level = 0.0f;
728}
729
730/*****************************************************************
731 *
732 * Parsing type-map files, in Roy's meta/http format augmented with
733 * #-comments.
734 */
735
736/* Reading RFC822-style header lines, ignoring #-comments and
737 * handling continuations.
738 */
739
740enum header_state {
741    header_eof, header_seen, header_sep
742};
743
744static enum header_state get_header_line(char *buffer, int len, apr_file_t *map)
745{
746    char *buf_end = buffer + len;
747    char *cp;
748    char c;
749
750    /* Get a noncommented line */
751
752    do {
753        if (apr_file_gets(buffer, MAX_STRING_LEN, map) != APR_SUCCESS) {
754            return header_eof;
755        }
756    } while (buffer[0] == '#');
757
758    /* If blank, just return it --- this ends information on this variant */
759
760    for (cp = buffer; apr_isspace(*cp); ++cp) {
761        continue;
762    }
763
764    if (*cp == '\0') {
765        return header_sep;
766    }
767
768    /* If non-blank, go looking for header lines, but note that we still
769     * have to treat comments specially...
770     */
771
772    cp += strlen(cp);
773
774    /* We need to shortcut the rest of this block following the Body:
775     * tag - we will not look for continutation after this line.
776     */
777    if (!strncasecmp(buffer, "Body:", 5))
778        return header_seen;
779
780    while (apr_file_getc(&c, map) != APR_EOF) {
781        if (c == '#') {
782            /* Comment line */
783            while (apr_file_getc(&c, map) != APR_EOF && c != '\n') {
784                continue;
785            }
786        }
787        else if (apr_isspace(c)) {
788            /* Leading whitespace.  POSSIBLE continuation line
789             * Also, possibly blank --- if so, we ungetc() the final newline
790             * so that we will pick up the blank line the next time 'round.
791             */
792
793            while (c != '\n' && apr_isspace(c)) {
794                if(apr_file_getc(&c, map) != APR_SUCCESS)
795                    break;
796            }
797
798            apr_file_ungetc(c, map);
799
800            if (c == '\n') {
801                return header_seen;     /* Blank line */
802            }
803
804            /* Continuation */
805
806            while (   cp < buf_end - 2
807                   && (apr_file_getc(&c, map)) != APR_EOF
808                   && c != '\n') {
809                *cp++ = c;
810            }
811
812            *cp++ = '\n';
813            *cp = '\0';
814        }
815        else {
816
817            /* Line beginning with something other than whitespace */
818
819            apr_file_ungetc(c, map);
820            return header_seen;
821        }
822    }
823
824    return header_seen;
825}
826
827static apr_off_t get_body(char *buffer, apr_size_t *len, const char *tag,
828                          apr_file_t *map)
829{
830    char *endbody;
831    int bodylen;
832    int taglen;
833    apr_off_t pos;
834
835    taglen = strlen(tag);
836    *len -= taglen;
837
838    /* We are at the first character following a body:tag\n entry
839     * Suck in the body, then backspace to the first char after the
840     * closing tag entry.  If we fail to read, find the tag or back
841     * up then we have a hosed file, so give up already
842     */
843    if (apr_file_read(map, buffer, len) != APR_SUCCESS) {
844        return -1;
845    }
846
847    /* put a copy of the tag *after* the data read from the file
848     * so that strstr() will find something with no reliance on
849     * terminating '\0'
850     */
851    memcpy(buffer + *len, tag, taglen);
852    endbody = strstr(buffer, tag);
853    if (endbody == buffer + *len) {
854        return -1;
855    }
856    bodylen = endbody - buffer;
857    endbody += taglen;
858    /* Skip all the trailing cruft after the end tag to the next line */
859    while (*endbody) {
860        if (*endbody == '\n') {
861            ++endbody;
862            break;
863        }
864        ++endbody;
865    }
866
867    pos = -(apr_off_t)(*len - (endbody - buffer));
868    if (apr_file_seek(map, APR_CUR, &pos) != APR_SUCCESS) {
869        return -1;
870    }
871
872    /* Give the caller back the actual body's file offset and length */
873    *len = bodylen;
874    return pos - (endbody - buffer);
875}
876
877
878/* Stripping out RFC822 comments */
879
880static void strip_paren_comments(char *hdr)
881{
882    /* Hmmm... is this correct?  In Roy's latest draft, (comments) can nest! */
883    /* Nope, it isn't correct.  Fails to handle backslash escape as well.    */
884
885    while (*hdr) {
886        if (*hdr == '"') {
887            hdr = strchr(hdr, '"');
888            if (hdr == NULL) {
889                return;
890            }
891            ++hdr;
892        }
893        else if (*hdr == '(') {
894            while (*hdr && *hdr != ')') {
895                *hdr++ = ' ';
896            }
897
898            if (*hdr) {
899                *hdr++ = ' ';
900            }
901        }
902        else {
903            ++hdr;
904        }
905    }
906}
907
908/* Getting to a header body from the header */
909
910static char *lcase_header_name_return_body(char *header, request_rec *r)
911{
912    char *cp = header;
913
914    for ( ; *cp && *cp != ':' ; ++cp) {
915        *cp = apr_tolower(*cp);
916    }
917
918    if (!*cp) {
919        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00681)
920                      "Syntax error in type map, no ':' in %s for header %s",
921                      r->filename, header);
922        return NULL;
923    }
924
925    do {
926        ++cp;
927    } while (apr_isspace(*cp));
928
929    if (!*cp) {
930        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00682)
931                      "Syntax error in type map --- no header body: %s for %s",
932                      r->filename, header);
933        return NULL;
934    }
935
936    return cp;
937}
938
939static int read_type_map(apr_file_t **map, negotiation_state *neg,
940                         request_rec *rr)
941{
942    request_rec *r = neg->r;
943    apr_file_t *map_ = NULL;
944    apr_status_t status;
945    char buffer[MAX_STRING_LEN];
946    enum header_state hstate;
947    struct var_rec mime_info;
948    int has_content;
949
950    if (!map)
951        map = &map_;
952
953    /* We are not using multiviews */
954    neg->count_multiviews_variants = 0;
955
956    if ((status = apr_file_open(map, rr->filename, APR_READ | APR_BUFFERED,
957                APR_OS_DEFAULT, neg->pool)) != APR_SUCCESS) {
958        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(00683)
959                      "cannot access type map file: %s", rr->filename);
960        if (APR_STATUS_IS_ENOTDIR(status) || APR_STATUS_IS_ENOENT(status)) {
961            return HTTP_NOT_FOUND;
962        }
963        else {
964            return HTTP_FORBIDDEN;
965        }
966    }
967
968    clean_var_rec(&mime_info);
969    has_content = 0;
970
971    do {
972        hstate = get_header_line(buffer, MAX_STRING_LEN, *map);
973
974        if (hstate == header_seen) {
975            char *body1 = lcase_header_name_return_body(buffer, neg->r);
976            const char *body;
977
978            if (body1 == NULL) {
979                return HTTP_INTERNAL_SERVER_ERROR;
980            }
981
982            strip_paren_comments(body1);
983            body = body1;
984
985            if (!strncmp(buffer, "uri:", 4)) {
986                mime_info.file_name = ap_get_token(neg->pool, &body, 0);
987            }
988            else if (!strncmp(buffer, "content-type:", 13)) {
989                struct accept_rec accept_info;
990
991                get_entry(neg->pool, &accept_info, body);
992                set_mime_fields(&mime_info, &accept_info);
993                has_content = 1;
994            }
995            else if (!strncmp(buffer, "content-length:", 15)) {
996                char *errp;
997                apr_off_t number;
998
999                body1 = ap_get_token(neg->pool, &body, 0);
1000                if (apr_strtoff(&number, body1, &errp, 10) != APR_SUCCESS
1001                    || *errp || number < 0) {
1002                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00684)
1003                                  "Parse error in type map, Content-Length: "
1004                                  "'%s' in %s is invalid.",
1005                                  body1, r->filename);
1006                    break;
1007                }
1008                mime_info.bytes = number;
1009                has_content = 1;
1010            }
1011            else if (!strncmp(buffer, "content-language:", 17)) {
1012                mime_info.content_languages = do_languages_line(neg->pool,
1013                                                                &body);
1014                has_content = 1;
1015            }
1016            else if (!strncmp(buffer, "content-encoding:", 17)) {
1017                mime_info.content_encoding = ap_get_token(neg->pool, &body, 0);
1018                has_content = 1;
1019            }
1020            else if (!strncmp(buffer, "description:", 12)) {
1021                char *desc = apr_pstrdup(neg->pool, body);
1022                char *cp;
1023
1024                for (cp = desc; *cp; ++cp) {
1025                    if (*cp=='\n') *cp=' ';
1026                }
1027                if (cp>desc) *(cp-1)=0;
1028                mime_info.description = desc;
1029            }
1030            else if (!strncmp(buffer, "body:", 5)) {
1031                char *tag = apr_pstrdup(neg->pool, body);
1032                char *eol = strchr(tag, '\0');
1033                apr_size_t len = MAX_STRING_LEN;
1034                while (--eol >= tag && apr_isspace(*eol))
1035                    *eol = '\0';
1036                if ((mime_info.body = get_body(buffer, &len, tag, *map)) < 0) {
1037                    ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00685)
1038                                  "Syntax error in type map, no end tag '%s'"
1039                                  "found in %s for Body: content.",
1040                                  tag, r->filename);
1041                     break;
1042                }
1043                mime_info.bytes = len;
1044                mime_info.file_name = apr_filepath_name_get(rr->filename);
1045            }
1046        }
1047        else {
1048            if (*mime_info.file_name && has_content) {
1049                void *new_var = apr_array_push(neg->avail_vars);
1050
1051                memcpy(new_var, (void *) &mime_info, sizeof(var_rec));
1052            }
1053
1054            clean_var_rec(&mime_info);
1055            has_content = 0;
1056        }
1057    } while (hstate != header_eof);
1058
1059    if (map_)
1060        apr_file_close(map_);
1061
1062    set_vlist_validator(r, rr);
1063
1064    return OK;
1065}
1066
1067
1068/* Sort function used by read_types_multi. */
1069static int variantsortf(var_rec *a, var_rec *b) {
1070
1071    /* First key is the source quality, sort in descending order. */
1072
1073    /* XXX: note that we currently implement no method of setting the
1074     * source quality for multiviews variants, so we are always comparing
1075     * 1.0 to 1.0 for now
1076     */
1077    if (a->source_quality < b->source_quality)
1078        return 1;
1079    if (a->source_quality > b->source_quality)
1080        return -1;
1081
1082    /* Second key is the variant name */
1083    return strcmp(a->file_name, b->file_name);
1084}
1085
1086/*****************************************************************
1087 *
1088 * Same as read_type_map, except we use a filtered directory listing
1089 * as the map...
1090 */
1091
1092static int read_types_multi(negotiation_state *neg)
1093{
1094    request_rec *r = neg->r;
1095
1096    char *filp;
1097    int prefix_len;
1098    apr_dir_t *dirp;
1099    apr_finfo_t dirent;
1100    apr_status_t status;
1101    struct var_rec mime_info;
1102    struct accept_rec accept_info;
1103    void *new_var;
1104    int anymatch = 0;
1105
1106    clean_var_rec(&mime_info);
1107
1108    if (r->proxyreq || !r->filename
1109                    || !ap_os_is_path_absolute(neg->pool, r->filename)) {
1110        return DECLINED;
1111    }
1112
1113    /* Only absolute paths here */
1114    if (!(filp = strrchr(r->filename, '/'))) {
1115        return DECLINED;
1116    }
1117    ++filp;
1118    prefix_len = strlen(filp);
1119
1120    if ((status = apr_dir_open(&dirp, neg->dir_name,
1121                               neg->pool)) != APR_SUCCESS) {
1122        ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(00686)
1123                    "cannot read directory for multi: %s", neg->dir_name);
1124        return HTTP_FORBIDDEN;
1125    }
1126
1127    while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dirp) == APR_SUCCESS) {
1128        apr_array_header_t *exception_list;
1129        request_rec *sub_req;
1130
1131        /* Do we have a match? */
1132#ifdef CASE_BLIND_FILESYSTEM
1133        if (strncasecmp(dirent.name, filp, prefix_len)) {
1134#else
1135        if (strncmp(dirent.name, filp, prefix_len)) {
1136#endif
1137            continue;
1138        }
1139        if (dirent.name[prefix_len] != '.') {
1140            continue;
1141        }
1142
1143        /* Don't negotiate directories and other unusual files
1144         * Really shouldn't see anything but DIR/LNK/REG here,
1145         * and we aught to discover if the LNK was interesting.
1146         *
1147         * Of course, this only helps platforms that capture the
1148         * the filetype in apr_dir_read(), which most can once
1149         * they are optimized with some magic [it's known to the
1150         * dirent, not associated to the inode, on most FS's.]
1151         */
1152        if ((dirent.valid & APR_FINFO_TYPE) && (dirent.filetype == APR_DIR))
1153            continue;
1154
1155        /* Ok, something's here.  Maybe nothing useful.  Remember that
1156         * we tried, if we completely fail, so we can reject the request!
1157         */
1158        anymatch = 1;
1159
1160        /* See if it's something which we have access to, and which
1161         * has a known type and encoding.
1162         */
1163        sub_req = ap_sub_req_lookup_dirent(&dirent, r, AP_SUBREQ_MERGE_ARGS,
1164                                           NULL);
1165
1166        /* Double check, we still don't multi-resolve non-ordinary files
1167         */
1168        if (sub_req->finfo.filetype != APR_REG) {
1169            /* XXX sub req not destroyed -- may be a bug/unintentional ? */
1170            continue;
1171        }
1172
1173        /* If it has a handler, we'll pretend it's a CGI script,
1174         * since that's a good indication of the sort of thing it
1175         * might be doing.
1176         */
1177        if (sub_req->handler && !sub_req->content_type) {
1178            ap_set_content_type(sub_req, CGI_MAGIC_TYPE);
1179        }
1180
1181        /*
1182         * mod_mime will _always_ provide us the base name in the
1183         * ap-mime-exception-list, if it processed anything.  If
1184         * this list is empty, give up immediately, there was
1185         * nothing interesting.  For example, looking at the files
1186         * readme.txt and readme.foo, we will throw away .foo if
1187         * it's an insignificant file (e.g. did not identify a
1188         * language, charset, encoding, content type or handler,)
1189         */
1190        exception_list =
1191            (apr_array_header_t *)apr_table_get(sub_req->notes,
1192                                                "ap-mime-exceptions-list");
1193
1194        if (!exception_list) {
1195            ap_destroy_sub_req(sub_req);
1196            continue;
1197        }
1198
1199        /* Each unregonized bit better match our base name, in sequence.
1200         * A test of index.html.foo will match index.foo or index.html.foo,
1201         * but it will never transpose the segments and allow index.foo.html
1202         * because that would introduce too much CPU consumption.  Better that
1203         * we don't attempt a many-to-many match here.
1204         */
1205        {
1206            int nexcept = exception_list->nelts;
1207            char **cur_except = (char**)exception_list->elts;
1208            char *segstart = filp, *segend, saveend;
1209
1210            while (*segstart && nexcept) {
1211                if (!(segend = strchr(segstart, '.')))
1212                    segend = strchr(segstart, '\0');
1213                saveend = *segend;
1214                *segend = '\0';
1215
1216#ifdef CASE_BLIND_FILESYSTEM
1217                if (strcasecmp(segstart, *cur_except) == 0) {
1218#else
1219                if (strcmp(segstart, *cur_except) == 0) {
1220#endif
1221                    --nexcept;
1222                    ++cur_except;
1223                }
1224
1225                if (!saveend)
1226                    break;
1227
1228                *segend = saveend;
1229                segstart = segend + 1;
1230            }
1231
1232            if (nexcept) {
1233                /* Something you don't know is, something you don't know...
1234                 */
1235                ap_destroy_sub_req(sub_req);
1236                continue;
1237            }
1238        }
1239
1240        /*
1241         * If we failed the subrequest, or don't
1242         * know what we are serving, then continue.
1243         */
1244        if (sub_req->status != HTTP_OK || (!sub_req->content_type)) {
1245            ap_destroy_sub_req(sub_req);
1246            continue;
1247        }
1248
1249        /* If it's a map file, we use that instead of the map
1250         * we're building...
1251         */
1252        if (((sub_req->content_type) &&
1253             !strcmp(sub_req->content_type, MAP_FILE_MAGIC_TYPE)) ||
1254            ((sub_req->handler) &&
1255             !strcmp(sub_req->handler, "type-map"))) {
1256
1257            apr_dir_close(dirp);
1258            neg->avail_vars->nelts = 0;
1259            if (sub_req->status != HTTP_OK) {
1260                return sub_req->status;
1261            }
1262            return read_type_map(NULL, neg, sub_req);
1263        }
1264
1265        /* Have reasonable variant --- gather notes. */
1266
1267        mime_info.sub_req = sub_req;
1268        mime_info.file_name = apr_pstrdup(neg->pool, dirent.name);
1269        if (sub_req->content_encoding) {
1270            mime_info.content_encoding = sub_req->content_encoding;
1271        }
1272        if (sub_req->content_languages) {
1273            mime_info.content_languages = sub_req->content_languages;
1274        }
1275
1276        get_entry(neg->pool, &accept_info, sub_req->content_type);
1277        set_mime_fields(&mime_info, &accept_info);
1278
1279        new_var = apr_array_push(neg->avail_vars);
1280        memcpy(new_var, (void *) &mime_info, sizeof(var_rec));
1281
1282        neg->count_multiviews_variants++;
1283
1284        clean_var_rec(&mime_info);
1285    }
1286
1287    apr_dir_close(dirp);
1288
1289    /* We found some file names that matched.  None could be served.
1290     * Rather than fall out to autoindex or some other mapper, this
1291     * request must die.
1292     */
1293    if (anymatch && !neg->avail_vars->nelts) {
1294        ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00687)
1295                      "Negotiation: discovered file(s) matching request: %s"
1296                      " (None could be negotiated).",
1297                      r->filename);
1298        return HTTP_NOT_FOUND;
1299    }
1300
1301    set_vlist_validator(r, r);
1302
1303    /* Sort the variants into a canonical order.  The negotiation
1304     * result sometimes depends on the order of the variants.  By
1305     * sorting the variants into a canonical order, rather than using
1306     * the order in which readdir() happens to return them, we ensure
1307     * that the negotiation result will be consistent over filesystem
1308     * backup/restores and over all mirror sites.
1309     */
1310
1311    qsort((void *) neg->avail_vars->elts, neg->avail_vars->nelts,
1312          sizeof(var_rec), (int (*)(const void *, const void *)) variantsortf);
1313
1314    return OK;
1315}
1316
1317
1318/*****************************************************************
1319 * And now for the code you've been waiting for... actually
1320 * finding a match to the client's requirements.
1321 */
1322
1323/* Matching MIME types ... the star/star and foo/star commenting conventions
1324 * are implemented here.  (You know what I mean by star/star, but just
1325 * try mentioning those three characters in a C comment).  Using strcmp()
1326 * is legit, because everything has already been smashed to lowercase.
1327 *
1328 * Note also that if we get an exact match on the media type, we update
1329 * level_matched for use in level_cmp below...
1330 *
1331 * We also give a value for mime_stars, which is used later. It should
1332 * be 1 for star/star, 2 for type/star and 3 for type/subtype.
1333 */
1334
1335static int mime_match(accept_rec *accept_r, var_rec *avail)
1336{
1337    const char *accept_type = accept_r->name;
1338    const char *avail_type = avail->mime_type;
1339    int len = strlen(accept_type);
1340
1341    if (accept_type[0] == '*') {        /* Anything matches star/star */
1342        if (avail->mime_stars < 1) {
1343            avail->mime_stars = 1;
1344        }
1345        return 1;
1346    }
1347    else if ((accept_type[len - 1] == '*') &&
1348             !strncmp(accept_type, avail_type, len - 2)) {
1349        if (avail->mime_stars < 2) {
1350            avail->mime_stars = 2;
1351        }
1352        return 1;
1353    }
1354    else if (!strcmp(accept_type, avail_type)
1355             || (!strcmp(accept_type, "text/html")
1356                 && (!strcmp(avail_type, INCLUDES_MAGIC_TYPE)
1357                     || !strcmp(avail_type, INCLUDES_MAGIC_TYPE3)))) {
1358        if (accept_r->level >= avail->level) {
1359            avail->level_matched = avail->level;
1360            avail->mime_stars = 3;
1361            return 1;
1362        }
1363    }
1364
1365    return OK;
1366}
1367
1368/* This code implements a piece of the tie-breaking algorithm between
1369 * variants of equal quality.  This piece is the treatment of variants
1370 * of the same base media type, but different levels.  What we want to
1371 * return is the variant at the highest level that the client explicitly
1372 * claimed to accept.
1373 *
1374 * If all the variants available are at a higher level than that, or if
1375 * the client didn't say anything specific about this media type at all
1376 * and these variants just got in on a wildcard, we prefer the lowest
1377 * level, on grounds that that's the one that the client is least likely
1378 * to choke on.
1379 *
1380 * (This is all motivated by treatment of levels in HTML --- we only
1381 * want to give level 3 to browsers that explicitly ask for it; browsers
1382 * that don't, including HTTP/0.9 browsers that only get the implicit
1383 * "Accept: * / *" [space added to avoid confusing cpp --- no, that
1384 * syntax doesn't really work] should get HTML2 if available).
1385 *
1386 * (Note that this code only comes into play when we are choosing among
1387 * variants of equal quality, where the draft standard gives us a fair
1388 * bit of leeway about what to do.  It ain't specified by the standard;
1389 * rather, it is a choice made by this server about what to do in cases
1390 * where the standard does not specify a unique course of action).
1391 */
1392
1393static int level_cmp(var_rec *var1, var_rec *var2)
1394{
1395    /* Levels are only comparable between matching media types */
1396
1397    if (var1->is_pseudo_html && !var2->is_pseudo_html) {
1398        return 0;
1399    }
1400
1401    if (!var1->is_pseudo_html && strcmp(var1->mime_type, var2->mime_type)) {
1402        return 0;
1403    }
1404    /* The result of the above if statements is that, if we get to
1405     * here, both variants have the same mime_type or both are
1406     * pseudo-html.
1407     */
1408
1409    /* Take highest level that matched, if either did match. */
1410
1411    if (var1->level_matched > var2->level_matched) {
1412        return 1;
1413    }
1414    if (var1->level_matched < var2->level_matched) {
1415        return -1;
1416    }
1417
1418    /* Neither matched.  Take lowest level, if there's a difference. */
1419
1420    if (var1->level < var2->level) {
1421        return 1;
1422    }
1423    if (var1->level > var2->level) {
1424        return -1;
1425    }
1426
1427    /* Tied */
1428
1429    return 0;
1430}
1431
1432/* Finding languages.  The main entry point is set_language_quality()
1433 * which is called for each variant. It sets two elements in the
1434 * variant record:
1435 *    language_quality  - the 'q' value of the 'best' matching language
1436 *                        from Accept-Language: header (HTTP/1.1)
1437 *    lang_index    -     Non-negotiated language priority, using
1438 *                        position of language on the Accept-Language:
1439 *                        header, if present, else LanguagePriority
1440 *                        directive order.
1441 *
1442 * When we do the variant checking for best variant, we use language
1443 * quality first, and if a tie, language_index next (this only applies
1444 * when _not_ using the RVSA/1.0 algorithm). If using the RVSA/1.0
1445 * algorithm, lang_index is never used.
1446 *
1447 * set_language_quality() calls find_lang_index() and find_default_index()
1448 * to set lang_index.
1449 */
1450
1451static int find_lang_index(apr_array_header_t *accept_langs, char *lang)
1452{
1453    const char **alang;
1454    int i;
1455
1456    if (!lang || !accept_langs) {
1457        return -1;
1458    }
1459
1460    alang = (const char **) accept_langs->elts;
1461
1462    for (i = 0; i < accept_langs->nelts; ++i) {
1463        if (!strncmp(lang, *alang, strlen(*alang))) {
1464            return i;
1465        }
1466        alang += (accept_langs->elt_size / sizeof(char*));
1467    }
1468
1469    return -1;
1470}
1471
1472/* set_default_lang_quality() sets the quality we apply to variants
1473 * which have no language assigned to them. If none of the variants
1474 * have a language, we are not negotiating on language, so all are
1475 * acceptable, and we set the default q value to 1.0. However if
1476 * some of the variants have languages, we set this default to 0.0001.
1477 * The value of this default will be applied to all variants with
1478 * no explicit language -- which will have the effect of making them
1479 * acceptable, but only if no variants with an explicit language
1480 * are acceptable. The default q value set here is assigned to variants
1481 * with no language type in set_language_quality().
1482 *
1483 * Note that if using the RVSA/1.0 algorithm, we don't use this
1484 * fiddle.
1485 */
1486
1487static void set_default_lang_quality(negotiation_state *neg)
1488{
1489    var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
1490    int j;
1491
1492    if (!neg->dont_fiddle_headers) {
1493        for (j = 0; j < neg->avail_vars->nelts; ++j) {
1494            var_rec *variant = &avail_recs[j];
1495            if (variant->content_languages &&
1496                variant->content_languages->nelts) {
1497                neg->default_lang_quality = 0.0001f;
1498                return;
1499            }
1500        }
1501    }
1502
1503    neg->default_lang_quality = 1.0f;
1504}
1505
1506/* Set the language_quality value in the variant record. Also
1507 * assigns lang_index for ForceLanguagePriority.
1508 *
1509 * To find the language_quality value, we look for the 'q' value
1510 * of the 'best' matching language on the Accept-Language
1511 * header. The 'best' match is the language on Accept-Language
1512 * header which matches the language of this variant either fully,
1513 * or as far as the prefix marker (-). If two or more languages
1514 * match, use the longest string from the Accept-Language header
1515 * (see HTTP/1.1 [14.4])
1516 *
1517 * When a variant has multiple languages, we find the 'best'
1518 * match for each variant language tag as above, then select the
1519 * one with the highest q value. Because both the accept-header
1520 * and variant can have multiple languages, we now have a hairy
1521 * loop-within-a-loop here.
1522 *
1523 * If the variant has no language and we have no Accept-Language
1524 * items, leave the quality at 1.0 and return.
1525 *
1526 * If the variant has no language, we use the default as set by
1527 * set_default_lang_quality() (1.0 if we are not negotiating on
1528 * language, 0.001 if we are).
1529 *
1530 * Following the setting of the language quality, we drop through to
1531 * set the old 'lang_index'. This is set based on either the order
1532 * of the languages on the Accept-Language header, or the
1533 * order on the LanguagePriority directive. This is only used
1534 * in the negotiation if the language qualities tie.
1535 */
1536
1537static void set_language_quality(negotiation_state *neg, var_rec *variant)
1538{
1539    int forcepriority = neg->conf->forcelangpriority;
1540    if (forcepriority == FLP_UNDEF) {
1541        forcepriority = FLP_DEFAULT;
1542    }
1543
1544    if (!variant->content_languages || !variant->content_languages->nelts) {
1545        /* This variant has no content-language, so use the default
1546         * quality factor for variants with no content-language
1547         * (previously set by set_default_lang_quality()).
1548         * Leave the factor alone (it remains at 1.0) when we may not fiddle
1549         * with the headers.
1550         */
1551        if (!neg->dont_fiddle_headers) {
1552            variant->lang_quality = neg->default_lang_quality;
1553        }
1554        if (!neg->accept_langs) {
1555            return;             /* no accept-language header */
1556        }
1557        return;
1558    }
1559    else {
1560        /* Variant has one (or more) languages.  Look for the best
1561         * match. We do this by going through each language on the
1562         * variant description looking for a match on the
1563         * Accept-Language header. The best match is the longest
1564         * matching language on the header. The final result is the
1565         * best q value from all the languages on the variant
1566         * description.
1567         */
1568
1569        if (!neg->accept_langs) {
1570            /* no accept-language header makes the variant indefinite */
1571            variant->definite = 0;
1572        }
1573        else {    /* There is an accept-language with 0 or more items */
1574            accept_rec *accs = (accept_rec *) neg->accept_langs->elts;
1575            accept_rec *best = NULL, *star = NULL;
1576            accept_rec *bestthistag;
1577            char *lang, *p;
1578            float fiddle_q = 0.0f;
1579            int any_match_on_star = 0;
1580            int i, j;
1581            apr_size_t alen, longest_lang_range_len;
1582
1583            for (j = 0; j < variant->content_languages->nelts; ++j) {
1584                p = NULL;
1585                bestthistag = NULL;
1586                longest_lang_range_len = 0;
1587
1588                /* lang is the variant's language-tag, which is the one
1589                 * we are allowed to use the prefix of in HTTP/1.1
1590                 */
1591                lang = ((char **) (variant->content_languages->elts))[j];
1592
1593                /* now find the best (i.e. longest) matching
1594                 * Accept-Language header language. We put the best match
1595                 * for this tag in bestthistag. We cannot update the
1596                 * overall best (based on q value) because the best match
1597                 * for this tag is the longest language item on the accept
1598                 * header, not necessarily the highest q.
1599                 */
1600                for (i = 0; i < neg->accept_langs->nelts; ++i) {
1601                    if (!strcmp(accs[i].name, "*")) {
1602                        if (!star) {
1603                            star = &accs[i];
1604                        }
1605                        continue;
1606                    }
1607                    /* Find language. We match if either the variant
1608                     * language tag exactly matches the language range
1609                     * from the accept header, or a prefix of the variant
1610                     * language tag up to a '-' character matches the
1611                     * whole of the language range in the Accept-Language
1612                     * header.  Note that HTTP/1.x allows any number of
1613                     * '-' characters in a tag or range, currently only
1614                     * tags with zero or one '-' characters are defined
1615                     * for general use (see rfc1766).
1616                     *
1617                     * We only use language range in the Accept-Language
1618                     * header the best match for the variant language tag
1619                     * if it is longer than the previous best match.
1620                     */
1621
1622                    alen = strlen(accs[i].name);
1623
1624                    if ((strlen(lang) >= alen) &&
1625                        !strncmp(lang, accs[i].name, alen) &&
1626                        ((lang[alen] == 0) || (lang[alen] == '-')) ) {
1627
1628                        if (alen > longest_lang_range_len) {
1629                            longest_lang_range_len = alen;
1630                            bestthistag = &accs[i];
1631                        }
1632                    }
1633
1634                    if (!bestthistag && !neg->dont_fiddle_headers) {
1635                        /* The next bit is a fiddle. Some browsers might
1636                         * be configured to send more specific language
1637                         * ranges than desirable. For example, an
1638                         * Accept-Language of en-US should never match
1639                         * variants with languages en or en-GB. But US
1640                         * English speakers might pick en-US as their
1641                         * language choice.  So this fiddle checks if the
1642                         * language range has a prefix, and if so, it
1643                         * matches variants which match that prefix with a
1644                         * priority of 0.001. So a request for en-US would
1645                         * match variants of types en and en-GB, but at
1646                         * much lower priority than matches of en-US
1647                         * directly, or of any other language listed on
1648                         * the Accept-Language header. Note that this
1649                         * fiddle does not handle multi-level prefixes.
1650                         */
1651                        if ((p = strchr(accs[i].name, '-'))) {
1652                            int plen = p - accs[i].name;
1653
1654                            if (!strncmp(lang, accs[i].name, plen)) {
1655                                fiddle_q = 0.001f;
1656                            }
1657                        }
1658                    }
1659                }
1660                /* Finished looking at Accept-Language headers, the best
1661                 * (longest) match is in bestthistag, or NULL if no match
1662                 */
1663                if (!best ||
1664                    (bestthistag && bestthistag->quality > best->quality)) {
1665                    best = bestthistag;
1666                }
1667
1668                /* See if the tag matches on a * in the Accept-Language
1669                 * header. If so, record this fact for later use
1670                 */
1671                if (!bestthistag && star) {
1672                    any_match_on_star = 1;
1673                }
1674            }
1675
1676            /* If one of the language tags of the variant matched on *, we
1677             * need to see if its q is better than that of any non-* match
1678             * on any other tag of the variant.  If so the * match takes
1679             * precedence and the overall match is not definite.
1680             */
1681            if ( any_match_on_star &&
1682                ((best && star->quality > best->quality) ||
1683                 (!best)) ) {
1684                best = star;
1685                variant->definite = 0;
1686            }
1687
1688            variant->lang_quality = best ? best->quality : fiddle_q;
1689        }
1690    }
1691
1692    /* Handle the ForceDefaultLanguage overrides, based on the best match
1693     * to LanguagePriority order.  The best match is the lowest index of
1694     * any LanguagePriority match.
1695     */
1696    if (((forcepriority & FLP_PREFER)
1697             && (variant->lang_index < 0))
1698     || ((forcepriority & FLP_FALLBACK)
1699             && !variant->lang_quality))
1700    {
1701        int bestidx = -1;
1702        int j;
1703
1704        for (j = 0; j < variant->content_languages->nelts; ++j)
1705        {
1706            /* lang is the variant's language-tag, which is the one
1707             * we are allowed to use the prefix of in HTTP/1.1
1708             */
1709            char *lang = ((char **) (variant->content_languages->elts))[j];
1710            int idx = -1;
1711
1712            /* If we wish to fallback or
1713             * we use our own LanguagePriority index.
1714             */
1715            idx = find_lang_index(neg->conf->language_priority, lang);
1716            if ((idx >= 0) && ((bestidx == -1) || (idx < bestidx))) {
1717                bestidx = idx;
1718            }
1719        }
1720
1721        if (bestidx >= 0) {
1722            if (variant->lang_quality) {
1723                if (forcepriority & FLP_PREFER) {
1724                    variant->lang_index = bestidx;
1725                }
1726            }
1727            else {
1728                if (forcepriority & FLP_FALLBACK) {
1729                    variant->lang_index = bestidx;
1730                    variant->lang_quality = .0001f;
1731                    variant->definite = 0;
1732                }
1733            }
1734        }
1735    }
1736    return;
1737}
1738
1739/* Determining the content length --- if the map didn't tell us,
1740 * we have to do a stat() and remember for next time.
1741 */
1742
1743static apr_off_t find_content_length(negotiation_state *neg, var_rec *variant)
1744{
1745    apr_finfo_t statb;
1746
1747    if (variant->bytes < 0) {
1748        if (   variant->sub_req
1749            && (variant->sub_req->finfo.valid & APR_FINFO_SIZE)) {
1750            variant->bytes = variant->sub_req->finfo.size;
1751        }
1752        else {
1753            char *fullname = ap_make_full_path(neg->pool, neg->dir_name,
1754                                               variant->file_name);
1755
1756            if (apr_stat(&statb, fullname,
1757                         APR_FINFO_SIZE, neg->pool) == APR_SUCCESS) {
1758                variant->bytes = statb.size;
1759            }
1760        }
1761    }
1762
1763    return variant->bytes;
1764}
1765
1766/* For a given variant, find the best matching Accept: header
1767 * and assign the Accept: header's quality value to the
1768 * mime_type_quality field of the variant, for later use in
1769 * determining the best matching variant.
1770 */
1771
1772static void set_accept_quality(negotiation_state *neg, var_rec *variant)
1773{
1774    int i;
1775    accept_rec *accept_recs;
1776    float q = 0.0f;
1777    int q_definite = 1;
1778
1779    /* if no Accept: header, leave quality alone (will
1780     * remain at the default value of 1)
1781     *
1782     * XXX: This if is currently never true because of the effect of
1783     * maybe_add_default_accepts().
1784     */
1785    if (!neg->accepts) {
1786        if (variant->mime_type && *variant->mime_type)
1787            variant->definite = 0;
1788        return;
1789    }
1790
1791    accept_recs = (accept_rec *) neg->accepts->elts;
1792
1793    /*
1794     * Go through each of the ranges on the Accept: header,
1795     * looking for the 'best' match with this variant's
1796     * content-type. We use the best match's quality
1797     * value (from the Accept: header) for this variant's
1798     * mime_type_quality field.
1799     *
1800     * The best match is determined like this:
1801     *    type/type is better than type/ * is better than * / *
1802     *    if match is type/type, use the level mime param if available
1803     */
1804    for (i = 0; i < neg->accepts->nelts; ++i) {
1805
1806        accept_rec *type = &accept_recs[i];
1807        int prev_mime_stars;
1808
1809        prev_mime_stars = variant->mime_stars;
1810
1811        if (!mime_match(type, variant)) {
1812            continue;           /* didn't match the content type at all */
1813        }
1814        else {
1815            /* did match - see if there were less or more stars than
1816             * in previous match
1817             */
1818            if (prev_mime_stars == variant->mime_stars) {
1819                continue;       /* more stars => not as good a match */
1820            }
1821        }
1822
1823        /* If we are allowed to mess with the q-values
1824         * and have no explicit q= parameters in the accept header,
1825         * make wildcards very low, so we have a low chance
1826         * of ending up with them if there's something better.
1827         */
1828
1829        if (!neg->dont_fiddle_headers && !neg->accept_q &&
1830            variant->mime_stars == 1) {
1831            q = 0.01f;
1832        }
1833        else if (!neg->dont_fiddle_headers && !neg->accept_q &&
1834                 variant->mime_stars == 2) {
1835            q = 0.02f;
1836        }
1837        else {
1838            q = type->quality;
1839        }
1840
1841        q_definite = (variant->mime_stars == 3);
1842    }
1843    variant->mime_type_quality = q;
1844    variant->definite = variant->definite && q_definite;
1845
1846}
1847
1848/* For a given variant, find the 'q' value of the charset given
1849 * on the Accept-Charset line. If no charsets are listed,
1850 * assume value of '1'.
1851 */
1852static void set_charset_quality(negotiation_state *neg, var_rec *variant)
1853{
1854    int i;
1855    accept_rec *accept_recs;
1856    const char *charset = variant->content_charset;
1857    accept_rec *star = NULL;
1858
1859    /* if no Accept-Charset: header, leave quality alone (will
1860     * remain at the default value of 1)
1861     */
1862    if (!neg->accept_charsets) {
1863        if (charset && *charset)
1864            variant->definite = 0;
1865        return;
1866    }
1867
1868    accept_recs = (accept_rec *) neg->accept_charsets->elts;
1869
1870    if (charset == NULL || !*charset) {
1871        /* Charset of variant not known */
1872
1873        /* if not a text / * type, leave quality alone */
1874        if (!(!strncmp(variant->mime_type, "text/", 5)
1875              || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE)
1876              || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE3)
1877              ))
1878            return;
1879
1880        /* Don't go guessing if we are in strict header mode,
1881         * e.g. when running the rvsa, as any guess won't be reflected
1882         * in the variant list or content-location headers.
1883         */
1884        if (neg->dont_fiddle_headers)
1885            return;
1886
1887        charset = "iso-8859-1"; /* The default charset for HTTP text types */
1888    }
1889
1890    /*
1891     * Go through each of the items on the Accept-Charset header,
1892     * looking for a match with this variant's charset. If none
1893     * match, charset is unacceptable, so set quality to 0.
1894     */
1895    for (i = 0; i < neg->accept_charsets->nelts; ++i) {
1896
1897        accept_rec *type = &accept_recs[i];
1898
1899        if (!strcmp(type->name, charset)) {
1900            variant->charset_quality = type->quality;
1901            return;
1902        }
1903        else if (strcmp(type->name, "*") == 0) {
1904            star = type;
1905        }
1906    }
1907    /* No explicit match */
1908    if (star) {
1909        variant->charset_quality = star->quality;
1910        variant->definite = 0;
1911        return;
1912    }
1913    /* If this variant is in charset iso-8859-1, the default is 1.0 */
1914    if (strcmp(charset, "iso-8859-1") == 0) {
1915        variant->charset_quality = 1.0f;
1916    }
1917    else {
1918        variant->charset_quality = 0.0f;
1919    }
1920}
1921
1922
1923/* is_identity_encoding is included for back-compat, but does anyone
1924 * use 7bit, 8bin or binary in their var files??
1925 */
1926
1927static int is_identity_encoding(const char *enc)
1928{
1929    return (!enc || !enc[0] || !strcmp(enc, "7bit") || !strcmp(enc, "8bit")
1930            || !strcmp(enc, "binary"));
1931}
1932
1933/*
1934 * set_encoding_quality determines whether the encoding for a particular
1935 * variant is acceptable for the user-agent.
1936 *
1937 * The rules for encoding are that if the user-agent does not supply
1938 * any Accept-Encoding header, then all encodings are allowed but a
1939 * variant with no encoding should be preferred.
1940 * If there is an empty Accept-Encoding header, then no encodings are
1941 * acceptable. If there is a non-empty Accept-Encoding header, then
1942 * any of the listed encodings are acceptable, as well as no encoding
1943 * unless the "identity" encoding is specifically excluded.
1944 */
1945static void set_encoding_quality(negotiation_state *neg, var_rec *variant)
1946{
1947    accept_rec *accept_recs;
1948    const char *enc = variant->content_encoding;
1949    accept_rec *star = NULL;
1950    float value_if_not_found = 0.0f;
1951    int i;
1952
1953    if (!neg->accept_encodings) {
1954        /* We had no Accept-Encoding header, assume that all
1955         * encodings are acceptable with a low quality,
1956         * but we prefer no encoding if available.
1957         */
1958        if (!enc || is_identity_encoding(enc))
1959            variant->encoding_quality = 1.0f;
1960        else
1961            variant->encoding_quality = 0.5f;
1962
1963        return;
1964    }
1965
1966    if (!enc || is_identity_encoding(enc)) {
1967        enc = "identity";
1968        value_if_not_found = 0.0001f;
1969    }
1970
1971    accept_recs = (accept_rec *) neg->accept_encodings->elts;
1972
1973    /* Go through each of the encodings on the Accept-Encoding: header,
1974     * looking for a match with our encoding. x- prefixes are ignored.
1975     */
1976    if (enc[0] == 'x' && enc[1] == '-') {
1977        enc += 2;
1978    }
1979    for (i = 0; i < neg->accept_encodings->nelts; ++i) {
1980
1981        char *name = accept_recs[i].name;
1982
1983        if (name[0] == 'x' && name[1] == '-') {
1984            name += 2;
1985        }
1986
1987        if (!strcmp(name, enc)) {
1988            variant->encoding_quality = accept_recs[i].quality;
1989            return;
1990        }
1991
1992        if (strcmp(name, "*") == 0) {
1993            star = &accept_recs[i];
1994        }
1995
1996    }
1997    /* No explicit match */
1998    if (star) {
1999        variant->encoding_quality = star->quality;
2000        return;
2001    }
2002
2003    /* Encoding not found on Accept-Encoding: header, so it is
2004     * _not_ acceptable unless it is the identity (no encoding)
2005     */
2006    variant->encoding_quality = value_if_not_found;
2007}
2008
2009/*************************************************************
2010 * Possible results of the variant selection algorithm
2011 */
2012enum algorithm_results {
2013    alg_choice = 1,              /* choose variant */
2014    alg_list                     /* list variants */
2015};
2016
2017/* Below is the 'best_match' function. It returns an int, which has
2018 * one of the two values alg_choice or alg_list, which give the result
2019 * of the variant selection algorithm.  alg_list means that no best
2020 * variant was found by the algorithm, alg_choice means that a best
2021 * variant was found and should be returned.  The list/choice
2022 * terminology comes from TCN (rfc2295), but is used in a more generic
2023 * way here.  The best variant is returned in *pbest. best_match has
2024 * two possible algorithms for determining the best variant: the
2025 * RVSA/1.0 algorithm (from RFC2296), and the standard Apache
2026 * algorithm. These are split out into separate functions
2027 * (is_variant_better_rvsa() and is_variant_better()).  Selection of
2028 * one is through the neg->use_rvsa flag.
2029 *
2030 * The call to best_match also creates full information, including
2031 * language, charset, etc quality for _every_ variant. This is needed
2032 * for generating a correct Vary header, and can be used for the
2033 * Alternates header, the human-readable list responses and 406 errors.
2034 */
2035
2036/* Firstly, the RVSA/1.0 (HTTP Remote Variant Selection Algorithm
2037 * v1.0) from rfc2296.  This is the algorithm that goes together with
2038 * transparent content negotiation (TCN).
2039 */
2040static int is_variant_better_rvsa(negotiation_state *neg, var_rec *variant,
2041                                  var_rec *best, float *p_bestq)
2042{
2043    float bestq = *p_bestq, q;
2044
2045    /* TCN does not cover negotiation on content-encoding.  For now,
2046     * we ignore the encoding unless it was explicitly excluded.
2047     */
2048    if (variant->encoding_quality == 0.0f)
2049        return 0;
2050
2051    q = variant->mime_type_quality *
2052        variant->source_quality *
2053        variant->charset_quality *
2054        variant->lang_quality;
2055
2056   /* RFC 2296 calls for the result to be rounded to 5 decimal places,
2057    * but we don't do that because it serves no useful purpose other
2058    * than to ensure that a remote algorithm operates on the same
2059    * precision as ours.  That is silly, since what we obviously want
2060    * is for the algorithm to operate on the best available precision
2061    * regardless of who runs it.  Since the above calculation may
2062    * result in significant variance at 1e-12, rounding would be bogus.
2063    */
2064
2065#ifdef NEG_DEBUG
2066    ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00688)
2067           "Variant: file=%s type=%s lang=%s sourceq=%1.3f "
2068           "mimeq=%1.3f langq=%1.3f charq=%1.3f encq=%1.3f "
2069           "q=%1.5f definite=%d",
2070            (variant->file_name ? variant->file_name : ""),
2071            (variant->mime_type ? variant->mime_type : ""),
2072            (variant->content_languages
2073             ? apr_array_pstrcat(neg->pool, variant->content_languages, ',')
2074             : ""),
2075            variant->source_quality,
2076            variant->mime_type_quality,
2077            variant->lang_quality,
2078            variant->charset_quality,
2079            variant->encoding_quality,
2080            q,
2081            variant->definite);
2082#endif
2083
2084    if (q <= 0.0f) {
2085        return 0;
2086    }
2087    if (q > bestq) {
2088        *p_bestq = q;
2089        return 1;
2090    }
2091    if (q == bestq) {
2092        /* If the best variant's encoding is of lesser quality than
2093         * this variant, then we prefer this variant
2094         */
2095        if (variant->encoding_quality > best->encoding_quality) {
2096            *p_bestq = q;
2097            return 1;
2098        }
2099    }
2100    return 0;
2101}
2102
2103/* Negotiation algorithm as used by previous versions of Apache
2104 * (just about).
2105 */
2106
2107static int is_variant_better(negotiation_state *neg, var_rec *variant,
2108                             var_rec *best, float *p_bestq)
2109{
2110    float bestq = *p_bestq, q;
2111    int levcmp;
2112
2113    /* For non-transparent negotiation, server can choose how
2114     * to handle the negotiation. We'll use the following in
2115     * order: content-type, language, content-type level, charset,
2116     * content encoding, content length.
2117     *
2118     * For each check, we have three possible outcomes:
2119     *   This variant is worse than current best: return 0
2120     *   This variant is better than the current best:
2121     *          assign this variant's q to *p_bestq, and return 1
2122     *   This variant is just as desirable as the current best:
2123     *          drop through to the next test.
2124     *
2125     * This code is written in this long-winded way to allow future
2126     * customisation, either by the addition of additional
2127     * checks, or to allow the order of the checks to be determined
2128     * by configuration options (e.g. we might prefer to check
2129     * language quality _before_ content type).
2130     */
2131
2132    /* First though, eliminate this variant if it is not
2133     * acceptable by type, charset, encoding or language.
2134     */
2135
2136#ifdef NEG_DEBUG
2137    ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00689)
2138           "Variant: file=%s type=%s lang=%s sourceq=%1.3f "
2139           "mimeq=%1.3f langq=%1.3f langidx=%d charq=%1.3f encq=%1.3f ",
2140            (variant->file_name ? variant->file_name : ""),
2141            (variant->mime_type ? variant->mime_type : ""),
2142            (variant->content_languages
2143             ? apr_array_pstrcat(neg->pool, variant->content_languages, ',')
2144             : ""),
2145            variant->source_quality,
2146            variant->mime_type_quality,
2147            variant->lang_quality,
2148            variant->lang_index,
2149            variant->charset_quality,
2150            variant->encoding_quality);
2151#endif
2152
2153    if (variant->encoding_quality == 0.0f ||
2154        variant->lang_quality == 0.0f ||
2155        variant->source_quality == 0.0f ||
2156        variant->charset_quality == 0.0f ||
2157        variant->mime_type_quality == 0.0f) {
2158        return 0;               /* don't consider unacceptables */
2159    }
2160
2161    q = variant->mime_type_quality * variant->source_quality;
2162    if (q == 0.0 || q < bestq) {
2163        return 0;
2164    }
2165    if (q > bestq || !best) {
2166        *p_bestq = q;
2167        return 1;
2168    }
2169
2170    /* language */
2171    if (variant->lang_quality < best->lang_quality) {
2172        return 0;
2173    }
2174    if (variant->lang_quality > best->lang_quality) {
2175        *p_bestq = q;
2176        return 1;
2177    }
2178
2179    /* if language qualities were equal, try the LanguagePriority stuff */
2180    if (best->lang_index != -1 &&
2181        (variant->lang_index == -1 || variant->lang_index > best->lang_index)) {
2182        return 0;
2183    }
2184    if (variant->lang_index != -1 &&
2185        (best->lang_index == -1 || variant->lang_index < best->lang_index)) {
2186        *p_bestq = q;
2187        return 1;
2188    }
2189
2190    /* content-type level (sometimes used with text/html, though we
2191     * support it on other types too)
2192     */
2193    levcmp = level_cmp(variant, best);
2194    if (levcmp == -1) {
2195        return 0;
2196    }
2197    if (levcmp == 1) {
2198        *p_bestq = q;
2199        return 1;
2200    }
2201
2202    /* charset */
2203    if (variant->charset_quality < best->charset_quality) {
2204        return 0;
2205    }
2206    /* If the best variant's charset is ISO-8859-1 and this variant has
2207     * the same charset quality, then we prefer this variant
2208     */
2209
2210    if (variant->charset_quality > best->charset_quality ||
2211        ((variant->content_charset != NULL &&
2212          *variant->content_charset != '\0' &&
2213          strcmp(variant->content_charset, "iso-8859-1") != 0) &&
2214         (best->content_charset == NULL ||
2215          *best->content_charset == '\0' ||
2216          strcmp(best->content_charset, "iso-8859-1") == 0))) {
2217        *p_bestq = q;
2218        return 1;
2219    }
2220
2221    /* Prefer the highest value for encoding_quality.
2222     */
2223    if (variant->encoding_quality < best->encoding_quality) {
2224       return 0;
2225    }
2226    if (variant->encoding_quality > best->encoding_quality) {
2227       *p_bestq = q;
2228       return 1;
2229    }
2230
2231    /* content length if all else equal */
2232    if (find_content_length(neg, variant) >= find_content_length(neg, best)) {
2233        return 0;
2234    }
2235
2236    /* ok, to get here means every thing turned out equal, except
2237     * we have a shorter content length, so use this variant
2238     */
2239    *p_bestq = q;
2240    return 1;
2241}
2242
2243/* figure out, whether a variant is in a specific language
2244 * it returns also false, if the variant has no language.
2245 */
2246static int variant_has_language(var_rec *variant, const char *lang)
2247{
2248    int j, max;
2249
2250    /* fast exit */
2251    if (   !lang
2252        || !variant->content_languages
2253        || !(max = variant->content_languages->nelts)) {
2254        return 0;
2255    }
2256
2257    for (j = 0; j < max; ++j) {
2258        if (!strcmp(lang,
2259                    ((char **) (variant->content_languages->elts))[j])) {
2260            return 1;
2261        }
2262    }
2263
2264    return 0;
2265}
2266
2267/* check for environment variables 'no-gzip' and
2268 * 'gzip-only-text/html' to get a behaviour similiar
2269 * to mod_deflate
2270 */
2271static int discard_variant_by_env(var_rec *variant, int discard)
2272{
2273    if (   is_identity_encoding(variant->content_encoding)
2274        || !strcmp(variant->content_encoding, "identity")) {
2275        return 0;
2276    }
2277
2278    return (   (discard == DISCARD_ALL_ENCODINGS)
2279            || (discard == DISCARD_ALL_BUT_HTML
2280                && (!variant->mime_type
2281                    || strncmp(variant->mime_type, "text/html", 9))));
2282}
2283
2284static int best_match(negotiation_state *neg, var_rec **pbest)
2285{
2286    int j;
2287    var_rec *best;
2288    float bestq = 0.0f;
2289    enum algorithm_results algorithm_result;
2290    int may_discard = 0;
2291
2292    var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
2293
2294    /* fetch request dependent variables
2295     * prefer-language: prefer a certain language.
2296     */
2297    const char *preferred_language = apr_table_get(neg->r->subprocess_env,
2298                                                   "prefer-language");
2299
2300    /* no-gzip: do not send encoded documents */
2301    if (apr_table_get(neg->r->subprocess_env, "no-gzip")) {
2302        may_discard = DISCARD_ALL_ENCODINGS;
2303    }
2304
2305    /* gzip-only-text/html: send encoded documents only
2306     * if they are text/html. (no-gzip has a higher priority).
2307     */
2308    else {
2309        const char *env_value = apr_table_get(neg->r->subprocess_env,
2310                                              "gzip-only-text/html");
2311
2312        if (env_value && !strcmp(env_value, "1")) {
2313            may_discard = DISCARD_ALL_BUT_HTML;
2314        }
2315    }
2316
2317    set_default_lang_quality(neg);
2318
2319    /*
2320     * Find the 'best' variant
2321     * We run the loop possibly twice: if "prefer-language"
2322     * environment variable is set but we did not find an appropriate
2323     * best variant. In that case forget the preferred language and
2324     * negotiate over all variants.
2325     */
2326
2327    do {
2328        best = NULL;
2329
2330        for (j = 0; j < neg->avail_vars->nelts; ++j) {
2331            var_rec *variant = &avail_recs[j];
2332
2333            /* if this variant is encoded somehow and there are special
2334             * variables set, we do not negotiate it. see above.
2335             */
2336            if (   may_discard
2337                && discard_variant_by_env(variant, may_discard)) {
2338                continue;
2339            }
2340
2341            /* if a language is preferred, but the current variant
2342             * is not in that language, then drop it for now
2343             */
2344            if (   preferred_language
2345                && !variant_has_language(variant, preferred_language)) {
2346                continue;
2347            }
2348
2349            /* Find all the relevant 'quality' values from the
2350             * Accept... headers, and store in the variant.  This also
2351             * prepares for sending an Alternates header etc so we need to
2352             * do it even if we do not actually plan to find a best
2353             * variant.
2354             */
2355            set_accept_quality(neg, variant);
2356            /* accept the preferred language, even when it's not listed within
2357             * the Accept-Language header
2358             */
2359            if (preferred_language) {
2360                variant->lang_quality = 1.0f;
2361                variant->definite = 1;
2362            }
2363            else {
2364                set_language_quality(neg, variant);
2365            }
2366            set_encoding_quality(neg, variant);
2367            set_charset_quality(neg, variant);
2368
2369            /* Only do variant selection if we may actually choose a
2370             * variant for the client
2371             */
2372            if (neg->may_choose) {
2373
2374                /* Now find out if this variant is better than the current
2375                 * best, either using the RVSA/1.0 algorithm, or Apache's
2376                 * internal server-driven algorithm. Presumably other
2377                 * server-driven algorithms are possible, and could be
2378                 * implemented here.
2379                 */
2380
2381                if (neg->use_rvsa) {
2382                    if (is_variant_better_rvsa(neg, variant, best, &bestq)) {
2383                        best = variant;
2384                    }
2385                }
2386                else {
2387                    if (is_variant_better(neg, variant, best, &bestq)) {
2388                        best = variant;
2389                    }
2390                }
2391            }
2392        }
2393
2394        /* We now either have a best variant, or no best variant */
2395
2396        if (neg->use_rvsa)    {
2397            /* calculate result for RVSA/1.0 algorithm:
2398             * only a choice response if the best variant has q>0
2399             * and is definite
2400             */
2401            algorithm_result = (best && best->definite) && (bestq > 0) ?
2402                                alg_choice : alg_list;
2403        }
2404        else {
2405            /* calculate result for Apache negotiation algorithm */
2406            algorithm_result = bestq > 0 ? alg_choice : alg_list;
2407        }
2408
2409        /* run the loop again, if the "prefer-language" got no clear result */
2410        if (preferred_language && (!best || algorithm_result != alg_choice)) {
2411            preferred_language = NULL;
2412            continue;
2413        }
2414
2415        break;
2416    } while (1);
2417
2418    /* Returning a choice response with a non-neighboring variant is a
2419     * protocol security error in TCN (see rfc2295).  We do *not*
2420     * verify here that the variant and URI are neighbors, even though
2421     * we may return alg_choice.  We depend on the environment (the
2422     * caller) to only declare the resource transparently negotiable if
2423     * all variants are neighbors.
2424     */
2425    *pbest = best;
2426    return algorithm_result;
2427}
2428
2429/* Sets response headers for a negotiated response.
2430 * neg->is_transparent determines whether a transparently negotiated
2431 * response or a plain `server driven negotiation' response is
2432 * created.   Applicable headers are Alternates, Vary, and TCN.
2433 *
2434 * The Vary header we create is sometimes longer than is required for
2435 * the correct caching of negotiated results by HTTP/1.1 caches.  For
2436 * example if we have 3 variants x.html, x.ps.en and x.ps.nl, and if
2437 * the Accept: header assigns a 0 quality to .ps, then the results of
2438 * the two server-side negotiation algorithms we currently implement
2439 * will never depend on Accept-Language so we could return `Vary:
2440 * negotiate, accept' instead of the longer 'Vary: negotiate, accept,
2441 * accept-language' which the code below will return.  A routine for
2442 * computing the exact minimal Vary header would be a huge pain to code
2443 * and maintain though, especially because we need to take all possible
2444 * twiddles in the server-side negotiation algorithms into account.
2445 */
2446static void set_neg_headers(request_rec *r, negotiation_state *neg,
2447                            int alg_result)
2448{
2449    apr_table_t *hdrs;
2450    var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
2451    const char *sample_type = NULL;
2452    const char *sample_language = NULL;
2453    const char *sample_encoding = NULL;
2454    const char *sample_charset = NULL;
2455    char *lang;
2456    char *qstr;
2457    apr_off_t len;
2458    apr_array_header_t *arr;
2459    int max_vlist_array = (neg->avail_vars->nelts * 21);
2460    int first_variant = 1;
2461    int vary_by_type = 0;
2462    int vary_by_language = 0;
2463    int vary_by_charset = 0;
2464    int vary_by_encoding = 0;
2465    int j;
2466
2467    /* In order to avoid O(n^2) memory copies in building Alternates,
2468     * we preallocate a apr_table_t with the maximum substrings possible,
2469     * fill it with the variant list, and then concatenate the entire array.
2470     * Note that if you change the number of substrings pushed, you also
2471     * need to change the calculation of max_vlist_array above.
2472     */
2473    if (neg->send_alternates && neg->avail_vars->nelts)
2474        arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *));
2475    else
2476        arr = NULL;
2477
2478    /* Put headers into err_headers_out, since send_http_header()
2479     * outputs both headers_out and err_headers_out.
2480     */
2481    hdrs = r->err_headers_out;
2482
2483    for (j = 0; j < neg->avail_vars->nelts; ++j) {
2484        var_rec *variant = &avail_recs[j];
2485
2486        if (variant->content_languages && variant->content_languages->nelts) {
2487            lang = apr_array_pstrcat(r->pool, variant->content_languages, ',');
2488        }
2489        else {
2490            lang = NULL;
2491        }
2492
2493        /* Calculate Vary by looking for any difference between variants */
2494
2495        if (first_variant) {
2496            sample_type     = variant->mime_type;
2497            sample_charset  = variant->content_charset;
2498            sample_language = lang;
2499            sample_encoding = variant->content_encoding;
2500        }
2501        else {
2502            if (!vary_by_type &&
2503                strcmp(sample_type ? sample_type : "",
2504                       variant->mime_type ? variant->mime_type : "")) {
2505                vary_by_type = 1;
2506            }
2507            if (!vary_by_charset &&
2508                strcmp(sample_charset ? sample_charset : "",
2509                       variant->content_charset ?
2510                       variant->content_charset : "")) {
2511                vary_by_charset = 1;
2512            }
2513            if (!vary_by_language &&
2514                strcmp(sample_language ? sample_language : "",
2515                       lang ? lang : "")) {
2516                vary_by_language = 1;
2517            }
2518            if (!vary_by_encoding &&
2519                strcmp(sample_encoding ? sample_encoding : "",
2520                       variant->content_encoding ?
2521                       variant->content_encoding : "")) {
2522                vary_by_encoding = 1;
2523            }
2524        }
2525        first_variant = 0;
2526
2527        if (!neg->send_alternates)
2528            continue;
2529
2530        /* Generate the string components for this Alternates entry */
2531
2532        *((const char **) apr_array_push(arr)) = "{\"";
2533        *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, variant->file_name);
2534        *((const char **) apr_array_push(arr)) = "\" ";
2535
2536        qstr = (char *) apr_palloc(r->pool, 6);
2537        apr_snprintf(qstr, 6, "%1.3f", variant->source_quality);
2538
2539        /* Strip trailing zeros (saves those valuable network bytes) */
2540        if (qstr[4] == '0') {
2541            qstr[4] = '\0';
2542            if (qstr[3] == '0') {
2543                qstr[3] = '\0';
2544                if (qstr[2] == '0') {
2545                    qstr[1] = '\0';
2546                }
2547            }
2548        }
2549        *((const char **) apr_array_push(arr)) = qstr;
2550
2551        if (variant->mime_type && *variant->mime_type) {
2552            *((const char **) apr_array_push(arr)) = " {type ";
2553            *((const char **) apr_array_push(arr)) = variant->mime_type;
2554            *((const char **) apr_array_push(arr)) = "}";
2555        }
2556        if (variant->content_charset && *variant->content_charset) {
2557            *((const char **) apr_array_push(arr)) = " {charset ";
2558            *((const char **) apr_array_push(arr)) = variant->content_charset;
2559            *((const char **) apr_array_push(arr)) = "}";
2560        }
2561        if (lang) {
2562            *((const char **) apr_array_push(arr)) = " {language ";
2563            *((const char **) apr_array_push(arr)) = lang;
2564            *((const char **) apr_array_push(arr)) = "}";
2565        }
2566        if (variant->content_encoding && *variant->content_encoding) {
2567            /* Strictly speaking, this is non-standard, but so is TCN */
2568
2569            *((const char **) apr_array_push(arr)) = " {encoding ";
2570            *((const char **) apr_array_push(arr)) = variant->content_encoding;
2571            *((const char **) apr_array_push(arr)) = "}";
2572        }
2573
2574        /* Note that the Alternates specification (in rfc2295) does
2575         * not require that we include {length x}, so we could omit it
2576         * if determining the length is too expensive.  We currently
2577         * always include it though.
2578         *
2579         * If the variant is a CGI script, find_content_length would
2580         * return the length of the script, not the output it
2581         * produces, so we check for the presence of a handler and if
2582         * there is one we don't add a length.
2583         *
2584         * XXX: TODO: This check does not detect a CGI script if we
2585         * get the variant from a type map.  This needs to be fixed
2586         * (without breaking things if the type map specifies a
2587         * content-length, which currently leads to the correct result).
2588         */
2589        if (!(variant->sub_req && variant->sub_req->handler)
2590            && (len = find_content_length(neg, variant)) >= 0) {
2591
2592            *((const char **) apr_array_push(arr)) = " {length ";
2593            *((const char **) apr_array_push(arr)) = apr_off_t_toa(r->pool,
2594                                                                   len);
2595            *((const char **) apr_array_push(arr)) = "}";
2596        }
2597
2598        *((const char **) apr_array_push(arr)) = "}";
2599        *((const char **) apr_array_push(arr)) = ", "; /* trimmed below */
2600    }
2601
2602    if (neg->send_alternates && neg->avail_vars->nelts) {
2603        arr->nelts--;                                 /* remove last comma */
2604        apr_table_mergen(hdrs, "Alternates",
2605                        apr_array_pstrcat(r->pool, arr, '\0'));
2606    }
2607
2608    if (neg->is_transparent || vary_by_type || vary_by_language ||
2609        vary_by_charset || vary_by_encoding) {
2610
2611        apr_table_mergen(hdrs, "Vary", 2 + apr_pstrcat(r->pool,
2612            neg->is_transparent ? ", negotiate"       : "",
2613            vary_by_type        ? ", accept"          : "",
2614            vary_by_language    ? ", accept-language" : "",
2615            vary_by_charset     ? ", accept-charset"  : "",
2616            vary_by_encoding    ? ", accept-encoding" : "", NULL));
2617    }
2618
2619    if (neg->is_transparent) { /* Create TCN response header */
2620        apr_table_setn(hdrs, "TCN",
2621                      alg_result == alg_list ? "list" : "choice");
2622    }
2623}
2624
2625/**********************************************************************
2626 *
2627 * Return an HTML list of variants. This is output as part of the
2628 * choice response or 406 status body.
2629 */
2630
2631static char *make_variant_list(request_rec *r, negotiation_state *neg)
2632{
2633    apr_array_header_t *arr;
2634    int i;
2635    int max_vlist_array = (neg->avail_vars->nelts * 15) + 2;
2636
2637    /* In order to avoid O(n^2) memory copies in building the list,
2638     * we preallocate a apr_table_t with the maximum substrings possible,
2639     * fill it with the variant list, and then concatenate the entire array.
2640     */
2641    arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *));
2642
2643    *((const char **) apr_array_push(arr)) = "Available variants:\n<ul>\n";
2644
2645    for (i = 0; i < neg->avail_vars->nelts; ++i) {
2646        var_rec *variant = &((var_rec *) neg->avail_vars->elts)[i];
2647        const char *filename = variant->file_name ? variant->file_name : "";
2648        apr_array_header_t *languages = variant->content_languages;
2649        const char *description = variant->description
2650                                    ? variant->description
2651                                    : "";
2652
2653        /* The format isn't very neat, and it would be nice to make
2654         * the tags human readable (eg replace 'language en' with 'English').
2655         * Note that if you change the number of substrings pushed, you also
2656         * need to change the calculation of max_vlist_array above.
2657         */
2658        *((const char **) apr_array_push(arr)) = "<li><a href=\"";
2659        *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, filename);
2660        *((const char **) apr_array_push(arr)) = "\">";
2661        *((const char **) apr_array_push(arr)) = ap_escape_html(r->pool, filename);
2662        *((const char **) apr_array_push(arr)) = "</a> ";
2663        *((const char **) apr_array_push(arr)) = description;
2664
2665        if (variant->mime_type && *variant->mime_type) {
2666            *((const char **) apr_array_push(arr)) = ", type ";
2667            *((const char **) apr_array_push(arr)) = variant->mime_type;
2668        }
2669        if (languages && languages->nelts) {
2670            *((const char **) apr_array_push(arr)) = ", language ";
2671            *((const char **) apr_array_push(arr)) = apr_array_pstrcat(r->pool,
2672                                                       languages, ',');
2673        }
2674        if (variant->content_charset && *variant->content_charset) {
2675            *((const char **) apr_array_push(arr)) = ", charset ";
2676            *((const char **) apr_array_push(arr)) = variant->content_charset;
2677        }
2678        if (variant->content_encoding) {
2679            *((const char **) apr_array_push(arr)) = ", encoding ";
2680            *((const char **) apr_array_push(arr)) = variant->content_encoding;
2681        }
2682        *((const char **) apr_array_push(arr)) = "</li>\n";
2683    }
2684    *((const char **) apr_array_push(arr)) = "</ul>\n";
2685
2686    return apr_array_pstrcat(r->pool, arr, '\0');
2687}
2688
2689static void store_variant_list(request_rec *r, negotiation_state *neg)
2690{
2691    if (r->main == NULL) {
2692        apr_table_setn(r->notes, "variant-list", make_variant_list(r, neg));
2693    }
2694    else {
2695        apr_table_setn(r->main->notes, "variant-list",
2696                      make_variant_list(r->main, neg));
2697    }
2698}
2699
2700/* Called if we got a "Choice" response from the variant selection algorithm.
2701 * It checks the result of the chosen variant to see if it
2702 * is itself negotiated (if so, return error HTTP_VARIANT_ALSO_VARIES).
2703 * Otherwise, add the appropriate headers to the current response.
2704 */
2705
2706static int setup_choice_response(request_rec *r, negotiation_state *neg,
2707                                 var_rec *variant)
2708{
2709    request_rec *sub_req;
2710    const char *sub_vary;
2711
2712    if (!variant->sub_req) {
2713        int status;
2714
2715        sub_req = ap_sub_req_lookup_file(variant->file_name, r, r->output_filters);
2716        status = sub_req->status;
2717
2718        if (status != HTTP_OK &&
2719            !apr_table_get(sub_req->err_headers_out, "TCN")) {
2720            ap_destroy_sub_req(sub_req);
2721            return status;
2722        }
2723        variant->sub_req = sub_req;
2724    }
2725    else {
2726        sub_req = variant->sub_req;
2727    }
2728
2729    /* The variant selection algorithm told us to return a "Choice"
2730     * response. This is the normal variant response, with
2731     * some extra headers. First, ensure that the chosen
2732     * variant did or will not itself engage in transparent negotiation.
2733     * If not, set the appropriate headers, and fall through to
2734     * the normal variant handling
2735     */
2736
2737    /* This catches the error that a transparent type map selects a
2738     * transparent multiviews resource as the best variant.
2739     *
2740     * XXX: We do not signal an error if a transparent type map
2741     * selects a _non_transparent multiviews resource as the best
2742     * variant, because we can generate a legal negotiation response
2743     * in this case.  In this case, the vlist_validator of the
2744     * nontransparent subrequest will be lost however.  This could
2745     * lead to cases in which a change in the set of variants or the
2746     * negotiation algorithm of the nontransparent resource is never
2747     * propagated up to a HTTP/1.1 cache which interprets Vary.  To be
2748     * completely on the safe side we should return HTTP_VARIANT_ALSO_VARIES
2749     * for this type of recursive negotiation too.
2750     */
2751    if (neg->is_transparent &&
2752        apr_table_get(sub_req->err_headers_out, "TCN")) {
2753        return HTTP_VARIANT_ALSO_VARIES;
2754    }
2755
2756    /* This catches the error that a transparent type map recursively
2757     * selects, as the best variant, another type map which itself
2758     * causes transparent negotiation to be done.
2759     *
2760     * XXX: Actually, we catch this error by catching all cases of
2761     * type map recursion.  There are some borderline recursive type
2762     * map arrangements which would not produce transparent
2763     * negotiation protocol errors or lack of cache propagation
2764     * problems, but such arrangements are very hard to detect at this
2765     * point in the control flow, so we do not bother to single them
2766     * out.
2767     *
2768     * Recursive type maps imply a recursive arrangement of negotiated
2769     * resources which is visible to outside clients, and this is not
2770     * supported by the transparent negotiation caching protocols, so
2771     * if we are to have generic support for recursive type maps, we
2772     * have to create some configuration setting which makes all type
2773     * maps non-transparent when recursion is enabled.  Also, if we
2774     * want recursive type map support which ensures propagation of
2775     * type map changes into HTTP/1.1 caches that handle Vary, we
2776     * would have to extend the current mechanism for generating
2777     * variant list validators.
2778     */
2779    if (sub_req->handler && strcmp(sub_req->handler, "type-map") == 0) {
2780        return HTTP_VARIANT_ALSO_VARIES;
2781    }
2782
2783    /* This adds an appropriate Variant-Vary header if the subrequest
2784     * is a multiviews resource.
2785     *
2786     * XXX: TODO: Note that this does _not_ handle any Vary header
2787     * returned by a CGI if sub_req is a CGI script, because we don't
2788     * see that Vary header yet at this point in the control flow.
2789     * This won't cause any cache consistency problems _unless_ the
2790     * CGI script also returns a Cache-Control header marking the
2791     * response as cachable.  This needs to be fixed, also there are
2792     * problems if a CGI returns an Etag header which also need to be
2793     * fixed.
2794     */
2795    if ((sub_vary = apr_table_get(sub_req->err_headers_out, "Vary")) != NULL) {
2796        apr_table_setn(r->err_headers_out, "Variant-Vary", sub_vary);
2797
2798        /* Move the subreq Vary header into the main request to
2799         * prevent having two Vary headers in the response, which
2800         * would be legal but strange.
2801         */
2802        apr_table_setn(r->err_headers_out, "Vary", sub_vary);
2803        apr_table_unset(sub_req->err_headers_out, "Vary");
2804    }
2805
2806    apr_table_setn(r->err_headers_out, "Content-Location",
2807                  ap_escape_path_segment(r->pool, variant->file_name));
2808
2809    set_neg_headers(r, neg, alg_choice);         /* add Alternates and Vary */
2810
2811    /* Still to do by caller: add Expires */
2812
2813    return 0;
2814}
2815
2816/****************************************************************
2817 *
2818 * Executive...
2819 */
2820
2821static int do_negotiation(request_rec *r, negotiation_state *neg,
2822                          var_rec **bestp, int prefer_scripts)
2823{
2824    var_rec *avail_recs = (var_rec *) neg->avail_vars->elts;
2825    int alg_result;              /* result of variant selection algorithm */
2826    int res;
2827    int j;
2828
2829    /* Decide if resource is transparently negotiable */
2830
2831    /* GET or HEAD? (HEAD has same method number as GET) */
2832    if (r->method_number == M_GET) {
2833
2834        /* maybe this should be configurable, see also the comment
2835         * about recursive type maps in setup_choice_response()
2836         */
2837        neg->is_transparent = 1;
2838
2839        /* We can't be transparent if we are a map file in the middle
2840         * of the request URI.
2841         */
2842        if (r->path_info && *r->path_info)
2843            neg->is_transparent = 0;
2844
2845        for (j = 0; j < neg->avail_vars->nelts; ++j) {
2846            var_rec *variant = &avail_recs[j];
2847
2848            /* We can't be transparent, because of internal
2849             * assumptions in best_match(), if there is a
2850             * non-neighboring variant.  We can have a non-neighboring
2851             * variant when processing a type map.
2852             */
2853            if (ap_strchr_c(variant->file_name, '/'))
2854                neg->is_transparent = 0;
2855
2856            /* We can't be transparent, because of the behavior
2857             * of variant typemap bodies.
2858             */
2859            if (variant->body) {
2860                neg->is_transparent = 0;
2861            }
2862        }
2863    }
2864
2865    if (neg->is_transparent)  {
2866        parse_negotiate_header(r, neg);
2867    }
2868    else { /* configure negotiation on non-transparent resource */
2869        neg->may_choose = 1;
2870    }
2871
2872    maybe_add_default_accepts(neg, prefer_scripts);
2873
2874    alg_result = best_match(neg, bestp);
2875
2876    /* alg_result is one of
2877     *   alg_choice: a best variant is chosen
2878     *   alg_list: no best variant is chosen
2879     */
2880
2881    if (alg_result == alg_list) {
2882        /* send a list response or HTTP_NOT_ACCEPTABLE error response  */
2883
2884        neg->send_alternates = 1; /* always include Alternates header */
2885        set_neg_headers(r, neg, alg_result);
2886        store_variant_list(r, neg);
2887
2888        if (neg->is_transparent && neg->ua_supports_trans) {
2889            /* XXX todo: expires? cachability? */
2890
2891            /* Some HTTP/1.0 clients are known to choke when they get
2892             * a 300 (multiple choices) response without a Location
2893             * header.  However the 300 code response we are are about
2894             * to generate will only reach 1.0 clients which support
2895             * transparent negotiation, and they should be OK. The
2896             * response should never reach older 1.0 clients, even if
2897             * we have CacheNegotiatedDocs enabled, because no 1.0
2898             * proxy cache (we know of) will cache and return 300
2899             * responses (they certainly won't if they conform to the
2900             * HTTP/1.0 specification).
2901             */
2902            return HTTP_MULTIPLE_CHOICES;
2903        }
2904
2905        if (!*bestp) {
2906            ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00690)
2907                          "no acceptable variant: %s", r->filename);
2908            return HTTP_NOT_ACCEPTABLE;
2909        }
2910    }
2911
2912    /* Variant selection chose a variant */
2913
2914    /* XXX todo: merge the two cases in the if statement below */
2915    if (neg->is_transparent) {
2916
2917        if ((res = setup_choice_response(r, neg, *bestp)) != 0) {
2918            return res; /* return if error */
2919        }
2920    }
2921    else {
2922        set_neg_headers(r, neg, alg_result);
2923    }
2924
2925    /* Make sure caching works - Vary should handle HTTP/1.1, but for
2926     * HTTP/1.0, we can't allow caching at all.
2927     */
2928
2929    /* XXX: Note that we only set r->no_cache to 1, which causes
2930     * Expires: <now> to be added, when responding to a HTTP/1.0
2931     * client.  If we return the response to a 1.1 client, we do not
2932     * add Expires <now>, because doing so would degrade 1.1 cache
2933     * performance by preventing re-use of the response without prior
2934     * revalidation.  On the other hand, if the 1.1 client is a proxy
2935     * which was itself contacted by a 1.0 client, or a proxy cache
2936     * which can be contacted later by 1.0 clients, then we currently
2937     * rely on this 1.1 proxy to add the Expires: <now> when it
2938     * forwards the response.
2939     *
2940     * XXX: TODO: Find out if the 1.1 spec requires proxies and
2941     * tunnels to add Expires: <now> when forwarding the response to
2942     * 1.0 clients.  I (kh) recall it is rather vague on this point.
2943     * Testing actual 1.1 proxy implementations would also be nice. If
2944     * Expires: <now> is not added by proxies then we need to always
2945     * include Expires: <now> ourselves to ensure correct caching, but
2946     * this would degrade HTTP/1.1 cache efficiency unless we also add
2947     * Cache-Control: max-age=N, which we currently don't.
2948     *
2949     * Roy: No, we are not going to screw over HTTP future just to
2950     *      ensure that people who can't be bothered to upgrade their
2951     *      clients will always receive perfect server-side negotiation.
2952     *      Hell, those clients are sending bogus accept headers anyway.
2953     *
2954     *      Manual setting of cache-control/expires always overrides this
2955     *      automated kluge, on purpose.
2956     */
2957
2958    if ((!do_cache_negotiated_docs(r->server)
2959         && (r->proto_num < HTTP_VERSION(1,1)))
2960         && neg->count_multiviews_variants != 1) {
2961        r->no_cache = 1;
2962    }
2963
2964    return OK;
2965}
2966
2967static int handle_map_file(request_rec *r)
2968{
2969    negotiation_state *neg;
2970    apr_file_t *map;
2971    var_rec *best;
2972    int res;
2973    char *udir;
2974    const char *new_req;
2975
2976    if(strcmp(r->handler,MAP_FILE_MAGIC_TYPE) && strcmp(r->handler,"type-map"))
2977        return DECLINED;
2978
2979    neg = parse_accept_headers(r);
2980    if ((res = read_type_map(&map, neg, r))) {
2981        return res;
2982    }
2983
2984    res = do_negotiation(r, neg, &best, 0);
2985    if (res != 0) return res;
2986
2987    if (best->body)
2988    {
2989        conn_rec *c = r->connection;
2990        apr_bucket_brigade *bb;
2991        apr_bucket *e;
2992
2993        ap_allow_standard_methods(r, REPLACE_ALLOW, M_GET, M_OPTIONS,
2994                                  M_POST, -1);
2995        /* XXX: ?
2996         * if (r->method_number == M_OPTIONS) {
2997         *    return ap_send_http_options(r);
2998         *}
2999         */
3000        if (r->method_number != M_GET && r->method_number != M_POST) {
3001            return HTTP_METHOD_NOT_ALLOWED;
3002        }
3003
3004        /* ### These may be implemented by adding some 'extra' info
3005         *     of the file offset onto the etag
3006         * ap_update_mtime(r, r->finfo.mtime);
3007         * ap_set_last_modified(r);
3008         * ap_set_etag(r);
3009         */
3010        ap_set_accept_ranges(r);
3011        ap_set_content_length(r, best->bytes);
3012
3013        /* set MIME type and charset as negotiated */
3014        if (best->mime_type && *best->mime_type) {
3015            if (best->content_charset && *best->content_charset) {
3016                ap_set_content_type(r, apr_pstrcat(r->pool,
3017                                                   best->mime_type,
3018                                                   "; charset=",
3019                                                   best->content_charset,
3020                                                   NULL));
3021            }
3022            else {
3023                ap_set_content_type(r, apr_pstrdup(r->pool, best->mime_type));
3024            }
3025        }
3026
3027        /* set Content-language(s) as negotiated */
3028        if (best->content_languages && best->content_languages->nelts) {
3029            r->content_languages = apr_array_copy(r->pool,
3030                                                  best->content_languages);
3031        }
3032
3033        /* set Content-Encoding as negotiated */
3034        if (best->content_encoding && *best->content_encoding) {
3035            r->content_encoding = apr_pstrdup(r->pool,
3036                                              best->content_encoding);
3037        }
3038
3039        if ((res = ap_meets_conditions(r)) != OK) {
3040            return res;
3041        }
3042
3043        if ((res = ap_discard_request_body(r)) != OK) {
3044            return res;
3045        }
3046        bb = apr_brigade_create(r->pool, c->bucket_alloc);
3047
3048        apr_brigade_insert_file(bb, map, best->body, best->bytes, r->pool);
3049
3050        e = apr_bucket_eos_create(c->bucket_alloc);
3051        APR_BRIGADE_INSERT_TAIL(bb, e);
3052
3053        return ap_pass_brigade_fchk(r, bb, NULL);
3054    }
3055
3056    if (r->path_info && *r->path_info) {
3057        /* remove any path_info from the end of the uri before trying
3058         * to change the filename.  r->path_info from the original
3059         * request is passed along on the redirect.
3060         */
3061        r->uri[ap_find_path_info(r->uri, r->path_info)] = '\0';
3062    }
3063    udir = ap_make_dirstr_parent(r->pool, r->uri);
3064    udir = ap_escape_uri(r->pool, udir);
3065    if (r->args) {
3066        if (r->path_info) {
3067            new_req = apr_pstrcat(r->pool, udir, best->file_name,
3068                                  r->path_info, "?", r->args, NULL);
3069        }
3070        else {
3071            new_req = apr_pstrcat(r->pool, udir, best->file_name,
3072                                  "?", r->args, NULL);
3073        }
3074    }
3075    else {
3076        new_req = apr_pstrcat(r->pool, udir, best->file_name,
3077                              r->path_info, NULL);
3078    }
3079    ap_internal_redirect(new_req, r);
3080    return OK;
3081}
3082
3083static int handle_multi(request_rec *r)
3084{
3085    negotiation_state *neg;
3086    var_rec *best, *avail_recs;
3087    request_rec *sub_req;
3088    int res;
3089    int j;
3090
3091    if (r->finfo.filetype != APR_NOFILE
3092        || !(ap_allow_options(r) & OPT_MULTI)) {
3093        return DECLINED;
3094    }
3095
3096    neg = parse_accept_headers(r);
3097
3098    if ((res = read_types_multi(neg))) {
3099      return_from_multi:
3100        /* free all allocated memory from subrequests */
3101        avail_recs = (var_rec *) neg->avail_vars->elts;
3102        for (j = 0; j < neg->avail_vars->nelts; ++j) {
3103            var_rec *variant = &avail_recs[j];
3104            if (variant->sub_req) {
3105                ap_destroy_sub_req(variant->sub_req);
3106            }
3107        }
3108        return res;
3109    }
3110    if (neg->avail_vars->nelts == 0) {
3111        return DECLINED;
3112    }
3113
3114    res = do_negotiation(r, neg, &best,
3115                         (r->method_number != M_GET) || r->args ||
3116                         (r->path_info && *r->path_info));
3117    if (res != 0)
3118        goto return_from_multi;
3119
3120    if (!(sub_req = best->sub_req)) {
3121        /* We got this out of a map file, so we don't actually have
3122         * a sub_req structure yet.  Get one now.
3123         */
3124
3125        sub_req = ap_sub_req_lookup_file(best->file_name, r, r->output_filters);
3126        if (sub_req->status != HTTP_OK) {
3127            res = sub_req->status;
3128            ap_destroy_sub_req(sub_req);
3129            goto return_from_multi;
3130        }
3131    }
3132    if (sub_req->args == NULL) {
3133        sub_req->args = r->args;
3134    }
3135
3136    /* now do a "fast redirect" ... promotes the sub_req into the main req */
3137    ap_internal_fast_redirect(sub_req, r);
3138
3139    /* give no advise for time on this subrequest.  Perhaps we
3140     * should tally the last mtime amoung all variants, and date
3141     * the most recent, but that could confuse the proxies.
3142     */
3143    r->mtime = 0;
3144
3145    /* clean up all but our favorite variant, since that sub_req
3146     * is now merged into the main request!
3147     */
3148    avail_recs = (var_rec *) neg->avail_vars->elts;
3149    for (j = 0; j < neg->avail_vars->nelts; ++j) {
3150        var_rec *variant = &avail_recs[j];
3151        if (variant != best && variant->sub_req) {
3152            ap_destroy_sub_req(variant->sub_req);
3153        }
3154    }
3155    return OK;
3156}
3157
3158/**********************************************************************
3159 * There is a problem with content-encoding, as some clients send and
3160 * expect an x- token (e.g. x-gzip) while others expect the plain token
3161 * (i.e. gzip). To try and deal with this as best as possible we do
3162 * the following: if the client sent an Accept-Encoding header and it
3163 * contains a plain token corresponding to the content encoding of the
3164 * response, then set content encoding using the plain token. Else if
3165 * the A-E header contains the x- token use the x- token in the C-E
3166 * header. Else don't do anything.
3167 *
3168 * Note that if no A-E header was sent, or it does not contain a token
3169 * compatible with the final content encoding, then the token in the
3170 * C-E header will be whatever was specified in the AddEncoding
3171 * directive.
3172 */
3173static int fix_encoding(request_rec *r)
3174{
3175    const char *enc = r->content_encoding;
3176    char *x_enc = NULL;
3177    apr_array_header_t *accept_encodings;
3178    accept_rec *accept_recs;
3179    int i;
3180
3181    if (!enc || !*enc) {
3182        return DECLINED;
3183    }
3184
3185    if (enc[0] == 'x' && enc[1] == '-') {
3186        enc += 2;
3187    }
3188
3189    if ((accept_encodings = do_header_line(r->pool,
3190             apr_table_get(r->headers_in, "Accept-Encoding"))) == NULL) {
3191        return DECLINED;
3192    }
3193
3194    accept_recs = (accept_rec *) accept_encodings->elts;
3195
3196    for (i = 0; i < accept_encodings->nelts; ++i) {
3197        char *name = accept_recs[i].name;
3198
3199        if (!strcmp(name, enc)) {
3200            r->content_encoding = name;
3201            return OK;
3202        }
3203
3204        if (name[0] == 'x' && name[1] == '-' && !strcmp(name+2, enc)) {
3205            x_enc = name;
3206        }
3207    }
3208
3209    if (x_enc) {
3210        r->content_encoding = x_enc;
3211        return OK;
3212    }
3213
3214    return DECLINED;
3215}
3216
3217static void register_hooks(apr_pool_t *p)
3218{
3219    ap_hook_fixups(fix_encoding,NULL,NULL,APR_HOOK_MIDDLE);
3220    ap_hook_type_checker(handle_multi,NULL,NULL,APR_HOOK_FIRST);
3221    ap_hook_handler(handle_map_file,NULL,NULL,APR_HOOK_MIDDLE);
3222}
3223
3224AP_DECLARE_MODULE(negotiation) =
3225{
3226    STANDARD20_MODULE_STUFF,
3227    create_neg_dir_config,      /* dir config creator */
3228    merge_neg_dir_configs,      /* dir merger --- default is to override */
3229    NULL,                       /* server config */
3230    NULL,                       /* merge server config */
3231    negotiation_cmds,           /* command apr_table_t */
3232    register_hooks              /* register hooks */
3233};
3234