1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_negotiation.c: keeps track of MIME types the client is willing to 19 * accept, and contains code to handle type arbitration. 20 * 21 * rst 22 */ 23 24#include "apr.h" 25#include "apr_strings.h" 26#include "apr_file_io.h" 27#include "apr_lib.h" 28 29#define APR_WANT_STRFUNC 30#include "apr_want.h" 31 32#include "ap_config.h" 33#include "httpd.h" 34#include "http_config.h" 35#include "http_request.h" 36#include "http_protocol.h" 37#include "http_core.h" 38#include "http_log.h" 39#include "util_script.h" 40 41 42#define MAP_FILE_MAGIC_TYPE "application/x-type-map" 43 44/* Commands --- configuring document caching on a per (virtual?) 45 * server basis... 46 */ 47 48typedef struct { 49 int forcelangpriority; 50 apr_array_header_t *language_priority; 51} neg_dir_config; 52 53/* forcelangpriority flags 54 */ 55#define FLP_UNDEF 0 /* Same as FLP_DEFAULT, but base overrides */ 56#define FLP_NONE 1 /* Return 406, HTTP_NOT_ACCEPTABLE */ 57#define FLP_PREFER 2 /* Use language_priority rather than MC */ 58#define FLP_FALLBACK 4 /* Use language_priority rather than NA */ 59 60#define FLP_DEFAULT FLP_PREFER 61 62/* env evaluation 63 */ 64#define DISCARD_ALL_ENCODINGS 1 /* no-gzip */ 65#define DISCARD_ALL_BUT_HTML 2 /* gzip-only-text/html */ 66 67module AP_MODULE_DECLARE_DATA negotiation_module; 68 69static void *create_neg_dir_config(apr_pool_t *p, char *dummy) 70{ 71 neg_dir_config *new = (neg_dir_config *) apr_palloc(p, 72 sizeof(neg_dir_config)); 73 74 new->forcelangpriority = FLP_UNDEF; 75 new->language_priority = NULL; 76 return new; 77} 78 79static void *merge_neg_dir_configs(apr_pool_t *p, void *basev, void *addv) 80{ 81 neg_dir_config *base = (neg_dir_config *) basev; 82 neg_dir_config *add = (neg_dir_config *) addv; 83 neg_dir_config *new = (neg_dir_config *) apr_palloc(p, 84 sizeof(neg_dir_config)); 85 86 /* give priority to the config in the subdirectory */ 87 new->forcelangpriority = (add->forcelangpriority != FLP_UNDEF) 88 ? add->forcelangpriority 89 : base->forcelangpriority; 90 new->language_priority = add->language_priority 91 ? add->language_priority 92 : base->language_priority; 93 return new; 94} 95 96static const char *set_language_priority(cmd_parms *cmd, void *n_, 97 const char *lang) 98{ 99 neg_dir_config *n = n_; 100 const char **langp; 101 102 if (!n->language_priority) 103 n->language_priority = apr_array_make(cmd->pool, 4, sizeof(char *)); 104 105 langp = (const char **) apr_array_push(n->language_priority); 106 *langp = lang; 107 return NULL; 108} 109 110static const char *set_force_priority(cmd_parms *cmd, void *n_, const char *w) 111{ 112 neg_dir_config *n = n_; 113 114 if (!strcasecmp(w, "None")) { 115 if (n->forcelangpriority & ~FLP_NONE) { 116 return "Cannot combine ForceLanguagePriority options with None"; 117 } 118 n->forcelangpriority = FLP_NONE; 119 } 120 else if (!strcasecmp(w, "Prefer")) { 121 if (n->forcelangpriority & FLP_NONE) { 122 return "Cannot combine ForceLanguagePriority options None and " 123 "Prefer"; 124 } 125 n->forcelangpriority |= FLP_PREFER; 126 } 127 else if (!strcasecmp(w, "Fallback")) { 128 if (n->forcelangpriority & FLP_NONE) { 129 return "Cannot combine ForceLanguagePriority options None and " 130 "Fallback"; 131 } 132 n->forcelangpriority |= FLP_FALLBACK; 133 } 134 else { 135 return apr_pstrcat(cmd->pool, "Invalid ForceLanguagePriority option ", 136 w, NULL); 137 } 138 139 return NULL; 140} 141 142static const char *cache_negotiated_docs(cmd_parms *cmd, void *dummy, 143 int arg) 144{ 145 ap_set_module_config(cmd->server->module_config, &negotiation_module, 146 (arg ? "Cache" : NULL)); 147 return NULL; 148} 149 150static int do_cache_negotiated_docs(server_rec *s) 151{ 152 return (ap_get_module_config(s->module_config, 153 &negotiation_module) != NULL); 154} 155 156static const command_rec negotiation_cmds[] = 157{ 158 AP_INIT_FLAG("CacheNegotiatedDocs", cache_negotiated_docs, NULL, RSRC_CONF, 159 "Either 'on' or 'off' (default)"), 160 AP_INIT_ITERATE("LanguagePriority", set_language_priority, NULL, 161 OR_FILEINFO, 162 "space-delimited list of MIME language abbreviations"), 163 AP_INIT_ITERATE("ForceLanguagePriority", set_force_priority, NULL, 164 OR_FILEINFO, 165 "Force LanguagePriority elections, either None, or " 166 "Fallback and/or Prefer"), 167 {NULL} 168}; 169 170/* 171 * Record of available info on a media type specified by the client 172 * (we also use 'em for encodings and languages) 173 */ 174 175typedef struct accept_rec { 176 char *name; /* MUST be lowercase */ 177 float quality; 178 float level; 179 char *charset; /* for content-type only */ 180} accept_rec; 181 182/* 183 * Record of available info on a particular variant 184 * 185 * Note that a few of these fields are updated by the actual negotiation 186 * code. These are: 187 * 188 * level_matched --- initialized to zero. Set to the value of level 189 * if the client actually accepts this media type at that 190 * level (and *not* if it got in on a wildcard). See level_cmp 191 * below. 192 * mime_stars -- initialized to zero. Set to the number of stars 193 * present in the best matching Accept header element. 194 * 1 for star/star, 2 for type/star and 3 for 195 * type/subtype. 196 * 197 * definite -- initialized to 1. Set to 0 if there is a match which 198 * makes the variant non-definite according to the rules 199 * in rfc2296. 200 */ 201 202typedef struct var_rec { 203 request_rec *sub_req; /* May be NULL (is, for map files) */ 204 const char *mime_type; /* MUST be lowercase */ 205 const char *file_name; /* Set to 'this' (for map file body content) */ 206 apr_off_t body; /* Only for map file body content */ 207 const char *content_encoding; 208 apr_array_header_t *content_languages; /* list of lang. for this variant */ 209 const char *content_charset; 210 const char *description; 211 212 /* The next five items give the quality values for the dimensions 213 * of negotiation for this variant. They are obtained from the 214 * appropriate header lines, except for source_quality, which 215 * is obtained from the variant itself (the 'qs' parameter value 216 * from the variant's mime-type). Apart from source_quality, 217 * these values are set when we find the quality for each variant 218 * (see best_match()). source_quality is set from the 'qs' parameter 219 * of the variant description or mime type: see set_mime_fields(). 220 */ 221 float lang_quality; /* quality of this variant's language */ 222 float encoding_quality; /* ditto encoding */ 223 float charset_quality; /* ditto charset */ 224 float mime_type_quality; /* ditto media type */ 225 float source_quality; /* source quality for this variant */ 226 227 /* Now some special values */ 228 float level; /* Auxiliary to content-type... */ 229 apr_off_t bytes; /* content length, if known */ 230 int lang_index; /* Index into LanguagePriority list */ 231 int is_pseudo_html; /* text/html, *or* the INCLUDES_MAGIC_TYPEs */ 232 233 /* Above are all written-once properties of the variant. The 234 * three fields below are changed during negotiation: 235 */ 236 237 float level_matched; 238 int mime_stars; 239 int definite; 240} var_rec; 241 242/* Something to carry around the state of negotiation (and to keep 243 * all of this thread-safe)... 244 */ 245 246typedef struct { 247 apr_pool_t *pool; 248 request_rec *r; 249 neg_dir_config *conf; 250 char *dir_name; 251 int accept_q; /* 1 if an Accept item has a q= param */ 252 float default_lang_quality; /* fiddle lang q for variants with no lang */ 253 254 /* the array pointers below are NULL if the corresponding accept 255 * headers are not present 256 */ 257 apr_array_header_t *accepts; /* accept_recs */ 258 apr_array_header_t *accept_encodings; /* accept_recs */ 259 apr_array_header_t *accept_charsets; /* accept_recs */ 260 apr_array_header_t *accept_langs; /* accept_recs */ 261 262 apr_array_header_t *avail_vars; /* available variants */ 263 264 int count_multiviews_variants; /* number of variants found on disk */ 265 266 int is_transparent; /* 1 if this resource is trans. negotiable */ 267 268 int dont_fiddle_headers; /* 1 if we may not fiddle with accept hdrs */ 269 int ua_supports_trans; /* 1 if ua supports trans negotiation */ 270 int send_alternates; /* 1 if we want to send an Alternates header */ 271 int may_choose; /* 1 if we may choose a variant for the client */ 272 int use_rvsa; /* 1 if we must use RVSA/1.0 negotiation algo */ 273} negotiation_state; 274 275/* A few functions to manipulate var_recs. 276 * Cleaning out the fields... 277 */ 278 279static void clean_var_rec(var_rec *mime_info) 280{ 281 mime_info->sub_req = NULL; 282 mime_info->mime_type = ""; 283 mime_info->file_name = ""; 284 mime_info->body = 0; 285 mime_info->content_encoding = NULL; 286 mime_info->content_languages = NULL; 287 mime_info->content_charset = ""; 288 mime_info->description = ""; 289 290 mime_info->is_pseudo_html = 0; 291 mime_info->level = 0.0f; 292 mime_info->level_matched = 0.0f; 293 mime_info->bytes = -1; 294 mime_info->lang_index = -1; 295 mime_info->mime_stars = 0; 296 mime_info->definite = 1; 297 298 mime_info->charset_quality = 1.0f; 299 mime_info->encoding_quality = 1.0f; 300 mime_info->lang_quality = 1.0f; 301 mime_info->mime_type_quality = 1.0f; 302 mime_info->source_quality = 0.0f; 303} 304 305/* Initializing the relevant fields of a variant record from the 306 * accept_info read out of its content-type, one way or another. 307 */ 308 309static void set_mime_fields(var_rec *var, accept_rec *mime_info) 310{ 311 var->mime_type = mime_info->name; 312 var->source_quality = mime_info->quality; 313 var->level = mime_info->level; 314 var->content_charset = mime_info->charset; 315 316 var->is_pseudo_html = (!strcmp(var->mime_type, "text/html") 317 || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE) 318 || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE3)); 319} 320 321/* Create a variant list validator in r using info from vlistr. */ 322 323static void set_vlist_validator(request_rec *r, request_rec *vlistr) 324{ 325 /* Calculating the variant list validator is similar to 326 * calculating an etag for the source of the variant list 327 * information, so we use ap_make_etag(). Note that this 328 * validator can be 'weak' in extreme case. 329 */ 330 ap_update_mtime(vlistr, vlistr->finfo.mtime); 331 r->vlist_validator = ap_make_etag(vlistr, 0); 332 333 /* ap_set_etag will later take r->vlist_validator into account 334 * when creating the etag header 335 */ 336} 337 338 339/***************************************************************** 340 * 341 * Parsing (lists of) media types and their parameters, as seen in 342 * HTTPD header lines and elsewhere. 343 */ 344 345/* 346 * parse quality value. atof(3) is not well-usable here, because it 347 * depends on the locale (argh). 348 * 349 * However, RFC 2616 states: 350 * 3.9 Quality Values 351 * 352 * [...] HTTP/1.1 applications MUST NOT generate more than three digits 353 * after the decimal point. User configuration of these values SHOULD also 354 * be limited in this fashion. 355 * 356 * qvalue = ( "0" [ "." 0*3DIGIT ] ) 357 * | ( "1" [ "." 0*3("0") ] ) 358 * 359 * This is quite easy. If the supplied string doesn't match the above 360 * definition (loosely), we simply return 1 (same as if there's no qvalue) 361 */ 362 363static float atoq(const char *string) 364{ 365 if (!string || !*string) { 366 return 1.0f; 367 } 368 369 while (apr_isspace(*string)) { 370 ++string; 371 } 372 373 /* be tolerant and accept qvalues without leading zero 374 * (also for backwards compat, where atof() was in use) 375 */ 376 if (*string != '.' && *string++ != '0') { 377 return 1.0f; 378 } 379 380 if (*string == '.') { 381 /* better only one division later, than dealing with fscking 382 * IEEE format 0.1 factors ... 383 */ 384 int i = 0; 385 386 if (*++string >= '0' && *string <= '9') { 387 i += (*string - '0') * 100; 388 389 if (*++string >= '0' && *string <= '9') { 390 i += (*string - '0') * 10; 391 392 if (*++string > '0' && *string <= '9') { 393 i += (*string - '0'); 394 } 395 } 396 } 397 398 return (float)i / 1000.0f; 399 } 400 401 return 0.0f; 402} 403 404/* 405 * Get a single mime type entry --- one media type and parameters; 406 * enter the values we recognize into the argument accept_rec 407 */ 408 409static const char *get_entry(apr_pool_t *p, accept_rec *result, 410 const char *accept_line) 411{ 412 result->quality = 1.0f; 413 result->level = 0.0f; 414 result->charset = ""; 415 416 /* 417 * Note that this handles what I gather is the "old format", 418 * 419 * Accept: text/html text/plain moo/zot 420 * 421 * without any compatibility kludges --- if the token after the 422 * MIME type begins with a semicolon, we know we're looking at parms, 423 * otherwise, we know we aren't. (So why all the pissing and moaning 424 * in the CERN server code? I must be missing something). 425 */ 426 427 result->name = ap_get_token(p, &accept_line, 0); 428 ap_str_tolower(result->name); /* You want case insensitive, 429 * you'll *get* case insensitive. 430 */ 431 432 /* KLUDGE!!! Default HTML to level 2.0 unless the browser 433 * *explicitly* says something else. 434 */ 435 436 if (!strcmp(result->name, "text/html") && (result->level == 0.0)) { 437 result->level = 2.0f; 438 } 439 else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE)) { 440 result->level = 2.0f; 441 } 442 else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE3)) { 443 result->level = 3.0f; 444 } 445 446 while (*accept_line == ';') { 447 /* Parameters ... */ 448 449 char *parm; 450 char *cp; 451 char *end; 452 453 ++accept_line; 454 parm = ap_get_token(p, &accept_line, 1); 455 456 /* Look for 'var = value' --- and make sure the var is in lcase. */ 457 458 for (cp = parm; (*cp && !apr_isspace(*cp) && *cp != '='); ++cp) { 459 *cp = apr_tolower(*cp); 460 } 461 462 if (!*cp) { 463 continue; /* No '='; just ignore it. */ 464 } 465 466 *cp++ = '\0'; /* Delimit var */ 467 while (apr_isspace(*cp) || *cp == '=') { 468 ++cp; 469 } 470 471 if (*cp == '"') { 472 ++cp; 473 for (end = cp; 474 (*end && *end != '\n' && *end != '\r' && *end != '\"'); 475 end++); 476 } 477 else { 478 for (end = cp; (*end && !apr_isspace(*end)); end++); 479 } 480 if (*end) { 481 *end = '\0'; /* strip ending quote or return */ 482 } 483 ap_str_tolower(cp); 484 485 if (parm[0] == 'q' 486 && (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0'))) { 487 result->quality = atoq(cp); 488 } 489 else if (parm[0] == 'l' && !strcmp(&parm[1], "evel")) { 490 result->level = (float)atoi(cp); 491 } 492 else if (!strcmp(parm, "charset")) { 493 result->charset = cp; 494 } 495 } 496 497 if (*accept_line == ',') { 498 ++accept_line; 499 } 500 501 return accept_line; 502} 503 504/***************************************************************** 505 * 506 * Dealing with header lines ... 507 * 508 * Accept, Accept-Charset, Accept-Language and Accept-Encoding 509 * are handled by do_header_line() - they all have the same 510 * basic structure of a list of items of the format 511 * name; q=N; charset=TEXT 512 * 513 * where charset is only valid in Accept. 514 */ 515 516static apr_array_header_t *do_header_line(apr_pool_t *p, 517 const char *accept_line) 518{ 519 apr_array_header_t *accept_recs; 520 521 if (!accept_line) { 522 return NULL; 523 } 524 525 accept_recs = apr_array_make(p, 40, sizeof(accept_rec)); 526 527 while (*accept_line) { 528 accept_rec *new = (accept_rec *) apr_array_push(accept_recs); 529 accept_line = get_entry(p, new, accept_line); 530 } 531 532 return accept_recs; 533} 534 535/* Given the text of the Content-Languages: line from the var map file, 536 * return an array containing the languages of this variant 537 */ 538 539static apr_array_header_t *do_languages_line(apr_pool_t *p, 540 const char **lang_line) 541{ 542 apr_array_header_t *lang_recs = apr_array_make(p, 2, sizeof(char *)); 543 544 if (!lang_line) { 545 return lang_recs; 546 } 547 548 while (**lang_line) { 549 char **new = (char **) apr_array_push(lang_recs); 550 *new = ap_get_token(p, lang_line, 0); 551 ap_str_tolower(*new); 552 if (**lang_line == ',' || **lang_line == ';') { 553 ++(*lang_line); 554 } 555 } 556 557 return lang_recs; 558} 559 560/***************************************************************** 561 * 562 * Handling header lines from clients... 563 */ 564 565static negotiation_state *parse_accept_headers(request_rec *r) 566{ 567 negotiation_state *new = 568 (negotiation_state *) apr_pcalloc(r->pool, sizeof(negotiation_state)); 569 accept_rec *elts; 570 apr_table_t *hdrs = r->headers_in; 571 int i; 572 573 new->pool = r->pool; 574 new->r = r; 575 new->conf = (neg_dir_config *)ap_get_module_config(r->per_dir_config, 576 &negotiation_module); 577 578 new->dir_name = ap_make_dirstr_parent(r->pool, r->filename); 579 580 new->accepts = do_header_line(r->pool, apr_table_get(hdrs, "Accept")); 581 582 /* calculate new->accept_q value */ 583 if (new->accepts) { 584 elts = (accept_rec *) new->accepts->elts; 585 586 for (i = 0; i < new->accepts->nelts; ++i) { 587 if (elts[i].quality < 1.0) { 588 new->accept_q = 1; 589 } 590 } 591 } 592 593 new->accept_encodings = 594 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Encoding")); 595 new->accept_langs = 596 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Language")); 597 new->accept_charsets = 598 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Charset")); 599 600 /* This is possibly overkill for some servers, heck, we have 601 * only 33 index.html variants in docs/docroot (today). 602 * Make this configurable? 603 */ 604 new->avail_vars = apr_array_make(r->pool, 40, sizeof(var_rec)); 605 606 return new; 607} 608 609 610static void parse_negotiate_header(request_rec *r, negotiation_state *neg) 611{ 612 const char *negotiate = apr_table_get(r->headers_in, "Negotiate"); 613 char *tok; 614 615 /* First, default to no TCN, no Alternates, and the original Apache 616 * negotiation algorithm with fiddles for broken browser configs. 617 * 618 * To save network bandwidth, we do not configure to send an 619 * Alternates header to the user agent by default. User 620 * agents that want an Alternates header for agent-driven 621 * negotiation will have to request it by sending an 622 * appropriate Negotiate header. 623 */ 624 neg->ua_supports_trans = 0; 625 neg->send_alternates = 0; 626 neg->may_choose = 1; 627 neg->use_rvsa = 0; 628 neg->dont_fiddle_headers = 0; 629 630 if (!negotiate) 631 return; 632 633 if (strcmp(negotiate, "trans") == 0) { 634 /* Lynx 2.7 and 2.8 send 'negotiate: trans' even though they 635 * do not support transparent content negotiation, so for Lynx we 636 * ignore the negotiate header when its contents are exactly "trans". 637 * If future versions of Lynx ever need to say 'negotiate: trans', 638 * they can send the equivalent 'negotiate: trans, trans' instead 639 * to avoid triggering the workaround below. 640 */ 641 const char *ua = apr_table_get(r->headers_in, "User-Agent"); 642 643 if (ua && (strncmp(ua, "Lynx", 4) == 0)) 644 return; 645 } 646 647 neg->may_choose = 0; /* An empty Negotiate would require 300 response */ 648 649 while ((tok = ap_get_list_item(neg->pool, &negotiate)) != NULL) { 650 651 if (strcmp(tok, "trans") == 0 || 652 strcmp(tok, "vlist") == 0 || 653 strcmp(tok, "guess-small") == 0 || 654 apr_isdigit(tok[0]) || 655 strcmp(tok, "*") == 0) { 656 657 /* The user agent supports transparent negotiation */ 658 neg->ua_supports_trans = 1; 659 660 /* Send-alternates could be configurable, but note 661 * that it must be 1 if we have 'vlist' in the 662 * negotiate header. 663 */ 664 neg->send_alternates = 1; 665 666 if (strcmp(tok, "1.0") == 0) { 667 /* we may use the RVSA/1.0 algorithm, configure for it */ 668 neg->may_choose = 1; 669 neg->use_rvsa = 1; 670 neg->dont_fiddle_headers = 1; 671 } 672 else if (tok[0] == '*') { 673 /* we may use any variant selection algorithm, configure 674 * to use the Apache algorithm 675 */ 676 neg->may_choose = 1; 677 678 /* We disable header fiddles on the assumption that a 679 * client sending Negotiate knows how to send correct 680 * headers which don't need fiddling. 681 */ 682 neg->dont_fiddle_headers = 1; 683 } 684 } 685 } 686 687#ifdef NEG_DEBUG 688 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00680) 689 "dont_fiddle_headers=%d use_rvsa=%d ua_supports_trans=%d " 690 "send_alternates=%d, may_choose=%d", 691 neg->dont_fiddle_headers, neg->use_rvsa, 692 neg->ua_supports_trans, neg->send_alternates, neg->may_choose); 693#endif 694 695} 696 697/* Sometimes clients will give us no Accept info at all; this routine sets 698 * up the standard default for that case, and also arranges for us to be 699 * willing to run a CGI script if we find one. (In fact, we set up to 700 * dramatically prefer CGI scripts in cases where that's appropriate, 701 * e.g., POST or when URI includes query args or extra path info). 702 */ 703static void maybe_add_default_accepts(negotiation_state *neg, 704 int prefer_scripts) 705{ 706 accept_rec *new_accept; 707 708 if (!neg->accepts) { 709 neg->accepts = apr_array_make(neg->pool, 4, sizeof(accept_rec)); 710 711 new_accept = (accept_rec *) apr_array_push(neg->accepts); 712 713 new_accept->name = "*/*"; 714 new_accept->quality = 1.0f; 715 new_accept->level = 0.0f; 716 } 717 718 new_accept = (accept_rec *) apr_array_push(neg->accepts); 719 720 new_accept->name = CGI_MAGIC_TYPE; 721 if (neg->use_rvsa) { 722 new_accept->quality = 0; 723 } 724 else { 725 new_accept->quality = prefer_scripts ? 2.0f : 0.001f; 726 } 727 new_accept->level = 0.0f; 728} 729 730/***************************************************************** 731 * 732 * Parsing type-map files, in Roy's meta/http format augmented with 733 * #-comments. 734 */ 735 736/* Reading RFC822-style header lines, ignoring #-comments and 737 * handling continuations. 738 */ 739 740enum header_state { 741 header_eof, header_seen, header_sep 742}; 743 744static enum header_state get_header_line(char *buffer, int len, apr_file_t *map) 745{ 746 char *buf_end = buffer + len; 747 char *cp; 748 char c; 749 750 /* Get a noncommented line */ 751 752 do { 753 if (apr_file_gets(buffer, MAX_STRING_LEN, map) != APR_SUCCESS) { 754 return header_eof; 755 } 756 } while (buffer[0] == '#'); 757 758 /* If blank, just return it --- this ends information on this variant */ 759 760 for (cp = buffer; apr_isspace(*cp); ++cp) { 761 continue; 762 } 763 764 if (*cp == '\0') { 765 return header_sep; 766 } 767 768 /* If non-blank, go looking for header lines, but note that we still 769 * have to treat comments specially... 770 */ 771 772 cp += strlen(cp); 773 774 /* We need to shortcut the rest of this block following the Body: 775 * tag - we will not look for continutation after this line. 776 */ 777 if (!strncasecmp(buffer, "Body:", 5)) 778 return header_seen; 779 780 while (apr_file_getc(&c, map) != APR_EOF) { 781 if (c == '#') { 782 /* Comment line */ 783 while (apr_file_getc(&c, map) != APR_EOF && c != '\n') { 784 continue; 785 } 786 } 787 else if (apr_isspace(c)) { 788 /* Leading whitespace. POSSIBLE continuation line 789 * Also, possibly blank --- if so, we ungetc() the final newline 790 * so that we will pick up the blank line the next time 'round. 791 */ 792 793 while (c != '\n' && apr_isspace(c)) { 794 if(apr_file_getc(&c, map) != APR_SUCCESS) 795 break; 796 } 797 798 apr_file_ungetc(c, map); 799 800 if (c == '\n') { 801 return header_seen; /* Blank line */ 802 } 803 804 /* Continuation */ 805 806 while ( cp < buf_end - 2 807 && (apr_file_getc(&c, map)) != APR_EOF 808 && c != '\n') { 809 *cp++ = c; 810 } 811 812 *cp++ = '\n'; 813 *cp = '\0'; 814 } 815 else { 816 817 /* Line beginning with something other than whitespace */ 818 819 apr_file_ungetc(c, map); 820 return header_seen; 821 } 822 } 823 824 return header_seen; 825} 826 827static apr_off_t get_body(char *buffer, apr_size_t *len, const char *tag, 828 apr_file_t *map) 829{ 830 char *endbody; 831 int bodylen; 832 int taglen; 833 apr_off_t pos; 834 835 taglen = strlen(tag); 836 *len -= taglen; 837 838 /* We are at the first character following a body:tag\n entry 839 * Suck in the body, then backspace to the first char after the 840 * closing tag entry. If we fail to read, find the tag or back 841 * up then we have a hosed file, so give up already 842 */ 843 if (apr_file_read(map, buffer, len) != APR_SUCCESS) { 844 return -1; 845 } 846 847 /* put a copy of the tag *after* the data read from the file 848 * so that strstr() will find something with no reliance on 849 * terminating '\0' 850 */ 851 memcpy(buffer + *len, tag, taglen); 852 endbody = strstr(buffer, tag); 853 if (endbody == buffer + *len) { 854 return -1; 855 } 856 bodylen = endbody - buffer; 857 endbody += taglen; 858 /* Skip all the trailing cruft after the end tag to the next line */ 859 while (*endbody) { 860 if (*endbody == '\n') { 861 ++endbody; 862 break; 863 } 864 ++endbody; 865 } 866 867 pos = -(apr_off_t)(*len - (endbody - buffer)); 868 if (apr_file_seek(map, APR_CUR, &pos) != APR_SUCCESS) { 869 return -1; 870 } 871 872 /* Give the caller back the actual body's file offset and length */ 873 *len = bodylen; 874 return pos - (endbody - buffer); 875} 876 877 878/* Stripping out RFC822 comments */ 879 880static void strip_paren_comments(char *hdr) 881{ 882 /* Hmmm... is this correct? In Roy's latest draft, (comments) can nest! */ 883 /* Nope, it isn't correct. Fails to handle backslash escape as well. */ 884 885 while (*hdr) { 886 if (*hdr == '"') { 887 hdr = strchr(hdr, '"'); 888 if (hdr == NULL) { 889 return; 890 } 891 ++hdr; 892 } 893 else if (*hdr == '(') { 894 while (*hdr && *hdr != ')') { 895 *hdr++ = ' '; 896 } 897 898 if (*hdr) { 899 *hdr++ = ' '; 900 } 901 } 902 else { 903 ++hdr; 904 } 905 } 906} 907 908/* Getting to a header body from the header */ 909 910static char *lcase_header_name_return_body(char *header, request_rec *r) 911{ 912 char *cp = header; 913 914 for ( ; *cp && *cp != ':' ; ++cp) { 915 *cp = apr_tolower(*cp); 916 } 917 918 if (!*cp) { 919 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00681) 920 "Syntax error in type map, no ':' in %s for header %s", 921 r->filename, header); 922 return NULL; 923 } 924 925 do { 926 ++cp; 927 } while (apr_isspace(*cp)); 928 929 if (!*cp) { 930 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00682) 931 "Syntax error in type map --- no header body: %s for %s", 932 r->filename, header); 933 return NULL; 934 } 935 936 return cp; 937} 938 939static int read_type_map(apr_file_t **map, negotiation_state *neg, 940 request_rec *rr) 941{ 942 request_rec *r = neg->r; 943 apr_file_t *map_ = NULL; 944 apr_status_t status; 945 char buffer[MAX_STRING_LEN]; 946 enum header_state hstate; 947 struct var_rec mime_info; 948 int has_content; 949 950 if (!map) 951 map = &map_; 952 953 /* We are not using multiviews */ 954 neg->count_multiviews_variants = 0; 955 956 if ((status = apr_file_open(map, rr->filename, APR_READ | APR_BUFFERED, 957 APR_OS_DEFAULT, neg->pool)) != APR_SUCCESS) { 958 ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(00683) 959 "cannot access type map file: %s", rr->filename); 960 if (APR_STATUS_IS_ENOTDIR(status) || APR_STATUS_IS_ENOENT(status)) { 961 return HTTP_NOT_FOUND; 962 } 963 else { 964 return HTTP_FORBIDDEN; 965 } 966 } 967 968 clean_var_rec(&mime_info); 969 has_content = 0; 970 971 do { 972 hstate = get_header_line(buffer, MAX_STRING_LEN, *map); 973 974 if (hstate == header_seen) { 975 char *body1 = lcase_header_name_return_body(buffer, neg->r); 976 const char *body; 977 978 if (body1 == NULL) { 979 return HTTP_INTERNAL_SERVER_ERROR; 980 } 981 982 strip_paren_comments(body1); 983 body = body1; 984 985 if (!strncmp(buffer, "uri:", 4)) { 986 mime_info.file_name = ap_get_token(neg->pool, &body, 0); 987 } 988 else if (!strncmp(buffer, "content-type:", 13)) { 989 struct accept_rec accept_info; 990 991 get_entry(neg->pool, &accept_info, body); 992 set_mime_fields(&mime_info, &accept_info); 993 has_content = 1; 994 } 995 else if (!strncmp(buffer, "content-length:", 15)) { 996 char *errp; 997 apr_off_t number; 998 999 body1 = ap_get_token(neg->pool, &body, 0); 1000 if (apr_strtoff(&number, body1, &errp, 10) != APR_SUCCESS 1001 || *errp || number < 0) { 1002 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00684) 1003 "Parse error in type map, Content-Length: " 1004 "'%s' in %s is invalid.", 1005 body1, r->filename); 1006 break; 1007 } 1008 mime_info.bytes = number; 1009 has_content = 1; 1010 } 1011 else if (!strncmp(buffer, "content-language:", 17)) { 1012 mime_info.content_languages = do_languages_line(neg->pool, 1013 &body); 1014 has_content = 1; 1015 } 1016 else if (!strncmp(buffer, "content-encoding:", 17)) { 1017 mime_info.content_encoding = ap_get_token(neg->pool, &body, 0); 1018 has_content = 1; 1019 } 1020 else if (!strncmp(buffer, "description:", 12)) { 1021 char *desc = apr_pstrdup(neg->pool, body); 1022 char *cp; 1023 1024 for (cp = desc; *cp; ++cp) { 1025 if (*cp=='\n') *cp=' '; 1026 } 1027 if (cp>desc) *(cp-1)=0; 1028 mime_info.description = desc; 1029 } 1030 else if (!strncmp(buffer, "body:", 5)) { 1031 char *tag = apr_pstrdup(neg->pool, body); 1032 char *eol = strchr(tag, '\0'); 1033 apr_size_t len = MAX_STRING_LEN; 1034 while (--eol >= tag && apr_isspace(*eol)) 1035 *eol = '\0'; 1036 if ((mime_info.body = get_body(buffer, &len, tag, *map)) < 0) { 1037 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00685) 1038 "Syntax error in type map, no end tag '%s'" 1039 "found in %s for Body: content.", 1040 tag, r->filename); 1041 break; 1042 } 1043 mime_info.bytes = len; 1044 mime_info.file_name = apr_filepath_name_get(rr->filename); 1045 } 1046 } 1047 else { 1048 if (*mime_info.file_name && has_content) { 1049 void *new_var = apr_array_push(neg->avail_vars); 1050 1051 memcpy(new_var, (void *) &mime_info, sizeof(var_rec)); 1052 } 1053 1054 clean_var_rec(&mime_info); 1055 has_content = 0; 1056 } 1057 } while (hstate != header_eof); 1058 1059 if (map_) 1060 apr_file_close(map_); 1061 1062 set_vlist_validator(r, rr); 1063 1064 return OK; 1065} 1066 1067 1068/* Sort function used by read_types_multi. */ 1069static int variantsortf(var_rec *a, var_rec *b) { 1070 1071 /* First key is the source quality, sort in descending order. */ 1072 1073 /* XXX: note that we currently implement no method of setting the 1074 * source quality for multiviews variants, so we are always comparing 1075 * 1.0 to 1.0 for now 1076 */ 1077 if (a->source_quality < b->source_quality) 1078 return 1; 1079 if (a->source_quality > b->source_quality) 1080 return -1; 1081 1082 /* Second key is the variant name */ 1083 return strcmp(a->file_name, b->file_name); 1084} 1085 1086/***************************************************************** 1087 * 1088 * Same as read_type_map, except we use a filtered directory listing 1089 * as the map... 1090 */ 1091 1092static int read_types_multi(negotiation_state *neg) 1093{ 1094 request_rec *r = neg->r; 1095 1096 char *filp; 1097 int prefix_len; 1098 apr_dir_t *dirp; 1099 apr_finfo_t dirent; 1100 apr_status_t status; 1101 struct var_rec mime_info; 1102 struct accept_rec accept_info; 1103 void *new_var; 1104 int anymatch = 0; 1105 1106 clean_var_rec(&mime_info); 1107 1108 if (r->proxyreq || !r->filename 1109 || !ap_os_is_path_absolute(neg->pool, r->filename)) { 1110 return DECLINED; 1111 } 1112 1113 /* Only absolute paths here */ 1114 if (!(filp = strrchr(r->filename, '/'))) { 1115 return DECLINED; 1116 } 1117 ++filp; 1118 prefix_len = strlen(filp); 1119 1120 if ((status = apr_dir_open(&dirp, neg->dir_name, 1121 neg->pool)) != APR_SUCCESS) { 1122 ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, APLOGNO(00686) 1123 "cannot read directory for multi: %s", neg->dir_name); 1124 return HTTP_FORBIDDEN; 1125 } 1126 1127 while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dirp) == APR_SUCCESS) { 1128 apr_array_header_t *exception_list; 1129 request_rec *sub_req; 1130 1131 /* Do we have a match? */ 1132#ifdef CASE_BLIND_FILESYSTEM 1133 if (strncasecmp(dirent.name, filp, prefix_len)) { 1134#else 1135 if (strncmp(dirent.name, filp, prefix_len)) { 1136#endif 1137 continue; 1138 } 1139 if (dirent.name[prefix_len] != '.') { 1140 continue; 1141 } 1142 1143 /* Don't negotiate directories and other unusual files 1144 * Really shouldn't see anything but DIR/LNK/REG here, 1145 * and we aught to discover if the LNK was interesting. 1146 * 1147 * Of course, this only helps platforms that capture the 1148 * the filetype in apr_dir_read(), which most can once 1149 * they are optimized with some magic [it's known to the 1150 * dirent, not associated to the inode, on most FS's.] 1151 */ 1152 if ((dirent.valid & APR_FINFO_TYPE) && (dirent.filetype == APR_DIR)) 1153 continue; 1154 1155 /* Ok, something's here. Maybe nothing useful. Remember that 1156 * we tried, if we completely fail, so we can reject the request! 1157 */ 1158 anymatch = 1; 1159 1160 /* See if it's something which we have access to, and which 1161 * has a known type and encoding. 1162 */ 1163 sub_req = ap_sub_req_lookup_dirent(&dirent, r, AP_SUBREQ_MERGE_ARGS, 1164 NULL); 1165 1166 /* Double check, we still don't multi-resolve non-ordinary files 1167 */ 1168 if (sub_req->finfo.filetype != APR_REG) { 1169 /* XXX sub req not destroyed -- may be a bug/unintentional ? */ 1170 continue; 1171 } 1172 1173 /* If it has a handler, we'll pretend it's a CGI script, 1174 * since that's a good indication of the sort of thing it 1175 * might be doing. 1176 */ 1177 if (sub_req->handler && !sub_req->content_type) { 1178 ap_set_content_type(sub_req, CGI_MAGIC_TYPE); 1179 } 1180 1181 /* 1182 * mod_mime will _always_ provide us the base name in the 1183 * ap-mime-exception-list, if it processed anything. If 1184 * this list is empty, give up immediately, there was 1185 * nothing interesting. For example, looking at the files 1186 * readme.txt and readme.foo, we will throw away .foo if 1187 * it's an insignificant file (e.g. did not identify a 1188 * language, charset, encoding, content type or handler,) 1189 */ 1190 exception_list = 1191 (apr_array_header_t *)apr_table_get(sub_req->notes, 1192 "ap-mime-exceptions-list"); 1193 1194 if (!exception_list) { 1195 ap_destroy_sub_req(sub_req); 1196 continue; 1197 } 1198 1199 /* Each unregonized bit better match our base name, in sequence. 1200 * A test of index.html.foo will match index.foo or index.html.foo, 1201 * but it will never transpose the segments and allow index.foo.html 1202 * because that would introduce too much CPU consumption. Better that 1203 * we don't attempt a many-to-many match here. 1204 */ 1205 { 1206 int nexcept = exception_list->nelts; 1207 char **cur_except = (char**)exception_list->elts; 1208 char *segstart = filp, *segend, saveend; 1209 1210 while (*segstart && nexcept) { 1211 if (!(segend = strchr(segstart, '.'))) 1212 segend = strchr(segstart, '\0'); 1213 saveend = *segend; 1214 *segend = '\0'; 1215 1216#ifdef CASE_BLIND_FILESYSTEM 1217 if (strcasecmp(segstart, *cur_except) == 0) { 1218#else 1219 if (strcmp(segstart, *cur_except) == 0) { 1220#endif 1221 --nexcept; 1222 ++cur_except; 1223 } 1224 1225 if (!saveend) 1226 break; 1227 1228 *segend = saveend; 1229 segstart = segend + 1; 1230 } 1231 1232 if (nexcept) { 1233 /* Something you don't know is, something you don't know... 1234 */ 1235 ap_destroy_sub_req(sub_req); 1236 continue; 1237 } 1238 } 1239 1240 /* 1241 * If we failed the subrequest, or don't 1242 * know what we are serving, then continue. 1243 */ 1244 if (sub_req->status != HTTP_OK || (!sub_req->content_type)) { 1245 ap_destroy_sub_req(sub_req); 1246 continue; 1247 } 1248 1249 /* If it's a map file, we use that instead of the map 1250 * we're building... 1251 */ 1252 if (((sub_req->content_type) && 1253 !strcmp(sub_req->content_type, MAP_FILE_MAGIC_TYPE)) || 1254 ((sub_req->handler) && 1255 !strcmp(sub_req->handler, "type-map"))) { 1256 1257 apr_dir_close(dirp); 1258 neg->avail_vars->nelts = 0; 1259 if (sub_req->status != HTTP_OK) { 1260 return sub_req->status; 1261 } 1262 return read_type_map(NULL, neg, sub_req); 1263 } 1264 1265 /* Have reasonable variant --- gather notes. */ 1266 1267 mime_info.sub_req = sub_req; 1268 mime_info.file_name = apr_pstrdup(neg->pool, dirent.name); 1269 if (sub_req->content_encoding) { 1270 mime_info.content_encoding = sub_req->content_encoding; 1271 } 1272 if (sub_req->content_languages) { 1273 mime_info.content_languages = sub_req->content_languages; 1274 } 1275 1276 get_entry(neg->pool, &accept_info, sub_req->content_type); 1277 set_mime_fields(&mime_info, &accept_info); 1278 1279 new_var = apr_array_push(neg->avail_vars); 1280 memcpy(new_var, (void *) &mime_info, sizeof(var_rec)); 1281 1282 neg->count_multiviews_variants++; 1283 1284 clean_var_rec(&mime_info); 1285 } 1286 1287 apr_dir_close(dirp); 1288 1289 /* We found some file names that matched. None could be served. 1290 * Rather than fall out to autoindex or some other mapper, this 1291 * request must die. 1292 */ 1293 if (anymatch && !neg->avail_vars->nelts) { 1294 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00687) 1295 "Negotiation: discovered file(s) matching request: %s" 1296 " (None could be negotiated).", 1297 r->filename); 1298 return HTTP_NOT_FOUND; 1299 } 1300 1301 set_vlist_validator(r, r); 1302 1303 /* Sort the variants into a canonical order. The negotiation 1304 * result sometimes depends on the order of the variants. By 1305 * sorting the variants into a canonical order, rather than using 1306 * the order in which readdir() happens to return them, we ensure 1307 * that the negotiation result will be consistent over filesystem 1308 * backup/restores and over all mirror sites. 1309 */ 1310 1311 qsort((void *) neg->avail_vars->elts, neg->avail_vars->nelts, 1312 sizeof(var_rec), (int (*)(const void *, const void *)) variantsortf); 1313 1314 return OK; 1315} 1316 1317 1318/***************************************************************** 1319 * And now for the code you've been waiting for... actually 1320 * finding a match to the client's requirements. 1321 */ 1322 1323/* Matching MIME types ... the star/star and foo/star commenting conventions 1324 * are implemented here. (You know what I mean by star/star, but just 1325 * try mentioning those three characters in a C comment). Using strcmp() 1326 * is legit, because everything has already been smashed to lowercase. 1327 * 1328 * Note also that if we get an exact match on the media type, we update 1329 * level_matched for use in level_cmp below... 1330 * 1331 * We also give a value for mime_stars, which is used later. It should 1332 * be 1 for star/star, 2 for type/star and 3 for type/subtype. 1333 */ 1334 1335static int mime_match(accept_rec *accept_r, var_rec *avail) 1336{ 1337 const char *accept_type = accept_r->name; 1338 const char *avail_type = avail->mime_type; 1339 int len = strlen(accept_type); 1340 1341 if (accept_type[0] == '*') { /* Anything matches star/star */ 1342 if (avail->mime_stars < 1) { 1343 avail->mime_stars = 1; 1344 } 1345 return 1; 1346 } 1347 else if ((accept_type[len - 1] == '*') && 1348 !strncmp(accept_type, avail_type, len - 2)) { 1349 if (avail->mime_stars < 2) { 1350 avail->mime_stars = 2; 1351 } 1352 return 1; 1353 } 1354 else if (!strcmp(accept_type, avail_type) 1355 || (!strcmp(accept_type, "text/html") 1356 && (!strcmp(avail_type, INCLUDES_MAGIC_TYPE) 1357 || !strcmp(avail_type, INCLUDES_MAGIC_TYPE3)))) { 1358 if (accept_r->level >= avail->level) { 1359 avail->level_matched = avail->level; 1360 avail->mime_stars = 3; 1361 return 1; 1362 } 1363 } 1364 1365 return OK; 1366} 1367 1368/* This code implements a piece of the tie-breaking algorithm between 1369 * variants of equal quality. This piece is the treatment of variants 1370 * of the same base media type, but different levels. What we want to 1371 * return is the variant at the highest level that the client explicitly 1372 * claimed to accept. 1373 * 1374 * If all the variants available are at a higher level than that, or if 1375 * the client didn't say anything specific about this media type at all 1376 * and these variants just got in on a wildcard, we prefer the lowest 1377 * level, on grounds that that's the one that the client is least likely 1378 * to choke on. 1379 * 1380 * (This is all motivated by treatment of levels in HTML --- we only 1381 * want to give level 3 to browsers that explicitly ask for it; browsers 1382 * that don't, including HTTP/0.9 browsers that only get the implicit 1383 * "Accept: * / *" [space added to avoid confusing cpp --- no, that 1384 * syntax doesn't really work] should get HTML2 if available). 1385 * 1386 * (Note that this code only comes into play when we are choosing among 1387 * variants of equal quality, where the draft standard gives us a fair 1388 * bit of leeway about what to do. It ain't specified by the standard; 1389 * rather, it is a choice made by this server about what to do in cases 1390 * where the standard does not specify a unique course of action). 1391 */ 1392 1393static int level_cmp(var_rec *var1, var_rec *var2) 1394{ 1395 /* Levels are only comparable between matching media types */ 1396 1397 if (var1->is_pseudo_html && !var2->is_pseudo_html) { 1398 return 0; 1399 } 1400 1401 if (!var1->is_pseudo_html && strcmp(var1->mime_type, var2->mime_type)) { 1402 return 0; 1403 } 1404 /* The result of the above if statements is that, if we get to 1405 * here, both variants have the same mime_type or both are 1406 * pseudo-html. 1407 */ 1408 1409 /* Take highest level that matched, if either did match. */ 1410 1411 if (var1->level_matched > var2->level_matched) { 1412 return 1; 1413 } 1414 if (var1->level_matched < var2->level_matched) { 1415 return -1; 1416 } 1417 1418 /* Neither matched. Take lowest level, if there's a difference. */ 1419 1420 if (var1->level < var2->level) { 1421 return 1; 1422 } 1423 if (var1->level > var2->level) { 1424 return -1; 1425 } 1426 1427 /* Tied */ 1428 1429 return 0; 1430} 1431 1432/* Finding languages. The main entry point is set_language_quality() 1433 * which is called for each variant. It sets two elements in the 1434 * variant record: 1435 * language_quality - the 'q' value of the 'best' matching language 1436 * from Accept-Language: header (HTTP/1.1) 1437 * lang_index - Non-negotiated language priority, using 1438 * position of language on the Accept-Language: 1439 * header, if present, else LanguagePriority 1440 * directive order. 1441 * 1442 * When we do the variant checking for best variant, we use language 1443 * quality first, and if a tie, language_index next (this only applies 1444 * when _not_ using the RVSA/1.0 algorithm). If using the RVSA/1.0 1445 * algorithm, lang_index is never used. 1446 * 1447 * set_language_quality() calls find_lang_index() and find_default_index() 1448 * to set lang_index. 1449 */ 1450 1451static int find_lang_index(apr_array_header_t *accept_langs, char *lang) 1452{ 1453 const char **alang; 1454 int i; 1455 1456 if (!lang || !accept_langs) { 1457 return -1; 1458 } 1459 1460 alang = (const char **) accept_langs->elts; 1461 1462 for (i = 0; i < accept_langs->nelts; ++i) { 1463 if (!strncmp(lang, *alang, strlen(*alang))) { 1464 return i; 1465 } 1466 alang += (accept_langs->elt_size / sizeof(char*)); 1467 } 1468 1469 return -1; 1470} 1471 1472/* set_default_lang_quality() sets the quality we apply to variants 1473 * which have no language assigned to them. If none of the variants 1474 * have a language, we are not negotiating on language, so all are 1475 * acceptable, and we set the default q value to 1.0. However if 1476 * some of the variants have languages, we set this default to 0.0001. 1477 * The value of this default will be applied to all variants with 1478 * no explicit language -- which will have the effect of making them 1479 * acceptable, but only if no variants with an explicit language 1480 * are acceptable. The default q value set here is assigned to variants 1481 * with no language type in set_language_quality(). 1482 * 1483 * Note that if using the RVSA/1.0 algorithm, we don't use this 1484 * fiddle. 1485 */ 1486 1487static void set_default_lang_quality(negotiation_state *neg) 1488{ 1489 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 1490 int j; 1491 1492 if (!neg->dont_fiddle_headers) { 1493 for (j = 0; j < neg->avail_vars->nelts; ++j) { 1494 var_rec *variant = &avail_recs[j]; 1495 if (variant->content_languages && 1496 variant->content_languages->nelts) { 1497 neg->default_lang_quality = 0.0001f; 1498 return; 1499 } 1500 } 1501 } 1502 1503 neg->default_lang_quality = 1.0f; 1504} 1505 1506/* Set the language_quality value in the variant record. Also 1507 * assigns lang_index for ForceLanguagePriority. 1508 * 1509 * To find the language_quality value, we look for the 'q' value 1510 * of the 'best' matching language on the Accept-Language 1511 * header. The 'best' match is the language on Accept-Language 1512 * header which matches the language of this variant either fully, 1513 * or as far as the prefix marker (-). If two or more languages 1514 * match, use the longest string from the Accept-Language header 1515 * (see HTTP/1.1 [14.4]) 1516 * 1517 * When a variant has multiple languages, we find the 'best' 1518 * match for each variant language tag as above, then select the 1519 * one with the highest q value. Because both the accept-header 1520 * and variant can have multiple languages, we now have a hairy 1521 * loop-within-a-loop here. 1522 * 1523 * If the variant has no language and we have no Accept-Language 1524 * items, leave the quality at 1.0 and return. 1525 * 1526 * If the variant has no language, we use the default as set by 1527 * set_default_lang_quality() (1.0 if we are not negotiating on 1528 * language, 0.001 if we are). 1529 * 1530 * Following the setting of the language quality, we drop through to 1531 * set the old 'lang_index'. This is set based on either the order 1532 * of the languages on the Accept-Language header, or the 1533 * order on the LanguagePriority directive. This is only used 1534 * in the negotiation if the language qualities tie. 1535 */ 1536 1537static void set_language_quality(negotiation_state *neg, var_rec *variant) 1538{ 1539 int forcepriority = neg->conf->forcelangpriority; 1540 if (forcepriority == FLP_UNDEF) { 1541 forcepriority = FLP_DEFAULT; 1542 } 1543 1544 if (!variant->content_languages || !variant->content_languages->nelts) { 1545 /* This variant has no content-language, so use the default 1546 * quality factor for variants with no content-language 1547 * (previously set by set_default_lang_quality()). 1548 * Leave the factor alone (it remains at 1.0) when we may not fiddle 1549 * with the headers. 1550 */ 1551 if (!neg->dont_fiddle_headers) { 1552 variant->lang_quality = neg->default_lang_quality; 1553 } 1554 if (!neg->accept_langs) { 1555 return; /* no accept-language header */ 1556 } 1557 return; 1558 } 1559 else { 1560 /* Variant has one (or more) languages. Look for the best 1561 * match. We do this by going through each language on the 1562 * variant description looking for a match on the 1563 * Accept-Language header. The best match is the longest 1564 * matching language on the header. The final result is the 1565 * best q value from all the languages on the variant 1566 * description. 1567 */ 1568 1569 if (!neg->accept_langs) { 1570 /* no accept-language header makes the variant indefinite */ 1571 variant->definite = 0; 1572 } 1573 else { /* There is an accept-language with 0 or more items */ 1574 accept_rec *accs = (accept_rec *) neg->accept_langs->elts; 1575 accept_rec *best = NULL, *star = NULL; 1576 accept_rec *bestthistag; 1577 char *lang, *p; 1578 float fiddle_q = 0.0f; 1579 int any_match_on_star = 0; 1580 int i, j; 1581 apr_size_t alen, longest_lang_range_len; 1582 1583 for (j = 0; j < variant->content_languages->nelts; ++j) { 1584 p = NULL; 1585 bestthistag = NULL; 1586 longest_lang_range_len = 0; 1587 1588 /* lang is the variant's language-tag, which is the one 1589 * we are allowed to use the prefix of in HTTP/1.1 1590 */ 1591 lang = ((char **) (variant->content_languages->elts))[j]; 1592 1593 /* now find the best (i.e. longest) matching 1594 * Accept-Language header language. We put the best match 1595 * for this tag in bestthistag. We cannot update the 1596 * overall best (based on q value) because the best match 1597 * for this tag is the longest language item on the accept 1598 * header, not necessarily the highest q. 1599 */ 1600 for (i = 0; i < neg->accept_langs->nelts; ++i) { 1601 if (!strcmp(accs[i].name, "*")) { 1602 if (!star) { 1603 star = &accs[i]; 1604 } 1605 continue; 1606 } 1607 /* Find language. We match if either the variant 1608 * language tag exactly matches the language range 1609 * from the accept header, or a prefix of the variant 1610 * language tag up to a '-' character matches the 1611 * whole of the language range in the Accept-Language 1612 * header. Note that HTTP/1.x allows any number of 1613 * '-' characters in a tag or range, currently only 1614 * tags with zero or one '-' characters are defined 1615 * for general use (see rfc1766). 1616 * 1617 * We only use language range in the Accept-Language 1618 * header the best match for the variant language tag 1619 * if it is longer than the previous best match. 1620 */ 1621 1622 alen = strlen(accs[i].name); 1623 1624 if ((strlen(lang) >= alen) && 1625 !strncmp(lang, accs[i].name, alen) && 1626 ((lang[alen] == 0) || (lang[alen] == '-')) ) { 1627 1628 if (alen > longest_lang_range_len) { 1629 longest_lang_range_len = alen; 1630 bestthistag = &accs[i]; 1631 } 1632 } 1633 1634 if (!bestthistag && !neg->dont_fiddle_headers) { 1635 /* The next bit is a fiddle. Some browsers might 1636 * be configured to send more specific language 1637 * ranges than desirable. For example, an 1638 * Accept-Language of en-US should never match 1639 * variants with languages en or en-GB. But US 1640 * English speakers might pick en-US as their 1641 * language choice. So this fiddle checks if the 1642 * language range has a prefix, and if so, it 1643 * matches variants which match that prefix with a 1644 * priority of 0.001. So a request for en-US would 1645 * match variants of types en and en-GB, but at 1646 * much lower priority than matches of en-US 1647 * directly, or of any other language listed on 1648 * the Accept-Language header. Note that this 1649 * fiddle does not handle multi-level prefixes. 1650 */ 1651 if ((p = strchr(accs[i].name, '-'))) { 1652 int plen = p - accs[i].name; 1653 1654 if (!strncmp(lang, accs[i].name, plen)) { 1655 fiddle_q = 0.001f; 1656 } 1657 } 1658 } 1659 } 1660 /* Finished looking at Accept-Language headers, the best 1661 * (longest) match is in bestthistag, or NULL if no match 1662 */ 1663 if (!best || 1664 (bestthistag && bestthistag->quality > best->quality)) { 1665 best = bestthistag; 1666 } 1667 1668 /* See if the tag matches on a * in the Accept-Language 1669 * header. If so, record this fact for later use 1670 */ 1671 if (!bestthistag && star) { 1672 any_match_on_star = 1; 1673 } 1674 } 1675 1676 /* If one of the language tags of the variant matched on *, we 1677 * need to see if its q is better than that of any non-* match 1678 * on any other tag of the variant. If so the * match takes 1679 * precedence and the overall match is not definite. 1680 */ 1681 if ( any_match_on_star && 1682 ((best && star->quality > best->quality) || 1683 (!best)) ) { 1684 best = star; 1685 variant->definite = 0; 1686 } 1687 1688 variant->lang_quality = best ? best->quality : fiddle_q; 1689 } 1690 } 1691 1692 /* Handle the ForceDefaultLanguage overrides, based on the best match 1693 * to LanguagePriority order. The best match is the lowest index of 1694 * any LanguagePriority match. 1695 */ 1696 if (((forcepriority & FLP_PREFER) 1697 && (variant->lang_index < 0)) 1698 || ((forcepriority & FLP_FALLBACK) 1699 && !variant->lang_quality)) 1700 { 1701 int bestidx = -1; 1702 int j; 1703 1704 for (j = 0; j < variant->content_languages->nelts; ++j) 1705 { 1706 /* lang is the variant's language-tag, which is the one 1707 * we are allowed to use the prefix of in HTTP/1.1 1708 */ 1709 char *lang = ((char **) (variant->content_languages->elts))[j]; 1710 int idx = -1; 1711 1712 /* If we wish to fallback or 1713 * we use our own LanguagePriority index. 1714 */ 1715 idx = find_lang_index(neg->conf->language_priority, lang); 1716 if ((idx >= 0) && ((bestidx == -1) || (idx < bestidx))) { 1717 bestidx = idx; 1718 } 1719 } 1720 1721 if (bestidx >= 0) { 1722 if (variant->lang_quality) { 1723 if (forcepriority & FLP_PREFER) { 1724 variant->lang_index = bestidx; 1725 } 1726 } 1727 else { 1728 if (forcepriority & FLP_FALLBACK) { 1729 variant->lang_index = bestidx; 1730 variant->lang_quality = .0001f; 1731 variant->definite = 0; 1732 } 1733 } 1734 } 1735 } 1736 return; 1737} 1738 1739/* Determining the content length --- if the map didn't tell us, 1740 * we have to do a stat() and remember for next time. 1741 */ 1742 1743static apr_off_t find_content_length(negotiation_state *neg, var_rec *variant) 1744{ 1745 apr_finfo_t statb; 1746 1747 if (variant->bytes < 0) { 1748 if ( variant->sub_req 1749 && (variant->sub_req->finfo.valid & APR_FINFO_SIZE)) { 1750 variant->bytes = variant->sub_req->finfo.size; 1751 } 1752 else { 1753 char *fullname = ap_make_full_path(neg->pool, neg->dir_name, 1754 variant->file_name); 1755 1756 if (apr_stat(&statb, fullname, 1757 APR_FINFO_SIZE, neg->pool) == APR_SUCCESS) { 1758 variant->bytes = statb.size; 1759 } 1760 } 1761 } 1762 1763 return variant->bytes; 1764} 1765 1766/* For a given variant, find the best matching Accept: header 1767 * and assign the Accept: header's quality value to the 1768 * mime_type_quality field of the variant, for later use in 1769 * determining the best matching variant. 1770 */ 1771 1772static void set_accept_quality(negotiation_state *neg, var_rec *variant) 1773{ 1774 int i; 1775 accept_rec *accept_recs; 1776 float q = 0.0f; 1777 int q_definite = 1; 1778 1779 /* if no Accept: header, leave quality alone (will 1780 * remain at the default value of 1) 1781 * 1782 * XXX: This if is currently never true because of the effect of 1783 * maybe_add_default_accepts(). 1784 */ 1785 if (!neg->accepts) { 1786 if (variant->mime_type && *variant->mime_type) 1787 variant->definite = 0; 1788 return; 1789 } 1790 1791 accept_recs = (accept_rec *) neg->accepts->elts; 1792 1793 /* 1794 * Go through each of the ranges on the Accept: header, 1795 * looking for the 'best' match with this variant's 1796 * content-type. We use the best match's quality 1797 * value (from the Accept: header) for this variant's 1798 * mime_type_quality field. 1799 * 1800 * The best match is determined like this: 1801 * type/type is better than type/ * is better than * / * 1802 * if match is type/type, use the level mime param if available 1803 */ 1804 for (i = 0; i < neg->accepts->nelts; ++i) { 1805 1806 accept_rec *type = &accept_recs[i]; 1807 int prev_mime_stars; 1808 1809 prev_mime_stars = variant->mime_stars; 1810 1811 if (!mime_match(type, variant)) { 1812 continue; /* didn't match the content type at all */ 1813 } 1814 else { 1815 /* did match - see if there were less or more stars than 1816 * in previous match 1817 */ 1818 if (prev_mime_stars == variant->mime_stars) { 1819 continue; /* more stars => not as good a match */ 1820 } 1821 } 1822 1823 /* If we are allowed to mess with the q-values 1824 * and have no explicit q= parameters in the accept header, 1825 * make wildcards very low, so we have a low chance 1826 * of ending up with them if there's something better. 1827 */ 1828 1829 if (!neg->dont_fiddle_headers && !neg->accept_q && 1830 variant->mime_stars == 1) { 1831 q = 0.01f; 1832 } 1833 else if (!neg->dont_fiddle_headers && !neg->accept_q && 1834 variant->mime_stars == 2) { 1835 q = 0.02f; 1836 } 1837 else { 1838 q = type->quality; 1839 } 1840 1841 q_definite = (variant->mime_stars == 3); 1842 } 1843 variant->mime_type_quality = q; 1844 variant->definite = variant->definite && q_definite; 1845 1846} 1847 1848/* For a given variant, find the 'q' value of the charset given 1849 * on the Accept-Charset line. If no charsets are listed, 1850 * assume value of '1'. 1851 */ 1852static void set_charset_quality(negotiation_state *neg, var_rec *variant) 1853{ 1854 int i; 1855 accept_rec *accept_recs; 1856 const char *charset = variant->content_charset; 1857 accept_rec *star = NULL; 1858 1859 /* if no Accept-Charset: header, leave quality alone (will 1860 * remain at the default value of 1) 1861 */ 1862 if (!neg->accept_charsets) { 1863 if (charset && *charset) 1864 variant->definite = 0; 1865 return; 1866 } 1867 1868 accept_recs = (accept_rec *) neg->accept_charsets->elts; 1869 1870 if (charset == NULL || !*charset) { 1871 /* Charset of variant not known */ 1872 1873 /* if not a text / * type, leave quality alone */ 1874 if (!(!strncmp(variant->mime_type, "text/", 5) 1875 || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE) 1876 || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE3) 1877 )) 1878 return; 1879 1880 /* Don't go guessing if we are in strict header mode, 1881 * e.g. when running the rvsa, as any guess won't be reflected 1882 * in the variant list or content-location headers. 1883 */ 1884 if (neg->dont_fiddle_headers) 1885 return; 1886 1887 charset = "iso-8859-1"; /* The default charset for HTTP text types */ 1888 } 1889 1890 /* 1891 * Go through each of the items on the Accept-Charset header, 1892 * looking for a match with this variant's charset. If none 1893 * match, charset is unacceptable, so set quality to 0. 1894 */ 1895 for (i = 0; i < neg->accept_charsets->nelts; ++i) { 1896 1897 accept_rec *type = &accept_recs[i]; 1898 1899 if (!strcmp(type->name, charset)) { 1900 variant->charset_quality = type->quality; 1901 return; 1902 } 1903 else if (strcmp(type->name, "*") == 0) { 1904 star = type; 1905 } 1906 } 1907 /* No explicit match */ 1908 if (star) { 1909 variant->charset_quality = star->quality; 1910 variant->definite = 0; 1911 return; 1912 } 1913 /* If this variant is in charset iso-8859-1, the default is 1.0 */ 1914 if (strcmp(charset, "iso-8859-1") == 0) { 1915 variant->charset_quality = 1.0f; 1916 } 1917 else { 1918 variant->charset_quality = 0.0f; 1919 } 1920} 1921 1922 1923/* is_identity_encoding is included for back-compat, but does anyone 1924 * use 7bit, 8bin or binary in their var files?? 1925 */ 1926 1927static int is_identity_encoding(const char *enc) 1928{ 1929 return (!enc || !enc[0] || !strcmp(enc, "7bit") || !strcmp(enc, "8bit") 1930 || !strcmp(enc, "binary")); 1931} 1932 1933/* 1934 * set_encoding_quality determines whether the encoding for a particular 1935 * variant is acceptable for the user-agent. 1936 * 1937 * The rules for encoding are that if the user-agent does not supply 1938 * any Accept-Encoding header, then all encodings are allowed but a 1939 * variant with no encoding should be preferred. 1940 * If there is an empty Accept-Encoding header, then no encodings are 1941 * acceptable. If there is a non-empty Accept-Encoding header, then 1942 * any of the listed encodings are acceptable, as well as no encoding 1943 * unless the "identity" encoding is specifically excluded. 1944 */ 1945static void set_encoding_quality(negotiation_state *neg, var_rec *variant) 1946{ 1947 accept_rec *accept_recs; 1948 const char *enc = variant->content_encoding; 1949 accept_rec *star = NULL; 1950 float value_if_not_found = 0.0f; 1951 int i; 1952 1953 if (!neg->accept_encodings) { 1954 /* We had no Accept-Encoding header, assume that all 1955 * encodings are acceptable with a low quality, 1956 * but we prefer no encoding if available. 1957 */ 1958 if (!enc || is_identity_encoding(enc)) 1959 variant->encoding_quality = 1.0f; 1960 else 1961 variant->encoding_quality = 0.5f; 1962 1963 return; 1964 } 1965 1966 if (!enc || is_identity_encoding(enc)) { 1967 enc = "identity"; 1968 value_if_not_found = 0.0001f; 1969 } 1970 1971 accept_recs = (accept_rec *) neg->accept_encodings->elts; 1972 1973 /* Go through each of the encodings on the Accept-Encoding: header, 1974 * looking for a match with our encoding. x- prefixes are ignored. 1975 */ 1976 if (enc[0] == 'x' && enc[1] == '-') { 1977 enc += 2; 1978 } 1979 for (i = 0; i < neg->accept_encodings->nelts; ++i) { 1980 1981 char *name = accept_recs[i].name; 1982 1983 if (name[0] == 'x' && name[1] == '-') { 1984 name += 2; 1985 } 1986 1987 if (!strcmp(name, enc)) { 1988 variant->encoding_quality = accept_recs[i].quality; 1989 return; 1990 } 1991 1992 if (strcmp(name, "*") == 0) { 1993 star = &accept_recs[i]; 1994 } 1995 1996 } 1997 /* No explicit match */ 1998 if (star) { 1999 variant->encoding_quality = star->quality; 2000 return; 2001 } 2002 2003 /* Encoding not found on Accept-Encoding: header, so it is 2004 * _not_ acceptable unless it is the identity (no encoding) 2005 */ 2006 variant->encoding_quality = value_if_not_found; 2007} 2008 2009/************************************************************* 2010 * Possible results of the variant selection algorithm 2011 */ 2012enum algorithm_results { 2013 alg_choice = 1, /* choose variant */ 2014 alg_list /* list variants */ 2015}; 2016 2017/* Below is the 'best_match' function. It returns an int, which has 2018 * one of the two values alg_choice or alg_list, which give the result 2019 * of the variant selection algorithm. alg_list means that no best 2020 * variant was found by the algorithm, alg_choice means that a best 2021 * variant was found and should be returned. The list/choice 2022 * terminology comes from TCN (rfc2295), but is used in a more generic 2023 * way here. The best variant is returned in *pbest. best_match has 2024 * two possible algorithms for determining the best variant: the 2025 * RVSA/1.0 algorithm (from RFC2296), and the standard Apache 2026 * algorithm. These are split out into separate functions 2027 * (is_variant_better_rvsa() and is_variant_better()). Selection of 2028 * one is through the neg->use_rvsa flag. 2029 * 2030 * The call to best_match also creates full information, including 2031 * language, charset, etc quality for _every_ variant. This is needed 2032 * for generating a correct Vary header, and can be used for the 2033 * Alternates header, the human-readable list responses and 406 errors. 2034 */ 2035 2036/* Firstly, the RVSA/1.0 (HTTP Remote Variant Selection Algorithm 2037 * v1.0) from rfc2296. This is the algorithm that goes together with 2038 * transparent content negotiation (TCN). 2039 */ 2040static int is_variant_better_rvsa(negotiation_state *neg, var_rec *variant, 2041 var_rec *best, float *p_bestq) 2042{ 2043 float bestq = *p_bestq, q; 2044 2045 /* TCN does not cover negotiation on content-encoding. For now, 2046 * we ignore the encoding unless it was explicitly excluded. 2047 */ 2048 if (variant->encoding_quality == 0.0f) 2049 return 0; 2050 2051 q = variant->mime_type_quality * 2052 variant->source_quality * 2053 variant->charset_quality * 2054 variant->lang_quality; 2055 2056 /* RFC 2296 calls for the result to be rounded to 5 decimal places, 2057 * but we don't do that because it serves no useful purpose other 2058 * than to ensure that a remote algorithm operates on the same 2059 * precision as ours. That is silly, since what we obviously want 2060 * is for the algorithm to operate on the best available precision 2061 * regardless of who runs it. Since the above calculation may 2062 * result in significant variance at 1e-12, rounding would be bogus. 2063 */ 2064 2065#ifdef NEG_DEBUG 2066 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00688) 2067 "Variant: file=%s type=%s lang=%s sourceq=%1.3f " 2068 "mimeq=%1.3f langq=%1.3f charq=%1.3f encq=%1.3f " 2069 "q=%1.5f definite=%d", 2070 (variant->file_name ? variant->file_name : ""), 2071 (variant->mime_type ? variant->mime_type : ""), 2072 (variant->content_languages 2073 ? apr_array_pstrcat(neg->pool, variant->content_languages, ',') 2074 : ""), 2075 variant->source_quality, 2076 variant->mime_type_quality, 2077 variant->lang_quality, 2078 variant->charset_quality, 2079 variant->encoding_quality, 2080 q, 2081 variant->definite); 2082#endif 2083 2084 if (q <= 0.0f) { 2085 return 0; 2086 } 2087 if (q > bestq) { 2088 *p_bestq = q; 2089 return 1; 2090 } 2091 if (q == bestq) { 2092 /* If the best variant's encoding is of lesser quality than 2093 * this variant, then we prefer this variant 2094 */ 2095 if (variant->encoding_quality > best->encoding_quality) { 2096 *p_bestq = q; 2097 return 1; 2098 } 2099 } 2100 return 0; 2101} 2102 2103/* Negotiation algorithm as used by previous versions of Apache 2104 * (just about). 2105 */ 2106 2107static int is_variant_better(negotiation_state *neg, var_rec *variant, 2108 var_rec *best, float *p_bestq) 2109{ 2110 float bestq = *p_bestq, q; 2111 int levcmp; 2112 2113 /* For non-transparent negotiation, server can choose how 2114 * to handle the negotiation. We'll use the following in 2115 * order: content-type, language, content-type level, charset, 2116 * content encoding, content length. 2117 * 2118 * For each check, we have three possible outcomes: 2119 * This variant is worse than current best: return 0 2120 * This variant is better than the current best: 2121 * assign this variant's q to *p_bestq, and return 1 2122 * This variant is just as desirable as the current best: 2123 * drop through to the next test. 2124 * 2125 * This code is written in this long-winded way to allow future 2126 * customisation, either by the addition of additional 2127 * checks, or to allow the order of the checks to be determined 2128 * by configuration options (e.g. we might prefer to check 2129 * language quality _before_ content type). 2130 */ 2131 2132 /* First though, eliminate this variant if it is not 2133 * acceptable by type, charset, encoding or language. 2134 */ 2135 2136#ifdef NEG_DEBUG 2137 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, APLOGNO(00689) 2138 "Variant: file=%s type=%s lang=%s sourceq=%1.3f " 2139 "mimeq=%1.3f langq=%1.3f langidx=%d charq=%1.3f encq=%1.3f ", 2140 (variant->file_name ? variant->file_name : ""), 2141 (variant->mime_type ? variant->mime_type : ""), 2142 (variant->content_languages 2143 ? apr_array_pstrcat(neg->pool, variant->content_languages, ',') 2144 : ""), 2145 variant->source_quality, 2146 variant->mime_type_quality, 2147 variant->lang_quality, 2148 variant->lang_index, 2149 variant->charset_quality, 2150 variant->encoding_quality); 2151#endif 2152 2153 if (variant->encoding_quality == 0.0f || 2154 variant->lang_quality == 0.0f || 2155 variant->source_quality == 0.0f || 2156 variant->charset_quality == 0.0f || 2157 variant->mime_type_quality == 0.0f) { 2158 return 0; /* don't consider unacceptables */ 2159 } 2160 2161 q = variant->mime_type_quality * variant->source_quality; 2162 if (q == 0.0 || q < bestq) { 2163 return 0; 2164 } 2165 if (q > bestq || !best) { 2166 *p_bestq = q; 2167 return 1; 2168 } 2169 2170 /* language */ 2171 if (variant->lang_quality < best->lang_quality) { 2172 return 0; 2173 } 2174 if (variant->lang_quality > best->lang_quality) { 2175 *p_bestq = q; 2176 return 1; 2177 } 2178 2179 /* if language qualities were equal, try the LanguagePriority stuff */ 2180 if (best->lang_index != -1 && 2181 (variant->lang_index == -1 || variant->lang_index > best->lang_index)) { 2182 return 0; 2183 } 2184 if (variant->lang_index != -1 && 2185 (best->lang_index == -1 || variant->lang_index < best->lang_index)) { 2186 *p_bestq = q; 2187 return 1; 2188 } 2189 2190 /* content-type level (sometimes used with text/html, though we 2191 * support it on other types too) 2192 */ 2193 levcmp = level_cmp(variant, best); 2194 if (levcmp == -1) { 2195 return 0; 2196 } 2197 if (levcmp == 1) { 2198 *p_bestq = q; 2199 return 1; 2200 } 2201 2202 /* charset */ 2203 if (variant->charset_quality < best->charset_quality) { 2204 return 0; 2205 } 2206 /* If the best variant's charset is ISO-8859-1 and this variant has 2207 * the same charset quality, then we prefer this variant 2208 */ 2209 2210 if (variant->charset_quality > best->charset_quality || 2211 ((variant->content_charset != NULL && 2212 *variant->content_charset != '\0' && 2213 strcmp(variant->content_charset, "iso-8859-1") != 0) && 2214 (best->content_charset == NULL || 2215 *best->content_charset == '\0' || 2216 strcmp(best->content_charset, "iso-8859-1") == 0))) { 2217 *p_bestq = q; 2218 return 1; 2219 } 2220 2221 /* Prefer the highest value for encoding_quality. 2222 */ 2223 if (variant->encoding_quality < best->encoding_quality) { 2224 return 0; 2225 } 2226 if (variant->encoding_quality > best->encoding_quality) { 2227 *p_bestq = q; 2228 return 1; 2229 } 2230 2231 /* content length if all else equal */ 2232 if (find_content_length(neg, variant) >= find_content_length(neg, best)) { 2233 return 0; 2234 } 2235 2236 /* ok, to get here means every thing turned out equal, except 2237 * we have a shorter content length, so use this variant 2238 */ 2239 *p_bestq = q; 2240 return 1; 2241} 2242 2243/* figure out, whether a variant is in a specific language 2244 * it returns also false, if the variant has no language. 2245 */ 2246static int variant_has_language(var_rec *variant, const char *lang) 2247{ 2248 int j, max; 2249 2250 /* fast exit */ 2251 if ( !lang 2252 || !variant->content_languages 2253 || !(max = variant->content_languages->nelts)) { 2254 return 0; 2255 } 2256 2257 for (j = 0; j < max; ++j) { 2258 if (!strcmp(lang, 2259 ((char **) (variant->content_languages->elts))[j])) { 2260 return 1; 2261 } 2262 } 2263 2264 return 0; 2265} 2266 2267/* check for environment variables 'no-gzip' and 2268 * 'gzip-only-text/html' to get a behaviour similiar 2269 * to mod_deflate 2270 */ 2271static int discard_variant_by_env(var_rec *variant, int discard) 2272{ 2273 if ( is_identity_encoding(variant->content_encoding) 2274 || !strcmp(variant->content_encoding, "identity")) { 2275 return 0; 2276 } 2277 2278 return ( (discard == DISCARD_ALL_ENCODINGS) 2279 || (discard == DISCARD_ALL_BUT_HTML 2280 && (!variant->mime_type 2281 || strncmp(variant->mime_type, "text/html", 9)))); 2282} 2283 2284static int best_match(negotiation_state *neg, var_rec **pbest) 2285{ 2286 int j; 2287 var_rec *best; 2288 float bestq = 0.0f; 2289 enum algorithm_results algorithm_result; 2290 int may_discard = 0; 2291 2292 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2293 2294 /* fetch request dependent variables 2295 * prefer-language: prefer a certain language. 2296 */ 2297 const char *preferred_language = apr_table_get(neg->r->subprocess_env, 2298 "prefer-language"); 2299 2300 /* no-gzip: do not send encoded documents */ 2301 if (apr_table_get(neg->r->subprocess_env, "no-gzip")) { 2302 may_discard = DISCARD_ALL_ENCODINGS; 2303 } 2304 2305 /* gzip-only-text/html: send encoded documents only 2306 * if they are text/html. (no-gzip has a higher priority). 2307 */ 2308 else { 2309 const char *env_value = apr_table_get(neg->r->subprocess_env, 2310 "gzip-only-text/html"); 2311 2312 if (env_value && !strcmp(env_value, "1")) { 2313 may_discard = DISCARD_ALL_BUT_HTML; 2314 } 2315 } 2316 2317 set_default_lang_quality(neg); 2318 2319 /* 2320 * Find the 'best' variant 2321 * We run the loop possibly twice: if "prefer-language" 2322 * environment variable is set but we did not find an appropriate 2323 * best variant. In that case forget the preferred language and 2324 * negotiate over all variants. 2325 */ 2326 2327 do { 2328 best = NULL; 2329 2330 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2331 var_rec *variant = &avail_recs[j]; 2332 2333 /* if this variant is encoded somehow and there are special 2334 * variables set, we do not negotiate it. see above. 2335 */ 2336 if ( may_discard 2337 && discard_variant_by_env(variant, may_discard)) { 2338 continue; 2339 } 2340 2341 /* if a language is preferred, but the current variant 2342 * is not in that language, then drop it for now 2343 */ 2344 if ( preferred_language 2345 && !variant_has_language(variant, preferred_language)) { 2346 continue; 2347 } 2348 2349 /* Find all the relevant 'quality' values from the 2350 * Accept... headers, and store in the variant. This also 2351 * prepares for sending an Alternates header etc so we need to 2352 * do it even if we do not actually plan to find a best 2353 * variant. 2354 */ 2355 set_accept_quality(neg, variant); 2356 /* accept the preferred language, even when it's not listed within 2357 * the Accept-Language header 2358 */ 2359 if (preferred_language) { 2360 variant->lang_quality = 1.0f; 2361 variant->definite = 1; 2362 } 2363 else { 2364 set_language_quality(neg, variant); 2365 } 2366 set_encoding_quality(neg, variant); 2367 set_charset_quality(neg, variant); 2368 2369 /* Only do variant selection if we may actually choose a 2370 * variant for the client 2371 */ 2372 if (neg->may_choose) { 2373 2374 /* Now find out if this variant is better than the current 2375 * best, either using the RVSA/1.0 algorithm, or Apache's 2376 * internal server-driven algorithm. Presumably other 2377 * server-driven algorithms are possible, and could be 2378 * implemented here. 2379 */ 2380 2381 if (neg->use_rvsa) { 2382 if (is_variant_better_rvsa(neg, variant, best, &bestq)) { 2383 best = variant; 2384 } 2385 } 2386 else { 2387 if (is_variant_better(neg, variant, best, &bestq)) { 2388 best = variant; 2389 } 2390 } 2391 } 2392 } 2393 2394 /* We now either have a best variant, or no best variant */ 2395 2396 if (neg->use_rvsa) { 2397 /* calculate result for RVSA/1.0 algorithm: 2398 * only a choice response if the best variant has q>0 2399 * and is definite 2400 */ 2401 algorithm_result = (best && best->definite) && (bestq > 0) ? 2402 alg_choice : alg_list; 2403 } 2404 else { 2405 /* calculate result for Apache negotiation algorithm */ 2406 algorithm_result = bestq > 0 ? alg_choice : alg_list; 2407 } 2408 2409 /* run the loop again, if the "prefer-language" got no clear result */ 2410 if (preferred_language && (!best || algorithm_result != alg_choice)) { 2411 preferred_language = NULL; 2412 continue; 2413 } 2414 2415 break; 2416 } while (1); 2417 2418 /* Returning a choice response with a non-neighboring variant is a 2419 * protocol security error in TCN (see rfc2295). We do *not* 2420 * verify here that the variant and URI are neighbors, even though 2421 * we may return alg_choice. We depend on the environment (the 2422 * caller) to only declare the resource transparently negotiable if 2423 * all variants are neighbors. 2424 */ 2425 *pbest = best; 2426 return algorithm_result; 2427} 2428 2429/* Sets response headers for a negotiated response. 2430 * neg->is_transparent determines whether a transparently negotiated 2431 * response or a plain `server driven negotiation' response is 2432 * created. Applicable headers are Alternates, Vary, and TCN. 2433 * 2434 * The Vary header we create is sometimes longer than is required for 2435 * the correct caching of negotiated results by HTTP/1.1 caches. For 2436 * example if we have 3 variants x.html, x.ps.en and x.ps.nl, and if 2437 * the Accept: header assigns a 0 quality to .ps, then the results of 2438 * the two server-side negotiation algorithms we currently implement 2439 * will never depend on Accept-Language so we could return `Vary: 2440 * negotiate, accept' instead of the longer 'Vary: negotiate, accept, 2441 * accept-language' which the code below will return. A routine for 2442 * computing the exact minimal Vary header would be a huge pain to code 2443 * and maintain though, especially because we need to take all possible 2444 * twiddles in the server-side negotiation algorithms into account. 2445 */ 2446static void set_neg_headers(request_rec *r, negotiation_state *neg, 2447 int alg_result) 2448{ 2449 apr_table_t *hdrs; 2450 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2451 const char *sample_type = NULL; 2452 const char *sample_language = NULL; 2453 const char *sample_encoding = NULL; 2454 const char *sample_charset = NULL; 2455 char *lang; 2456 char *qstr; 2457 apr_off_t len; 2458 apr_array_header_t *arr; 2459 int max_vlist_array = (neg->avail_vars->nelts * 21); 2460 int first_variant = 1; 2461 int vary_by_type = 0; 2462 int vary_by_language = 0; 2463 int vary_by_charset = 0; 2464 int vary_by_encoding = 0; 2465 int j; 2466 2467 /* In order to avoid O(n^2) memory copies in building Alternates, 2468 * we preallocate a apr_table_t with the maximum substrings possible, 2469 * fill it with the variant list, and then concatenate the entire array. 2470 * Note that if you change the number of substrings pushed, you also 2471 * need to change the calculation of max_vlist_array above. 2472 */ 2473 if (neg->send_alternates && neg->avail_vars->nelts) 2474 arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *)); 2475 else 2476 arr = NULL; 2477 2478 /* Put headers into err_headers_out, since send_http_header() 2479 * outputs both headers_out and err_headers_out. 2480 */ 2481 hdrs = r->err_headers_out; 2482 2483 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2484 var_rec *variant = &avail_recs[j]; 2485 2486 if (variant->content_languages && variant->content_languages->nelts) { 2487 lang = apr_array_pstrcat(r->pool, variant->content_languages, ','); 2488 } 2489 else { 2490 lang = NULL; 2491 } 2492 2493 /* Calculate Vary by looking for any difference between variants */ 2494 2495 if (first_variant) { 2496 sample_type = variant->mime_type; 2497 sample_charset = variant->content_charset; 2498 sample_language = lang; 2499 sample_encoding = variant->content_encoding; 2500 } 2501 else { 2502 if (!vary_by_type && 2503 strcmp(sample_type ? sample_type : "", 2504 variant->mime_type ? variant->mime_type : "")) { 2505 vary_by_type = 1; 2506 } 2507 if (!vary_by_charset && 2508 strcmp(sample_charset ? sample_charset : "", 2509 variant->content_charset ? 2510 variant->content_charset : "")) { 2511 vary_by_charset = 1; 2512 } 2513 if (!vary_by_language && 2514 strcmp(sample_language ? sample_language : "", 2515 lang ? lang : "")) { 2516 vary_by_language = 1; 2517 } 2518 if (!vary_by_encoding && 2519 strcmp(sample_encoding ? sample_encoding : "", 2520 variant->content_encoding ? 2521 variant->content_encoding : "")) { 2522 vary_by_encoding = 1; 2523 } 2524 } 2525 first_variant = 0; 2526 2527 if (!neg->send_alternates) 2528 continue; 2529 2530 /* Generate the string components for this Alternates entry */ 2531 2532 *((const char **) apr_array_push(arr)) = "{\""; 2533 *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, variant->file_name); 2534 *((const char **) apr_array_push(arr)) = "\" "; 2535 2536 qstr = (char *) apr_palloc(r->pool, 6); 2537 apr_snprintf(qstr, 6, "%1.3f", variant->source_quality); 2538 2539 /* Strip trailing zeros (saves those valuable network bytes) */ 2540 if (qstr[4] == '0') { 2541 qstr[4] = '\0'; 2542 if (qstr[3] == '0') { 2543 qstr[3] = '\0'; 2544 if (qstr[2] == '0') { 2545 qstr[1] = '\0'; 2546 } 2547 } 2548 } 2549 *((const char **) apr_array_push(arr)) = qstr; 2550 2551 if (variant->mime_type && *variant->mime_type) { 2552 *((const char **) apr_array_push(arr)) = " {type "; 2553 *((const char **) apr_array_push(arr)) = variant->mime_type; 2554 *((const char **) apr_array_push(arr)) = "}"; 2555 } 2556 if (variant->content_charset && *variant->content_charset) { 2557 *((const char **) apr_array_push(arr)) = " {charset "; 2558 *((const char **) apr_array_push(arr)) = variant->content_charset; 2559 *((const char **) apr_array_push(arr)) = "}"; 2560 } 2561 if (lang) { 2562 *((const char **) apr_array_push(arr)) = " {language "; 2563 *((const char **) apr_array_push(arr)) = lang; 2564 *((const char **) apr_array_push(arr)) = "}"; 2565 } 2566 if (variant->content_encoding && *variant->content_encoding) { 2567 /* Strictly speaking, this is non-standard, but so is TCN */ 2568 2569 *((const char **) apr_array_push(arr)) = " {encoding "; 2570 *((const char **) apr_array_push(arr)) = variant->content_encoding; 2571 *((const char **) apr_array_push(arr)) = "}"; 2572 } 2573 2574 /* Note that the Alternates specification (in rfc2295) does 2575 * not require that we include {length x}, so we could omit it 2576 * if determining the length is too expensive. We currently 2577 * always include it though. 2578 * 2579 * If the variant is a CGI script, find_content_length would 2580 * return the length of the script, not the output it 2581 * produces, so we check for the presence of a handler and if 2582 * there is one we don't add a length. 2583 * 2584 * XXX: TODO: This check does not detect a CGI script if we 2585 * get the variant from a type map. This needs to be fixed 2586 * (without breaking things if the type map specifies a 2587 * content-length, which currently leads to the correct result). 2588 */ 2589 if (!(variant->sub_req && variant->sub_req->handler) 2590 && (len = find_content_length(neg, variant)) >= 0) { 2591 2592 *((const char **) apr_array_push(arr)) = " {length "; 2593 *((const char **) apr_array_push(arr)) = apr_off_t_toa(r->pool, 2594 len); 2595 *((const char **) apr_array_push(arr)) = "}"; 2596 } 2597 2598 *((const char **) apr_array_push(arr)) = "}"; 2599 *((const char **) apr_array_push(arr)) = ", "; /* trimmed below */ 2600 } 2601 2602 if (neg->send_alternates && neg->avail_vars->nelts) { 2603 arr->nelts--; /* remove last comma */ 2604 apr_table_mergen(hdrs, "Alternates", 2605 apr_array_pstrcat(r->pool, arr, '\0')); 2606 } 2607 2608 if (neg->is_transparent || vary_by_type || vary_by_language || 2609 vary_by_charset || vary_by_encoding) { 2610 2611 apr_table_mergen(hdrs, "Vary", 2 + apr_pstrcat(r->pool, 2612 neg->is_transparent ? ", negotiate" : "", 2613 vary_by_type ? ", accept" : "", 2614 vary_by_language ? ", accept-language" : "", 2615 vary_by_charset ? ", accept-charset" : "", 2616 vary_by_encoding ? ", accept-encoding" : "", NULL)); 2617 } 2618 2619 if (neg->is_transparent) { /* Create TCN response header */ 2620 apr_table_setn(hdrs, "TCN", 2621 alg_result == alg_list ? "list" : "choice"); 2622 } 2623} 2624 2625/********************************************************************** 2626 * 2627 * Return an HTML list of variants. This is output as part of the 2628 * choice response or 406 status body. 2629 */ 2630 2631static char *make_variant_list(request_rec *r, negotiation_state *neg) 2632{ 2633 apr_array_header_t *arr; 2634 int i; 2635 int max_vlist_array = (neg->avail_vars->nelts * 15) + 2; 2636 2637 /* In order to avoid O(n^2) memory copies in building the list, 2638 * we preallocate a apr_table_t with the maximum substrings possible, 2639 * fill it with the variant list, and then concatenate the entire array. 2640 */ 2641 arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *)); 2642 2643 *((const char **) apr_array_push(arr)) = "Available variants:\n<ul>\n"; 2644 2645 for (i = 0; i < neg->avail_vars->nelts; ++i) { 2646 var_rec *variant = &((var_rec *) neg->avail_vars->elts)[i]; 2647 const char *filename = variant->file_name ? variant->file_name : ""; 2648 apr_array_header_t *languages = variant->content_languages; 2649 const char *description = variant->description 2650 ? variant->description 2651 : ""; 2652 2653 /* The format isn't very neat, and it would be nice to make 2654 * the tags human readable (eg replace 'language en' with 'English'). 2655 * Note that if you change the number of substrings pushed, you also 2656 * need to change the calculation of max_vlist_array above. 2657 */ 2658 *((const char **) apr_array_push(arr)) = "<li><a href=\""; 2659 *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, filename); 2660 *((const char **) apr_array_push(arr)) = "\">"; 2661 *((const char **) apr_array_push(arr)) = ap_escape_html(r->pool, filename); 2662 *((const char **) apr_array_push(arr)) = "</a> "; 2663 *((const char **) apr_array_push(arr)) = description; 2664 2665 if (variant->mime_type && *variant->mime_type) { 2666 *((const char **) apr_array_push(arr)) = ", type "; 2667 *((const char **) apr_array_push(arr)) = variant->mime_type; 2668 } 2669 if (languages && languages->nelts) { 2670 *((const char **) apr_array_push(arr)) = ", language "; 2671 *((const char **) apr_array_push(arr)) = apr_array_pstrcat(r->pool, 2672 languages, ','); 2673 } 2674 if (variant->content_charset && *variant->content_charset) { 2675 *((const char **) apr_array_push(arr)) = ", charset "; 2676 *((const char **) apr_array_push(arr)) = variant->content_charset; 2677 } 2678 if (variant->content_encoding) { 2679 *((const char **) apr_array_push(arr)) = ", encoding "; 2680 *((const char **) apr_array_push(arr)) = variant->content_encoding; 2681 } 2682 *((const char **) apr_array_push(arr)) = "</li>\n"; 2683 } 2684 *((const char **) apr_array_push(arr)) = "</ul>\n"; 2685 2686 return apr_array_pstrcat(r->pool, arr, '\0'); 2687} 2688 2689static void store_variant_list(request_rec *r, negotiation_state *neg) 2690{ 2691 if (r->main == NULL) { 2692 apr_table_setn(r->notes, "variant-list", make_variant_list(r, neg)); 2693 } 2694 else { 2695 apr_table_setn(r->main->notes, "variant-list", 2696 make_variant_list(r->main, neg)); 2697 } 2698} 2699 2700/* Called if we got a "Choice" response from the variant selection algorithm. 2701 * It checks the result of the chosen variant to see if it 2702 * is itself negotiated (if so, return error HTTP_VARIANT_ALSO_VARIES). 2703 * Otherwise, add the appropriate headers to the current response. 2704 */ 2705 2706static int setup_choice_response(request_rec *r, negotiation_state *neg, 2707 var_rec *variant) 2708{ 2709 request_rec *sub_req; 2710 const char *sub_vary; 2711 2712 if (!variant->sub_req) { 2713 int status; 2714 2715 sub_req = ap_sub_req_lookup_file(variant->file_name, r, r->output_filters); 2716 status = sub_req->status; 2717 2718 if (status != HTTP_OK && 2719 !apr_table_get(sub_req->err_headers_out, "TCN")) { 2720 ap_destroy_sub_req(sub_req); 2721 return status; 2722 } 2723 variant->sub_req = sub_req; 2724 } 2725 else { 2726 sub_req = variant->sub_req; 2727 } 2728 2729 /* The variant selection algorithm told us to return a "Choice" 2730 * response. This is the normal variant response, with 2731 * some extra headers. First, ensure that the chosen 2732 * variant did or will not itself engage in transparent negotiation. 2733 * If not, set the appropriate headers, and fall through to 2734 * the normal variant handling 2735 */ 2736 2737 /* This catches the error that a transparent type map selects a 2738 * transparent multiviews resource as the best variant. 2739 * 2740 * XXX: We do not signal an error if a transparent type map 2741 * selects a _non_transparent multiviews resource as the best 2742 * variant, because we can generate a legal negotiation response 2743 * in this case. In this case, the vlist_validator of the 2744 * nontransparent subrequest will be lost however. This could 2745 * lead to cases in which a change in the set of variants or the 2746 * negotiation algorithm of the nontransparent resource is never 2747 * propagated up to a HTTP/1.1 cache which interprets Vary. To be 2748 * completely on the safe side we should return HTTP_VARIANT_ALSO_VARIES 2749 * for this type of recursive negotiation too. 2750 */ 2751 if (neg->is_transparent && 2752 apr_table_get(sub_req->err_headers_out, "TCN")) { 2753 return HTTP_VARIANT_ALSO_VARIES; 2754 } 2755 2756 /* This catches the error that a transparent type map recursively 2757 * selects, as the best variant, another type map which itself 2758 * causes transparent negotiation to be done. 2759 * 2760 * XXX: Actually, we catch this error by catching all cases of 2761 * type map recursion. There are some borderline recursive type 2762 * map arrangements which would not produce transparent 2763 * negotiation protocol errors or lack of cache propagation 2764 * problems, but such arrangements are very hard to detect at this 2765 * point in the control flow, so we do not bother to single them 2766 * out. 2767 * 2768 * Recursive type maps imply a recursive arrangement of negotiated 2769 * resources which is visible to outside clients, and this is not 2770 * supported by the transparent negotiation caching protocols, so 2771 * if we are to have generic support for recursive type maps, we 2772 * have to create some configuration setting which makes all type 2773 * maps non-transparent when recursion is enabled. Also, if we 2774 * want recursive type map support which ensures propagation of 2775 * type map changes into HTTP/1.1 caches that handle Vary, we 2776 * would have to extend the current mechanism for generating 2777 * variant list validators. 2778 */ 2779 if (sub_req->handler && strcmp(sub_req->handler, "type-map") == 0) { 2780 return HTTP_VARIANT_ALSO_VARIES; 2781 } 2782 2783 /* This adds an appropriate Variant-Vary header if the subrequest 2784 * is a multiviews resource. 2785 * 2786 * XXX: TODO: Note that this does _not_ handle any Vary header 2787 * returned by a CGI if sub_req is a CGI script, because we don't 2788 * see that Vary header yet at this point in the control flow. 2789 * This won't cause any cache consistency problems _unless_ the 2790 * CGI script also returns a Cache-Control header marking the 2791 * response as cachable. This needs to be fixed, also there are 2792 * problems if a CGI returns an Etag header which also need to be 2793 * fixed. 2794 */ 2795 if ((sub_vary = apr_table_get(sub_req->err_headers_out, "Vary")) != NULL) { 2796 apr_table_setn(r->err_headers_out, "Variant-Vary", sub_vary); 2797 2798 /* Move the subreq Vary header into the main request to 2799 * prevent having two Vary headers in the response, which 2800 * would be legal but strange. 2801 */ 2802 apr_table_setn(r->err_headers_out, "Vary", sub_vary); 2803 apr_table_unset(sub_req->err_headers_out, "Vary"); 2804 } 2805 2806 apr_table_setn(r->err_headers_out, "Content-Location", 2807 ap_escape_path_segment(r->pool, variant->file_name)); 2808 2809 set_neg_headers(r, neg, alg_choice); /* add Alternates and Vary */ 2810 2811 /* Still to do by caller: add Expires */ 2812 2813 return 0; 2814} 2815 2816/**************************************************************** 2817 * 2818 * Executive... 2819 */ 2820 2821static int do_negotiation(request_rec *r, negotiation_state *neg, 2822 var_rec **bestp, int prefer_scripts) 2823{ 2824 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2825 int alg_result; /* result of variant selection algorithm */ 2826 int res; 2827 int j; 2828 2829 /* Decide if resource is transparently negotiable */ 2830 2831 /* GET or HEAD? (HEAD has same method number as GET) */ 2832 if (r->method_number == M_GET) { 2833 2834 /* maybe this should be configurable, see also the comment 2835 * about recursive type maps in setup_choice_response() 2836 */ 2837 neg->is_transparent = 1; 2838 2839 /* We can't be transparent if we are a map file in the middle 2840 * of the request URI. 2841 */ 2842 if (r->path_info && *r->path_info) 2843 neg->is_transparent = 0; 2844 2845 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2846 var_rec *variant = &avail_recs[j]; 2847 2848 /* We can't be transparent, because of internal 2849 * assumptions in best_match(), if there is a 2850 * non-neighboring variant. We can have a non-neighboring 2851 * variant when processing a type map. 2852 */ 2853 if (ap_strchr_c(variant->file_name, '/')) 2854 neg->is_transparent = 0; 2855 2856 /* We can't be transparent, because of the behavior 2857 * of variant typemap bodies. 2858 */ 2859 if (variant->body) { 2860 neg->is_transparent = 0; 2861 } 2862 } 2863 } 2864 2865 if (neg->is_transparent) { 2866 parse_negotiate_header(r, neg); 2867 } 2868 else { /* configure negotiation on non-transparent resource */ 2869 neg->may_choose = 1; 2870 } 2871 2872 maybe_add_default_accepts(neg, prefer_scripts); 2873 2874 alg_result = best_match(neg, bestp); 2875 2876 /* alg_result is one of 2877 * alg_choice: a best variant is chosen 2878 * alg_list: no best variant is chosen 2879 */ 2880 2881 if (alg_result == alg_list) { 2882 /* send a list response or HTTP_NOT_ACCEPTABLE error response */ 2883 2884 neg->send_alternates = 1; /* always include Alternates header */ 2885 set_neg_headers(r, neg, alg_result); 2886 store_variant_list(r, neg); 2887 2888 if (neg->is_transparent && neg->ua_supports_trans) { 2889 /* XXX todo: expires? cachability? */ 2890 2891 /* Some HTTP/1.0 clients are known to choke when they get 2892 * a 300 (multiple choices) response without a Location 2893 * header. However the 300 code response we are are about 2894 * to generate will only reach 1.0 clients which support 2895 * transparent negotiation, and they should be OK. The 2896 * response should never reach older 1.0 clients, even if 2897 * we have CacheNegotiatedDocs enabled, because no 1.0 2898 * proxy cache (we know of) will cache and return 300 2899 * responses (they certainly won't if they conform to the 2900 * HTTP/1.0 specification). 2901 */ 2902 return HTTP_MULTIPLE_CHOICES; 2903 } 2904 2905 if (!*bestp) { 2906 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(00690) 2907 "no acceptable variant: %s", r->filename); 2908 return HTTP_NOT_ACCEPTABLE; 2909 } 2910 } 2911 2912 /* Variant selection chose a variant */ 2913 2914 /* XXX todo: merge the two cases in the if statement below */ 2915 if (neg->is_transparent) { 2916 2917 if ((res = setup_choice_response(r, neg, *bestp)) != 0) { 2918 return res; /* return if error */ 2919 } 2920 } 2921 else { 2922 set_neg_headers(r, neg, alg_result); 2923 } 2924 2925 /* Make sure caching works - Vary should handle HTTP/1.1, but for 2926 * HTTP/1.0, we can't allow caching at all. 2927 */ 2928 2929 /* XXX: Note that we only set r->no_cache to 1, which causes 2930 * Expires: <now> to be added, when responding to a HTTP/1.0 2931 * client. If we return the response to a 1.1 client, we do not 2932 * add Expires <now>, because doing so would degrade 1.1 cache 2933 * performance by preventing re-use of the response without prior 2934 * revalidation. On the other hand, if the 1.1 client is a proxy 2935 * which was itself contacted by a 1.0 client, or a proxy cache 2936 * which can be contacted later by 1.0 clients, then we currently 2937 * rely on this 1.1 proxy to add the Expires: <now> when it 2938 * forwards the response. 2939 * 2940 * XXX: TODO: Find out if the 1.1 spec requires proxies and 2941 * tunnels to add Expires: <now> when forwarding the response to 2942 * 1.0 clients. I (kh) recall it is rather vague on this point. 2943 * Testing actual 1.1 proxy implementations would also be nice. If 2944 * Expires: <now> is not added by proxies then we need to always 2945 * include Expires: <now> ourselves to ensure correct caching, but 2946 * this would degrade HTTP/1.1 cache efficiency unless we also add 2947 * Cache-Control: max-age=N, which we currently don't. 2948 * 2949 * Roy: No, we are not going to screw over HTTP future just to 2950 * ensure that people who can't be bothered to upgrade their 2951 * clients will always receive perfect server-side negotiation. 2952 * Hell, those clients are sending bogus accept headers anyway. 2953 * 2954 * Manual setting of cache-control/expires always overrides this 2955 * automated kluge, on purpose. 2956 */ 2957 2958 if ((!do_cache_negotiated_docs(r->server) 2959 && (r->proto_num < HTTP_VERSION(1,1))) 2960 && neg->count_multiviews_variants != 1) { 2961 r->no_cache = 1; 2962 } 2963 2964 return OK; 2965} 2966 2967static int handle_map_file(request_rec *r) 2968{ 2969 negotiation_state *neg; 2970 apr_file_t *map; 2971 var_rec *best; 2972 int res; 2973 char *udir; 2974 const char *new_req; 2975 2976 if(strcmp(r->handler,MAP_FILE_MAGIC_TYPE) && strcmp(r->handler,"type-map")) 2977 return DECLINED; 2978 2979 neg = parse_accept_headers(r); 2980 if ((res = read_type_map(&map, neg, r))) { 2981 return res; 2982 } 2983 2984 res = do_negotiation(r, neg, &best, 0); 2985 if (res != 0) return res; 2986 2987 if (best->body) 2988 { 2989 conn_rec *c = r->connection; 2990 apr_bucket_brigade *bb; 2991 apr_bucket *e; 2992 2993 ap_allow_standard_methods(r, REPLACE_ALLOW, M_GET, M_OPTIONS, 2994 M_POST, -1); 2995 /* XXX: ? 2996 * if (r->method_number == M_OPTIONS) { 2997 * return ap_send_http_options(r); 2998 *} 2999 */ 3000 if (r->method_number != M_GET && r->method_number != M_POST) { 3001 return HTTP_METHOD_NOT_ALLOWED; 3002 } 3003 3004 /* ### These may be implemented by adding some 'extra' info 3005 * of the file offset onto the etag 3006 * ap_update_mtime(r, r->finfo.mtime); 3007 * ap_set_last_modified(r); 3008 * ap_set_etag(r); 3009 */ 3010 ap_set_accept_ranges(r); 3011 ap_set_content_length(r, best->bytes); 3012 3013 /* set MIME type and charset as negotiated */ 3014 if (best->mime_type && *best->mime_type) { 3015 if (best->content_charset && *best->content_charset) { 3016 ap_set_content_type(r, apr_pstrcat(r->pool, 3017 best->mime_type, 3018 "; charset=", 3019 best->content_charset, 3020 NULL)); 3021 } 3022 else { 3023 ap_set_content_type(r, apr_pstrdup(r->pool, best->mime_type)); 3024 } 3025 } 3026 3027 /* set Content-language(s) as negotiated */ 3028 if (best->content_languages && best->content_languages->nelts) { 3029 r->content_languages = apr_array_copy(r->pool, 3030 best->content_languages); 3031 } 3032 3033 /* set Content-Encoding as negotiated */ 3034 if (best->content_encoding && *best->content_encoding) { 3035 r->content_encoding = apr_pstrdup(r->pool, 3036 best->content_encoding); 3037 } 3038 3039 if ((res = ap_meets_conditions(r)) != OK) { 3040 return res; 3041 } 3042 3043 if ((res = ap_discard_request_body(r)) != OK) { 3044 return res; 3045 } 3046 bb = apr_brigade_create(r->pool, c->bucket_alloc); 3047 3048 apr_brigade_insert_file(bb, map, best->body, best->bytes, r->pool); 3049 3050 e = apr_bucket_eos_create(c->bucket_alloc); 3051 APR_BRIGADE_INSERT_TAIL(bb, e); 3052 3053 return ap_pass_brigade_fchk(r, bb, NULL); 3054 } 3055 3056 if (r->path_info && *r->path_info) { 3057 /* remove any path_info from the end of the uri before trying 3058 * to change the filename. r->path_info from the original 3059 * request is passed along on the redirect. 3060 */ 3061 r->uri[ap_find_path_info(r->uri, r->path_info)] = '\0'; 3062 } 3063 udir = ap_make_dirstr_parent(r->pool, r->uri); 3064 udir = ap_escape_uri(r->pool, udir); 3065 if (r->args) { 3066 if (r->path_info) { 3067 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3068 r->path_info, "?", r->args, NULL); 3069 } 3070 else { 3071 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3072 "?", r->args, NULL); 3073 } 3074 } 3075 else { 3076 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3077 r->path_info, NULL); 3078 } 3079 ap_internal_redirect(new_req, r); 3080 return OK; 3081} 3082 3083static int handle_multi(request_rec *r) 3084{ 3085 negotiation_state *neg; 3086 var_rec *best, *avail_recs; 3087 request_rec *sub_req; 3088 int res; 3089 int j; 3090 3091 if (r->finfo.filetype != APR_NOFILE 3092 || !(ap_allow_options(r) & OPT_MULTI)) { 3093 return DECLINED; 3094 } 3095 3096 neg = parse_accept_headers(r); 3097 3098 if ((res = read_types_multi(neg))) { 3099 return_from_multi: 3100 /* free all allocated memory from subrequests */ 3101 avail_recs = (var_rec *) neg->avail_vars->elts; 3102 for (j = 0; j < neg->avail_vars->nelts; ++j) { 3103 var_rec *variant = &avail_recs[j]; 3104 if (variant->sub_req) { 3105 ap_destroy_sub_req(variant->sub_req); 3106 } 3107 } 3108 return res; 3109 } 3110 if (neg->avail_vars->nelts == 0) { 3111 return DECLINED; 3112 } 3113 3114 res = do_negotiation(r, neg, &best, 3115 (r->method_number != M_GET) || r->args || 3116 (r->path_info && *r->path_info)); 3117 if (res != 0) 3118 goto return_from_multi; 3119 3120 if (!(sub_req = best->sub_req)) { 3121 /* We got this out of a map file, so we don't actually have 3122 * a sub_req structure yet. Get one now. 3123 */ 3124 3125 sub_req = ap_sub_req_lookup_file(best->file_name, r, r->output_filters); 3126 if (sub_req->status != HTTP_OK) { 3127 res = sub_req->status; 3128 ap_destroy_sub_req(sub_req); 3129 goto return_from_multi; 3130 } 3131 } 3132 if (sub_req->args == NULL) { 3133 sub_req->args = r->args; 3134 } 3135 3136 /* now do a "fast redirect" ... promotes the sub_req into the main req */ 3137 ap_internal_fast_redirect(sub_req, r); 3138 3139 /* give no advise for time on this subrequest. Perhaps we 3140 * should tally the last mtime amoung all variants, and date 3141 * the most recent, but that could confuse the proxies. 3142 */ 3143 r->mtime = 0; 3144 3145 /* clean up all but our favorite variant, since that sub_req 3146 * is now merged into the main request! 3147 */ 3148 avail_recs = (var_rec *) neg->avail_vars->elts; 3149 for (j = 0; j < neg->avail_vars->nelts; ++j) { 3150 var_rec *variant = &avail_recs[j]; 3151 if (variant != best && variant->sub_req) { 3152 ap_destroy_sub_req(variant->sub_req); 3153 } 3154 } 3155 return OK; 3156} 3157 3158/********************************************************************** 3159 * There is a problem with content-encoding, as some clients send and 3160 * expect an x- token (e.g. x-gzip) while others expect the plain token 3161 * (i.e. gzip). To try and deal with this as best as possible we do 3162 * the following: if the client sent an Accept-Encoding header and it 3163 * contains a plain token corresponding to the content encoding of the 3164 * response, then set content encoding using the plain token. Else if 3165 * the A-E header contains the x- token use the x- token in the C-E 3166 * header. Else don't do anything. 3167 * 3168 * Note that if no A-E header was sent, or it does not contain a token 3169 * compatible with the final content encoding, then the token in the 3170 * C-E header will be whatever was specified in the AddEncoding 3171 * directive. 3172 */ 3173static int fix_encoding(request_rec *r) 3174{ 3175 const char *enc = r->content_encoding; 3176 char *x_enc = NULL; 3177 apr_array_header_t *accept_encodings; 3178 accept_rec *accept_recs; 3179 int i; 3180 3181 if (!enc || !*enc) { 3182 return DECLINED; 3183 } 3184 3185 if (enc[0] == 'x' && enc[1] == '-') { 3186 enc += 2; 3187 } 3188 3189 if ((accept_encodings = do_header_line(r->pool, 3190 apr_table_get(r->headers_in, "Accept-Encoding"))) == NULL) { 3191 return DECLINED; 3192 } 3193 3194 accept_recs = (accept_rec *) accept_encodings->elts; 3195 3196 for (i = 0; i < accept_encodings->nelts; ++i) { 3197 char *name = accept_recs[i].name; 3198 3199 if (!strcmp(name, enc)) { 3200 r->content_encoding = name; 3201 return OK; 3202 } 3203 3204 if (name[0] == 'x' && name[1] == '-' && !strcmp(name+2, enc)) { 3205 x_enc = name; 3206 } 3207 } 3208 3209 if (x_enc) { 3210 r->content_encoding = x_enc; 3211 return OK; 3212 } 3213 3214 return DECLINED; 3215} 3216 3217static void register_hooks(apr_pool_t *p) 3218{ 3219 ap_hook_fixups(fix_encoding,NULL,NULL,APR_HOOK_MIDDLE); 3220 ap_hook_type_checker(handle_multi,NULL,NULL,APR_HOOK_FIRST); 3221 ap_hook_handler(handle_map_file,NULL,NULL,APR_HOOK_MIDDLE); 3222} 3223 3224AP_DECLARE_MODULE(negotiation) = 3225{ 3226 STANDARD20_MODULE_STUFF, 3227 create_neg_dir_config, /* dir config creator */ 3228 merge_neg_dir_configs, /* dir merger --- default is to override */ 3229 NULL, /* server config */ 3230 NULL, /* merge server config */ 3231 negotiation_cmds, /* command apr_table_t */ 3232 register_hooks /* register hooks */ 3233}; 3234