1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_negotiation.c: keeps track of MIME types the client is willing to 19 * accept, and contains code to handle type arbitration. 20 * 21 * rst 22 */ 23 24#include "apr.h" 25#include "apr_strings.h" 26#include "apr_file_io.h" 27#include "apr_lib.h" 28 29#define APR_WANT_STRFUNC 30#include "apr_want.h" 31 32#include "ap_config.h" 33#include "httpd.h" 34#include "http_config.h" 35#include "http_request.h" 36#include "http_protocol.h" 37#include "http_core.h" 38#include "http_log.h" 39#include "util_script.h" 40 41 42#define MAP_FILE_MAGIC_TYPE "application/x-type-map" 43 44/* Commands --- configuring document caching on a per (virtual?) 45 * server basis... 46 */ 47 48typedef struct { 49 int forcelangpriority; 50 apr_array_header_t *language_priority; 51} neg_dir_config; 52 53/* forcelangpriority flags 54 */ 55#define FLP_UNDEF 0 /* Same as FLP_DEFAULT, but base overrides */ 56#define FLP_NONE 1 /* Return 406, HTTP_NOT_ACCEPTABLE */ 57#define FLP_PREFER 2 /* Use language_priority rather than MC */ 58#define FLP_FALLBACK 4 /* Use language_priority rather than NA */ 59 60#define FLP_DEFAULT FLP_PREFER 61 62/* env evaluation 63 */ 64#define DISCARD_ALL_ENCODINGS 1 /* no-gzip */ 65#define DISCARD_ALL_BUT_HTML 2 /* gzip-only-text/html */ 66 67module AP_MODULE_DECLARE_DATA negotiation_module; 68 69static void *create_neg_dir_config(apr_pool_t *p, char *dummy) 70{ 71 neg_dir_config *new = (neg_dir_config *) apr_palloc(p, 72 sizeof(neg_dir_config)); 73 74 new->forcelangpriority = FLP_UNDEF; 75 new->language_priority = NULL; 76 return new; 77} 78 79static void *merge_neg_dir_configs(apr_pool_t *p, void *basev, void *addv) 80{ 81 neg_dir_config *base = (neg_dir_config *) basev; 82 neg_dir_config *add = (neg_dir_config *) addv; 83 neg_dir_config *new = (neg_dir_config *) apr_palloc(p, 84 sizeof(neg_dir_config)); 85 86 /* give priority to the config in the subdirectory */ 87 new->forcelangpriority = (add->forcelangpriority != FLP_UNDEF) 88 ? add->forcelangpriority 89 : base->forcelangpriority; 90 new->language_priority = add->language_priority 91 ? add->language_priority 92 : base->language_priority; 93 return new; 94} 95 96static const char *set_language_priority(cmd_parms *cmd, void *n_, 97 const char *lang) 98{ 99 neg_dir_config *n = n_; 100 const char **langp; 101 102 if (!n->language_priority) 103 n->language_priority = apr_array_make(cmd->pool, 4, sizeof(char *)); 104 105 langp = (const char **) apr_array_push(n->language_priority); 106 *langp = lang; 107 return NULL; 108} 109 110static const char *set_force_priority(cmd_parms *cmd, void *n_, const char *w) 111{ 112 neg_dir_config *n = n_; 113 114 if (!strcasecmp(w, "None")) { 115 if (n->forcelangpriority & ~FLP_NONE) { 116 return "Cannot combine ForceLanguagePriority options with None"; 117 } 118 n->forcelangpriority = FLP_NONE; 119 } 120 else if (!strcasecmp(w, "Prefer")) { 121 if (n->forcelangpriority & FLP_NONE) { 122 return "Cannot combine ForceLanguagePriority options None and " 123 "Prefer"; 124 } 125 n->forcelangpriority |= FLP_PREFER; 126 } 127 else if (!strcasecmp(w, "Fallback")) { 128 if (n->forcelangpriority & FLP_NONE) { 129 return "Cannot combine ForceLanguagePriority options None and " 130 "Fallback"; 131 } 132 n->forcelangpriority |= FLP_FALLBACK; 133 } 134 else { 135 return apr_pstrcat(cmd->pool, "Invalid ForceLanguagePriority option ", 136 w, NULL); 137 } 138 139 return NULL; 140} 141 142static const char *cache_negotiated_docs(cmd_parms *cmd, void *dummy, 143 int arg) 144{ 145 ap_set_module_config(cmd->server->module_config, &negotiation_module, 146 (arg ? "Cache" : NULL)); 147 return NULL; 148} 149 150static int do_cache_negotiated_docs(server_rec *s) 151{ 152 return (ap_get_module_config(s->module_config, 153 &negotiation_module) != NULL); 154} 155 156static const command_rec negotiation_cmds[] = 157{ 158 AP_INIT_FLAG("CacheNegotiatedDocs", cache_negotiated_docs, NULL, RSRC_CONF, 159 "Either 'on' or 'off' (default)"), 160 AP_INIT_ITERATE("LanguagePriority", set_language_priority, NULL, 161 OR_FILEINFO, 162 "space-delimited list of MIME language abbreviations"), 163 AP_INIT_ITERATE("ForceLanguagePriority", set_force_priority, NULL, 164 OR_FILEINFO, 165 "Force LanguagePriority elections, either None, or " 166 "Fallback and/or Prefer"), 167 {NULL} 168}; 169 170/* 171 * Record of available info on a media type specified by the client 172 * (we also use 'em for encodings and languages) 173 */ 174 175typedef struct accept_rec { 176 char *name; /* MUST be lowercase */ 177 float quality; 178 float level; 179 char *charset; /* for content-type only */ 180} accept_rec; 181 182/* 183 * Record of available info on a particular variant 184 * 185 * Note that a few of these fields are updated by the actual negotiation 186 * code. These are: 187 * 188 * level_matched --- initialized to zero. Set to the value of level 189 * if the client actually accepts this media type at that 190 * level (and *not* if it got in on a wildcard). See level_cmp 191 * below. 192 * mime_stars -- initialized to zero. Set to the number of stars 193 * present in the best matching Accept header element. 194 * 1 for star/star, 2 for type/star and 3 for 195 * type/subtype. 196 * 197 * definite -- initialized to 1. Set to 0 if there is a match which 198 * makes the variant non-definite according to the rules 199 * in rfc2296. 200 */ 201 202typedef struct var_rec { 203 request_rec *sub_req; /* May be NULL (is, for map files) */ 204 const char *mime_type; /* MUST be lowercase */ 205 const char *file_name; /* Set to 'this' (for map file body content) */ 206 apr_off_t body; /* Only for map file body content */ 207 const char *content_encoding; 208 apr_array_header_t *content_languages; /* list of lang. for this variant */ 209 const char *content_charset; 210 const char *description; 211 212 /* The next five items give the quality values for the dimensions 213 * of negotiation for this variant. They are obtained from the 214 * appropriate header lines, except for source_quality, which 215 * is obtained from the variant itself (the 'qs' parameter value 216 * from the variant's mime-type). Apart from source_quality, 217 * these values are set when we find the quality for each variant 218 * (see best_match()). source_quality is set from the 'qs' parameter 219 * of the variant description or mime type: see set_mime_fields(). 220 */ 221 float lang_quality; /* quality of this variant's language */ 222 float encoding_quality; /* ditto encoding */ 223 float charset_quality; /* ditto charset */ 224 float mime_type_quality; /* ditto media type */ 225 float source_quality; /* source quality for this variant */ 226 227 /* Now some special values */ 228 float level; /* Auxiliary to content-type... */ 229 apr_off_t bytes; /* content length, if known */ 230 int lang_index; /* Index into LanguagePriority list */ 231 int is_pseudo_html; /* text/html, *or* the INCLUDES_MAGIC_TYPEs */ 232 233 /* Above are all written-once properties of the variant. The 234 * three fields below are changed during negotiation: 235 */ 236 237 float level_matched; 238 int mime_stars; 239 int definite; 240} var_rec; 241 242/* Something to carry around the state of negotiation (and to keep 243 * all of this thread-safe)... 244 */ 245 246typedef struct { 247 apr_pool_t *pool; 248 request_rec *r; 249 neg_dir_config *conf; 250 char *dir_name; 251 int accept_q; /* 1 if an Accept item has a q= param */ 252 float default_lang_quality; /* fiddle lang q for variants with no lang */ 253 254 /* the array pointers below are NULL if the corresponding accept 255 * headers are not present 256 */ 257 apr_array_header_t *accepts; /* accept_recs */ 258 apr_array_header_t *accept_encodings; /* accept_recs */ 259 apr_array_header_t *accept_charsets; /* accept_recs */ 260 apr_array_header_t *accept_langs; /* accept_recs */ 261 262 apr_array_header_t *avail_vars; /* available variants */ 263 264 int count_multiviews_variants; /* number of variants found on disk */ 265 266 int is_transparent; /* 1 if this resource is trans. negotiable */ 267 268 int dont_fiddle_headers; /* 1 if we may not fiddle with accept hdrs */ 269 int ua_supports_trans; /* 1 if ua supports trans negotiation */ 270 int send_alternates; /* 1 if we want to send an Alternates header */ 271 int may_choose; /* 1 if we may choose a variant for the client */ 272 int use_rvsa; /* 1 if we must use RVSA/1.0 negotiation algo */ 273} negotiation_state; 274 275/* A few functions to manipulate var_recs. 276 * Cleaning out the fields... 277 */ 278 279static void clean_var_rec(var_rec *mime_info) 280{ 281 mime_info->sub_req = NULL; 282 mime_info->mime_type = ""; 283 mime_info->file_name = ""; 284 mime_info->body = 0; 285 mime_info->content_encoding = NULL; 286 mime_info->content_languages = NULL; 287 mime_info->content_charset = ""; 288 mime_info->description = ""; 289 290 mime_info->is_pseudo_html = 0; 291 mime_info->level = 0.0f; 292 mime_info->level_matched = 0.0f; 293 mime_info->bytes = -1; 294 mime_info->lang_index = -1; 295 mime_info->mime_stars = 0; 296 mime_info->definite = 1; 297 298 mime_info->charset_quality = 1.0f; 299 mime_info->encoding_quality = 1.0f; 300 mime_info->lang_quality = 1.0f; 301 mime_info->mime_type_quality = 1.0f; 302 mime_info->source_quality = 0.0f; 303} 304 305/* Initializing the relevant fields of a variant record from the 306 * accept_info read out of its content-type, one way or another. 307 */ 308 309static void set_mime_fields(var_rec *var, accept_rec *mime_info) 310{ 311 var->mime_type = mime_info->name; 312 var->source_quality = mime_info->quality; 313 var->level = mime_info->level; 314 var->content_charset = mime_info->charset; 315 316 var->is_pseudo_html = (!strcmp(var->mime_type, "text/html") 317 || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE) 318 || !strcmp(var->mime_type, INCLUDES_MAGIC_TYPE3)); 319} 320 321/* Create a variant list validator in r using info from vlistr. */ 322 323static void set_vlist_validator(request_rec *r, request_rec *vlistr) 324{ 325 /* Calculating the variant list validator is similar to 326 * calculating an etag for the source of the variant list 327 * information, so we use ap_make_etag(). Note that this 328 * validator can be 'weak' in extreme case. 329 */ 330 ap_update_mtime(vlistr, vlistr->finfo.mtime); 331 r->vlist_validator = ap_make_etag(vlistr, 0); 332 333 /* ap_set_etag will later take r->vlist_validator into account 334 * when creating the etag header 335 */ 336} 337 338 339/***************************************************************** 340 * 341 * Parsing (lists of) media types and their parameters, as seen in 342 * HTTPD header lines and elsewhere. 343 */ 344 345/* 346 * parse quality value. atof(3) is not well-usable here, because it 347 * depends on the locale (argh). 348 * 349 * However, RFC 2616 states: 350 * 3.9 Quality Values 351 * 352 * [...] HTTP/1.1 applications MUST NOT generate more than three digits 353 * after the decimal point. User configuration of these values SHOULD also 354 * be limited in this fashion. 355 * 356 * qvalue = ( "0" [ "." 0*3DIGIT ] ) 357 * | ( "1" [ "." 0*3("0") ] ) 358 * 359 * This is quite easy. If the supplied string doesn't match the above 360 * definition (loosely), we simply return 1 (same as if there's no qvalue) 361 */ 362 363static float atoq(const char *string) 364{ 365 if (!string || !*string) { 366 return 1.0f; 367 } 368 369 while (*string && apr_isspace(*string)) { 370 ++string; 371 } 372 373 /* be tolerant and accept qvalues without leading zero 374 * (also for backwards compat, where atof() was in use) 375 */ 376 if (*string != '.' && *string++ != '0') { 377 return 1.0f; 378 } 379 380 if (*string == '.') { 381 /* better only one division later, than dealing with fscking 382 * IEEE format 0.1 factors ... 383 */ 384 int i = 0; 385 386 if (*++string >= '0' && *string <= '9') { 387 i += (*string - '0') * 100; 388 389 if (*++string >= '0' && *string <= '9') { 390 i += (*string - '0') * 10; 391 392 if (*++string > '0' && *string <= '9') { 393 i += (*string - '0'); 394 } 395 } 396 } 397 398 return (float)i / 1000.0f; 399 } 400 401 return 0.0f; 402} 403 404/* 405 * Get a single mime type entry --- one media type and parameters; 406 * enter the values we recognize into the argument accept_rec 407 */ 408 409static const char *get_entry(apr_pool_t *p, accept_rec *result, 410 const char *accept_line) 411{ 412 result->quality = 1.0f; 413 result->level = 0.0f; 414 result->charset = ""; 415 416 /* 417 * Note that this handles what I gather is the "old format", 418 * 419 * Accept: text/html text/plain moo/zot 420 * 421 * without any compatibility kludges --- if the token after the 422 * MIME type begins with a semicolon, we know we're looking at parms, 423 * otherwise, we know we aren't. (So why all the pissing and moaning 424 * in the CERN server code? I must be missing something). 425 */ 426 427 result->name = ap_get_token(p, &accept_line, 0); 428 ap_str_tolower(result->name); /* You want case insensitive, 429 * you'll *get* case insensitive. 430 */ 431 432 /* KLUDGE!!! Default HTML to level 2.0 unless the browser 433 * *explicitly* says something else. 434 */ 435 436 if (!strcmp(result->name, "text/html") && (result->level == 0.0)) { 437 result->level = 2.0f; 438 } 439 else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE)) { 440 result->level = 2.0f; 441 } 442 else if (!strcmp(result->name, INCLUDES_MAGIC_TYPE3)) { 443 result->level = 3.0f; 444 } 445 446 while (*accept_line == ';') { 447 /* Parameters ... */ 448 449 char *parm; 450 char *cp; 451 char *end; 452 453 ++accept_line; 454 parm = ap_get_token(p, &accept_line, 1); 455 456 /* Look for 'var = value' --- and make sure the var is in lcase. */ 457 458 for (cp = parm; (*cp && !apr_isspace(*cp) && *cp != '='); ++cp) { 459 *cp = apr_tolower(*cp); 460 } 461 462 if (!*cp) { 463 continue; /* No '='; just ignore it. */ 464 } 465 466 *cp++ = '\0'; /* Delimit var */ 467 while (*cp && (apr_isspace(*cp) || *cp == '=')) { 468 ++cp; 469 } 470 471 if (*cp == '"') { 472 ++cp; 473 for (end = cp; 474 (*end && *end != '\n' && *end != '\r' && *end != '\"'); 475 end++); 476 } 477 else { 478 for (end = cp; (*end && !apr_isspace(*end)); end++); 479 } 480 if (*end) { 481 *end = '\0'; /* strip ending quote or return */ 482 } 483 ap_str_tolower(cp); 484 485 if (parm[0] == 'q' 486 && (parm[1] == '\0' || (parm[1] == 's' && parm[2] == '\0'))) { 487 result->quality = atoq(cp); 488 } 489 else if (parm[0] == 'l' && !strcmp(&parm[1], "evel")) { 490 result->level = (float)atoi(cp); 491 } 492 else if (!strcmp(parm, "charset")) { 493 result->charset = cp; 494 } 495 } 496 497 if (*accept_line == ',') { 498 ++accept_line; 499 } 500 501 return accept_line; 502} 503 504/***************************************************************** 505 * 506 * Dealing with header lines ... 507 * 508 * Accept, Accept-Charset, Accept-Language and Accept-Encoding 509 * are handled by do_header_line() - they all have the same 510 * basic structure of a list of items of the format 511 * name; q=N; charset=TEXT 512 * 513 * where charset is only valid in Accept. 514 */ 515 516static apr_array_header_t *do_header_line(apr_pool_t *p, 517 const char *accept_line) 518{ 519 apr_array_header_t *accept_recs; 520 521 if (!accept_line) { 522 return NULL; 523 } 524 525 accept_recs = apr_array_make(p, 40, sizeof(accept_rec)); 526 527 while (*accept_line) { 528 accept_rec *new = (accept_rec *) apr_array_push(accept_recs); 529 accept_line = get_entry(p, new, accept_line); 530 } 531 532 return accept_recs; 533} 534 535/* Given the text of the Content-Languages: line from the var map file, 536 * return an array containing the languages of this variant 537 */ 538 539static apr_array_header_t *do_languages_line(apr_pool_t *p, 540 const char **lang_line) 541{ 542 apr_array_header_t *lang_recs = apr_array_make(p, 2, sizeof(char *)); 543 544 if (!lang_line) { 545 return lang_recs; 546 } 547 548 while (**lang_line) { 549 char **new = (char **) apr_array_push(lang_recs); 550 *new = ap_get_token(p, lang_line, 0); 551 ap_str_tolower(*new); 552 if (**lang_line == ',' || **lang_line == ';') { 553 ++(*lang_line); 554 } 555 } 556 557 return lang_recs; 558} 559 560/***************************************************************** 561 * 562 * Handling header lines from clients... 563 */ 564 565static negotiation_state *parse_accept_headers(request_rec *r) 566{ 567 negotiation_state *new = 568 (negotiation_state *) apr_pcalloc(r->pool, sizeof(negotiation_state)); 569 accept_rec *elts; 570 apr_table_t *hdrs = r->headers_in; 571 int i; 572 573 new->pool = r->pool; 574 new->r = r; 575 new->conf = (neg_dir_config *)ap_get_module_config(r->per_dir_config, 576 &negotiation_module); 577 578 new->dir_name = ap_make_dirstr_parent(r->pool, r->filename); 579 580 new->accepts = do_header_line(r->pool, apr_table_get(hdrs, "Accept")); 581 582 /* calculate new->accept_q value */ 583 if (new->accepts) { 584 elts = (accept_rec *) new->accepts->elts; 585 586 for (i = 0; i < new->accepts->nelts; ++i) { 587 if (elts[i].quality < 1.0) { 588 new->accept_q = 1; 589 } 590 } 591 } 592 593 new->accept_encodings = 594 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Encoding")); 595 new->accept_langs = 596 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Language")); 597 new->accept_charsets = 598 do_header_line(r->pool, apr_table_get(hdrs, "Accept-Charset")); 599 600 /* This is possibly overkill for some servers, heck, we have 601 * only 33 index.html variants in docs/docroot (today). 602 * Make this configurable? 603 */ 604 new->avail_vars = apr_array_make(r->pool, 40, sizeof(var_rec)); 605 606 return new; 607} 608 609 610static void parse_negotiate_header(request_rec *r, negotiation_state *neg) 611{ 612 const char *negotiate = apr_table_get(r->headers_in, "Negotiate"); 613 char *tok; 614 615 /* First, default to no TCN, no Alternates, and the original Apache 616 * negotiation algorithm with fiddles for broken browser configs. 617 * 618 * To save network bandwidth, we do not configure to send an 619 * Alternates header to the user agent by default. User 620 * agents that want an Alternates header for agent-driven 621 * negotiation will have to request it by sending an 622 * appropriate Negotiate header. 623 */ 624 neg->ua_supports_trans = 0; 625 neg->send_alternates = 0; 626 neg->may_choose = 1; 627 neg->use_rvsa = 0; 628 neg->dont_fiddle_headers = 0; 629 630 if (!negotiate) 631 return; 632 633 if (strcmp(negotiate, "trans") == 0) { 634 /* Lynx 2.7 and 2.8 send 'negotiate: trans' even though they 635 * do not support transparent content negotiation, so for Lynx we 636 * ignore the negotiate header when its contents are exactly "trans". 637 * If future versions of Lynx ever need to say 'negotiate: trans', 638 * they can send the equivalent 'negotiate: trans, trans' instead 639 * to avoid triggering the workaround below. 640 */ 641 const char *ua = apr_table_get(r->headers_in, "User-Agent"); 642 643 if (ua && (strncmp(ua, "Lynx", 4) == 0)) 644 return; 645 } 646 647 neg->may_choose = 0; /* An empty Negotiate would require 300 response */ 648 649 while ((tok = ap_get_list_item(neg->pool, &negotiate)) != NULL) { 650 651 if (strcmp(tok, "trans") == 0 || 652 strcmp(tok, "vlist") == 0 || 653 strcmp(tok, "guess-small") == 0 || 654 apr_isdigit(tok[0]) || 655 strcmp(tok, "*") == 0) { 656 657 /* The user agent supports transparent negotiation */ 658 neg->ua_supports_trans = 1; 659 660 /* Send-alternates could be configurable, but note 661 * that it must be 1 if we have 'vlist' in the 662 * negotiate header. 663 */ 664 neg->send_alternates = 1; 665 666 if (strcmp(tok, "1.0") == 0) { 667 /* we may use the RVSA/1.0 algorithm, configure for it */ 668 neg->may_choose = 1; 669 neg->use_rvsa = 1; 670 neg->dont_fiddle_headers = 1; 671 } 672 else if (tok[0] == '*') { 673 /* we may use any variant selection algorithm, configure 674 * to use the Apache algorithm 675 */ 676 neg->may_choose = 1; 677 678 /* We disable header fiddles on the assumption that a 679 * client sending Negotiate knows how to send correct 680 * headers which don't need fiddling. 681 */ 682 neg->dont_fiddle_headers = 1; 683 } 684 } 685 } 686 687#ifdef NEG_DEBUG 688 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, 689 "dont_fiddle_headers=%d use_rvsa=%d ua_supports_trans=%d " 690 "send_alternates=%d, may_choose=%d", 691 neg->dont_fiddle_headers, neg->use_rvsa, 692 neg->ua_supports_trans, neg->send_alternates, neg->may_choose); 693#endif 694 695} 696 697/* Sometimes clients will give us no Accept info at all; this routine sets 698 * up the standard default for that case, and also arranges for us to be 699 * willing to run a CGI script if we find one. (In fact, we set up to 700 * dramatically prefer CGI scripts in cases where that's appropriate, 701 * e.g., POST or when URI includes query args or extra path info). 702 */ 703static void maybe_add_default_accepts(negotiation_state *neg, 704 int prefer_scripts) 705{ 706 accept_rec *new_accept; 707 708 if (!neg->accepts) { 709 neg->accepts = apr_array_make(neg->pool, 4, sizeof(accept_rec)); 710 711 new_accept = (accept_rec *) apr_array_push(neg->accepts); 712 713 new_accept->name = "*/*"; 714 new_accept->quality = 1.0f; 715 new_accept->level = 0.0f; 716 } 717 718 new_accept = (accept_rec *) apr_array_push(neg->accepts); 719 720 new_accept->name = CGI_MAGIC_TYPE; 721 if (neg->use_rvsa) { 722 new_accept->quality = 0; 723 } 724 else { 725 new_accept->quality = prefer_scripts ? 2.0f : 0.001f; 726 } 727 new_accept->level = 0.0f; 728} 729 730/***************************************************************** 731 * 732 * Parsing type-map files, in Roy's meta/http format augmented with 733 * #-comments. 734 */ 735 736/* Reading RFC822-style header lines, ignoring #-comments and 737 * handling continuations. 738 */ 739 740enum header_state { 741 header_eof, header_seen, header_sep 742}; 743 744static enum header_state get_header_line(char *buffer, int len, apr_file_t *map) 745{ 746 char *buf_end = buffer + len; 747 char *cp; 748 char c; 749 750 /* Get a noncommented line */ 751 752 do { 753 if (apr_file_gets(buffer, MAX_STRING_LEN, map) != APR_SUCCESS) { 754 return header_eof; 755 } 756 } while (buffer[0] == '#'); 757 758 /* If blank, just return it --- this ends information on this variant */ 759 760 for (cp = buffer; (*cp && apr_isspace(*cp)); ++cp) { 761 continue; 762 } 763 764 if (*cp == '\0') { 765 return header_sep; 766 } 767 768 /* If non-blank, go looking for header lines, but note that we still 769 * have to treat comments specially... 770 */ 771 772 cp += strlen(cp); 773 774 /* We need to shortcut the rest of this block following the Body: 775 * tag - we will not look for continutation after this line. 776 */ 777 if (!strncasecmp(buffer, "Body:", 5)) 778 return header_seen; 779 780 while (apr_file_getc(&c, map) != APR_EOF) { 781 if (c == '#') { 782 /* Comment line */ 783 while (apr_file_getc(&c, map) != APR_EOF && c != '\n') { 784 continue; 785 } 786 } 787 else if (apr_isspace(c)) { 788 /* Leading whitespace. POSSIBLE continuation line 789 * Also, possibly blank --- if so, we ungetc() the final newline 790 * so that we will pick up the blank line the next time 'round. 791 */ 792 793 while (c != '\n' && apr_isspace(c)) { 794 if(apr_file_getc(&c, map) != APR_SUCCESS) 795 break; 796 } 797 798 apr_file_ungetc(c, map); 799 800 if (c == '\n') { 801 return header_seen; /* Blank line */ 802 } 803 804 /* Continuation */ 805 806 while ( cp < buf_end - 2 807 && (apr_file_getc(&c, map)) != APR_EOF 808 && c != '\n') { 809 *cp++ = c; 810 } 811 812 *cp++ = '\n'; 813 *cp = '\0'; 814 } 815 else { 816 817 /* Line beginning with something other than whitespace */ 818 819 apr_file_ungetc(c, map); 820 return header_seen; 821 } 822 } 823 824 return header_seen; 825} 826 827static apr_off_t get_body(char *buffer, apr_size_t *len, const char *tag, 828 apr_file_t *map) 829{ 830 char *endbody; 831 int bodylen; 832 int taglen; 833 apr_off_t pos; 834 835 taglen = strlen(tag); 836 *len -= taglen; 837 838 /* We are at the first character following a body:tag\n entry 839 * Suck in the body, then backspace to the first char after the 840 * closing tag entry. If we fail to read, find the tag or back 841 * up then we have a hosed file, so give up already 842 */ 843 if (apr_file_read(map, buffer, len) != APR_SUCCESS) { 844 return -1; 845 } 846 847 /* put a copy of the tag *after* the data read from the file 848 * so that strstr() will find something with no reliance on 849 * terminating '\0' 850 */ 851 memcpy(buffer + *len, tag, taglen); 852 endbody = strstr(buffer, tag); 853 if (endbody == buffer + *len) { 854 return -1; 855 } 856 bodylen = endbody - buffer; 857 endbody += taglen; 858 /* Skip all the trailing cruft after the end tag to the next line */ 859 while (*endbody) { 860 if (*endbody == '\n') { 861 ++endbody; 862 break; 863 } 864 ++endbody; 865 } 866 867 pos = -(apr_off_t)(*len - (endbody - buffer)); 868 if (apr_file_seek(map, APR_CUR, &pos) != APR_SUCCESS) { 869 return -1; 870 } 871 872 /* Give the caller back the actual body's file offset and length */ 873 *len = bodylen; 874 return pos - (endbody - buffer); 875} 876 877 878/* Stripping out RFC822 comments */ 879 880static void strip_paren_comments(char *hdr) 881{ 882 /* Hmmm... is this correct? In Roy's latest draft, (comments) can nest! */ 883 /* Nope, it isn't correct. Fails to handle backslash escape as well. */ 884 885 while (*hdr) { 886 if (*hdr == '"') { 887 hdr = strchr(hdr, '"'); 888 if (hdr == NULL) { 889 return; 890 } 891 ++hdr; 892 } 893 else if (*hdr == '(') { 894 while (*hdr && *hdr != ')') { 895 *hdr++ = ' '; 896 } 897 898 if (*hdr) { 899 *hdr++ = ' '; 900 } 901 } 902 else { 903 ++hdr; 904 } 905 } 906} 907 908/* Getting to a header body from the header */ 909 910static char *lcase_header_name_return_body(char *header, request_rec *r) 911{ 912 char *cp = header; 913 914 for ( ; *cp && *cp != ':' ; ++cp) { 915 *cp = apr_tolower(*cp); 916 } 917 918 if (!*cp) { 919 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 920 "Syntax error in type map, no ':' in %s for header %s", 921 r->filename, header); 922 return NULL; 923 } 924 925 do { 926 ++cp; 927 } while (*cp && apr_isspace(*cp)); 928 929 if (!*cp) { 930 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 931 "Syntax error in type map --- no header body: %s for %s", 932 r->filename, header); 933 return NULL; 934 } 935 936 return cp; 937} 938 939static int read_type_map(apr_file_t **map, negotiation_state *neg, 940 request_rec *rr) 941{ 942 request_rec *r = neg->r; 943 apr_file_t *map_ = NULL; 944 apr_status_t status; 945 char buffer[MAX_STRING_LEN]; 946 enum header_state hstate; 947 struct var_rec mime_info; 948 int has_content; 949 950 if (!map) 951 map = &map_; 952 953 /* We are not using multiviews */ 954 neg->count_multiviews_variants = 0; 955 956 if ((status = apr_file_open(map, rr->filename, APR_READ | APR_BUFFERED, 957 APR_OS_DEFAULT, neg->pool)) != APR_SUCCESS) { 958 ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, 959 "cannot access type map file: %s", rr->filename); 960 if (APR_STATUS_IS_ENOTDIR(status) || APR_STATUS_IS_ENOENT(status)) { 961 return HTTP_NOT_FOUND; 962 } 963 else { 964 return HTTP_FORBIDDEN; 965 } 966 } 967 968 clean_var_rec(&mime_info); 969 has_content = 0; 970 971 do { 972 hstate = get_header_line(buffer, MAX_STRING_LEN, *map); 973 974 if (hstate == header_seen) { 975 char *body1 = lcase_header_name_return_body(buffer, neg->r); 976 const char *body; 977 978 if (body1 == NULL) { 979 return HTTP_INTERNAL_SERVER_ERROR; 980 } 981 982 strip_paren_comments(body1); 983 body = body1; 984 985 if (!strncmp(buffer, "uri:", 4)) { 986 mime_info.file_name = ap_get_token(neg->pool, &body, 0); 987 } 988 else if (!strncmp(buffer, "content-type:", 13)) { 989 struct accept_rec accept_info; 990 991 get_entry(neg->pool, &accept_info, body); 992 set_mime_fields(&mime_info, &accept_info); 993 has_content = 1; 994 } 995 else if (!strncmp(buffer, "content-length:", 15)) { 996 char *errp; 997 apr_off_t number; 998 999 if (apr_strtoff(&number, body, &errp, 10) 1000 || *errp || number < 0) { 1001 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1002 "Parse error in type map, Content-Length: " 1003 "'%s' in %s is invalid.", 1004 body, r->filename); 1005 break; 1006 } 1007 mime_info.bytes = number; 1008 has_content = 1; 1009 } 1010 else if (!strncmp(buffer, "content-language:", 17)) { 1011 mime_info.content_languages = do_languages_line(neg->pool, 1012 &body); 1013 has_content = 1; 1014 } 1015 else if (!strncmp(buffer, "content-encoding:", 17)) { 1016 mime_info.content_encoding = ap_get_token(neg->pool, &body, 0); 1017 has_content = 1; 1018 } 1019 else if (!strncmp(buffer, "description:", 12)) { 1020 char *desc = apr_pstrdup(neg->pool, body); 1021 char *cp; 1022 1023 for (cp = desc; *cp; ++cp) { 1024 if (*cp=='\n') *cp=' '; 1025 } 1026 if (cp>desc) *(cp-1)=0; 1027 mime_info.description = desc; 1028 } 1029 else if (!strncmp(buffer, "body:", 5)) { 1030 char *tag = apr_pstrdup(neg->pool, body); 1031 char *eol = strchr(tag, '\0'); 1032 apr_size_t len = MAX_STRING_LEN; 1033 while (--eol >= tag && apr_isspace(*eol)) 1034 *eol = '\0'; 1035 if ((mime_info.body = get_body(buffer, &len, tag, *map)) < 0) { 1036 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1037 "Syntax error in type map, no end tag '%s'" 1038 "found in %s for Body: content.", 1039 tag, r->filename); 1040 break; 1041 } 1042 mime_info.bytes = len; 1043 mime_info.file_name = apr_filepath_name_get(rr->filename); 1044 } 1045 } 1046 else { 1047 if (*mime_info.file_name && has_content) { 1048 void *new_var = apr_array_push(neg->avail_vars); 1049 1050 memcpy(new_var, (void *) &mime_info, sizeof(var_rec)); 1051 } 1052 1053 clean_var_rec(&mime_info); 1054 has_content = 0; 1055 } 1056 } while (hstate != header_eof); 1057 1058 if (map_) 1059 apr_file_close(map_); 1060 1061 set_vlist_validator(r, rr); 1062 1063 return OK; 1064} 1065 1066 1067/* Sort function used by read_types_multi. */ 1068static int variantsortf(var_rec *a, var_rec *b) { 1069 1070 /* First key is the source quality, sort in descending order. */ 1071 1072 /* XXX: note that we currently implement no method of setting the 1073 * source quality for multiviews variants, so we are always comparing 1074 * 1.0 to 1.0 for now 1075 */ 1076 if (a->source_quality < b->source_quality) 1077 return 1; 1078 if (a->source_quality > b->source_quality) 1079 return -1; 1080 1081 /* Second key is the variant name */ 1082 return strcmp(a->file_name, b->file_name); 1083} 1084 1085/***************************************************************** 1086 * 1087 * Same as read_type_map, except we use a filtered directory listing 1088 * as the map... 1089 */ 1090 1091static int read_types_multi(negotiation_state *neg) 1092{ 1093 request_rec *r = neg->r; 1094 1095 char *filp; 1096 int prefix_len; 1097 apr_dir_t *dirp; 1098 apr_finfo_t dirent; 1099 apr_status_t status; 1100 struct var_rec mime_info; 1101 struct accept_rec accept_info; 1102 void *new_var; 1103 int anymatch = 0; 1104 1105 clean_var_rec(&mime_info); 1106 1107 if (r->proxyreq || !r->filename 1108 || !ap_os_is_path_absolute(neg->pool, r->filename)) { 1109 return DECLINED; 1110 } 1111 1112 /* Only absolute paths here */ 1113 if (!(filp = strrchr(r->filename, '/'))) { 1114 return DECLINED; 1115 } 1116 ++filp; 1117 prefix_len = strlen(filp); 1118 1119 if ((status = apr_dir_open(&dirp, neg->dir_name, 1120 neg->pool)) != APR_SUCCESS) { 1121 ap_log_rerror(APLOG_MARK, APLOG_ERR, status, r, 1122 "cannot read directory for multi: %s", neg->dir_name); 1123 return HTTP_FORBIDDEN; 1124 } 1125 1126 while (apr_dir_read(&dirent, APR_FINFO_DIRENT, dirp) == APR_SUCCESS) { 1127 apr_array_header_t *exception_list; 1128 request_rec *sub_req; 1129 1130 /* Do we have a match? */ 1131#ifdef CASE_BLIND_FILESYSTEM 1132 if (strncasecmp(dirent.name, filp, prefix_len)) { 1133#else 1134 if (strncmp(dirent.name, filp, prefix_len)) { 1135#endif 1136 continue; 1137 } 1138 if (dirent.name[prefix_len] != '.') { 1139 continue; 1140 } 1141 1142 /* Don't negotiate directories and other unusual files 1143 * Really shouldn't see anything but DIR/LNK/REG here, 1144 * and we aught to discover if the LNK was interesting. 1145 * 1146 * Of course, this only helps platforms that capture the 1147 * the filetype in apr_dir_read(), which most can once 1148 * they are optimized with some magic [it's known to the 1149 * dirent, not associated to the inode, on most FS's.] 1150 */ 1151 if ((dirent.valid & APR_FINFO_TYPE) && (dirent.filetype == APR_DIR)) 1152 continue; 1153 1154 /* Ok, something's here. Maybe nothing useful. Remember that 1155 * we tried, if we completely fail, so we can reject the request! 1156 */ 1157 anymatch = 1; 1158 1159 /* See if it's something which we have access to, and which 1160 * has a known type and encoding (as opposed to something 1161 * which we'll be slapping default_type on later). 1162 */ 1163 sub_req = ap_sub_req_lookup_dirent(&dirent, r, AP_SUBREQ_MERGE_ARGS, 1164 NULL); 1165 1166 /* Double check, we still don't multi-resolve non-ordinary files 1167 */ 1168 if (sub_req->finfo.filetype != APR_REG) { 1169 /* XXX sub req not destroyed -- may be a bug/unintentional ? */ 1170 continue; 1171 } 1172 1173 /* If it has a handler, we'll pretend it's a CGI script, 1174 * since that's a good indication of the sort of thing it 1175 * might be doing. 1176 */ 1177 if (sub_req->handler && !sub_req->content_type) { 1178 ap_set_content_type(sub_req, CGI_MAGIC_TYPE); 1179 } 1180 1181 /* 1182 * mod_mime will _always_ provide us the base name in the 1183 * ap-mime-exception-list, if it processed anything. If 1184 * this list is empty, give up immediately, there was 1185 * nothing interesting. For example, looking at the files 1186 * readme.txt and readme.foo, we will throw away .foo if 1187 * it's an insignificant file (e.g. did not identify a 1188 * language, charset, encoding, content type or handler,) 1189 */ 1190 exception_list = 1191 (apr_array_header_t *)apr_table_get(sub_req->notes, 1192 "ap-mime-exceptions-list"); 1193 1194 if (!exception_list) { 1195 ap_destroy_sub_req(sub_req); 1196 continue; 1197 } 1198 1199 /* Each unregonized bit better match our base name, in sequence. 1200 * A test of index.html.foo will match index.foo or index.html.foo, 1201 * but it will never transpose the segments and allow index.foo.html 1202 * because that would introduce too much CPU consumption. Better that 1203 * we don't attempt a many-to-many match here. 1204 */ 1205 { 1206 int nexcept = exception_list->nelts; 1207 char **cur_except = (char**)exception_list->elts; 1208 char *segstart = filp, *segend, saveend; 1209 1210 while (*segstart && nexcept) { 1211 if (!(segend = strchr(segstart, '.'))) 1212 segend = strchr(segstart, '\0'); 1213 saveend = *segend; 1214 *segend = '\0'; 1215 1216#ifdef CASE_BLIND_FILESYSTEM 1217 if (strcasecmp(segstart, *cur_except) == 0) { 1218#else 1219 if (strcmp(segstart, *cur_except) == 0) { 1220#endif 1221 --nexcept; 1222 ++cur_except; 1223 } 1224 1225 if (!saveend) 1226 break; 1227 1228 *segend = saveend; 1229 segstart = segend + 1; 1230 } 1231 1232 if (nexcept) { 1233 /* Something you don't know is, something you don't know... 1234 */ 1235 ap_destroy_sub_req(sub_req); 1236 continue; 1237 } 1238 } 1239 1240 /* 1241 * ###: be warned, the _default_ content type is already 1242 * picked up here! If we failed the subrequest, or don't 1243 * know what we are serving, then continue. 1244 */ 1245 if (sub_req->status != HTTP_OK || (!sub_req->content_type)) { 1246 ap_destroy_sub_req(sub_req); 1247 continue; 1248 } 1249 1250 /* If it's a map file, we use that instead of the map 1251 * we're building... 1252 */ 1253 if (((sub_req->content_type) && 1254 !strcmp(sub_req->content_type, MAP_FILE_MAGIC_TYPE)) || 1255 ((sub_req->handler) && 1256 !strcmp(sub_req->handler, "type-map"))) { 1257 1258 apr_dir_close(dirp); 1259 neg->avail_vars->nelts = 0; 1260 if (sub_req->status != HTTP_OK) { 1261 return sub_req->status; 1262 } 1263 return read_type_map(NULL, neg, sub_req); 1264 } 1265 1266 /* Have reasonable variant --- gather notes. */ 1267 1268 mime_info.sub_req = sub_req; 1269 mime_info.file_name = apr_pstrdup(neg->pool, dirent.name); 1270 if (sub_req->content_encoding) { 1271 mime_info.content_encoding = sub_req->content_encoding; 1272 } 1273 if (sub_req->content_languages) { 1274 mime_info.content_languages = sub_req->content_languages; 1275 } 1276 1277 get_entry(neg->pool, &accept_info, sub_req->content_type); 1278 set_mime_fields(&mime_info, &accept_info); 1279 1280 new_var = apr_array_push(neg->avail_vars); 1281 memcpy(new_var, (void *) &mime_info, sizeof(var_rec)); 1282 1283 neg->count_multiviews_variants++; 1284 1285 clean_var_rec(&mime_info); 1286 } 1287 1288 apr_dir_close(dirp); 1289 1290 /* We found some file names that matched. None could be served. 1291 * Rather than fall out to autoindex or some other mapper, this 1292 * request must die. 1293 */ 1294 if (anymatch && !neg->avail_vars->nelts) { 1295 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 1296 "Negotiation: discovered file(s) matching request: %s" 1297 " (None could be negotiated).", 1298 r->filename); 1299 return HTTP_NOT_FOUND; 1300 } 1301 1302 set_vlist_validator(r, r); 1303 1304 /* Sort the variants into a canonical order. The negotiation 1305 * result sometimes depends on the order of the variants. By 1306 * sorting the variants into a canonical order, rather than using 1307 * the order in which readdir() happens to return them, we ensure 1308 * that the negotiation result will be consistent over filesystem 1309 * backup/restores and over all mirror sites. 1310 */ 1311 1312 qsort((void *) neg->avail_vars->elts, neg->avail_vars->nelts, 1313 sizeof(var_rec), (int (*)(const void *, const void *)) variantsortf); 1314 1315 return OK; 1316} 1317 1318 1319/***************************************************************** 1320 * And now for the code you've been waiting for... actually 1321 * finding a match to the client's requirements. 1322 */ 1323 1324/* Matching MIME types ... the star/star and foo/star commenting conventions 1325 * are implemented here. (You know what I mean by star/star, but just 1326 * try mentioning those three characters in a C comment). Using strcmp() 1327 * is legit, because everything has already been smashed to lowercase. 1328 * 1329 * Note also that if we get an exact match on the media type, we update 1330 * level_matched for use in level_cmp below... 1331 * 1332 * We also give a value for mime_stars, which is used later. It should 1333 * be 1 for star/star, 2 for type/star and 3 for type/subtype. 1334 */ 1335 1336static int mime_match(accept_rec *accept_r, var_rec *avail) 1337{ 1338 const char *accept_type = accept_r->name; 1339 const char *avail_type = avail->mime_type; 1340 int len = strlen(accept_type); 1341 1342 if (accept_type[0] == '*') { /* Anything matches star/star */ 1343 if (avail->mime_stars < 1) { 1344 avail->mime_stars = 1; 1345 } 1346 return 1; 1347 } 1348 else if ((accept_type[len - 1] == '*') && 1349 !strncmp(accept_type, avail_type, len - 2)) { 1350 if (avail->mime_stars < 2) { 1351 avail->mime_stars = 2; 1352 } 1353 return 1; 1354 } 1355 else if (!strcmp(accept_type, avail_type) 1356 || (!strcmp(accept_type, "text/html") 1357 && (!strcmp(avail_type, INCLUDES_MAGIC_TYPE) 1358 || !strcmp(avail_type, INCLUDES_MAGIC_TYPE3)))) { 1359 if (accept_r->level >= avail->level) { 1360 avail->level_matched = avail->level; 1361 avail->mime_stars = 3; 1362 return 1; 1363 } 1364 } 1365 1366 return OK; 1367} 1368 1369/* This code implements a piece of the tie-breaking algorithm between 1370 * variants of equal quality. This piece is the treatment of variants 1371 * of the same base media type, but different levels. What we want to 1372 * return is the variant at the highest level that the client explicitly 1373 * claimed to accept. 1374 * 1375 * If all the variants available are at a higher level than that, or if 1376 * the client didn't say anything specific about this media type at all 1377 * and these variants just got in on a wildcard, we prefer the lowest 1378 * level, on grounds that that's the one that the client is least likely 1379 * to choke on. 1380 * 1381 * (This is all motivated by treatment of levels in HTML --- we only 1382 * want to give level 3 to browsers that explicitly ask for it; browsers 1383 * that don't, including HTTP/0.9 browsers that only get the implicit 1384 * "Accept: * / *" [space added to avoid confusing cpp --- no, that 1385 * syntax doesn't really work] should get HTML2 if available). 1386 * 1387 * (Note that this code only comes into play when we are choosing among 1388 * variants of equal quality, where the draft standard gives us a fair 1389 * bit of leeway about what to do. It ain't specified by the standard; 1390 * rather, it is a choice made by this server about what to do in cases 1391 * where the standard does not specify a unique course of action). 1392 */ 1393 1394static int level_cmp(var_rec *var1, var_rec *var2) 1395{ 1396 /* Levels are only comparable between matching media types */ 1397 1398 if (var1->is_pseudo_html && !var2->is_pseudo_html) { 1399 return 0; 1400 } 1401 1402 if (!var1->is_pseudo_html && strcmp(var1->mime_type, var2->mime_type)) { 1403 return 0; 1404 } 1405 /* The result of the above if statements is that, if we get to 1406 * here, both variants have the same mime_type or both are 1407 * pseudo-html. 1408 */ 1409 1410 /* Take highest level that matched, if either did match. */ 1411 1412 if (var1->level_matched > var2->level_matched) { 1413 return 1; 1414 } 1415 if (var1->level_matched < var2->level_matched) { 1416 return -1; 1417 } 1418 1419 /* Neither matched. Take lowest level, if there's a difference. */ 1420 1421 if (var1->level < var2->level) { 1422 return 1; 1423 } 1424 if (var1->level > var2->level) { 1425 return -1; 1426 } 1427 1428 /* Tied */ 1429 1430 return 0; 1431} 1432 1433/* Finding languages. The main entry point is set_language_quality() 1434 * which is called for each variant. It sets two elements in the 1435 * variant record: 1436 * language_quality - the 'q' value of the 'best' matching language 1437 * from Accept-Language: header (HTTP/1.1) 1438 * lang_index - Non-negotiated language priority, using 1439 * position of language on the Accept-Language: 1440 * header, if present, else LanguagePriority 1441 * directive order. 1442 * 1443 * When we do the variant checking for best variant, we use language 1444 * quality first, and if a tie, language_index next (this only applies 1445 * when _not_ using the RVSA/1.0 algorithm). If using the RVSA/1.0 1446 * algorithm, lang_index is never used. 1447 * 1448 * set_language_quality() calls find_lang_index() and find_default_index() 1449 * to set lang_index. 1450 */ 1451 1452static int find_lang_index(apr_array_header_t *accept_langs, char *lang) 1453{ 1454 const char **alang; 1455 int i; 1456 1457 if (!lang || !accept_langs) { 1458 return -1; 1459 } 1460 1461 alang = (const char **) accept_langs->elts; 1462 1463 for (i = 0; i < accept_langs->nelts; ++i) { 1464 if (!strncmp(lang, *alang, strlen(*alang))) { 1465 return i; 1466 } 1467 alang += (accept_langs->elt_size / sizeof(char*)); 1468 } 1469 1470 return -1; 1471} 1472 1473/* set_default_lang_quality() sets the quality we apply to variants 1474 * which have no language assigned to them. If none of the variants 1475 * have a language, we are not negotiating on language, so all are 1476 * acceptable, and we set the default q value to 1.0. However if 1477 * some of the variants have languages, we set this default to 0.0001. 1478 * The value of this default will be applied to all variants with 1479 * no explicit language -- which will have the effect of making them 1480 * acceptable, but only if no variants with an explicit language 1481 * are acceptable. The default q value set here is assigned to variants 1482 * with no language type in set_language_quality(). 1483 * 1484 * Note that if using the RVSA/1.0 algorithm, we don't use this 1485 * fiddle. 1486 */ 1487 1488static void set_default_lang_quality(negotiation_state *neg) 1489{ 1490 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 1491 int j; 1492 1493 if (!neg->dont_fiddle_headers) { 1494 for (j = 0; j < neg->avail_vars->nelts; ++j) { 1495 var_rec *variant = &avail_recs[j]; 1496 if (variant->content_languages && 1497 variant->content_languages->nelts) { 1498 neg->default_lang_quality = 0.0001f; 1499 return; 1500 } 1501 } 1502 } 1503 1504 neg->default_lang_quality = 1.0f; 1505} 1506 1507/* Set the language_quality value in the variant record. Also 1508 * assigns lang_index for ForceLanguagePriority. 1509 * 1510 * To find the language_quality value, we look for the 'q' value 1511 * of the 'best' matching language on the Accept-Language 1512 * header. The 'best' match is the language on Accept-Language 1513 * header which matches the language of this variant either fully, 1514 * or as far as the prefix marker (-). If two or more languages 1515 * match, use the longest string from the Accept-Language header 1516 * (see HTTP/1.1 [14.4]) 1517 * 1518 * When a variant has multiple languages, we find the 'best' 1519 * match for each variant language tag as above, then select the 1520 * one with the highest q value. Because both the accept-header 1521 * and variant can have multiple languages, we now have a hairy 1522 * loop-within-a-loop here. 1523 * 1524 * If the variant has no language and we have no Accept-Language 1525 * items, leave the quality at 1.0 and return. 1526 * 1527 * If the variant has no language, we use the default as set by 1528 * set_default_lang_quality() (1.0 if we are not negotiating on 1529 * language, 0.001 if we are). 1530 * 1531 * Following the setting of the language quality, we drop through to 1532 * set the old 'lang_index'. This is set based on either the order 1533 * of the languages on the Accept-Language header, or the 1534 * order on the LanguagePriority directive. This is only used 1535 * in the negotiation if the language qualities tie. 1536 */ 1537 1538static void set_language_quality(negotiation_state *neg, var_rec *variant) 1539{ 1540 int forcepriority = neg->conf->forcelangpriority; 1541 if (forcepriority == FLP_UNDEF) { 1542 forcepriority = FLP_DEFAULT; 1543 } 1544 1545 if (!variant->content_languages || !variant->content_languages->nelts) { 1546 /* This variant has no content-language, so use the default 1547 * quality factor for variants with no content-language 1548 * (previously set by set_default_lang_quality()). 1549 * Leave the factor alone (it remains at 1.0) when we may not fiddle 1550 * with the headers. 1551 */ 1552 if (!neg->dont_fiddle_headers) { 1553 variant->lang_quality = neg->default_lang_quality; 1554 } 1555 if (!neg->accept_langs) { 1556 return; /* no accept-language header */ 1557 } 1558 return; 1559 } 1560 else { 1561 /* Variant has one (or more) languages. Look for the best 1562 * match. We do this by going through each language on the 1563 * variant description looking for a match on the 1564 * Accept-Language header. The best match is the longest 1565 * matching language on the header. The final result is the 1566 * best q value from all the languages on the variant 1567 * description. 1568 */ 1569 1570 if (!neg->accept_langs) { 1571 /* no accept-language header makes the variant indefinite */ 1572 variant->definite = 0; 1573 } 1574 else { /* There is an accept-language with 0 or more items */ 1575 accept_rec *accs = (accept_rec *) neg->accept_langs->elts; 1576 accept_rec *best = NULL, *star = NULL; 1577 accept_rec *bestthistag; 1578 char *lang, *p; 1579 float fiddle_q = 0.0f; 1580 int any_match_on_star = 0; 1581 int i, j; 1582 apr_size_t alen, longest_lang_range_len; 1583 1584 for (j = 0; j < variant->content_languages->nelts; ++j) { 1585 p = NULL; 1586 bestthistag = NULL; 1587 longest_lang_range_len = 0; 1588 alen = 0; 1589 1590 /* lang is the variant's language-tag, which is the one 1591 * we are allowed to use the prefix of in HTTP/1.1 1592 */ 1593 lang = ((char **) (variant->content_languages->elts))[j]; 1594 1595 /* now find the best (i.e. longest) matching 1596 * Accept-Language header language. We put the best match 1597 * for this tag in bestthistag. We cannot update the 1598 * overall best (based on q value) because the best match 1599 * for this tag is the longest language item on the accept 1600 * header, not necessarily the highest q. 1601 */ 1602 for (i = 0; i < neg->accept_langs->nelts; ++i) { 1603 if (!strcmp(accs[i].name, "*")) { 1604 if (!star) { 1605 star = &accs[i]; 1606 } 1607 continue; 1608 } 1609 /* Find language. We match if either the variant 1610 * language tag exactly matches the language range 1611 * from the accept header, or a prefix of the variant 1612 * language tag up to a '-' character matches the 1613 * whole of the language range in the Accept-Language 1614 * header. Note that HTTP/1.x allows any number of 1615 * '-' characters in a tag or range, currently only 1616 * tags with zero or one '-' characters are defined 1617 * for general use (see rfc1766). 1618 * 1619 * We only use language range in the Accept-Language 1620 * header the best match for the variant language tag 1621 * if it is longer than the previous best match. 1622 */ 1623 1624 alen = strlen(accs[i].name); 1625 1626 if ((strlen(lang) >= alen) && 1627 !strncmp(lang, accs[i].name, alen) && 1628 ((lang[alen] == 0) || (lang[alen] == '-')) ) { 1629 1630 if (alen > longest_lang_range_len) { 1631 longest_lang_range_len = alen; 1632 bestthistag = &accs[i]; 1633 } 1634 } 1635 1636 if (!bestthistag && !neg->dont_fiddle_headers) { 1637 /* The next bit is a fiddle. Some browsers might 1638 * be configured to send more specific language 1639 * ranges than desirable. For example, an 1640 * Accept-Language of en-US should never match 1641 * variants with languages en or en-GB. But US 1642 * English speakers might pick en-US as their 1643 * language choice. So this fiddle checks if the 1644 * language range has a prefix, and if so, it 1645 * matches variants which match that prefix with a 1646 * priority of 0.001. So a request for en-US would 1647 * match variants of types en and en-GB, but at 1648 * much lower priority than matches of en-US 1649 * directly, or of any other language listed on 1650 * the Accept-Language header. Note that this 1651 * fiddle does not handle multi-level prefixes. 1652 */ 1653 if ((p = strchr(accs[i].name, '-'))) { 1654 int plen = p - accs[i].name; 1655 1656 if (!strncmp(lang, accs[i].name, plen)) { 1657 fiddle_q = 0.001f; 1658 } 1659 } 1660 } 1661 } 1662 /* Finished looking at Accept-Language headers, the best 1663 * (longest) match is in bestthistag, or NULL if no match 1664 */ 1665 if (!best || 1666 (bestthistag && bestthistag->quality > best->quality)) { 1667 best = bestthistag; 1668 } 1669 1670 /* See if the tag matches on a * in the Accept-Language 1671 * header. If so, record this fact for later use 1672 */ 1673 if (!bestthistag && star) { 1674 any_match_on_star = 1; 1675 } 1676 } 1677 1678 /* If one of the language tags of the variant matched on *, we 1679 * need to see if its q is better than that of any non-* match 1680 * on any other tag of the variant. If so the * match takes 1681 * precedence and the overall match is not definite. 1682 */ 1683 if ( any_match_on_star && 1684 ((best && star->quality > best->quality) || 1685 (!best)) ) { 1686 best = star; 1687 variant->definite = 0; 1688 } 1689 1690 variant->lang_quality = best ? best->quality : fiddle_q; 1691 } 1692 } 1693 1694 /* Handle the ForceDefaultLanguage overrides, based on the best match 1695 * to LanguagePriority order. The best match is the lowest index of 1696 * any LanguagePriority match. 1697 */ 1698 if (((forcepriority & FLP_PREFER) 1699 && (variant->lang_index < 0)) 1700 || ((forcepriority & FLP_FALLBACK) 1701 && !variant->lang_quality)) 1702 { 1703 int bestidx = -1; 1704 int j; 1705 1706 for (j = 0; j < variant->content_languages->nelts; ++j) 1707 { 1708 /* lang is the variant's language-tag, which is the one 1709 * we are allowed to use the prefix of in HTTP/1.1 1710 */ 1711 char *lang = ((char **) (variant->content_languages->elts))[j]; 1712 int idx = -1; 1713 1714 /* If we wish to fallback or 1715 * we use our own LanguagePriority index. 1716 */ 1717 idx = find_lang_index(neg->conf->language_priority, lang); 1718 if ((idx >= 0) && ((bestidx == -1) || (idx < bestidx))) { 1719 bestidx = idx; 1720 } 1721 } 1722 1723 if (bestidx >= 0) { 1724 if (variant->lang_quality) { 1725 if (forcepriority & FLP_PREFER) { 1726 variant->lang_index = bestidx; 1727 } 1728 } 1729 else { 1730 if (forcepriority & FLP_FALLBACK) { 1731 variant->lang_index = bestidx; 1732 variant->lang_quality = .0001f; 1733 variant->definite = 0; 1734 } 1735 } 1736 } 1737 } 1738 return; 1739} 1740 1741/* Determining the content length --- if the map didn't tell us, 1742 * we have to do a stat() and remember for next time. 1743 */ 1744 1745static apr_off_t find_content_length(negotiation_state *neg, var_rec *variant) 1746{ 1747 apr_finfo_t statb; 1748 1749 if (variant->bytes < 0) { 1750 if ( variant->sub_req 1751 && (variant->sub_req->finfo.valid & APR_FINFO_SIZE)) { 1752 variant->bytes = variant->sub_req->finfo.size; 1753 } 1754 else { 1755 char *fullname = ap_make_full_path(neg->pool, neg->dir_name, 1756 variant->file_name); 1757 1758 if (apr_stat(&statb, fullname, 1759 APR_FINFO_SIZE, neg->pool) == APR_SUCCESS) { 1760 variant->bytes = statb.size; 1761 } 1762 } 1763 } 1764 1765 return variant->bytes; 1766} 1767 1768/* For a given variant, find the best matching Accept: header 1769 * and assign the Accept: header's quality value to the 1770 * mime_type_quality field of the variant, for later use in 1771 * determining the best matching variant. 1772 */ 1773 1774static void set_accept_quality(negotiation_state *neg, var_rec *variant) 1775{ 1776 int i; 1777 accept_rec *accept_recs; 1778 float q = 0.0f; 1779 int q_definite = 1; 1780 1781 /* if no Accept: header, leave quality alone (will 1782 * remain at the default value of 1) 1783 * 1784 * XXX: This if is currently never true because of the effect of 1785 * maybe_add_default_accepts(). 1786 */ 1787 if (!neg->accepts) { 1788 if (variant->mime_type && *variant->mime_type) 1789 variant->definite = 0; 1790 return; 1791 } 1792 1793 accept_recs = (accept_rec *) neg->accepts->elts; 1794 1795 /* 1796 * Go through each of the ranges on the Accept: header, 1797 * looking for the 'best' match with this variant's 1798 * content-type. We use the best match's quality 1799 * value (from the Accept: header) for this variant's 1800 * mime_type_quality field. 1801 * 1802 * The best match is determined like this: 1803 * type/type is better than type/ * is better than * / * 1804 * if match is type/type, use the level mime param if available 1805 */ 1806 for (i = 0; i < neg->accepts->nelts; ++i) { 1807 1808 accept_rec *type = &accept_recs[i]; 1809 int prev_mime_stars; 1810 1811 prev_mime_stars = variant->mime_stars; 1812 1813 if (!mime_match(type, variant)) { 1814 continue; /* didn't match the content type at all */ 1815 } 1816 else { 1817 /* did match - see if there were less or more stars than 1818 * in previous match 1819 */ 1820 if (prev_mime_stars == variant->mime_stars) { 1821 continue; /* more stars => not as good a match */ 1822 } 1823 } 1824 1825 /* If we are allowed to mess with the q-values 1826 * and have no explicit q= parameters in the accept header, 1827 * make wildcards very low, so we have a low chance 1828 * of ending up with them if there's something better. 1829 */ 1830 1831 if (!neg->dont_fiddle_headers && !neg->accept_q && 1832 variant->mime_stars == 1) { 1833 q = 0.01f; 1834 } 1835 else if (!neg->dont_fiddle_headers && !neg->accept_q && 1836 variant->mime_stars == 2) { 1837 q = 0.02f; 1838 } 1839 else { 1840 q = type->quality; 1841 } 1842 1843 q_definite = (variant->mime_stars == 3); 1844 } 1845 variant->mime_type_quality = q; 1846 variant->definite = variant->definite && q_definite; 1847 1848} 1849 1850/* For a given variant, find the 'q' value of the charset given 1851 * on the Accept-Charset line. If no charsets are listed, 1852 * assume value of '1'. 1853 */ 1854static void set_charset_quality(negotiation_state *neg, var_rec *variant) 1855{ 1856 int i; 1857 accept_rec *accept_recs; 1858 const char *charset = variant->content_charset; 1859 accept_rec *star = NULL; 1860 1861 /* if no Accept-Charset: header, leave quality alone (will 1862 * remain at the default value of 1) 1863 */ 1864 if (!neg->accept_charsets) { 1865 if (charset && *charset) 1866 variant->definite = 0; 1867 return; 1868 } 1869 1870 accept_recs = (accept_rec *) neg->accept_charsets->elts; 1871 1872 if (charset == NULL || !*charset) { 1873 /* Charset of variant not known */ 1874 1875 /* if not a text / * type, leave quality alone */ 1876 if (!(!strncmp(variant->mime_type, "text/", 5) 1877 || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE) 1878 || !strcmp(variant->mime_type, INCLUDES_MAGIC_TYPE3) 1879 )) 1880 return; 1881 1882 /* Don't go guessing if we are in strict header mode, 1883 * e.g. when running the rvsa, as any guess won't be reflected 1884 * in the variant list or content-location headers. 1885 */ 1886 if (neg->dont_fiddle_headers) 1887 return; 1888 1889 charset = "iso-8859-1"; /* The default charset for HTTP text types */ 1890 } 1891 1892 /* 1893 * Go through each of the items on the Accept-Charset header, 1894 * looking for a match with this variant's charset. If none 1895 * match, charset is unacceptable, so set quality to 0. 1896 */ 1897 for (i = 0; i < neg->accept_charsets->nelts; ++i) { 1898 1899 accept_rec *type = &accept_recs[i]; 1900 1901 if (!strcmp(type->name, charset)) { 1902 variant->charset_quality = type->quality; 1903 return; 1904 } 1905 else if (strcmp(type->name, "*") == 0) { 1906 star = type; 1907 } 1908 } 1909 /* No explicit match */ 1910 if (star) { 1911 variant->charset_quality = star->quality; 1912 variant->definite = 0; 1913 return; 1914 } 1915 /* If this variant is in charset iso-8859-1, the default is 1.0 */ 1916 if (strcmp(charset, "iso-8859-1") == 0) { 1917 variant->charset_quality = 1.0f; 1918 } 1919 else { 1920 variant->charset_quality = 0.0f; 1921 } 1922} 1923 1924 1925/* is_identity_encoding is included for back-compat, but does anyone 1926 * use 7bit, 8bin or binary in their var files?? 1927 */ 1928 1929static int is_identity_encoding(const char *enc) 1930{ 1931 return (!enc || !enc[0] || !strcmp(enc, "7bit") || !strcmp(enc, "8bit") 1932 || !strcmp(enc, "binary")); 1933} 1934 1935/* 1936 * set_encoding_quality determines whether the encoding for a particular 1937 * variant is acceptable for the user-agent. 1938 * 1939 * The rules for encoding are that if the user-agent does not supply 1940 * any Accept-Encoding header, then all encodings are allowed but a 1941 * variant with no encoding should be preferred. 1942 * If there is an empty Accept-Encoding header, then no encodings are 1943 * acceptable. If there is a non-empty Accept-Encoding header, then 1944 * any of the listed encodings are acceptable, as well as no encoding 1945 * unless the "identity" encoding is specifically excluded. 1946 */ 1947static void set_encoding_quality(negotiation_state *neg, var_rec *variant) 1948{ 1949 accept_rec *accept_recs; 1950 const char *enc = variant->content_encoding; 1951 accept_rec *star = NULL; 1952 float value_if_not_found = 0.0f; 1953 int i; 1954 1955 if (!neg->accept_encodings) { 1956 /* We had no Accept-Encoding header, assume that all 1957 * encodings are acceptable with a low quality, 1958 * but we prefer no encoding if available. 1959 */ 1960 if (!enc || is_identity_encoding(enc)) 1961 variant->encoding_quality = 1.0f; 1962 else 1963 variant->encoding_quality = 0.5f; 1964 1965 return; 1966 } 1967 1968 if (!enc || is_identity_encoding(enc)) { 1969 enc = "identity"; 1970 value_if_not_found = 0.0001f; 1971 } 1972 1973 accept_recs = (accept_rec *) neg->accept_encodings->elts; 1974 1975 /* Go through each of the encodings on the Accept-Encoding: header, 1976 * looking for a match with our encoding. x- prefixes are ignored. 1977 */ 1978 if (enc[0] == 'x' && enc[1] == '-') { 1979 enc += 2; 1980 } 1981 for (i = 0; i < neg->accept_encodings->nelts; ++i) { 1982 1983 char *name = accept_recs[i].name; 1984 1985 if (name[0] == 'x' && name[1] == '-') { 1986 name += 2; 1987 } 1988 1989 if (!strcmp(name, enc)) { 1990 variant->encoding_quality = accept_recs[i].quality; 1991 return; 1992 } 1993 1994 if (strcmp(name, "*") == 0) { 1995 star = &accept_recs[i]; 1996 } 1997 1998 } 1999 /* No explicit match */ 2000 if (star) { 2001 variant->encoding_quality = star->quality; 2002 return; 2003 } 2004 2005 /* Encoding not found on Accept-Encoding: header, so it is 2006 * _not_ acceptable unless it is the identity (no encoding) 2007 */ 2008 variant->encoding_quality = value_if_not_found; 2009} 2010 2011/************************************************************* 2012 * Possible results of the variant selection algorithm 2013 */ 2014enum algorithm_results { 2015 alg_choice = 1, /* choose variant */ 2016 alg_list /* list variants */ 2017}; 2018 2019/* Below is the 'best_match' function. It returns an int, which has 2020 * one of the two values alg_choice or alg_list, which give the result 2021 * of the variant selection algorithm. alg_list means that no best 2022 * variant was found by the algorithm, alg_choice means that a best 2023 * variant was found and should be returned. The list/choice 2024 * terminology comes from TCN (rfc2295), but is used in a more generic 2025 * way here. The best variant is returned in *pbest. best_match has 2026 * two possible algorithms for determining the best variant: the 2027 * RVSA/1.0 algorithm (from RFC2296), and the standard Apache 2028 * algorithm. These are split out into separate functions 2029 * (is_variant_better_rvsa() and is_variant_better()). Selection of 2030 * one is through the neg->use_rvsa flag. 2031 * 2032 * The call to best_match also creates full information, including 2033 * language, charset, etc quality for _every_ variant. This is needed 2034 * for generating a correct Vary header, and can be used for the 2035 * Alternates header, the human-readable list responses and 406 errors. 2036 */ 2037 2038/* Firstly, the RVSA/1.0 (HTTP Remote Variant Selection Algorithm 2039 * v1.0) from rfc2296. This is the algorithm that goes together with 2040 * transparent content negotiation (TCN). 2041 */ 2042static int is_variant_better_rvsa(negotiation_state *neg, var_rec *variant, 2043 var_rec *best, float *p_bestq) 2044{ 2045 float bestq = *p_bestq, q; 2046 2047 /* TCN does not cover negotiation on content-encoding. For now, 2048 * we ignore the encoding unless it was explicitly excluded. 2049 */ 2050 if (variant->encoding_quality == 0.0f) 2051 return 0; 2052 2053 q = variant->mime_type_quality * 2054 variant->source_quality * 2055 variant->charset_quality * 2056 variant->lang_quality; 2057 2058 /* RFC 2296 calls for the result to be rounded to 5 decimal places, 2059 * but we don't do that because it serves no useful purpose other 2060 * than to ensure that a remote algorithm operates on the same 2061 * precision as ours. That is silly, since what we obviously want 2062 * is for the algorithm to operate on the best available precision 2063 * regardless of who runs it. Since the above calculation may 2064 * result in significant variance at 1e-12, rounding would be bogus. 2065 */ 2066 2067#ifdef NEG_DEBUG 2068 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, 2069 "Variant: file=%s type=%s lang=%s sourceq=%1.3f " 2070 "mimeq=%1.3f langq=%1.3f charq=%1.3f encq=%1.3f " 2071 "q=%1.5f definite=%d", 2072 (variant->file_name ? variant->file_name : ""), 2073 (variant->mime_type ? variant->mime_type : ""), 2074 (variant->content_languages 2075 ? apr_array_pstrcat(neg->pool, variant->content_languages, ',') 2076 : ""), 2077 variant->source_quality, 2078 variant->mime_type_quality, 2079 variant->lang_quality, 2080 variant->charset_quality, 2081 variant->encoding_quality, 2082 q, 2083 variant->definite); 2084#endif 2085 2086 if (q <= 0.0f) { 2087 return 0; 2088 } 2089 if (q > bestq) { 2090 *p_bestq = q; 2091 return 1; 2092 } 2093 if (q == bestq) { 2094 /* If the best variant's encoding is of lesser quality than 2095 * this variant, then we prefer this variant 2096 */ 2097 if (variant->encoding_quality > best->encoding_quality) { 2098 *p_bestq = q; 2099 return 1; 2100 } 2101 } 2102 return 0; 2103} 2104 2105/* Negotiation algorithm as used by previous versions of Apache 2106 * (just about). 2107 */ 2108 2109static int is_variant_better(negotiation_state *neg, var_rec *variant, 2110 var_rec *best, float *p_bestq) 2111{ 2112 float bestq = *p_bestq, q; 2113 int levcmp; 2114 2115 /* For non-transparent negotiation, server can choose how 2116 * to handle the negotiation. We'll use the following in 2117 * order: content-type, language, content-type level, charset, 2118 * content encoding, content length. 2119 * 2120 * For each check, we have three possible outcomes: 2121 * This variant is worse than current best: return 0 2122 * This variant is better than the current best: 2123 * assign this variant's q to *p_bestq, and return 1 2124 * This variant is just as desirable as the current best: 2125 * drop through to the next test. 2126 * 2127 * This code is written in this long-winded way to allow future 2128 * customisation, either by the addition of additional 2129 * checks, or to allow the order of the checks to be determined 2130 * by configuration options (e.g. we might prefer to check 2131 * language quality _before_ content type). 2132 */ 2133 2134 /* First though, eliminate this variant if it is not 2135 * acceptable by type, charset, encoding or language. 2136 */ 2137 2138#ifdef NEG_DEBUG 2139 ap_log_error(APLOG_MARK, APLOG_STARTUP, 0, NULL, 2140 "Variant: file=%s type=%s lang=%s sourceq=%1.3f " 2141 "mimeq=%1.3f langq=%1.3f langidx=%d charq=%1.3f encq=%1.3f ", 2142 (variant->file_name ? variant->file_name : ""), 2143 (variant->mime_type ? variant->mime_type : ""), 2144 (variant->content_languages 2145 ? apr_array_pstrcat(neg->pool, variant->content_languages, ',') 2146 : ""), 2147 variant->source_quality, 2148 variant->mime_type_quality, 2149 variant->lang_quality, 2150 variant->lang_index, 2151 variant->charset_quality, 2152 variant->encoding_quality); 2153#endif 2154 2155 if (variant->encoding_quality == 0.0f || 2156 variant->lang_quality == 0.0f || 2157 variant->source_quality == 0.0f || 2158 variant->charset_quality == 0.0f || 2159 variant->mime_type_quality == 0.0f) { 2160 return 0; /* don't consider unacceptables */ 2161 } 2162 2163 q = variant->mime_type_quality * variant->source_quality; 2164 if (q == 0.0 || q < bestq) { 2165 return 0; 2166 } 2167 if (q > bestq || !best) { 2168 *p_bestq = q; 2169 return 1; 2170 } 2171 2172 /* language */ 2173 if (variant->lang_quality < best->lang_quality) { 2174 return 0; 2175 } 2176 if (variant->lang_quality > best->lang_quality) { 2177 *p_bestq = q; 2178 return 1; 2179 } 2180 2181 /* if language qualities were equal, try the LanguagePriority stuff */ 2182 if (best->lang_index != -1 && 2183 (variant->lang_index == -1 || variant->lang_index > best->lang_index)) { 2184 return 0; 2185 } 2186 if (variant->lang_index != -1 && 2187 (best->lang_index == -1 || variant->lang_index < best->lang_index)) { 2188 *p_bestq = q; 2189 return 1; 2190 } 2191 2192 /* content-type level (sometimes used with text/html, though we 2193 * support it on other types too) 2194 */ 2195 levcmp = level_cmp(variant, best); 2196 if (levcmp == -1) { 2197 return 0; 2198 } 2199 if (levcmp == 1) { 2200 *p_bestq = q; 2201 return 1; 2202 } 2203 2204 /* charset */ 2205 if (variant->charset_quality < best->charset_quality) { 2206 return 0; 2207 } 2208 /* If the best variant's charset is ISO-8859-1 and this variant has 2209 * the same charset quality, then we prefer this variant 2210 */ 2211 2212 if (variant->charset_quality > best->charset_quality || 2213 ((variant->content_charset != NULL && 2214 *variant->content_charset != '\0' && 2215 strcmp(variant->content_charset, "iso-8859-1") != 0) && 2216 (best->content_charset == NULL || 2217 *best->content_charset == '\0' || 2218 strcmp(best->content_charset, "iso-8859-1") == 0))) { 2219 *p_bestq = q; 2220 return 1; 2221 } 2222 2223 /* Prefer the highest value for encoding_quality. 2224 */ 2225 if (variant->encoding_quality < best->encoding_quality) { 2226 return 0; 2227 } 2228 if (variant->encoding_quality > best->encoding_quality) { 2229 *p_bestq = q; 2230 return 1; 2231 } 2232 2233 /* content length if all else equal */ 2234 if (find_content_length(neg, variant) >= find_content_length(neg, best)) { 2235 return 0; 2236 } 2237 2238 /* ok, to get here means every thing turned out equal, except 2239 * we have a shorter content length, so use this variant 2240 */ 2241 *p_bestq = q; 2242 return 1; 2243} 2244 2245/* figure out, whether a variant is in a specific language 2246 * it returns also false, if the variant has no language. 2247 */ 2248static int variant_has_language(var_rec *variant, const char *lang) 2249{ 2250 int j, max; 2251 2252 /* fast exit */ 2253 if ( !lang 2254 || !variant->content_languages 2255 || !(max = variant->content_languages->nelts)) { 2256 return 0; 2257 } 2258 2259 for (j = 0; j < max; ++j) { 2260 if (!strcmp(lang, 2261 ((char **) (variant->content_languages->elts))[j])) { 2262 return 1; 2263 } 2264 } 2265 2266 return 0; 2267} 2268 2269/* check for environment variables 'no-gzip' and 2270 * 'gzip-only-text/html' to get a behaviour similiar 2271 * to mod_deflate 2272 */ 2273static int discard_variant_by_env(var_rec *variant, int discard) 2274{ 2275 if ( is_identity_encoding(variant->content_encoding) 2276 || !strcmp(variant->content_encoding, "identity")) { 2277 return 0; 2278 } 2279 2280 return ( (discard == DISCARD_ALL_ENCODINGS) 2281 || (discard == DISCARD_ALL_BUT_HTML 2282 && (!variant->mime_type 2283 || strncmp(variant->mime_type, "text/html", 9)))); 2284} 2285 2286static int best_match(negotiation_state *neg, var_rec **pbest) 2287{ 2288 int j; 2289 var_rec *best; 2290 float bestq = 0.0f; 2291 enum algorithm_results algorithm_result; 2292 int may_discard = 0; 2293 2294 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2295 2296 /* fetch request dependent variables 2297 * prefer-language: prefer a certain language. 2298 */ 2299 const char *preferred_language = apr_table_get(neg->r->subprocess_env, 2300 "prefer-language"); 2301 2302 /* no-gzip: do not send encoded documents */ 2303 if (apr_table_get(neg->r->subprocess_env, "no-gzip")) { 2304 may_discard = DISCARD_ALL_ENCODINGS; 2305 } 2306 2307 /* gzip-only-text/html: send encoded documents only 2308 * if they are text/html. (no-gzip has a higher priority). 2309 */ 2310 else { 2311 const char *env_value = apr_table_get(neg->r->subprocess_env, 2312 "gzip-only-text/html"); 2313 2314 if (env_value && !strcmp(env_value, "1")) { 2315 may_discard = DISCARD_ALL_BUT_HTML; 2316 } 2317 } 2318 2319 set_default_lang_quality(neg); 2320 2321 /* 2322 * Find the 'best' variant 2323 * We run the loop possibly twice: if "prefer-language" 2324 * environment variable is set but we did not find an appropriate 2325 * best variant. In that case forget the preferred language and 2326 * negotiate over all variants. 2327 */ 2328 2329 do { 2330 best = NULL; 2331 2332 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2333 var_rec *variant = &avail_recs[j]; 2334 2335 /* if this variant is encoded somehow and there are special 2336 * variables set, we do not negotiate it. see above. 2337 */ 2338 if ( may_discard 2339 && discard_variant_by_env(variant, may_discard)) { 2340 continue; 2341 } 2342 2343 /* if a language is preferred, but the current variant 2344 * is not in that language, then drop it for now 2345 */ 2346 if ( preferred_language 2347 && !variant_has_language(variant, preferred_language)) { 2348 continue; 2349 } 2350 2351 /* Find all the relevant 'quality' values from the 2352 * Accept... headers, and store in the variant. This also 2353 * prepares for sending an Alternates header etc so we need to 2354 * do it even if we do not actually plan to find a best 2355 * variant. 2356 */ 2357 set_accept_quality(neg, variant); 2358 /* accept the preferred language, even when it's not listed within 2359 * the Accept-Language header 2360 */ 2361 if (preferred_language) { 2362 variant->lang_quality = 1.0f; 2363 variant->definite = 1; 2364 } 2365 else { 2366 set_language_quality(neg, variant); 2367 } 2368 set_encoding_quality(neg, variant); 2369 set_charset_quality(neg, variant); 2370 2371 /* Only do variant selection if we may actually choose a 2372 * variant for the client 2373 */ 2374 if (neg->may_choose) { 2375 2376 /* Now find out if this variant is better than the current 2377 * best, either using the RVSA/1.0 algorithm, or Apache's 2378 * internal server-driven algorithm. Presumably other 2379 * server-driven algorithms are possible, and could be 2380 * implemented here. 2381 */ 2382 2383 if (neg->use_rvsa) { 2384 if (is_variant_better_rvsa(neg, variant, best, &bestq)) { 2385 best = variant; 2386 } 2387 } 2388 else { 2389 if (is_variant_better(neg, variant, best, &bestq)) { 2390 best = variant; 2391 } 2392 } 2393 } 2394 } 2395 2396 /* We now either have a best variant, or no best variant */ 2397 2398 if (neg->use_rvsa) { 2399 /* calculate result for RVSA/1.0 algorithm: 2400 * only a choice response if the best variant has q>0 2401 * and is definite 2402 */ 2403 algorithm_result = (best && best->definite) && (bestq > 0) ? 2404 alg_choice : alg_list; 2405 } 2406 else { 2407 /* calculate result for Apache negotiation algorithm */ 2408 algorithm_result = bestq > 0 ? alg_choice : alg_list; 2409 } 2410 2411 /* run the loop again, if the "prefer-language" got no clear result */ 2412 if (preferred_language && (!best || algorithm_result != alg_choice)) { 2413 preferred_language = NULL; 2414 continue; 2415 } 2416 2417 break; 2418 } while (1); 2419 2420 /* Returning a choice response with a non-neighboring variant is a 2421 * protocol security error in TCN (see rfc2295). We do *not* 2422 * verify here that the variant and URI are neighbors, even though 2423 * we may return alg_choice. We depend on the environment (the 2424 * caller) to only declare the resource transparently negotiable if 2425 * all variants are neighbors. 2426 */ 2427 *pbest = best; 2428 return algorithm_result; 2429} 2430 2431/* Sets response headers for a negotiated response. 2432 * neg->is_transparent determines whether a transparently negotiated 2433 * response or a plain `server driven negotiation' response is 2434 * created. Applicable headers are Alternates, Vary, and TCN. 2435 * 2436 * The Vary header we create is sometimes longer than is required for 2437 * the correct caching of negotiated results by HTTP/1.1 caches. For 2438 * example if we have 3 variants x.html, x.ps.en and x.ps.nl, and if 2439 * the Accept: header assigns a 0 quality to .ps, then the results of 2440 * the two server-side negotiation algorithms we currently implement 2441 * will never depend on Accept-Language so we could return `Vary: 2442 * negotiate, accept' instead of the longer 'Vary: negotiate, accept, 2443 * accept-language' which the code below will return. A routine for 2444 * computing the exact minimal Vary header would be a huge pain to code 2445 * and maintain though, especially because we need to take all possible 2446 * twiddles in the server-side negotiation algorithms into account. 2447 */ 2448static void set_neg_headers(request_rec *r, negotiation_state *neg, 2449 int alg_result) 2450{ 2451 apr_table_t *hdrs; 2452 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2453 const char *sample_type = NULL; 2454 const char *sample_language = NULL; 2455 const char *sample_encoding = NULL; 2456 const char *sample_charset = NULL; 2457 char *lang; 2458 char *qstr; 2459 apr_off_t len; 2460 apr_array_header_t *arr; 2461 int max_vlist_array = (neg->avail_vars->nelts * 21); 2462 int first_variant = 1; 2463 int vary_by_type = 0; 2464 int vary_by_language = 0; 2465 int vary_by_charset = 0; 2466 int vary_by_encoding = 0; 2467 int j; 2468 2469 /* In order to avoid O(n^2) memory copies in building Alternates, 2470 * we preallocate a apr_table_t with the maximum substrings possible, 2471 * fill it with the variant list, and then concatenate the entire array. 2472 * Note that if you change the number of substrings pushed, you also 2473 * need to change the calculation of max_vlist_array above. 2474 */ 2475 if (neg->send_alternates && neg->avail_vars->nelts) 2476 arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *)); 2477 else 2478 arr = NULL; 2479 2480 /* Put headers into err_headers_out, since send_http_header() 2481 * outputs both headers_out and err_headers_out. 2482 */ 2483 hdrs = r->err_headers_out; 2484 2485 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2486 var_rec *variant = &avail_recs[j]; 2487 2488 if (variant->content_languages && variant->content_languages->nelts) { 2489 lang = apr_array_pstrcat(r->pool, variant->content_languages, ','); 2490 } 2491 else { 2492 lang = NULL; 2493 } 2494 2495 /* Calculate Vary by looking for any difference between variants */ 2496 2497 if (first_variant) { 2498 sample_type = variant->mime_type; 2499 sample_charset = variant->content_charset; 2500 sample_language = lang; 2501 sample_encoding = variant->content_encoding; 2502 } 2503 else { 2504 if (!vary_by_type && 2505 strcmp(sample_type ? sample_type : "", 2506 variant->mime_type ? variant->mime_type : "")) { 2507 vary_by_type = 1; 2508 } 2509 if (!vary_by_charset && 2510 strcmp(sample_charset ? sample_charset : "", 2511 variant->content_charset ? 2512 variant->content_charset : "")) { 2513 vary_by_charset = 1; 2514 } 2515 if (!vary_by_language && 2516 strcmp(sample_language ? sample_language : "", 2517 lang ? lang : "")) { 2518 vary_by_language = 1; 2519 } 2520 if (!vary_by_encoding && 2521 strcmp(sample_encoding ? sample_encoding : "", 2522 variant->content_encoding ? 2523 variant->content_encoding : "")) { 2524 vary_by_encoding = 1; 2525 } 2526 } 2527 first_variant = 0; 2528 2529 if (!neg->send_alternates) 2530 continue; 2531 2532 /* Generate the string components for this Alternates entry */ 2533 2534 *((const char **) apr_array_push(arr)) = "{\""; 2535 *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, variant->file_name); 2536 *((const char **) apr_array_push(arr)) = "\" "; 2537 2538 qstr = (char *) apr_palloc(r->pool, 6); 2539 apr_snprintf(qstr, 6, "%1.3f", variant->source_quality); 2540 2541 /* Strip trailing zeros (saves those valuable network bytes) */ 2542 if (qstr[4] == '0') { 2543 qstr[4] = '\0'; 2544 if (qstr[3] == '0') { 2545 qstr[3] = '\0'; 2546 if (qstr[2] == '0') { 2547 qstr[1] = '\0'; 2548 } 2549 } 2550 } 2551 *((const char **) apr_array_push(arr)) = qstr; 2552 2553 if (variant->mime_type && *variant->mime_type) { 2554 *((const char **) apr_array_push(arr)) = " {type "; 2555 *((const char **) apr_array_push(arr)) = variant->mime_type; 2556 *((const char **) apr_array_push(arr)) = "}"; 2557 } 2558 if (variant->content_charset && *variant->content_charset) { 2559 *((const char **) apr_array_push(arr)) = " {charset "; 2560 *((const char **) apr_array_push(arr)) = variant->content_charset; 2561 *((const char **) apr_array_push(arr)) = "}"; 2562 } 2563 if (lang) { 2564 *((const char **) apr_array_push(arr)) = " {language "; 2565 *((const char **) apr_array_push(arr)) = lang; 2566 *((const char **) apr_array_push(arr)) = "}"; 2567 } 2568 if (variant->content_encoding && *variant->content_encoding) { 2569 /* Strictly speaking, this is non-standard, but so is TCN */ 2570 2571 *((const char **) apr_array_push(arr)) = " {encoding "; 2572 *((const char **) apr_array_push(arr)) = variant->content_encoding; 2573 *((const char **) apr_array_push(arr)) = "}"; 2574 } 2575 2576 /* Note that the Alternates specification (in rfc2295) does 2577 * not require that we include {length x}, so we could omit it 2578 * if determining the length is too expensive. We currently 2579 * always include it though. 2580 * 2581 * If the variant is a CGI script, find_content_length would 2582 * return the length of the script, not the output it 2583 * produces, so we check for the presence of a handler and if 2584 * there is one we don't add a length. 2585 * 2586 * XXX: TODO: This check does not detect a CGI script if we 2587 * get the variant from a type map. This needs to be fixed 2588 * (without breaking things if the type map specifies a 2589 * content-length, which currently leads to the correct result). 2590 */ 2591 if (!(variant->sub_req && variant->sub_req->handler) 2592 && (len = find_content_length(neg, variant)) >= 0) { 2593 2594 *((const char **) apr_array_push(arr)) = " {length "; 2595 *((const char **) apr_array_push(arr)) = apr_off_t_toa(r->pool, 2596 len); 2597 *((const char **) apr_array_push(arr)) = "}"; 2598 } 2599 2600 *((const char **) apr_array_push(arr)) = "}"; 2601 *((const char **) apr_array_push(arr)) = ", "; /* trimmed below */ 2602 } 2603 2604 if (neg->send_alternates && neg->avail_vars->nelts) { 2605 arr->nelts--; /* remove last comma */ 2606 apr_table_mergen(hdrs, "Alternates", 2607 apr_array_pstrcat(r->pool, arr, '\0')); 2608 } 2609 2610 if (neg->is_transparent || vary_by_type || vary_by_language || 2611 vary_by_language || vary_by_charset || vary_by_encoding) { 2612 2613 apr_table_mergen(hdrs, "Vary", 2 + apr_pstrcat(r->pool, 2614 neg->is_transparent ? ", negotiate" : "", 2615 vary_by_type ? ", accept" : "", 2616 vary_by_language ? ", accept-language" : "", 2617 vary_by_charset ? ", accept-charset" : "", 2618 vary_by_encoding ? ", accept-encoding" : "", NULL)); 2619 } 2620 2621 if (neg->is_transparent) { /* Create TCN response header */ 2622 apr_table_setn(hdrs, "TCN", 2623 alg_result == alg_list ? "list" : "choice"); 2624 } 2625} 2626 2627/********************************************************************** 2628 * 2629 * Return an HTML list of variants. This is output as part of the 2630 * choice response or 406 status body. 2631 */ 2632 2633static char *make_variant_list(request_rec *r, negotiation_state *neg) 2634{ 2635 apr_array_header_t *arr; 2636 int i; 2637 int max_vlist_array = (neg->avail_vars->nelts * 15) + 2; 2638 2639 /* In order to avoid O(n^2) memory copies in building the list, 2640 * we preallocate a apr_table_t with the maximum substrings possible, 2641 * fill it with the variant list, and then concatenate the entire array. 2642 */ 2643 arr = apr_array_make(r->pool, max_vlist_array, sizeof(char *)); 2644 2645 *((const char **) apr_array_push(arr)) = "Available variants:\n<ul>\n"; 2646 2647 for (i = 0; i < neg->avail_vars->nelts; ++i) { 2648 var_rec *variant = &((var_rec *) neg->avail_vars->elts)[i]; 2649 const char *filename = variant->file_name ? variant->file_name : ""; 2650 apr_array_header_t *languages = variant->content_languages; 2651 const char *description = variant->description 2652 ? variant->description 2653 : ""; 2654 2655 /* The format isn't very neat, and it would be nice to make 2656 * the tags human readable (eg replace 'language en' with 'English'). 2657 * Note that if you change the number of substrings pushed, you also 2658 * need to change the calculation of max_vlist_array above. 2659 */ 2660 *((const char **) apr_array_push(arr)) = "<li><a href=\""; 2661 *((const char **) apr_array_push(arr)) = ap_escape_path_segment(r->pool, filename); 2662 *((const char **) apr_array_push(arr)) = "\">"; 2663 *((const char **) apr_array_push(arr)) = ap_escape_html(r->pool, filename); 2664 *((const char **) apr_array_push(arr)) = "</a> "; 2665 *((const char **) apr_array_push(arr)) = description; 2666 2667 if (variant->mime_type && *variant->mime_type) { 2668 *((const char **) apr_array_push(arr)) = ", type "; 2669 *((const char **) apr_array_push(arr)) = variant->mime_type; 2670 } 2671 if (languages && languages->nelts) { 2672 *((const char **) apr_array_push(arr)) = ", language "; 2673 *((const char **) apr_array_push(arr)) = apr_array_pstrcat(r->pool, 2674 languages, ','); 2675 } 2676 if (variant->content_charset && *variant->content_charset) { 2677 *((const char **) apr_array_push(arr)) = ", charset "; 2678 *((const char **) apr_array_push(arr)) = variant->content_charset; 2679 } 2680 if (variant->content_encoding) { 2681 *((const char **) apr_array_push(arr)) = ", encoding "; 2682 *((const char **) apr_array_push(arr)) = variant->content_encoding; 2683 } 2684 *((const char **) apr_array_push(arr)) = "</li>\n"; 2685 } 2686 *((const char **) apr_array_push(arr)) = "</ul>\n"; 2687 2688 return apr_array_pstrcat(r->pool, arr, '\0'); 2689} 2690 2691static void store_variant_list(request_rec *r, negotiation_state *neg) 2692{ 2693 if (r->main == NULL) { 2694 apr_table_setn(r->notes, "variant-list", make_variant_list(r, neg)); 2695 } 2696 else { 2697 apr_table_setn(r->main->notes, "variant-list", 2698 make_variant_list(r->main, neg)); 2699 } 2700} 2701 2702/* Called if we got a "Choice" response from the variant selection algorithm. 2703 * It checks the result of the chosen variant to see if it 2704 * is itself negotiated (if so, return error HTTP_VARIANT_ALSO_VARIES). 2705 * Otherwise, add the appropriate headers to the current response. 2706 */ 2707 2708static int setup_choice_response(request_rec *r, negotiation_state *neg, 2709 var_rec *variant) 2710{ 2711 request_rec *sub_req; 2712 const char *sub_vary; 2713 2714 if (!variant->sub_req) { 2715 int status; 2716 2717 sub_req = ap_sub_req_lookup_file(variant->file_name, r, r->output_filters); 2718 status = sub_req->status; 2719 2720 if (status != HTTP_OK && 2721 !apr_table_get(sub_req->err_headers_out, "TCN")) { 2722 ap_destroy_sub_req(sub_req); 2723 return status; 2724 } 2725 variant->sub_req = sub_req; 2726 } 2727 else { 2728 sub_req = variant->sub_req; 2729 } 2730 2731 /* The variant selection algorithm told us to return a "Choice" 2732 * response. This is the normal variant response, with 2733 * some extra headers. First, ensure that the chosen 2734 * variant did or will not itself engage in transparent negotiation. 2735 * If not, set the appropriate headers, and fall through to 2736 * the normal variant handling 2737 */ 2738 2739 /* This catches the error that a transparent type map selects a 2740 * transparent multiviews resource as the best variant. 2741 * 2742 * XXX: We do not signal an error if a transparent type map 2743 * selects a _non_transparent multiviews resource as the best 2744 * variant, because we can generate a legal negotiation response 2745 * in this case. In this case, the vlist_validator of the 2746 * nontransparent subrequest will be lost however. This could 2747 * lead to cases in which a change in the set of variants or the 2748 * negotiation algorithm of the nontransparent resource is never 2749 * propagated up to a HTTP/1.1 cache which interprets Vary. To be 2750 * completely on the safe side we should return HTTP_VARIANT_ALSO_VARIES 2751 * for this type of recursive negotiation too. 2752 */ 2753 if (neg->is_transparent && 2754 apr_table_get(sub_req->err_headers_out, "TCN")) { 2755 return HTTP_VARIANT_ALSO_VARIES; 2756 } 2757 2758 /* This catches the error that a transparent type map recursively 2759 * selects, as the best variant, another type map which itself 2760 * causes transparent negotiation to be done. 2761 * 2762 * XXX: Actually, we catch this error by catching all cases of 2763 * type map recursion. There are some borderline recursive type 2764 * map arrangements which would not produce transparent 2765 * negotiation protocol errors or lack of cache propagation 2766 * problems, but such arrangements are very hard to detect at this 2767 * point in the control flow, so we do not bother to single them 2768 * out. 2769 * 2770 * Recursive type maps imply a recursive arrangement of negotiated 2771 * resources which is visible to outside clients, and this is not 2772 * supported by the transparent negotiation caching protocols, so 2773 * if we are to have generic support for recursive type maps, we 2774 * have to create some configuration setting which makes all type 2775 * maps non-transparent when recursion is enabled. Also, if we 2776 * want recursive type map support which ensures propagation of 2777 * type map changes into HTTP/1.1 caches that handle Vary, we 2778 * would have to extend the current mechanism for generating 2779 * variant list validators. 2780 */ 2781 if (sub_req->handler && strcmp(sub_req->handler, "type-map") == 0) { 2782 return HTTP_VARIANT_ALSO_VARIES; 2783 } 2784 2785 /* This adds an appropriate Variant-Vary header if the subrequest 2786 * is a multiviews resource. 2787 * 2788 * XXX: TODO: Note that this does _not_ handle any Vary header 2789 * returned by a CGI if sub_req is a CGI script, because we don't 2790 * see that Vary header yet at this point in the control flow. 2791 * This won't cause any cache consistency problems _unless_ the 2792 * CGI script also returns a Cache-Control header marking the 2793 * response as cachable. This needs to be fixed, also there are 2794 * problems if a CGI returns an Etag header which also need to be 2795 * fixed. 2796 */ 2797 if ((sub_vary = apr_table_get(sub_req->err_headers_out, "Vary")) != NULL) { 2798 apr_table_setn(r->err_headers_out, "Variant-Vary", sub_vary); 2799 2800 /* Move the subreq Vary header into the main request to 2801 * prevent having two Vary headers in the response, which 2802 * would be legal but strange. 2803 */ 2804 apr_table_setn(r->err_headers_out, "Vary", sub_vary); 2805 apr_table_unset(sub_req->err_headers_out, "Vary"); 2806 } 2807 2808 apr_table_setn(r->err_headers_out, "Content-Location", 2809 ap_escape_path_segment(r->pool, variant->file_name)); 2810 2811 set_neg_headers(r, neg, alg_choice); /* add Alternates and Vary */ 2812 2813 /* Still to do by caller: add Expires */ 2814 2815 return 0; 2816} 2817 2818/**************************************************************** 2819 * 2820 * Executive... 2821 */ 2822 2823static int do_negotiation(request_rec *r, negotiation_state *neg, 2824 var_rec **bestp, int prefer_scripts) 2825{ 2826 var_rec *avail_recs = (var_rec *) neg->avail_vars->elts; 2827 int alg_result; /* result of variant selection algorithm */ 2828 int res; 2829 int j; 2830 2831 /* Decide if resource is transparently negotiable */ 2832 2833 /* GET or HEAD? (HEAD has same method number as GET) */ 2834 if (r->method_number == M_GET) { 2835 2836 /* maybe this should be configurable, see also the comment 2837 * about recursive type maps in setup_choice_response() 2838 */ 2839 neg->is_transparent = 1; 2840 2841 /* We can't be transparent if we are a map file in the middle 2842 * of the request URI. 2843 */ 2844 if (r->path_info && *r->path_info) 2845 neg->is_transparent = 0; 2846 2847 for (j = 0; j < neg->avail_vars->nelts; ++j) { 2848 var_rec *variant = &avail_recs[j]; 2849 2850 /* We can't be transparent, because of internal 2851 * assumptions in best_match(), if there is a 2852 * non-neighboring variant. We can have a non-neighboring 2853 * variant when processing a type map. 2854 */ 2855 if (ap_strchr_c(variant->file_name, '/')) 2856 neg->is_transparent = 0; 2857 2858 /* We can't be transparent, because of the behavior 2859 * of variant typemap bodies. 2860 */ 2861 if (variant->body) { 2862 neg->is_transparent = 0; 2863 } 2864 } 2865 } 2866 2867 if (neg->is_transparent) { 2868 parse_negotiate_header(r, neg); 2869 } 2870 else { /* configure negotiation on non-transparent resource */ 2871 neg->may_choose = 1; 2872 } 2873 2874 maybe_add_default_accepts(neg, prefer_scripts); 2875 2876 alg_result = best_match(neg, bestp); 2877 2878 /* alg_result is one of 2879 * alg_choice: a best variant is chosen 2880 * alg_list: no best variant is chosen 2881 */ 2882 2883 if (alg_result == alg_list) { 2884 /* send a list response or HTTP_NOT_ACCEPTABLE error response */ 2885 2886 neg->send_alternates = 1; /* always include Alternates header */ 2887 set_neg_headers(r, neg, alg_result); 2888 store_variant_list(r, neg); 2889 2890 if (neg->is_transparent && neg->ua_supports_trans) { 2891 /* XXX todo: expires? cachability? */ 2892 2893 /* Some HTTP/1.0 clients are known to choke when they get 2894 * a 300 (multiple choices) response without a Location 2895 * header. However the 300 code response we are are about 2896 * to generate will only reach 1.0 clients which support 2897 * transparent negotiation, and they should be OK. The 2898 * response should never reach older 1.0 clients, even if 2899 * we have CacheNegotiatedDocs enabled, because no 1.0 2900 * proxy cache (we know of) will cache and return 300 2901 * responses (they certainly won't if they conform to the 2902 * HTTP/1.0 specification). 2903 */ 2904 return HTTP_MULTIPLE_CHOICES; 2905 } 2906 2907 if (!*bestp) { 2908 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, 2909 "no acceptable variant: %s", r->filename); 2910 return HTTP_NOT_ACCEPTABLE; 2911 } 2912 } 2913 2914 /* Variant selection chose a variant */ 2915 2916 /* XXX todo: merge the two cases in the if statement below */ 2917 if (neg->is_transparent) { 2918 2919 if ((res = setup_choice_response(r, neg, *bestp)) != 0) { 2920 return res; /* return if error */ 2921 } 2922 } 2923 else { 2924 set_neg_headers(r, neg, alg_result); 2925 } 2926 2927 /* Make sure caching works - Vary should handle HTTP/1.1, but for 2928 * HTTP/1.0, we can't allow caching at all. 2929 */ 2930 2931 /* XXX: Note that we only set r->no_cache to 1, which causes 2932 * Expires: <now> to be added, when responding to a HTTP/1.0 2933 * client. If we return the response to a 1.1 client, we do not 2934 * add Expires <now>, because doing so would degrade 1.1 cache 2935 * performance by preventing re-use of the response without prior 2936 * revalidation. On the other hand, if the 1.1 client is a proxy 2937 * which was itself contacted by a 1.0 client, or a proxy cache 2938 * which can be contacted later by 1.0 clients, then we currently 2939 * rely on this 1.1 proxy to add the Expires: <now> when it 2940 * forwards the response. 2941 * 2942 * XXX: TODO: Find out if the 1.1 spec requires proxies and 2943 * tunnels to add Expires: <now> when forwarding the response to 2944 * 1.0 clients. I (kh) recall it is rather vague on this point. 2945 * Testing actual 1.1 proxy implementations would also be nice. If 2946 * Expires: <now> is not added by proxies then we need to always 2947 * include Expires: <now> ourselves to ensure correct caching, but 2948 * this would degrade HTTP/1.1 cache efficiency unless we also add 2949 * Cache-Control: max-age=N, which we currently don't. 2950 * 2951 * Roy: No, we are not going to screw over HTTP future just to 2952 * ensure that people who can't be bothered to upgrade their 2953 * clients will always receive perfect server-side negotiation. 2954 * Hell, those clients are sending bogus accept headers anyway. 2955 * 2956 * Manual setting of cache-control/expires always overrides this 2957 * automated kluge, on purpose. 2958 */ 2959 2960 if ((!do_cache_negotiated_docs(r->server) 2961 && (r->proto_num < HTTP_VERSION(1,1))) 2962 && neg->count_multiviews_variants != 1) { 2963 r->no_cache = 1; 2964 } 2965 2966 return OK; 2967} 2968 2969static int handle_map_file(request_rec *r) 2970{ 2971 negotiation_state *neg; 2972 apr_file_t *map; 2973 var_rec *best; 2974 int res; 2975 char *udir; 2976 const char *new_req; 2977 2978 if(strcmp(r->handler,MAP_FILE_MAGIC_TYPE) && strcmp(r->handler,"type-map")) 2979 return DECLINED; 2980 2981 neg = parse_accept_headers(r); 2982 if ((res = read_type_map(&map, neg, r))) { 2983 return res; 2984 } 2985 2986 res = do_negotiation(r, neg, &best, 0); 2987 if (res != 0) return res; 2988 2989 if (best->body) 2990 { 2991 conn_rec *c = r->connection; 2992 apr_bucket_brigade *bb; 2993 apr_bucket *e; 2994 2995 ap_allow_standard_methods(r, REPLACE_ALLOW, M_GET, M_OPTIONS, 2996 M_POST, -1); 2997 /* XXX: ? 2998 * if (r->method_number == M_OPTIONS) { 2999 * return ap_send_http_options(r); 3000 *} 3001 */ 3002 if (r->method_number != M_GET && r->method_number != M_POST) { 3003 return HTTP_METHOD_NOT_ALLOWED; 3004 } 3005 3006 /* ### These may be implemented by adding some 'extra' info 3007 * of the file offset onto the etag 3008 * ap_update_mtime(r, r->finfo.mtime); 3009 * ap_set_last_modified(r); 3010 * ap_set_etag(r); 3011 */ 3012 ap_set_accept_ranges(r); 3013 ap_set_content_length(r, best->bytes); 3014 3015 /* set MIME type and charset as negotiated */ 3016 if (best->mime_type && *best->mime_type) { 3017 if (best->content_charset && *best->content_charset) { 3018 ap_set_content_type(r, apr_pstrcat(r->pool, 3019 best->mime_type, 3020 "; charset=", 3021 best->content_charset, 3022 NULL)); 3023 } 3024 else { 3025 ap_set_content_type(r, apr_pstrdup(r->pool, best->mime_type)); 3026 } 3027 } 3028 3029 /* set Content-language(s) as negotiated */ 3030 if (best->content_languages && best->content_languages->nelts) { 3031 r->content_languages = apr_array_copy(r->pool, 3032 best->content_languages); 3033 } 3034 3035 /* set Content-Encoding as negotiated */ 3036 if (best->content_encoding && *best->content_encoding) { 3037 r->content_encoding = apr_pstrdup(r->pool, 3038 best->content_encoding); 3039 } 3040 3041 if ((res = ap_meets_conditions(r)) != OK) { 3042 return res; 3043 } 3044 3045 if ((res = ap_discard_request_body(r)) != OK) { 3046 return res; 3047 } 3048 bb = apr_brigade_create(r->pool, c->bucket_alloc); 3049 e = apr_bucket_file_create(map, best->body, 3050 (apr_size_t)best->bytes, r->pool, 3051 c->bucket_alloc); 3052 APR_BRIGADE_INSERT_TAIL(bb, e); 3053 e = apr_bucket_eos_create(c->bucket_alloc); 3054 APR_BRIGADE_INSERT_TAIL(bb, e); 3055 3056 return ap_pass_brigade(r->output_filters, bb); 3057 } 3058 3059 if (r->path_info && *r->path_info) { 3060 /* remove any path_info from the end of the uri before trying 3061 * to change the filename. r->path_info from the original 3062 * request is passed along on the redirect. 3063 */ 3064 r->uri[ap_find_path_info(r->uri, r->path_info)] = '\0'; 3065 } 3066 udir = ap_make_dirstr_parent(r->pool, r->uri); 3067 udir = ap_escape_uri(r->pool, udir); 3068 if (r->args) { 3069 if (r->path_info) { 3070 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3071 r->path_info, "?", r->args, NULL); 3072 } 3073 else { 3074 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3075 "?", r->args, NULL); 3076 } 3077 } 3078 else { 3079 new_req = apr_pstrcat(r->pool, udir, best->file_name, 3080 r->path_info, NULL); 3081 } 3082 ap_internal_redirect(new_req, r); 3083 return OK; 3084} 3085 3086static int handle_multi(request_rec *r) 3087{ 3088 negotiation_state *neg; 3089 var_rec *best, *avail_recs; 3090 request_rec *sub_req; 3091 int res; 3092 int j; 3093 3094 if (r->finfo.filetype != APR_NOFILE 3095 || !(ap_allow_options(r) & OPT_MULTI)) { 3096 return DECLINED; 3097 } 3098 3099 neg = parse_accept_headers(r); 3100 3101 if ((res = read_types_multi(neg))) { 3102 return_from_multi: 3103 /* free all allocated memory from subrequests */ 3104 avail_recs = (var_rec *) neg->avail_vars->elts; 3105 for (j = 0; j < neg->avail_vars->nelts; ++j) { 3106 var_rec *variant = &avail_recs[j]; 3107 if (variant->sub_req) { 3108 ap_destroy_sub_req(variant->sub_req); 3109 } 3110 } 3111 return res; 3112 } 3113 if (neg->avail_vars->nelts == 0) { 3114 return DECLINED; 3115 } 3116 3117 res = do_negotiation(r, neg, &best, 3118 (r->method_number != M_GET) || r->args || 3119 (r->path_info && *r->path_info)); 3120 if (res != 0) 3121 goto return_from_multi; 3122 3123 if (!(sub_req = best->sub_req)) { 3124 /* We got this out of a map file, so we don't actually have 3125 * a sub_req structure yet. Get one now. 3126 */ 3127 3128 sub_req = ap_sub_req_lookup_file(best->file_name, r, r->output_filters); 3129 if (sub_req->status != HTTP_OK) { 3130 res = sub_req->status; 3131 ap_destroy_sub_req(sub_req); 3132 goto return_from_multi; 3133 } 3134 } 3135 if (sub_req->args == NULL) { 3136 sub_req->args = r->args; 3137 } 3138 3139 /* now do a "fast redirect" ... promotes the sub_req into the main req */ 3140 ap_internal_fast_redirect(sub_req, r); 3141 3142 /* give no advise for time on this subrequest. Perhaps we 3143 * should tally the last mtime amoung all variants, and date 3144 * the most recent, but that could confuse the proxies. 3145 */ 3146 r->mtime = 0; 3147 3148 /* clean up all but our favorite variant, since that sub_req 3149 * is now merged into the main request! 3150 */ 3151 avail_recs = (var_rec *) neg->avail_vars->elts; 3152 for (j = 0; j < neg->avail_vars->nelts; ++j) { 3153 var_rec *variant = &avail_recs[j]; 3154 if (variant != best && variant->sub_req) { 3155 ap_destroy_sub_req(variant->sub_req); 3156 } 3157 } 3158 return OK; 3159} 3160 3161/********************************************************************** 3162 * There is a problem with content-encoding, as some clients send and 3163 * expect an x- token (e.g. x-gzip) while others expect the plain token 3164 * (i.e. gzip). To try and deal with this as best as possible we do 3165 * the following: if the client sent an Accept-Encoding header and it 3166 * contains a plain token corresponding to the content encoding of the 3167 * response, then set content encoding using the plain token. Else if 3168 * the A-E header contains the x- token use the x- token in the C-E 3169 * header. Else don't do anything. 3170 * 3171 * Note that if no A-E header was sent, or it does not contain a token 3172 * compatible with the final content encoding, then the token in the 3173 * C-E header will be whatever was specified in the AddEncoding 3174 * directive. 3175 */ 3176static int fix_encoding(request_rec *r) 3177{ 3178 const char *enc = r->content_encoding; 3179 char *x_enc = NULL; 3180 apr_array_header_t *accept_encodings; 3181 accept_rec *accept_recs; 3182 int i; 3183 3184 if (!enc || !*enc) { 3185 return DECLINED; 3186 } 3187 3188 if (enc[0] == 'x' && enc[1] == '-') { 3189 enc += 2; 3190 } 3191 3192 if ((accept_encodings = do_header_line(r->pool, 3193 apr_table_get(r->headers_in, "Accept-Encoding"))) == NULL) { 3194 return DECLINED; 3195 } 3196 3197 accept_recs = (accept_rec *) accept_encodings->elts; 3198 3199 for (i = 0; i < accept_encodings->nelts; ++i) { 3200 char *name = accept_recs[i].name; 3201 3202 if (!strcmp(name, enc)) { 3203 r->content_encoding = name; 3204 return OK; 3205 } 3206 3207 if (name[0] == 'x' && name[1] == '-' && !strcmp(name+2, enc)) { 3208 x_enc = name; 3209 } 3210 } 3211 3212 if (x_enc) { 3213 r->content_encoding = x_enc; 3214 return OK; 3215 } 3216 3217 return DECLINED; 3218} 3219 3220static void register_hooks(apr_pool_t *p) 3221{ 3222 ap_hook_fixups(fix_encoding,NULL,NULL,APR_HOOK_MIDDLE); 3223 ap_hook_type_checker(handle_multi,NULL,NULL,APR_HOOK_FIRST); 3224 ap_hook_handler(handle_map_file,NULL,NULL,APR_HOOK_MIDDLE); 3225} 3226 3227module AP_MODULE_DECLARE_DATA negotiation_module = 3228{ 3229 STANDARD20_MODULE_STUFF, 3230 create_neg_dir_config, /* dir config creator */ 3231 merge_neg_dir_configs, /* dir merger --- default is to override */ 3232 NULL, /* server config */ 3233 NULL, /* merge server config */ 3234 negotiation_cmds, /* command apr_table_t */ 3235 register_hooks /* register hooks */ 3236}; 3237