1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * simple hokey charset recoding configuration module 19 * 20 * See mod_ebcdic and mod_charset for more thought-out examples. This 21 * one is just so Jeff can learn how a module works and experiment with 22 * basic character set recoding configuration. 23 * 24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!! 25 */ 26 27#include "httpd.h" 28#include "http_config.h" 29 30#include "http_core.h" 31#include "http_log.h" 32#include "http_main.h" 33#include "http_protocol.h" 34#include "http_request.h" 35#include "util_charset.h" 36#include "apr_buckets.h" 37#include "util_filter.h" 38#include "apr_strings.h" 39#include "apr_lib.h" 40#include "apr_xlate.h" 41#define APR_WANT_STRFUNC 42#include "apr_want.h" 43 44#define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */ 45#define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */ 46 47#define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much 48 * space left in the translation buffer 49 */ 50 51#define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle 52 * two buckets 53 */ 54 55/* extended error status codes; this is used in addition to an apr_status_t to 56 * track errors in the translation filter 57 */ 58typedef enum { 59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */ 60 EES_LIMIT, /* built-in restriction encountered */ 61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */ 62 EES_BUCKET_READ, 63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */ 64 EES_BAD_INPUT /* input data invalid */ 65} ees_t; 66 67/* registered name of the output translation filter */ 68#define XLATEOUT_FILTER_NAME "XLATEOUT" 69/* registered name of input translation filter */ 70#define XLATEIN_FILTER_NAME "XLATEIN" 71 72typedef struct charset_dir_t { 73 const char *charset_source; /* source encoding */ 74 const char *charset_default; /* how to ship on wire */ 75 /** module does ap_add_*_filter()? */ 76 enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add; 77 /** treat all mimetypes as text? */ 78 enum {FX_INIT, FX_FORCE, FX_NOFORCE} force_xlate; 79} charset_dir_t; 80 81/* charset_filter_ctx_t is created for each filter instance; because the same 82 * filter code is used for translating in both directions, we need this context 83 * data to tell the filter which translation handle to use; it also can hold a 84 * character which was split between buckets 85 */ 86typedef struct charset_filter_ctx_t { 87 apr_xlate_t *xlate; 88 int is_sb; /* single-byte translation? */ 89 charset_dir_t *dc; 90 ees_t ees; /* extended error status */ 91 apr_size_t saved; 92 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */ 93 int ran; /* has filter instance run before? */ 94 int noop; /* should we pass brigades through unchanged? */ 95 char *tmp; /* buffer for input filtering */ 96 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */ 97 apr_bucket_brigade *tmpbb; /* used for passing downstream */ 98} charset_filter_ctx_t; 99 100/* charset_req_t is available via r->request_config if any translation is 101 * being performed 102 */ 103typedef struct charset_req_t { 104 charset_dir_t *dc; 105 charset_filter_ctx_t *output_ctx, *input_ctx; 106} charset_req_t; 107 108module AP_MODULE_DECLARE_DATA charset_lite_module; 109 110static void *create_charset_dir_conf(apr_pool_t *p,char *dummy) 111{ 112 charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t)); 113 114 return dc; 115} 116 117static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv) 118{ 119 charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t)); 120 charset_dir_t *base = (charset_dir_t *)basev, 121 *over = (charset_dir_t *)overridesv; 122 123 /* If it is defined in the current container, use it. Otherwise, use the one 124 * from the enclosing container. 125 */ 126 127 a->charset_default = 128 over->charset_default ? over->charset_default : base->charset_default; 129 a->charset_source = 130 over->charset_source ? over->charset_source : base->charset_source; 131 a->implicit_add = 132 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add; 133 a->force_xlate= 134 over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate; 135 return a; 136} 137 138/* CharsetSourceEnc charset 139 */ 140static const char *add_charset_source(cmd_parms *cmd, void *in_dc, 141 const char *name) 142{ 143 charset_dir_t *dc = in_dc; 144 145 dc->charset_source = name; 146 return NULL; 147} 148 149/* CharsetDefault charset 150 */ 151static const char *add_charset_default(cmd_parms *cmd, void *in_dc, 152 const char *name) 153{ 154 charset_dir_t *dc = in_dc; 155 156 dc->charset_default = name; 157 return NULL; 158} 159 160/* CharsetOptions optionflag... 161 */ 162static const char *add_charset_options(cmd_parms *cmd, void *in_dc, 163 const char *flag) 164{ 165 charset_dir_t *dc = in_dc; 166 167 if (!strcasecmp(flag, "ImplicitAdd")) { 168 dc->implicit_add = IA_IMPADD; 169 } 170 else if (!strcasecmp(flag, "NoImplicitAdd")) { 171 dc->implicit_add = IA_NOIMPADD; 172 } 173 else if (!strcasecmp(flag, "TranslateAllMimeTypes")) { 174 dc->force_xlate = FX_FORCE; 175 } 176 else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) { 177 dc->force_xlate = FX_NOFORCE; 178 } 179 else { 180 return apr_pstrcat(cmd->temp_pool, 181 "Invalid CharsetOptions option: ", 182 flag, 183 NULL); 184 } 185 186 return NULL; 187} 188 189/* find_code_page() is a fixup hook that checks if the module is 190 * configured and the input or output potentially need to be translated. 191 * If so, context is initialized for the filters. 192 */ 193static int find_code_page(request_rec *r) 194{ 195 charset_dir_t *dc = ap_get_module_config(r->per_dir_config, 196 &charset_lite_module); 197 charset_req_t *reqinfo; 198 charset_filter_ctx_t *input_ctx, *output_ctx; 199 apr_status_t rv; 200 201 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r, 202 "uri: %s file: %s method: %d " 203 "imt: %s flags: %s%s%s %s->%s", 204 r->uri, 205 r->filename ? r->filename : "(none)", 206 r->method_number, 207 r->content_type ? r->content_type : "(unknown)", 208 r->main ? "S" : "", /* S if subrequest */ 209 r->prev ? "R" : "", /* R if redirect */ 210 r->proxyreq ? "P" : "", /* P if proxy */ 211 dc->charset_source, dc->charset_default); 212 213 /* If we don't have a full directory configuration, bail out. 214 */ 215 if (!dc->charset_source || !dc->charset_default) { 216 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01448) 217 "incomplete configuration: src %s, dst %s", 218 dc->charset_source ? dc->charset_source : "unspecified", 219 dc->charset_default ? dc->charset_default : "unspecified"); 220 return DECLINED; 221 } 222 223 /* catch proxy requests */ 224 if (r->proxyreq) { 225 return DECLINED; 226 } 227 228 /* mod_rewrite indicators */ 229 if (r->filename 230 && (!strncmp(r->filename, "redirect:", 9) 231 || !strncmp(r->filename, "gone:", 5) 232 || !strncmp(r->filename, "passthrough:", 12) 233 || !strncmp(r->filename, "forbidden:", 10))) { 234 return DECLINED; 235 } 236 237 /* no translation when server and network charsets are set to the same value */ 238 if (!strcasecmp(dc->charset_source, dc->charset_default)) { 239 return DECLINED; 240 } 241 242 /* Get storage for the request data and the output filter context. 243 * We rarely need the input filter context, so allocate that separately. 244 */ 245 reqinfo = (charset_req_t *)apr_pcalloc(r->pool, 246 sizeof(charset_req_t) + 247 sizeof(charset_filter_ctx_t)); 248 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1); 249 250 reqinfo->dc = dc; 251 output_ctx->dc = dc; 252 output_ctx->tmpbb = apr_brigade_create(r->pool, 253 r->connection->bucket_alloc); 254 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo); 255 256 reqinfo->output_ctx = output_ctx; 257 258 switch (r->method_number) { 259 case M_PUT: 260 case M_POST: 261 /* Set up input translation. Note: A request body can be included 262 * with the OPTIONS method, but for now we don't set up translation 263 * of it. 264 */ 265 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t)); 266 input_ctx->bb = apr_brigade_create(r->pool, 267 r->connection->bucket_alloc); 268 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE); 269 input_ctx->dc = dc; 270 reqinfo->input_ctx = input_ctx; 271 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source, 272 dc->charset_default, r->pool); 273 if (rv != APR_SUCCESS) { 274 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01449) 275 "can't open translation %s->%s", 276 dc->charset_default, dc->charset_source); 277 return HTTP_INTERNAL_SERVER_ERROR; 278 } 279 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) { 280 input_ctx->is_sb = 0; 281 } 282 } 283 284 return DECLINED; 285} 286 287static int configured_in_list(request_rec *r, const char *filter_name, 288 struct ap_filter_t *filter_list) 289{ 290 struct ap_filter_t *filter = filter_list; 291 292 while (filter) { 293 if (!strcasecmp(filter_name, filter->frec->name)) { 294 return 1; 295 } 296 filter = filter->next; 297 } 298 return 0; 299} 300 301static int configured_on_input(request_rec *r, const char *filter_name) 302{ 303 return configured_in_list(r, filter_name, r->input_filters); 304} 305 306static int configured_on_output(request_rec *r, const char *filter_name) 307{ 308 return configured_in_list(r, filter_name, r->output_filters); 309} 310 311/* xlate_insert_filter() is a filter hook which decides whether or not 312 * to insert a translation filter for the current request. 313 */ 314static void xlate_insert_filter(request_rec *r) 315{ 316 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */ 317 charset_req_t *reqinfo = ap_get_module_config(r->request_config, 318 &charset_lite_module); 319 charset_dir_t *dc = ap_get_module_config(r->per_dir_config, 320 &charset_lite_module); 321 322 if (dc && (dc->implicit_add == IA_NOIMPADD)) { 323 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, r, 324 "xlate output filter not added implicitly because " 325 "CharsetOptions included 'NoImplicitAdd'"); 326 return; 327 } 328 329 if (reqinfo) { 330 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) { 331 ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r, 332 r->connection); 333 } 334 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r, 335 "xlate output filter not added implicitly because %s", 336 !reqinfo->output_ctx ? 337 "no output configuration available" : 338 "another module added the filter"); 339 340 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) { 341 ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r, 342 r->connection); 343 } 344 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, r, 345 "xlate input filter not added implicitly because %s", 346 !reqinfo->input_ctx ? 347 "no input configuration available" : 348 "another module added the filter"); 349 } 350} 351 352/* stuff that sucks that I know of: 353 * 354 * bucket handling: 355 * why create an eos bucket when we see it come down the stream? just send the one 356 * passed as input... news flash: this will be fixed when xlate_out_filter() starts 357 * using the more generic xlate_brigade() 358 * 359 * translation mechanics: 360 * we don't handle characters that straddle more than two buckets; an error 361 * will be generated 362 */ 363 364static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b) 365{ 366 charset_filter_ctx_t *ctx = f->ctx; 367 apr_status_t rv; 368 369 APR_BRIGADE_INSERT_TAIL(ctx->tmpbb, b); 370 rv = ap_pass_brigade(f->next, ctx->tmpbb); 371 if (rv != APR_SUCCESS) { 372 ctx->ees = EES_DOWNSTREAM; 373 } 374 apr_brigade_cleanup(ctx->tmpbb); 375 return rv; 376} 377 378/* send_downstream() is passed the translated data; it puts it in a single- 379 * bucket brigade and passes the brigade to the next filter 380 */ 381static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len) 382{ 383 request_rec *r = f->r; 384 conn_rec *c = r->connection; 385 apr_bucket *b; 386 387 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc); 388 return send_bucket_downstream(f, b); 389} 390 391static apr_status_t send_eos(ap_filter_t *f) 392{ 393 request_rec *r = f->r; 394 conn_rec *c = r->connection; 395 apr_bucket_brigade *bb; 396 apr_bucket *b; 397 charset_filter_ctx_t *ctx = f->ctx; 398 apr_status_t rv; 399 400 bb = apr_brigade_create(r->pool, c->bucket_alloc); 401 b = apr_bucket_eos_create(c->bucket_alloc); 402 APR_BRIGADE_INSERT_TAIL(bb, b); 403 rv = ap_pass_brigade(f->next, bb); 404 if (rv != APR_SUCCESS) { 405 ctx->ees = EES_DOWNSTREAM; 406 } 407 return rv; 408} 409 410static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx, 411 const char *partial, 412 apr_size_t partial_len) 413{ 414 apr_status_t rv; 415 416 if (sizeof(ctx->buf) > partial_len) { 417 ctx->saved = partial_len; 418 memcpy(ctx->buf, partial, partial_len); 419 rv = APR_SUCCESS; 420 } 421 else { 422 rv = APR_INCOMPLETE; 423 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle 424 * buckets 425 */ 426 } 427 return rv; 428} 429 430static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx, 431 /* input buffer: */ 432 const char **cur_str, 433 apr_size_t *cur_len, 434 /* output buffer: */ 435 char **out_str, 436 apr_size_t *out_len) 437{ 438 apr_status_t rv; 439 apr_size_t tmp_input_len; 440 441 /* Keep adding bytes from the input string to the saved string until we 442 * 1) finish the input char 443 * 2) get an error 444 * or 3) run out of bytes to add 445 */ 446 447 do { 448 ctx->buf[ctx->saved] = **cur_str; 449 ++ctx->saved; 450 ++*cur_str; 451 --*cur_len; 452 tmp_input_len = ctx->saved; 453 rv = apr_xlate_conv_buffer(ctx->xlate, 454 ctx->buf, 455 &tmp_input_len, 456 *out_str, 457 out_len); 458 } while (rv == APR_INCOMPLETE && *cur_len); 459 460 if (rv == APR_SUCCESS) { 461 ctx->saved = 0; 462 } 463 else { 464 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars 465 * straddling more than two buckets 466 */ 467 } 468 469 return rv; 470} 471 472static void log_xlate_error(ap_filter_t *f, apr_status_t rv) 473{ 474 charset_filter_ctx_t *ctx = f->ctx; 475 const char *msg; 476 char msgbuf[100]; 477 apr_size_t len; 478 479 switch(ctx->ees) { 480 case EES_LIMIT: 481 rv = 0; 482 msg = APLOGNO(02193) "xlate filter - a built-in restriction was encountered"; 483 break; 484 case EES_BAD_INPUT: 485 rv = 0; 486 msg = APLOGNO(02194) "xlate filter - an input character was invalid"; 487 break; 488 case EES_BUCKET_READ: 489 rv = 0; 490 msg = APLOGNO(02195) "xlate filter - bucket read routine failed"; 491 break; 492 case EES_INCOMPLETE_CHAR: 493 rv = 0; 494 strcpy(msgbuf, APLOGNO(02196) "xlate filter - incomplete char at end of input - "); 495 len = ctx->saved; 496 497 /* We must ensure not to process more than what would fit in the 498 * remaining of the destination buffer, including terminating NULL */ 499 if (len > (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2) 500 len = (sizeof(msgbuf) - strlen(msgbuf) - 1) / 2; 501 502 ap_bin2hex(ctx->buf, len, msgbuf + strlen(msgbuf)); 503 msg = msgbuf; 504 break; 505 case EES_DOWNSTREAM: 506 msg = APLOGNO(02197) "xlate filter - an error occurred in a lower filter"; 507 break; 508 default: 509 msg = APLOGNO(02198) "xlate filter - returning error"; 510 } 511 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, "%s", msg); 512} 513 514/* chk_filter_chain() is called once per filter instance; it tries to 515 * determine if the current filter instance should be disabled because 516 * its translation is incompatible with the translation of an existing 517 * instance of the translate filter 518 * 519 * Example bad scenario: 520 * 521 * configured filter chain for the request: 522 * INCLUDES XLATEOUT(8859-1->UTS-16) 523 * configured filter chain for the subrequest: 524 * XLATEOUT(8859-1->UTS-16) 525 * 526 * When the subrequest is processed, the filter chain will be 527 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16) 528 * This makes no sense, so the instance of XLATEOUT added for the 529 * subrequest will be noop-ed. 530 * 531 * Example good scenario: 532 * 533 * configured filter chain for the request: 534 * INCLUDES XLATEOUT(8859-1->UTS-16) 535 * configured filter chain for the subrequest: 536 * XLATEOUT(IBM-1047->8859-1) 537 * 538 * When the subrequest is processed, the filter chain will be 539 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16) 540 * This makes sense, so the instance of XLATEOUT added for the 541 * subrequest will be left alone and it will translate from 542 * IBM-1047->8859-1. 543 */ 544static void chk_filter_chain(ap_filter_t *f) 545{ 546 ap_filter_t *curf; 547 charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL, 548 *ctx = f->ctx; 549 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME); 550 551 if (ctx->noop) { 552 return; 553 } 554 555 /* walk the filter chain; see if it makes sense for our filter to 556 * do any translation 557 */ 558 curf = output ? f->r->output_filters : f->r->input_filters; 559 while (curf) { 560 if (!strcasecmp(curf->frec->name, f->frec->name) && 561 curf->ctx) { 562 curctx = (charset_filter_ctx_t *)curf->ctx; 563 if (!last_xlate_ctx) { 564 last_xlate_ctx = curctx; 565 } 566 else { 567 if (strcmp(last_xlate_ctx->dc->charset_default, 568 curctx->dc->charset_source)) { 569 /* incompatible translation 570 * if our filter instance is incompatible with an instance 571 * already in place, noop our instance 572 * Notes: 573 * . We are only willing to noop our own instance. 574 * . It is possible to noop another instance which has not 575 * yet run, but this is not currently implemented. 576 * Hopefully it will not be needed. 577 * . It is not possible to noop an instance which has 578 * already run. 579 */ 580 if (last_xlate_ctx == f->ctx) { 581 last_xlate_ctx->noop = 1; 582 if (APLOGrtrace1(f->r)) { 583 const char *symbol = output ? "->" : "<-"; 584 585 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 586 0, f->r, APLOGNO(01451) 587 "%s %s - disabling " 588 "translation %s%s%s; existing " 589 "translation %s%s%s", 590 f->r->uri ? "uri" : "file", 591 f->r->uri ? f->r->uri : f->r->filename, 592 last_xlate_ctx->dc->charset_source, 593 symbol, 594 last_xlate_ctx->dc->charset_default, 595 curctx->dc->charset_source, 596 symbol, 597 curctx->dc->charset_default); 598 } 599 } 600 else { 601 const char *symbol = output ? "->" : "<-"; 602 603 ap_log_rerror(APLOG_MARK, APLOG_ERR, 604 0, f->r, APLOGNO(01452) 605 "chk_filter_chain() - can't disable " 606 "translation %s%s%s; existing " 607 "translation %s%s%s", 608 last_xlate_ctx->dc->charset_source, 609 symbol, 610 last_xlate_ctx->dc->charset_default, 611 curctx->dc->charset_source, 612 symbol, 613 curctx->dc->charset_default); 614 } 615 break; 616 } 617 } 618 } 619 curf = curf->next; 620 } 621} 622 623/* xlate_brigade() is used to filter request and response bodies 624 * 625 * we'll stop when one of the following occurs: 626 * . we run out of buckets 627 * . we run out of space in the output buffer 628 * . we hit an error or metadata 629 * 630 * inputs: 631 * bb: brigade to process 632 * buffer: storage to hold the translated characters 633 * buffer_avail: size of buffer 634 * (and a few more uninteresting parms) 635 * 636 * outputs: 637 * return value: APR_SUCCESS or some error code 638 * bb: we've removed any buckets representing the 639 * translated characters; the eos bucket, if 640 * present, will be left in the brigade 641 * buffer: filled in with translated characters 642 * buffer_avail: updated with the bytes remaining 643 * hit_eos: did we hit an EOS bucket? 644 */ 645static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx, 646 apr_bucket_brigade *bb, 647 char *buffer, 648 apr_size_t *buffer_avail, 649 int *hit_eos) 650{ 651 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */ 652 apr_bucket *consumed_bucket; 653 const char *bucket; 654 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */ 655 apr_size_t bucket_avail; /* bytes left in current bucket */ 656 apr_status_t rv = APR_SUCCESS; 657 658 *hit_eos = 0; 659 bucket_avail = 0; 660 consumed_bucket = NULL; 661 while (1) { 662 if (!bucket_avail) { /* no bytes left to process in the current bucket... */ 663 if (consumed_bucket) { 664 apr_bucket_delete(consumed_bucket); 665 consumed_bucket = NULL; 666 } 667 b = APR_BRIGADE_FIRST(bb); 668 if (b == APR_BRIGADE_SENTINEL(bb) || 669 APR_BUCKET_IS_METADATA(b)) { 670 break; 671 } 672 rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ); 673 if (rv != APR_SUCCESS) { 674 ctx->ees = EES_BUCKET_READ; 675 break; 676 } 677 bucket_avail = bytes_in_bucket; 678 consumed_bucket = b; /* for axing when we're done reading it */ 679 } 680 if (bucket_avail) { 681 /* We've got data, so translate it. */ 682 if (ctx->saved) { 683 /* Rats... we need to finish a partial character from the previous 684 * bucket. 685 * 686 * Strangely, finish_partial_char() increments the input buffer 687 * pointer but does not increment the output buffer pointer. 688 */ 689 apr_size_t old_buffer_avail = *buffer_avail; 690 rv = finish_partial_char(ctx, 691 &bucket, &bucket_avail, 692 &buffer, buffer_avail); 693 buffer += old_buffer_avail - *buffer_avail; 694 } 695 else { 696 apr_size_t old_buffer_avail = *buffer_avail; 697 apr_size_t old_bucket_avail = bucket_avail; 698 rv = apr_xlate_conv_buffer(ctx->xlate, 699 bucket, &bucket_avail, 700 buffer, 701 buffer_avail); 702 buffer += old_buffer_avail - *buffer_avail; 703 bucket += old_bucket_avail - bucket_avail; 704 705 if (rv == APR_INCOMPLETE) { /* partial character at end of input */ 706 /* We need to save the final byte(s) for next time; we can't 707 * convert it until we look at the next bucket. 708 */ 709 rv = set_aside_partial_char(ctx, bucket, bucket_avail); 710 bucket_avail = 0; 711 } 712 } 713 if (rv != APR_SUCCESS) { 714 /* bad input byte or partial char too big to store */ 715 break; 716 } 717 if (*buffer_avail < XLATE_MIN_BUFF_LEFT) { 718 /* if any data remains in the current bucket, split there */ 719 if (bucket_avail) { 720 apr_bucket_split(b, bytes_in_bucket - bucket_avail); 721 } 722 apr_bucket_delete(b); 723 break; 724 } 725 } 726 } 727 728 if (!APR_BRIGADE_EMPTY(bb)) { 729 b = APR_BRIGADE_FIRST(bb); 730 if (APR_BUCKET_IS_EOS(b)) { 731 /* Leave the eos bucket in the brigade for reporting to 732 * subsequent filters. 733 */ 734 *hit_eos = 1; 735 if (ctx->saved) { 736 /* Oops... we have a partial char from the previous bucket 737 * that won't be completed because there's no more data. 738 */ 739 rv = APR_INCOMPLETE; 740 ctx->ees = EES_INCOMPLETE_CHAR; 741 } 742 } 743 } 744 745 return rv; 746} 747 748/* xlate_out_filter() handles (almost) arbitrary conversions from one charset 749 * to another... 750 * translation is determined in the fixup hook (find_code_page), which is 751 * where the filter's context data is set up... the context data gives us 752 * the translation handle 753 */ 754static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) 755{ 756 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, 757 &charset_lite_module); 758 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, 759 &charset_lite_module); 760 charset_filter_ctx_t *ctx = f->ctx; 761 apr_bucket *dptr, *consumed_bucket; 762 const char *cur_str; 763 apr_size_t cur_len, cur_avail; 764 char tmp[OUTPUT_XLATE_BUF_SIZE]; 765 apr_size_t space_avail; 766 int done; 767 apr_status_t rv = APR_SUCCESS; 768 769 if (!ctx) { 770 /* this is SetOutputFilter path; grab the preallocated context, 771 * if any; note that if we decided not to do anything in an earlier 772 * handler, we won't even have a reqinfo 773 */ 774 if (reqinfo) { 775 ctx = f->ctx = reqinfo->output_ctx; 776 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice 777 * in the filter chain; we can't have two 778 * instances using the same context 779 */ 780 } 781 if (!ctx) { /* no idea how to translate; don't do anything */ 782 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); 783 ctx->dc = dc; 784 ctx->noop = 1; 785 } 786 } 787 788 /* Check the mime type to see if translation should be performed. 789 */ 790 if (!ctx->noop && ctx->xlate == NULL) { 791 const char *mime_type = f->r->content_type; 792 793 if (mime_type && (strncasecmp(mime_type, "text/", 5) == 0 || 794#if APR_CHARSET_EBCDIC 795 /* On an EBCDIC machine, be willing to translate mod_autoindex- 796 * generated output. Otherwise, it doesn't look too cool. 797 * 798 * XXX This isn't a perfect fix because this doesn't trigger us 799 * to convert from the charset of the source code to ASCII. The 800 * general solution seems to be to allow a generator to set an 801 * indicator in the r specifying that the body is coded in the 802 * implementation character set (i.e., the charset of the source 803 * code). This would get several different types of documents 804 * translated properly: mod_autoindex output, mod_status output, 805 * mod_info output, hard-coded error documents, etc. 806 */ 807 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 || 808#endif 809 strncasecmp(mime_type, "message/", 8) == 0 || 810 dc->force_xlate == FX_FORCE)) { 811 812 rv = apr_xlate_open(&ctx->xlate, 813 dc->charset_default, dc->charset_source, f->r->pool); 814 if (rv != APR_SUCCESS) { 815 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01453) 816 "can't open translation %s->%s", 817 dc->charset_source, dc->charset_default); 818 ctx->noop = 1; 819 } 820 else { 821 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) { 822 ctx->is_sb = 0; 823 } 824 } 825 } 826 else { 827 ctx->noop = 1; 828 if (mime_type) { 829 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, 830 "mime type is %s; no translation selected", 831 mime_type); 832 } 833 } 834 } 835 836 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, 837 "xlate_out_filter() - " 838 "charset_source: %s charset_default: %s", 839 dc && dc->charset_source ? dc->charset_source : "(none)", 840 dc && dc->charset_default ? dc->charset_default : "(none)"); 841 842 if (!ctx->ran) { /* filter never ran before */ 843 chk_filter_chain(f); 844 ctx->ran = 1; 845 if (!ctx->noop && !ctx->is_sb) { 846 /* We're not converting between two single-byte charsets, so unset 847 * Content-Length since it is unlikely to remain the same. 848 */ 849 apr_table_unset(f->r->headers_out, "Content-Length"); 850 } 851 } 852 853 if (ctx->noop) { 854 return ap_pass_brigade(f->next, bb); 855 } 856 857 dptr = APR_BRIGADE_FIRST(bb); 858 done = 0; 859 cur_len = 0; 860 space_avail = sizeof(tmp); 861 consumed_bucket = NULL; 862 while (!done) { 863 if (!cur_len) { /* no bytes left to process in the current bucket... */ 864 if (consumed_bucket) { 865 apr_bucket_delete(consumed_bucket); 866 consumed_bucket = NULL; 867 } 868 if (dptr == APR_BRIGADE_SENTINEL(bb)) { 869 break; 870 } 871 if (APR_BUCKET_IS_EOS(dptr)) { 872 cur_len = -1; /* XXX yuck, but that tells us to send 873 * eos down; when we minimize our bb construction 874 * we'll fix this crap */ 875 if (ctx->saved) { 876 /* Oops... we have a partial char from the previous bucket 877 * that won't be completed because there's no more data. 878 */ 879 rv = APR_INCOMPLETE; 880 ctx->ees = EES_INCOMPLETE_CHAR; 881 } 882 break; 883 } 884 if (APR_BUCKET_IS_METADATA(dptr)) { 885 apr_bucket *metadata_bucket; 886 metadata_bucket = dptr; 887 dptr = APR_BUCKET_NEXT(dptr); 888 APR_BUCKET_REMOVE(metadata_bucket); 889 rv = send_bucket_downstream(f, metadata_bucket); 890 if (rv != APR_SUCCESS) { 891 done = 1; 892 } 893 continue; 894 } 895 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ); 896 if (rv != APR_SUCCESS) { 897 ctx->ees = EES_BUCKET_READ; 898 break; 899 } 900 consumed_bucket = dptr; /* for axing when we're done reading it */ 901 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the 902 * next bucket */ 903 } 904 /* Try to fill up our tmp buffer with translated data. */ 905 cur_avail = cur_len; 906 907 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */ 908 if (ctx->saved) { 909 /* Rats... we need to finish a partial character from the previous 910 * bucket. 911 */ 912 char *tmp_tmp; 913 914 tmp_tmp = tmp + sizeof(tmp) - space_avail; 915 rv = finish_partial_char(ctx, 916 &cur_str, &cur_len, 917 &tmp_tmp, &space_avail); 918 } 919 else { 920 rv = apr_xlate_conv_buffer(ctx->xlate, 921 cur_str, &cur_avail, 922 tmp + sizeof(tmp) - space_avail, &space_avail); 923 924 /* Update input ptr and len after consuming some bytes */ 925 cur_str += cur_len - cur_avail; 926 cur_len = cur_avail; 927 928 if (rv == APR_INCOMPLETE) { /* partial character at end of input */ 929 /* We need to save the final byte(s) for next time; we can't 930 * convert it until we look at the next bucket. 931 */ 932 rv = set_aside_partial_char(ctx, cur_str, cur_len); 933 cur_len = 0; 934 } 935 } 936 } 937 938 if (rv != APR_SUCCESS) { 939 /* bad input byte or partial char too big to store */ 940 done = 1; 941 } 942 943 if (space_avail < XLATE_MIN_BUFF_LEFT) { 944 /* It is time to flush, as there is not enough space left in the 945 * current output buffer to bother with converting more data. 946 */ 947 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); 948 if (rv != APR_SUCCESS) { 949 done = 1; 950 } 951 952 /* tmp is now empty */ 953 space_avail = sizeof(tmp); 954 } 955 } 956 957 if (rv == APR_SUCCESS) { 958 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */ 959 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); 960 } 961 } 962 if (rv == APR_SUCCESS) { 963 if (cur_len == -1) { 964 rv = send_eos(f); 965 } 966 } 967 else { 968 log_xlate_error(f, rv); 969 } 970 971 return rv; 972} 973 974static apr_status_t xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb, 975 ap_input_mode_t mode, apr_read_type_e block, 976 apr_off_t readbytes) 977{ 978 apr_status_t rv; 979 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, 980 &charset_lite_module); 981 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, 982 &charset_lite_module); 983 charset_filter_ctx_t *ctx = f->ctx; 984 apr_size_t buffer_size; 985 int hit_eos; 986 987 if (!ctx) { 988 /* this is SetInputFilter path; grab the preallocated context, 989 * if any; note that if we decided not to do anything in an earlier 990 * handler, we won't even have a reqinfo 991 */ 992 if (reqinfo) { 993 ctx = f->ctx = reqinfo->input_ctx; 994 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice 995 * in the filter chain; we can't have two 996 * instances using the same context 997 */ 998 } 999 if (!ctx) { /* no idea how to translate; don't do anything */ 1000 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); 1001 ctx->dc = dc; 1002 ctx->noop = 1; 1003 } 1004 } 1005 1006 ap_log_rerror(APLOG_MARK, APLOG_TRACE6, 0, f->r, 1007 "xlate_in_filter() - " 1008 "charset_source: %s charset_default: %s", 1009 dc && dc->charset_source ? dc->charset_source : "(none)", 1010 dc && dc->charset_default ? dc->charset_default : "(none)"); 1011 1012 if (!ctx->ran) { /* filter never ran before */ 1013 chk_filter_chain(f); 1014 ctx->ran = 1; 1015 if (!ctx->noop && !ctx->is_sb 1016 && apr_table_get(f->r->headers_in, "Content-Length")) { 1017 /* A Content-Length header is present, but it won't be valid after 1018 * conversion because we're not converting between two single-byte 1019 * charsets. This will affect most CGI scripts and may affect 1020 * some modules. 1021 * Content-Length can't be unset here because that would break 1022 * being able to read the request body. 1023 * Processing of chunked request bodies is not impacted by this 1024 * filter since the the length was not declared anyway. 1025 */ 1026 ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r, 1027 "Request body length may change, resulting in " 1028 "misprocessing by some modules or scripts"); 1029 } 1030 } 1031 1032 if (ctx->noop) { 1033 return ap_get_brigade(f->next, bb, mode, block, readbytes); 1034 } 1035 1036 if (APR_BRIGADE_EMPTY(ctx->bb)) { 1037 if ((rv = ap_get_brigade(f->next, bb, mode, block, 1038 readbytes)) != APR_SUCCESS) { 1039 return rv; 1040 } 1041 } 1042 else { 1043 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */ 1044 } 1045 1046 buffer_size = INPUT_XLATE_BUF_SIZE; 1047 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos); 1048 if (rv == APR_SUCCESS) { 1049 if (!hit_eos) { 1050 /* move anything leftover into our context for next time; 1051 * we don't currently "set aside" since the data came from 1052 * down below, but I suspect that for long-term we need to 1053 * do that 1054 */ 1055 APR_BRIGADE_CONCAT(ctx->bb, bb); 1056 } 1057 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */ 1058 apr_bucket *e; 1059 1060 e = apr_bucket_heap_create(ctx->tmp, 1061 INPUT_XLATE_BUF_SIZE - buffer_size, 1062 NULL, f->r->connection->bucket_alloc); 1063 /* make sure we insert at the head, because there may be 1064 * an eos bucket already there, and the eos bucket should 1065 * come after the data 1066 */ 1067 APR_BRIGADE_INSERT_HEAD(bb, e); 1068 } 1069 else { 1070 /* XXX need to get some more data... what if the last brigade 1071 * we got had only the first byte of a multibyte char? we need 1072 * to grab more data from the network instead of returning an 1073 * empty brigade 1074 */ 1075 } 1076 /* If we have any metadata at the head of ctx->bb, go ahead and move it 1077 * onto the end of bb to be returned to our caller. 1078 */ 1079 if (!APR_BRIGADE_EMPTY(ctx->bb)) { 1080 apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb); 1081 while (b != APR_BRIGADE_SENTINEL(ctx->bb) 1082 && APR_BUCKET_IS_METADATA(b)) { 1083 APR_BUCKET_REMOVE(b); 1084 APR_BRIGADE_INSERT_TAIL(bb, b); 1085 b = APR_BRIGADE_FIRST(ctx->bb); 1086 } 1087 } 1088 } 1089 else { 1090 log_xlate_error(f, rv); 1091 } 1092 1093 return rv; 1094} 1095 1096static const command_rec cmds[] = 1097{ 1098 AP_INIT_TAKE1("CharsetSourceEnc", 1099 add_charset_source, 1100 NULL, 1101 OR_FILEINFO, 1102 "source (html,cgi,ssi) file charset"), 1103 AP_INIT_TAKE1("CharsetDefault", 1104 add_charset_default, 1105 NULL, 1106 OR_FILEINFO, 1107 "name of default charset"), 1108 AP_INIT_ITERATE("CharsetOptions", 1109 add_charset_options, 1110 NULL, 1111 OR_FILEINFO, 1112 "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, " 1113 "NoTranslateAllMimeTypes"), 1114 {NULL} 1115}; 1116 1117static void charset_register_hooks(apr_pool_t *p) 1118{ 1119 ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE); 1120 ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST); 1121 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL, 1122 AP_FTYPE_RESOURCE); 1123 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL, 1124 AP_FTYPE_RESOURCE); 1125} 1126 1127AP_DECLARE_MODULE(charset_lite) = 1128{ 1129 STANDARD20_MODULE_STUFF, 1130 create_charset_dir_conf, 1131 merge_charset_dir_conf, 1132 NULL, 1133 NULL, 1134 cmds, 1135 charset_register_hooks 1136}; 1137 1138