1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * simple hokey charset recoding configuration module 19 * 20 * See mod_ebcdic and mod_charset for more thought-out examples. This 21 * one is just so Jeff can learn how a module works and experiment with 22 * basic character set recoding configuration. 23 * 24 * !!!This is an extremely cheap ripoff of mod_charset.c from Russian Apache!!! 25 */ 26 27#include "httpd.h" 28#include "http_config.h" 29#define CORE_PRIVATE 30#include "http_core.h" 31#include "http_log.h" 32#include "http_main.h" 33#include "http_protocol.h" 34#include "http_request.h" 35#include "util_charset.h" 36#include "apr_buckets.h" 37#include "util_filter.h" 38#include "apr_strings.h" 39#include "apr_lib.h" 40#include "apr_xlate.h" 41#define APR_WANT_STRFUNC 42#include "apr_want.h" 43 44#define OUTPUT_XLATE_BUF_SIZE (16*1024) /* size of translation buffer used on output */ 45#define INPUT_XLATE_BUF_SIZE (8*1024) /* size of translation buffer used on input */ 46 47#define XLATE_MIN_BUFF_LEFT 128 /* flush once there is no more than this much 48 * space left in the translation buffer 49 */ 50 51#define FATTEST_CHAR 8 /* we don't handle chars wider than this that straddle 52 * two buckets 53 */ 54 55/* extended error status codes; this is used in addition to an apr_status_t to 56 * track errors in the translation filter 57 */ 58typedef enum { 59 EES_INIT = 0, /* no error info yet; value must be 0 for easy init */ 60 EES_LIMIT, /* built-in restriction encountered */ 61 EES_INCOMPLETE_CHAR, /* incomplete multi-byte char at end of content */ 62 EES_BUCKET_READ, 63 EES_DOWNSTREAM, /* something bad happened in a filter below xlate */ 64 EES_BAD_INPUT /* input data invalid */ 65} ees_t; 66 67/* registered name of the output translation filter */ 68#define XLATEOUT_FILTER_NAME "XLATEOUT" 69/* registered name of input translation filter */ 70#define XLATEIN_FILTER_NAME "XLATEIN" 71 72typedef struct charset_dir_t { 73 /** debug level; -1 means uninitialized, 0 means no debug */ 74 int debug; 75 const char *charset_source; /* source encoding */ 76 const char *charset_default; /* how to ship on wire */ 77 /** module does ap_add_*_filter()? */ 78 enum {IA_INIT, IA_IMPADD, IA_NOIMPADD} implicit_add; 79 /** treat all mimetypes as text? */ 80 enum {FX_INIT, FX_FORCE, FX_NOFORCE} force_xlate; 81} charset_dir_t; 82 83/* charset_filter_ctx_t is created for each filter instance; because the same 84 * filter code is used for translating in both directions, we need this context 85 * data to tell the filter which translation handle to use; it also can hold a 86 * character which was split between buckets 87 */ 88typedef struct charset_filter_ctx_t { 89 apr_xlate_t *xlate; 90 int is_sb; /* single-byte translation? */ 91 charset_dir_t *dc; 92 ees_t ees; /* extended error status */ 93 apr_size_t saved; 94 char buf[FATTEST_CHAR]; /* we want to be able to build a complete char here */ 95 int ran; /* has filter instance run before? */ 96 int noop; /* should we pass brigades through unchanged? */ 97 char *tmp; /* buffer for input filtering */ 98 apr_bucket_brigade *bb; /* input buckets we couldn't finish translating */ 99 apr_bucket_brigade *tmpbb; /* used for passing downstream */ 100} charset_filter_ctx_t; 101 102/* charset_req_t is available via r->request_config if any translation is 103 * being performed 104 */ 105typedef struct charset_req_t { 106 charset_dir_t *dc; 107 charset_filter_ctx_t *output_ctx, *input_ctx; 108} charset_req_t; 109 110/* debug level definitions */ 111#define DBGLVL_GORY 9 /* gory details */ 112#define DBGLVL_FLOW 4 /* enough messages to see what happens on 113 * each request */ 114#define DBGLVL_PMC 2 /* messages about possible misconfiguration */ 115 116module AP_MODULE_DECLARE_DATA charset_lite_module; 117 118static void *create_charset_dir_conf(apr_pool_t *p,char *dummy) 119{ 120 charset_dir_t *dc = (charset_dir_t *)apr_pcalloc(p,sizeof(charset_dir_t)); 121 122 dc->debug = -1; 123 return dc; 124} 125 126static void *merge_charset_dir_conf(apr_pool_t *p, void *basev, void *overridesv) 127{ 128 charset_dir_t *a = (charset_dir_t *)apr_pcalloc (p, sizeof(charset_dir_t)); 129 charset_dir_t *base = (charset_dir_t *)basev, 130 *over = (charset_dir_t *)overridesv; 131 132 /* If it is defined in the current container, use it. Otherwise, use the one 133 * from the enclosing container. 134 */ 135 136 a->debug = 137 over->debug != -1 ? over->debug : base->debug; 138 a->charset_default = 139 over->charset_default ? over->charset_default : base->charset_default; 140 a->charset_source = 141 over->charset_source ? over->charset_source : base->charset_source; 142 a->implicit_add = 143 over->implicit_add != IA_INIT ? over->implicit_add : base->implicit_add; 144 a->force_xlate= 145 over->force_xlate != FX_INIT ? over->force_xlate : base->force_xlate; 146 return a; 147} 148 149/* CharsetSourceEnc charset 150 */ 151static const char *add_charset_source(cmd_parms *cmd, void *in_dc, 152 const char *name) 153{ 154 charset_dir_t *dc = in_dc; 155 156 dc->charset_source = name; 157 return NULL; 158} 159 160/* CharsetDefault charset 161 */ 162static const char *add_charset_default(cmd_parms *cmd, void *in_dc, 163 const char *name) 164{ 165 charset_dir_t *dc = in_dc; 166 167 dc->charset_default = name; 168 return NULL; 169} 170 171/* CharsetOptions optionflag... 172 */ 173static const char *add_charset_options(cmd_parms *cmd, void *in_dc, 174 const char *flag) 175{ 176 charset_dir_t *dc = in_dc; 177 178 if (!strcasecmp(flag, "ImplicitAdd")) { 179 dc->implicit_add = IA_IMPADD; 180 } 181 else if (!strcasecmp(flag, "NoImplicitAdd")) { 182 dc->implicit_add = IA_NOIMPADD; 183 } 184 else if (!strcasecmp(flag, "TranslateAllMimeTypes")) { 185 dc->force_xlate = FX_FORCE; 186 } 187 else if (!strcasecmp(flag, "NoTranslateAllMimeTypes")) { 188 dc->force_xlate = FX_NOFORCE; 189 } 190 else if (!strncasecmp(flag, "DebugLevel=", 11)) { 191 dc->debug = atoi(flag + 11); 192 } 193 else { 194 return apr_pstrcat(cmd->temp_pool, 195 "Invalid CharsetOptions option: ", 196 flag, 197 NULL); 198 } 199 200 return NULL; 201} 202 203/* find_code_page() is a fixup hook that checks if the module is 204 * configured and the input or output potentially need to be translated. 205 * If so, context is initialized for the filters. 206 */ 207static int find_code_page(request_rec *r) 208{ 209 charset_dir_t *dc = ap_get_module_config(r->per_dir_config, 210 &charset_lite_module); 211 charset_req_t *reqinfo; 212 charset_filter_ctx_t *input_ctx, *output_ctx; 213 apr_status_t rv; 214 215 if (dc->debug >= DBGLVL_FLOW) { 216 ap_log_rerror(APLOG_MARK,APLOG_DEBUG, 0, r, 217 "uri: %s file: %s method: %d " 218 "imt: %s flags: %s%s%s %s->%s", 219 r->uri, 220 r->filename ? r->filename : "(none)", 221 r->method_number, 222 r->content_type ? r->content_type : "(unknown)", 223 r->main ? "S" : "", /* S if subrequest */ 224 r->prev ? "R" : "", /* R if redirect */ 225 r->proxyreq ? "P" : "", /* P if proxy */ 226 dc->charset_source, dc->charset_default); 227 } 228 229 /* If we don't have a full directory configuration, bail out. 230 */ 231 if (!dc->charset_source || !dc->charset_default) { 232 if (dc->debug >= DBGLVL_PMC) { 233 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 234 "incomplete configuration: src %s, dst %s", 235 dc->charset_source ? dc->charset_source : "unspecified", 236 dc->charset_default ? dc->charset_default : "unspecified"); 237 } 238 return DECLINED; 239 } 240 241 /* catch proxy requests */ 242 if (r->proxyreq) { 243 return DECLINED; 244 } 245 246 /* mod_rewrite indicators */ 247 if (r->filename 248 && (!strncmp(r->filename, "redirect:", 9) 249 || !strncmp(r->filename, "gone:", 5) 250 || !strncmp(r->filename, "passthrough:", 12) 251 || !strncmp(r->filename, "forbidden:", 10))) { 252 return DECLINED; 253 } 254 255 /* no translation when server and network charsets are set to the same value */ 256 if (!strcasecmp(dc->charset_source, dc->charset_default)) { 257 return DECLINED; 258 } 259 260 /* Get storage for the request data and the output filter context. 261 * We rarely need the input filter context, so allocate that separately. 262 */ 263 reqinfo = (charset_req_t *)apr_pcalloc(r->pool, 264 sizeof(charset_req_t) + 265 sizeof(charset_filter_ctx_t)); 266 output_ctx = (charset_filter_ctx_t *)(reqinfo + 1); 267 268 reqinfo->dc = dc; 269 output_ctx->dc = dc; 270 output_ctx->tmpbb = apr_brigade_create(r->pool, 271 r->connection->bucket_alloc); 272 ap_set_module_config(r->request_config, &charset_lite_module, reqinfo); 273 274 reqinfo->output_ctx = output_ctx; 275 276 switch (r->method_number) { 277 case M_PUT: 278 case M_POST: 279 /* Set up input translation. Note: A request body can be included 280 * with the OPTIONS method, but for now we don't set up translation 281 * of it. 282 */ 283 input_ctx = apr_pcalloc(r->pool, sizeof(charset_filter_ctx_t)); 284 input_ctx->bb = apr_brigade_create(r->pool, 285 r->connection->bucket_alloc); 286 input_ctx->tmp = apr_palloc(r->pool, INPUT_XLATE_BUF_SIZE); 287 input_ctx->dc = dc; 288 reqinfo->input_ctx = input_ctx; 289 rv = apr_xlate_open(&input_ctx->xlate, dc->charset_source, 290 dc->charset_default, r->pool); 291 if (rv != APR_SUCCESS) { 292 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, 293 "can't open translation %s->%s", 294 dc->charset_default, dc->charset_source); 295 return HTTP_INTERNAL_SERVER_ERROR; 296 } 297 if (apr_xlate_sb_get(input_ctx->xlate, &input_ctx->is_sb) != APR_SUCCESS) { 298 input_ctx->is_sb = 0; 299 } 300 } 301 302 return DECLINED; 303} 304 305static int configured_in_list(request_rec *r, const char *filter_name, 306 struct ap_filter_t *filter_list) 307{ 308 struct ap_filter_t *filter = filter_list; 309 310 while (filter) { 311 if (!strcasecmp(filter_name, filter->frec->name)) { 312 return 1; 313 } 314 filter = filter->next; 315 } 316 return 0; 317} 318 319static int configured_on_input(request_rec *r, const char *filter_name) 320{ 321 return configured_in_list(r, filter_name, r->input_filters); 322} 323 324static int configured_on_output(request_rec *r, const char *filter_name) 325{ 326 return configured_in_list(r, filter_name, r->output_filters); 327} 328 329/* xlate_insert_filter() is a filter hook which decides whether or not 330 * to insert a translation filter for the current request. 331 */ 332static void xlate_insert_filter(request_rec *r) 333{ 334 /* Hey... don't be so quick to use reqinfo->dc here; reqinfo may be NULL */ 335 charset_req_t *reqinfo = ap_get_module_config(r->request_config, 336 &charset_lite_module); 337 charset_dir_t *dc = ap_get_module_config(r->per_dir_config, 338 &charset_lite_module); 339 340 if (dc && (dc->implicit_add == IA_NOIMPADD)) { 341 if (dc->debug >= DBGLVL_GORY) { 342 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 343 "xlate output filter not added implicitly because " 344 "CharsetOptions included 'NoImplicitAdd'"); 345 } 346 return; 347 } 348 349 if (reqinfo) { 350 if (reqinfo->output_ctx && !configured_on_output(r, XLATEOUT_FILTER_NAME)) { 351 ap_add_output_filter(XLATEOUT_FILTER_NAME, reqinfo->output_ctx, r, 352 r->connection); 353 } 354 else if (dc->debug >= DBGLVL_FLOW) { 355 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 356 "xlate output filter not added implicitly because %s", 357 !reqinfo->output_ctx ? 358 "no output configuration available" : 359 "another module added the filter"); 360 } 361 362 if (reqinfo->input_ctx && !configured_on_input(r, XLATEIN_FILTER_NAME)) { 363 ap_add_input_filter(XLATEIN_FILTER_NAME, reqinfo->input_ctx, r, 364 r->connection); 365 } 366 else if (dc->debug >= DBGLVL_FLOW) { 367 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, 368 "xlate input filter not added implicitly because %s", 369 !reqinfo->input_ctx ? 370 "no input configuration available" : 371 "another module added the filter"); 372 } 373 } 374} 375 376/* stuff that sucks that I know of: 377 * 378 * bucket handling: 379 * why create an eos bucket when we see it come down the stream? just send the one 380 * passed as input... news flash: this will be fixed when xlate_out_filter() starts 381 * using the more generic xlate_brigade() 382 * 383 * translation mechanics: 384 * we don't handle characters that straddle more than two buckets; an error 385 * will be generated 386 */ 387 388static apr_status_t send_bucket_downstream(ap_filter_t *f, apr_bucket *b) 389{ 390 charset_filter_ctx_t *ctx = f->ctx; 391 apr_status_t rv; 392 393 APR_BRIGADE_INSERT_TAIL(ctx->tmpbb, b); 394 rv = ap_pass_brigade(f->next, ctx->tmpbb); 395 if (rv != APR_SUCCESS) { 396 ctx->ees = EES_DOWNSTREAM; 397 } 398 apr_brigade_cleanup(ctx->tmpbb); 399 return rv; 400} 401 402/* send_downstream() is passed the translated data; it puts it in a single- 403 * bucket brigade and passes the brigade to the next filter 404 */ 405static apr_status_t send_downstream(ap_filter_t *f, const char *tmp, apr_size_t len) 406{ 407 request_rec *r = f->r; 408 conn_rec *c = r->connection; 409 apr_bucket *b; 410 411 b = apr_bucket_transient_create(tmp, len, c->bucket_alloc); 412 return send_bucket_downstream(f, b); 413} 414 415static apr_status_t send_eos(ap_filter_t *f) 416{ 417 request_rec *r = f->r; 418 conn_rec *c = r->connection; 419 apr_bucket_brigade *bb; 420 apr_bucket *b; 421 charset_filter_ctx_t *ctx = f->ctx; 422 apr_status_t rv; 423 424 bb = apr_brigade_create(r->pool, c->bucket_alloc); 425 b = apr_bucket_eos_create(c->bucket_alloc); 426 APR_BRIGADE_INSERT_TAIL(bb, b); 427 rv = ap_pass_brigade(f->next, bb); 428 if (rv != APR_SUCCESS) { 429 ctx->ees = EES_DOWNSTREAM; 430 } 431 return rv; 432} 433 434static apr_status_t set_aside_partial_char(charset_filter_ctx_t *ctx, 435 const char *partial, 436 apr_size_t partial_len) 437{ 438 apr_status_t rv; 439 440 if (sizeof(ctx->buf) > partial_len) { 441 ctx->saved = partial_len; 442 memcpy(ctx->buf, partial, partial_len); 443 rv = APR_SUCCESS; 444 } 445 else { 446 rv = APR_INCOMPLETE; 447 ctx->ees = EES_LIMIT; /* we don't handle chars this wide which straddle 448 * buckets 449 */ 450 } 451 return rv; 452} 453 454static apr_status_t finish_partial_char(charset_filter_ctx_t *ctx, 455 /* input buffer: */ 456 const char **cur_str, 457 apr_size_t *cur_len, 458 /* output buffer: */ 459 char **out_str, 460 apr_size_t *out_len) 461{ 462 apr_status_t rv; 463 apr_size_t tmp_input_len; 464 465 /* Keep adding bytes from the input string to the saved string until we 466 * 1) finish the input char 467 * 2) get an error 468 * or 3) run out of bytes to add 469 */ 470 471 do { 472 ctx->buf[ctx->saved] = **cur_str; 473 ++ctx->saved; 474 ++*cur_str; 475 --*cur_len; 476 tmp_input_len = ctx->saved; 477 rv = apr_xlate_conv_buffer(ctx->xlate, 478 ctx->buf, 479 &tmp_input_len, 480 *out_str, 481 out_len); 482 } while (rv == APR_INCOMPLETE && *cur_len); 483 484 if (rv == APR_SUCCESS) { 485 ctx->saved = 0; 486 } 487 else { 488 ctx->ees = EES_LIMIT; /* code isn't smart enough to handle chars 489 * straddling more than two buckets 490 */ 491 } 492 493 return rv; 494} 495 496static void log_xlate_error(ap_filter_t *f, apr_status_t rv) 497{ 498 charset_filter_ctx_t *ctx = f->ctx; 499 const char *msg; 500 char msgbuf[100]; 501 int cur; 502 503 switch(ctx->ees) { 504 case EES_LIMIT: 505 rv = 0; 506 msg = "xlate filter - a built-in restriction was encountered"; 507 break; 508 case EES_BAD_INPUT: 509 rv = 0; 510 msg = "xlate filter - an input character was invalid"; 511 break; 512 case EES_BUCKET_READ: 513 rv = 0; 514 msg = "xlate filter - bucket read routine failed"; 515 break; 516 case EES_INCOMPLETE_CHAR: 517 rv = 0; 518 strcpy(msgbuf, "xlate filter - incomplete char at end of input - "); 519 cur = 0; 520 while ((apr_size_t)cur < ctx->saved) { 521 apr_snprintf(msgbuf + strlen(msgbuf), sizeof(msgbuf) - strlen(msgbuf), 522 "%02X", (unsigned)ctx->buf[cur]); 523 ++cur; 524 } 525 msg = msgbuf; 526 break; 527 case EES_DOWNSTREAM: 528 msg = "xlate filter - an error occurred in a lower filter"; 529 break; 530 default: 531 msg = "xlate filter - returning error"; 532 } 533 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, 534 "%s", msg); 535} 536 537/* chk_filter_chain() is called once per filter instance; it tries to 538 * determine if the current filter instance should be disabled because 539 * its translation is incompatible with the translation of an existing 540 * instance of the translate filter 541 * 542 * Example bad scenario: 543 * 544 * configured filter chain for the request: 545 * INCLUDES XLATEOUT(8859-1->UTS-16) 546 * configured filter chain for the subrequest: 547 * XLATEOUT(8859-1->UTS-16) 548 * 549 * When the subrequest is processed, the filter chain will be 550 * XLATEOUT(8859-1->UTS-16) XLATEOUT(8859-1->UTS-16) 551 * This makes no sense, so the instance of XLATEOUT added for the 552 * subrequest will be noop-ed. 553 * 554 * Example good scenario: 555 * 556 * configured filter chain for the request: 557 * INCLUDES XLATEOUT(8859-1->UTS-16) 558 * configured filter chain for the subrequest: 559 * XLATEOUT(IBM-1047->8859-1) 560 * 561 * When the subrequest is processed, the filter chain will be 562 * XLATEOUT(IBM-1047->8859-1) XLATEOUT(8859-1->UTS-16) 563 * This makes sense, so the instance of XLATEOUT added for the 564 * subrequest will be left alone and it will translate from 565 * IBM-1047->8859-1. 566 */ 567static void chk_filter_chain(ap_filter_t *f) 568{ 569 ap_filter_t *curf; 570 charset_filter_ctx_t *curctx, *last_xlate_ctx = NULL, 571 *ctx = f->ctx; 572 int debug = ctx->dc->debug; 573 int output = !strcasecmp(f->frec->name, XLATEOUT_FILTER_NAME); 574 575 if (ctx->noop) { 576 return; 577 } 578 579 /* walk the filter chain; see if it makes sense for our filter to 580 * do any translation 581 */ 582 curf = output ? f->r->output_filters : f->r->input_filters; 583 while (curf) { 584 if (!strcasecmp(curf->frec->name, f->frec->name) && 585 curf->ctx) { 586 curctx = (charset_filter_ctx_t *)curf->ctx; 587 if (!last_xlate_ctx) { 588 last_xlate_ctx = curctx; 589 } 590 else { 591 if (strcmp(last_xlate_ctx->dc->charset_default, 592 curctx->dc->charset_source)) { 593 /* incompatible translation 594 * if our filter instance is incompatible with an instance 595 * already in place, noop our instance 596 * Notes: 597 * . We are only willing to noop our own instance. 598 * . It is possible to noop another instance which has not 599 * yet run, but this is not currently implemented. 600 * Hopefully it will not be needed. 601 * . It is not possible to noop an instance which has 602 * already run. 603 */ 604 if (last_xlate_ctx == f->ctx) { 605 last_xlate_ctx->noop = 1; 606 if (debug >= DBGLVL_PMC) { 607 const char *symbol = output ? "->" : "<-"; 608 609 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 610 0, f->r, 611 "%s %s - disabling " 612 "translation %s%s%s; existing " 613 "translation %s%s%s", 614 f->r->uri ? "uri" : "file", 615 f->r->uri ? f->r->uri : f->r->filename, 616 last_xlate_ctx->dc->charset_source, 617 symbol, 618 last_xlate_ctx->dc->charset_default, 619 curctx->dc->charset_source, 620 symbol, 621 curctx->dc->charset_default); 622 } 623 } 624 else { 625 const char *symbol = output ? "->" : "<-"; 626 627 ap_log_rerror(APLOG_MARK, APLOG_ERR, 628 0, f->r, 629 "chk_filter_chain() - can't disable " 630 "translation %s%s%s; existing " 631 "translation %s%s%s", 632 last_xlate_ctx->dc->charset_source, 633 symbol, 634 last_xlate_ctx->dc->charset_default, 635 curctx->dc->charset_source, 636 symbol, 637 curctx->dc->charset_default); 638 } 639 break; 640 } 641 } 642 } 643 curf = curf->next; 644 } 645} 646 647/* xlate_brigade() is used to filter request and response bodies 648 * 649 * we'll stop when one of the following occurs: 650 * . we run out of buckets 651 * . we run out of space in the output buffer 652 * . we hit an error or metadata 653 * 654 * inputs: 655 * bb: brigade to process 656 * buffer: storage to hold the translated characters 657 * buffer_avail: size of buffer 658 * (and a few more uninteresting parms) 659 * 660 * outputs: 661 * return value: APR_SUCCESS or some error code 662 * bb: we've removed any buckets representing the 663 * translated characters; the eos bucket, if 664 * present, will be left in the brigade 665 * buffer: filled in with translated characters 666 * buffer_avail: updated with the bytes remaining 667 * hit_eos: did we hit an EOS bucket? 668 */ 669static apr_status_t xlate_brigade(charset_filter_ctx_t *ctx, 670 apr_bucket_brigade *bb, 671 char *buffer, 672 apr_size_t *buffer_avail, 673 int *hit_eos) 674{ 675 apr_bucket *b = NULL; /* set to NULL only to quiet some gcc */ 676 apr_bucket *consumed_bucket; 677 const char *bucket; 678 apr_size_t bytes_in_bucket; /* total bytes read from current bucket */ 679 apr_size_t bucket_avail; /* bytes left in current bucket */ 680 apr_status_t rv = APR_SUCCESS; 681 682 *hit_eos = 0; 683 bucket_avail = 0; 684 consumed_bucket = NULL; 685 while (1) { 686 if (!bucket_avail) { /* no bytes left to process in the current bucket... */ 687 if (consumed_bucket) { 688 apr_bucket_delete(consumed_bucket); 689 consumed_bucket = NULL; 690 } 691 b = APR_BRIGADE_FIRST(bb); 692 if (b == APR_BRIGADE_SENTINEL(bb) || 693 APR_BUCKET_IS_METADATA(b)) { 694 break; 695 } 696 rv = apr_bucket_read(b, &bucket, &bytes_in_bucket, APR_BLOCK_READ); 697 if (rv != APR_SUCCESS) { 698 ctx->ees = EES_BUCKET_READ; 699 break; 700 } 701 bucket_avail = bytes_in_bucket; 702 consumed_bucket = b; /* for axing when we're done reading it */ 703 } 704 if (bucket_avail) { 705 /* We've got data, so translate it. */ 706 if (ctx->saved) { 707 /* Rats... we need to finish a partial character from the previous 708 * bucket. 709 * 710 * Strangely, finish_partial_char() increments the input buffer 711 * pointer but does not increment the output buffer pointer. 712 */ 713 apr_size_t old_buffer_avail = *buffer_avail; 714 rv = finish_partial_char(ctx, 715 &bucket, &bucket_avail, 716 &buffer, buffer_avail); 717 buffer += old_buffer_avail - *buffer_avail; 718 } 719 else { 720 apr_size_t old_buffer_avail = *buffer_avail; 721 apr_size_t old_bucket_avail = bucket_avail; 722 rv = apr_xlate_conv_buffer(ctx->xlate, 723 bucket, &bucket_avail, 724 buffer, 725 buffer_avail); 726 buffer += old_buffer_avail - *buffer_avail; 727 bucket += old_bucket_avail - bucket_avail; 728 729 if (rv == APR_INCOMPLETE) { /* partial character at end of input */ 730 /* We need to save the final byte(s) for next time; we can't 731 * convert it until we look at the next bucket. 732 */ 733 rv = set_aside_partial_char(ctx, bucket, bucket_avail); 734 bucket_avail = 0; 735 } 736 } 737 if (rv != APR_SUCCESS) { 738 /* bad input byte or partial char too big to store */ 739 break; 740 } 741 if (*buffer_avail < XLATE_MIN_BUFF_LEFT) { 742 /* if any data remains in the current bucket, split there */ 743 if (bucket_avail) { 744 apr_bucket_split(b, bytes_in_bucket - bucket_avail); 745 } 746 apr_bucket_delete(b); 747 break; 748 } 749 } 750 } 751 752 if (!APR_BRIGADE_EMPTY(bb)) { 753 b = APR_BRIGADE_FIRST(bb); 754 if (APR_BUCKET_IS_EOS(b)) { 755 /* Leave the eos bucket in the brigade for reporting to 756 * subsequent filters. 757 */ 758 *hit_eos = 1; 759 if (ctx->saved) { 760 /* Oops... we have a partial char from the previous bucket 761 * that won't be completed because there's no more data. 762 */ 763 rv = APR_INCOMPLETE; 764 ctx->ees = EES_INCOMPLETE_CHAR; 765 } 766 } 767 } 768 769 return rv; 770} 771 772/* xlate_out_filter() handles (almost) arbitrary conversions from one charset 773 * to another... 774 * translation is determined in the fixup hook (find_code_page), which is 775 * where the filter's context data is set up... the context data gives us 776 * the translation handle 777 */ 778static apr_status_t xlate_out_filter(ap_filter_t *f, apr_bucket_brigade *bb) 779{ 780 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, 781 &charset_lite_module); 782 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, 783 &charset_lite_module); 784 charset_filter_ctx_t *ctx = f->ctx; 785 apr_bucket *dptr, *consumed_bucket; 786 const char *cur_str; 787 apr_size_t cur_len, cur_avail; 788 char tmp[OUTPUT_XLATE_BUF_SIZE]; 789 apr_size_t space_avail; 790 int done; 791 apr_status_t rv = APR_SUCCESS; 792 793 if (!ctx) { 794 /* this is SetOutputFilter path; grab the preallocated context, 795 * if any; note that if we decided not to do anything in an earlier 796 * handler, we won't even have a reqinfo 797 */ 798 if (reqinfo) { 799 ctx = f->ctx = reqinfo->output_ctx; 800 reqinfo->output_ctx = NULL; /* prevent SNAFU if user coded us twice 801 * in the filter chain; we can't have two 802 * instances using the same context 803 */ 804 } 805 if (!ctx) { /* no idea how to translate; don't do anything */ 806 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); 807 ctx->dc = dc; 808 ctx->noop = 1; 809 } 810 } 811 812 /* Check the mime type to see if translation should be performed. 813 */ 814 if (!ctx->noop && ctx->xlate == NULL) { 815 const char *mime_type = f->r->content_type ? f->r->content_type : ap_default_type(f->r); 816 817 if (strncasecmp(mime_type, "text/", 5) == 0 || 818#if APR_CHARSET_EBCDIC 819 /* On an EBCDIC machine, be willing to translate mod_autoindex- 820 * generated output. Otherwise, it doesn't look too cool. 821 * 822 * XXX This isn't a perfect fix because this doesn't trigger us 823 * to convert from the charset of the source code to ASCII. The 824 * general solution seems to be to allow a generator to set an 825 * indicator in the r specifying that the body is coded in the 826 * implementation character set (i.e., the charset of the source 827 * code). This would get several different types of documents 828 * translated properly: mod_autoindex output, mod_status output, 829 * mod_info output, hard-coded error documents, etc. 830 */ 831 strcmp(mime_type, DIR_MAGIC_TYPE) == 0 || 832#endif 833 strncasecmp(mime_type, "message/", 8) == 0 || 834 dc->force_xlate == FX_FORCE) { 835 836 rv = apr_xlate_open(&ctx->xlate, 837 dc->charset_default, dc->charset_source, f->r->pool); 838 if (rv != APR_SUCCESS) { 839 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, 840 "can't open translation %s->%s", 841 dc->charset_source, dc->charset_default); 842 ctx->noop = 1; 843 } 844 else { 845 if (apr_xlate_sb_get(ctx->xlate, &ctx->is_sb) != APR_SUCCESS) { 846 ctx->is_sb = 0; 847 } 848 } 849 } 850 else { 851 ctx->noop = 1; 852 if (dc->debug >= DBGLVL_GORY) { 853 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, 854 "mime type is %s; no translation selected", 855 mime_type); 856 } 857 } 858 } 859 860 if (dc->debug >= DBGLVL_GORY) { 861 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, 862 "xlate_out_filter() - " 863 "charset_source: %s charset_default: %s", 864 dc && dc->charset_source ? dc->charset_source : "(none)", 865 dc && dc->charset_default ? dc->charset_default : "(none)"); 866 } 867 868 if (!ctx->ran) { /* filter never ran before */ 869 chk_filter_chain(f); 870 ctx->ran = 1; 871 if (!ctx->noop && !ctx->is_sb) { 872 /* We're not converting between two single-byte charsets, so unset 873 * Content-Length since it is unlikely to remain the same. 874 */ 875 apr_table_unset(f->r->headers_out, "Content-Length"); 876 } 877 } 878 879 if (ctx->noop) { 880 return ap_pass_brigade(f->next, bb); 881 } 882 883 dptr = APR_BRIGADE_FIRST(bb); 884 done = 0; 885 cur_len = 0; 886 space_avail = sizeof(tmp); 887 consumed_bucket = NULL; 888 while (!done) { 889 if (!cur_len) { /* no bytes left to process in the current bucket... */ 890 if (consumed_bucket) { 891 apr_bucket_delete(consumed_bucket); 892 consumed_bucket = NULL; 893 } 894 if (dptr == APR_BRIGADE_SENTINEL(bb)) { 895 done = 1; 896 break; 897 } 898 if (APR_BUCKET_IS_EOS(dptr)) { 899 done = 1; 900 cur_len = -1; /* XXX yuck, but that tells us to send 901 * eos down; when we minimize our bb construction 902 * we'll fix this crap */ 903 if (ctx->saved) { 904 /* Oops... we have a partial char from the previous bucket 905 * that won't be completed because there's no more data. 906 */ 907 rv = APR_INCOMPLETE; 908 ctx->ees = EES_INCOMPLETE_CHAR; 909 } 910 break; 911 } 912 if (APR_BUCKET_IS_METADATA(dptr)) { 913 apr_bucket *metadata_bucket; 914 metadata_bucket = dptr; 915 dptr = APR_BUCKET_NEXT(dptr); 916 APR_BUCKET_REMOVE(metadata_bucket); 917 rv = send_bucket_downstream(f, metadata_bucket); 918 if (rv != APR_SUCCESS) { 919 done = 1; 920 } 921 continue; 922 } 923 rv = apr_bucket_read(dptr, &cur_str, &cur_len, APR_BLOCK_READ); 924 if (rv != APR_SUCCESS) { 925 done = 1; 926 ctx->ees = EES_BUCKET_READ; 927 break; 928 } 929 consumed_bucket = dptr; /* for axing when we're done reading it */ 930 dptr = APR_BUCKET_NEXT(dptr); /* get ready for when we access the 931 * next bucket */ 932 } 933 /* Try to fill up our tmp buffer with translated data. */ 934 cur_avail = cur_len; 935 936 if (cur_len) { /* maybe we just hit the end of a pipe (len = 0) ? */ 937 if (ctx->saved) { 938 /* Rats... we need to finish a partial character from the previous 939 * bucket. 940 */ 941 char *tmp_tmp; 942 943 tmp_tmp = tmp + sizeof(tmp) - space_avail; 944 rv = finish_partial_char(ctx, 945 &cur_str, &cur_len, 946 &tmp_tmp, &space_avail); 947 } 948 else { 949 rv = apr_xlate_conv_buffer(ctx->xlate, 950 cur_str, &cur_avail, 951 tmp + sizeof(tmp) - space_avail, &space_avail); 952 953 /* Update input ptr and len after consuming some bytes */ 954 cur_str += cur_len - cur_avail; 955 cur_len = cur_avail; 956 957 if (rv == APR_INCOMPLETE) { /* partial character at end of input */ 958 /* We need to save the final byte(s) for next time; we can't 959 * convert it until we look at the next bucket. 960 */ 961 rv = set_aside_partial_char(ctx, cur_str, cur_len); 962 cur_len = 0; 963 } 964 } 965 } 966 967 if (rv != APR_SUCCESS) { 968 /* bad input byte or partial char too big to store */ 969 done = 1; 970 } 971 972 if (space_avail < XLATE_MIN_BUFF_LEFT) { 973 /* It is time to flush, as there is not enough space left in the 974 * current output buffer to bother with converting more data. 975 */ 976 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); 977 if (rv != APR_SUCCESS) { 978 done = 1; 979 } 980 981 /* tmp is now empty */ 982 space_avail = sizeof(tmp); 983 } 984 } 985 986 if (rv == APR_SUCCESS) { 987 if (space_avail < sizeof(tmp)) { /* gotta write out what we converted */ 988 rv = send_downstream(f, tmp, sizeof(tmp) - space_avail); 989 } 990 } 991 if (rv == APR_SUCCESS) { 992 if (cur_len == -1) { 993 rv = send_eos(f); 994 } 995 } 996 else { 997 log_xlate_error(f, rv); 998 } 999 1000 return rv; 1001} 1002 1003static int xlate_in_filter(ap_filter_t *f, apr_bucket_brigade *bb, 1004 ap_input_mode_t mode, apr_read_type_e block, 1005 apr_off_t readbytes) 1006{ 1007 apr_status_t rv; 1008 charset_req_t *reqinfo = ap_get_module_config(f->r->request_config, 1009 &charset_lite_module); 1010 charset_dir_t *dc = ap_get_module_config(f->r->per_dir_config, 1011 &charset_lite_module); 1012 charset_filter_ctx_t *ctx = f->ctx; 1013 apr_size_t buffer_size; 1014 int hit_eos; 1015 1016 if (!ctx) { 1017 /* this is SetInputFilter path; grab the preallocated context, 1018 * if any; note that if we decided not to do anything in an earlier 1019 * handler, we won't even have a reqinfo 1020 */ 1021 if (reqinfo) { 1022 ctx = f->ctx = reqinfo->input_ctx; 1023 reqinfo->input_ctx = NULL; /* prevent SNAFU if user coded us twice 1024 * in the filter chain; we can't have two 1025 * instances using the same context 1026 */ 1027 } 1028 if (!ctx) { /* no idea how to translate; don't do anything */ 1029 ctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(charset_filter_ctx_t)); 1030 ctx->dc = dc; 1031 ctx->noop = 1; 1032 } 1033 } 1034 1035 if (dc->debug >= DBGLVL_GORY) { 1036 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, 1037 "xlate_in_filter() - " 1038 "charset_source: %s charset_default: %s", 1039 dc && dc->charset_source ? dc->charset_source : "(none)", 1040 dc && dc->charset_default ? dc->charset_default : "(none)"); 1041 } 1042 1043 if (!ctx->ran) { /* filter never ran before */ 1044 chk_filter_chain(f); 1045 ctx->ran = 1; 1046 if (!ctx->noop && !ctx->is_sb 1047 && apr_table_get(f->r->headers_in, "Content-Length")) { 1048 /* A Content-Length header is present, but it won't be valid after 1049 * conversion because we're not converting between two single-byte 1050 * charsets. This will affect most CGI scripts and may affect 1051 * some modules. 1052 * Content-Length can't be unset here because that would break 1053 * being able to read the request body. 1054 * Processing of chunked request bodies is not impacted by this 1055 * filter since the the length was not declared anyway. 1056 */ 1057 if (dc->debug >= DBGLVL_PMC) { 1058 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, 1059 "Request body length may change, resulting in " 1060 "misprocessing by some modules or scripts"); 1061 } 1062 } 1063 } 1064 1065 if (ctx->noop) { 1066 return ap_get_brigade(f->next, bb, mode, block, readbytes); 1067 } 1068 1069 if (APR_BRIGADE_EMPTY(ctx->bb)) { 1070 if ((rv = ap_get_brigade(f->next, bb, mode, block, 1071 readbytes)) != APR_SUCCESS) { 1072 return rv; 1073 } 1074 } 1075 else { 1076 APR_BRIGADE_PREPEND(bb, ctx->bb); /* first use the leftovers */ 1077 } 1078 1079 buffer_size = INPUT_XLATE_BUF_SIZE; 1080 rv = xlate_brigade(ctx, bb, ctx->tmp, &buffer_size, &hit_eos); 1081 if (rv == APR_SUCCESS) { 1082 if (!hit_eos) { 1083 /* move anything leftover into our context for next time; 1084 * we don't currently "set aside" since the data came from 1085 * down below, but I suspect that for long-term we need to 1086 * do that 1087 */ 1088 APR_BRIGADE_CONCAT(ctx->bb, bb); 1089 } 1090 if (buffer_size < INPUT_XLATE_BUF_SIZE) { /* do we have output? */ 1091 apr_bucket *e; 1092 1093 e = apr_bucket_heap_create(ctx->tmp, 1094 INPUT_XLATE_BUF_SIZE - buffer_size, 1095 NULL, f->r->connection->bucket_alloc); 1096 /* make sure we insert at the head, because there may be 1097 * an eos bucket already there, and the eos bucket should 1098 * come after the data 1099 */ 1100 APR_BRIGADE_INSERT_HEAD(bb, e); 1101 } 1102 else { 1103 /* XXX need to get some more data... what if the last brigade 1104 * we got had only the first byte of a multibyte char? we need 1105 * to grab more data from the network instead of returning an 1106 * empty brigade 1107 */ 1108 } 1109 /* If we have any metadata at the head of ctx->bb, go ahead and move it 1110 * onto the end of bb to be returned to our caller. 1111 */ 1112 if (!APR_BRIGADE_EMPTY(ctx->bb)) { 1113 apr_bucket *b = APR_BRIGADE_FIRST(ctx->bb); 1114 while (b != APR_BRIGADE_SENTINEL(ctx->bb) 1115 && APR_BUCKET_IS_METADATA(b)) { 1116 APR_BUCKET_REMOVE(b); 1117 APR_BRIGADE_INSERT_TAIL(bb, b); 1118 b = APR_BRIGADE_FIRST(ctx->bb); 1119 } 1120 } 1121 } 1122 else { 1123 log_xlate_error(f, rv); 1124 } 1125 1126 return rv; 1127} 1128 1129static const command_rec cmds[] = 1130{ 1131 AP_INIT_TAKE1("CharsetSourceEnc", 1132 add_charset_source, 1133 NULL, 1134 OR_FILEINFO, 1135 "source (html,cgi,ssi) file charset"), 1136 AP_INIT_TAKE1("CharsetDefault", 1137 add_charset_default, 1138 NULL, 1139 OR_FILEINFO, 1140 "name of default charset"), 1141 AP_INIT_ITERATE("CharsetOptions", 1142 add_charset_options, 1143 NULL, 1144 OR_FILEINFO, 1145 "valid options: ImplicitAdd, NoImplicitAdd, TranslateAllMimeTypes, " 1146 "NoTranslateAllMimeTypes, DebugLevel=n"), 1147 {NULL} 1148}; 1149 1150static void charset_register_hooks(apr_pool_t *p) 1151{ 1152 ap_hook_fixups(find_code_page, NULL, NULL, APR_HOOK_MIDDLE); 1153 ap_hook_insert_filter(xlate_insert_filter, NULL, NULL, APR_HOOK_REALLY_LAST); 1154 ap_register_output_filter(XLATEOUT_FILTER_NAME, xlate_out_filter, NULL, 1155 AP_FTYPE_RESOURCE); 1156 ap_register_input_filter(XLATEIN_FILTER_NAME, xlate_in_filter, NULL, 1157 AP_FTYPE_RESOURCE); 1158} 1159 1160module AP_MODULE_DECLARE_DATA charset_lite_module = 1161{ 1162 STANDARD20_MODULE_STUFF, 1163 create_charset_dir_conf, 1164 merge_charset_dir_conf, 1165 NULL, 1166 NULL, 1167 cmds, 1168 charset_register_hooks 1169}; 1170 1171