1/* Copyright (c) 2003-11, WebThing Ltd 2 * Copyright (c) 2011-, The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20/* GO_FASTER 21 You can #define GO_FASTER to disable trace logging. 22*/ 23 24#ifdef GO_FASTER 25#define VERBOSE(x) 26#define VERBOSEB(x) 27#else 28#define VERBOSE(x) if (verbose) x 29#define VERBOSEB(x) if (verbose) {x} 30#endif 31 32/* libxml2 */ 33#include <libxml/HTMLparser.h> 34 35#include "http_protocol.h" 36#include "http_config.h" 37#include "http_log.h" 38#include "apr_strings.h" 39#include "apr_hash.h" 40#include "apr_strmatch.h" 41#include "apr_lib.h" 42 43#include "apr_optional.h" 44#include "mod_xml2enc.h" 45#include "http_request.h" 46#include "ap_expr.h" 47 48/* globals set once at startup */ 49static ap_rxplus_t *old_expr; 50static ap_regex_t *seek_meta; 51static const apr_strmatch_pattern* seek_content; 52static apr_status_t (*xml2enc_charset)(request_rec*, xmlCharEncoding*, const char**) = NULL; 53static apr_status_t (*xml2enc_filter)(request_rec*, const char*, unsigned int) = NULL; 54 55module AP_MODULE_DECLARE_DATA proxy_html_module; 56 57#define M_HTML 0x01 58#define M_EVENTS 0x02 59#define M_CDATA 0x04 60#define M_REGEX 0x08 61#define M_ATSTART 0x10 62#define M_ATEND 0x20 63#define M_LAST 0x40 64#define M_NOTLAST 0x80 65#define M_INTERPOLATE_TO 0x100 66#define M_INTERPOLATE_FROM 0x200 67 68typedef struct { 69 const char *val; 70} tattr; 71typedef struct { 72 unsigned int start; 73 unsigned int end; 74} meta; 75typedef struct urlmap { 76 struct urlmap *next; 77 unsigned int flags; 78 unsigned int regflags; 79 union { 80 const char *c; 81 ap_regex_t *r; 82 } from; 83 const char *to; 84 ap_expr_info_t *cond; 85} urlmap; 86typedef struct { 87 urlmap *map; 88 const char *doctype; 89 const char *etag; 90 unsigned int flags; 91 size_t bufsz; 92 apr_hash_t *links; 93 apr_array_header_t *events; 94 const char *charset_out; 95 int extfix; 96 int metafix; 97 int strip_comments; 98 int interp; 99 int enabled; 100} proxy_html_conf; 101typedef struct { 102 ap_filter_t *f; 103 proxy_html_conf *cfg; 104 htmlParserCtxtPtr parser; 105 apr_bucket_brigade *bb; 106 char *buf; 107 size_t offset; 108 size_t avail; 109 const char *encoding; 110 urlmap *map; 111} saxctxt; 112 113 114#define NORM_LC 0x1 115#define NORM_MSSLASH 0x2 116#define NORM_RESET 0x4 117static htmlSAXHandler sax; 118 119typedef enum { ATTR_IGNORE, ATTR_URI, ATTR_EVENT } rewrite_t; 120 121static const char *const fpi_html = 122 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01//EN\">\n"; 123static const char *const fpi_html_legacy = 124 "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"; 125static const char *const fpi_xhtml = 126 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">\n"; 127static const char *const fpi_xhtml_legacy = 128 "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\" \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"; 129static const char *const html_etag = ">"; 130static const char *const xhtml_etag = " />"; 131/*#define DEFAULT_DOCTYPE fpi_html */ 132static const char *const DEFAULT_DOCTYPE = ""; 133#define DEFAULT_ETAG html_etag 134 135static void normalise(unsigned int flags, char *str) 136{ 137 char *p; 138 if (flags & NORM_LC) 139 for (p = str; *p; ++p) 140 if (isupper(*p)) 141 *p = tolower(*p); 142 143 if (flags & NORM_MSSLASH) 144 for (p = ap_strchr(str, '\\'); p; p = ap_strchr(p+1, '\\')) 145 *p = '/'; 146 147} 148#define consume_buffer(ctx,inbuf,bytes,flag) \ 149 htmlParseChunk(ctx->parser, inbuf, bytes, flag) 150 151#define AP_fwrite(ctx,inbuf,bytes,flush) \ 152 ap_fwrite(ctx->f->next, ctx->bb, inbuf, bytes); 153 154/* This is always utf-8 on entry. We can convert charset within FLUSH */ 155#define FLUSH AP_fwrite(ctx, (chars+begin), (i-begin), 0); begin = i+1 156static void pcharacters(void *ctxt, const xmlChar *uchars, int length) 157{ 158 const char *chars = (const char*) uchars; 159 saxctxt *ctx = (saxctxt*) ctxt; 160 int i; 161 int begin; 162 for (begin=i=0; i<length; i++) { 163 switch (chars[i]) { 164 case '&' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "&"); break; 165 case '<' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, "<"); break; 166 case '>' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, ">"); break; 167 case '"' : FLUSH; ap_fputs(ctx->f->next, ctx->bb, """); break; 168 default : break; 169 } 170 } 171 FLUSH; 172} 173 174static void preserve(saxctxt *ctx, const size_t len) 175{ 176 char *newbuf; 177 if (len <= (ctx->avail - ctx->offset)) 178 return; 179 else while (len > (ctx->avail - ctx->offset)) 180 ctx->avail += ctx->cfg->bufsz; 181 182 newbuf = realloc(ctx->buf, ctx->avail); 183 if (newbuf != ctx->buf) { 184 if (ctx->buf) 185 apr_pool_cleanup_kill(ctx->f->r->pool, ctx->buf, 186 (int(*)(void*))free); 187 apr_pool_cleanup_register(ctx->f->r->pool, newbuf, 188 (int(*)(void*))free, apr_pool_cleanup_null); 189 ctx->buf = newbuf; 190 } 191} 192 193static void pappend(saxctxt *ctx, const char *buf, const size_t len) 194{ 195 preserve(ctx, len); 196 memcpy(ctx->buf+ctx->offset, buf, len); 197 ctx->offset += len; 198} 199 200static void dump_content(saxctxt *ctx) 201{ 202 urlmap *m; 203 char *found; 204 size_t s_from, s_to; 205 size_t match; 206 char c = 0; 207 int nmatch; 208 ap_regmatch_t pmatch[10]; 209 char *subs; 210 size_t len, offs; 211 urlmap *themap = ctx->map; 212#ifndef GO_FASTER 213 int verbose = APLOGrtrace1(ctx->f->r); 214#endif 215 216 pappend(ctx, &c, 1); /* append null byte */ 217 /* parse the text for URLs */ 218 for (m = themap; m; m = m->next) { 219 if (!(m->flags & M_CDATA)) 220 continue; 221 if (m->flags & M_REGEX) { 222 nmatch = 10; 223 offs = 0; 224 while (!ap_regexec(m->from.r, ctx->buf+offs, nmatch, pmatch, 0)) { 225 match = pmatch[0].rm_so; 226 s_from = pmatch[0].rm_eo - match; 227 subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, 228 nmatch, pmatch); 229 s_to = strlen(subs); 230 len = strlen(ctx->buf); 231 offs += match; 232 VERBOSEB( 233 const char *f = apr_pstrndup(ctx->f->r->pool, 234 ctx->buf + offs, s_from); 235 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r, 236 "C/RX: match at %s, substituting %s", f, subs); 237 ) 238 if (s_to > s_from) { 239 preserve(ctx, s_to - s_from); 240 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, 241 len + 1 - s_from - offs); 242 memcpy(ctx->buf+offs, subs, s_to); 243 } 244 else { 245 memcpy(ctx->buf + offs, subs, s_to); 246 memmove(ctx->buf+offs+s_to, ctx->buf+offs+s_from, 247 len + 1 - s_from - offs); 248 } 249 offs += s_to; 250 } 251 } 252 else { 253 s_from = strlen(m->from.c); 254 s_to = strlen(m->to); 255 for (found = strstr(ctx->buf, m->from.c); found; 256 found = strstr(ctx->buf+match+s_to, m->from.c)) { 257 match = found - ctx->buf; 258 if ((m->flags & M_ATSTART) && (match != 0)) 259 break; 260 len = strlen(ctx->buf); 261 if ((m->flags & M_ATEND) && (match < (len - s_from))) 262 continue; 263 VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, ctx->f->r, 264 "C: matched %s, substituting %s", 265 m->from.c, m->to)); 266 if (s_to > s_from) { 267 preserve(ctx, s_to - s_from); 268 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from, 269 len + 1 - s_from - match); 270 memcpy(ctx->buf+match, m->to, s_to); 271 } 272 else { 273 memcpy(ctx->buf+match, m->to, s_to); 274 memmove(ctx->buf+match+s_to, ctx->buf+match+s_from, 275 len + 1 - s_from - match); 276 } 277 } 278 } 279 } 280 AP_fwrite(ctx, ctx->buf, strlen(ctx->buf), 1); 281} 282static void pcdata(void *ctxt, const xmlChar *uchars, int length) 283{ 284 const char *chars = (const char*) uchars; 285 saxctxt *ctx = (saxctxt*) ctxt; 286 if (ctx->cfg->extfix) { 287 pappend(ctx, chars, length); 288 } 289 else { 290 /* not sure if this should force-flush 291 * (i.e. can one cdata section come in multiple calls?) 292 */ 293 AP_fwrite(ctx, chars, length, 0); 294 } 295} 296static void pcomment(void *ctxt, const xmlChar *uchars) 297{ 298 const char *chars = (const char*) uchars; 299 saxctxt *ctx = (saxctxt*) ctxt; 300 if (ctx->cfg->strip_comments) 301 return; 302 303 if (ctx->cfg->extfix) { 304 pappend(ctx, "<!--", 4); 305 pappend(ctx, chars, strlen(chars)); 306 pappend(ctx, "-->", 3); 307 } 308 else { 309 ap_fputs(ctx->f->next, ctx->bb, "<!--"); 310 AP_fwrite(ctx, chars, strlen(chars), 1); 311 ap_fputs(ctx->f->next, ctx->bb, "-->"); 312 } 313} 314static void pendElement(void *ctxt, const xmlChar *uname) 315{ 316 saxctxt *ctx = (saxctxt*) ctxt; 317 const char *name = (const char*) uname; 318 const htmlElemDesc* desc = htmlTagLookup(uname); 319 320 if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { 321 /* enforce html */ 322 if (!desc || desc->depr) 323 return; 324 325 } 326 else if ((ctx->cfg->doctype == fpi_html) 327 || (ctx->cfg->doctype == fpi_xhtml)) { 328 /* enforce html legacy */ 329 if (!desc) 330 return; 331 } 332 /* TODO - implement HTML "allowed here" using the stack */ 333 /* nah. Keeping the stack is too much overhead */ 334 335 if (ctx->offset > 0) { 336 dump_content(ctx); 337 ctx->offset = 0; /* having dumped it, we can re-use the memory */ 338 } 339 if (!desc || !desc->empty) { 340 ap_fprintf(ctx->f->next, ctx->bb, "</%s>", name); 341 } 342} 343 344static void pstartElement(void *ctxt, const xmlChar *uname, 345 const xmlChar** uattrs) 346{ 347 int required_attrs; 348 int num_match; 349 size_t offs, len; 350 char *subs; 351 rewrite_t is_uri; 352 const char** a; 353 urlmap *m; 354 size_t s_to, s_from, match; 355 char *found; 356 saxctxt *ctx = (saxctxt*) ctxt; 357 size_t nmatch; 358 ap_regmatch_t pmatch[10]; 359#ifndef GO_FASTER 360 int verbose = APLOGrtrace1(ctx->f->r); 361#endif 362 apr_array_header_t *linkattrs; 363 int i; 364 const char *name = (const char*) uname; 365 const char** attrs = (const char**) uattrs; 366 const htmlElemDesc* desc = htmlTagLookup(uname); 367 urlmap *themap = ctx->map; 368#ifdef HAVE_STACK 369 const void** descp; 370#endif 371 int enforce = 0; 372 if ((ctx->cfg->doctype == fpi_html) || (ctx->cfg->doctype == fpi_xhtml)) { 373 /* enforce html */ 374 enforce = 2; 375 if (!desc || desc->depr) 376 return; 377 378 } 379 else if ((ctx->cfg->doctype == fpi_html) 380 || (ctx->cfg->doctype == fpi_xhtml)) { 381 enforce = 1; 382 /* enforce html legacy */ 383 if (!desc) { 384 return; 385 } 386 } 387 if (!desc && enforce) { 388 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01416) 389 "Bogus HTML element %s dropped", name); 390 return; 391 } 392 if (desc && desc->depr && (enforce == 2)) { 393 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01417) 394 "Deprecated HTML element %s dropped", name); 395 return; 396 } 397#ifdef HAVE_STACK 398 descp = apr_array_push(ctx->stack); 399 *descp = desc; 400 /* TODO - implement HTML "allowed here" */ 401#endif 402 403 ap_fputc(ctx->f->next, ctx->bb, '<'); 404 ap_fputs(ctx->f->next, ctx->bb, name); 405 406 required_attrs = 0; 407 if ((enforce > 0) && (desc != NULL) && (desc->attrs_req != NULL)) 408 for (a = desc->attrs_req; *a; a++) 409 ++required_attrs; 410 411 if (attrs) { 412 linkattrs = apr_hash_get(ctx->cfg->links, name, APR_HASH_KEY_STRING); 413 for (a = attrs; *a; a += 2) { 414 if (desc && enforce > 0) { 415 switch (htmlAttrAllowed(desc, (xmlChar*)*a, 2-enforce)) { 416 case HTML_INVALID: 417 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01418) 418 "Bogus HTML attribute %s of %s dropped", 419 *a, name); 420 continue; 421 case HTML_DEPRECATED: 422 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01419) 423 "Deprecated HTML attribute %s of %s dropped", 424 *a, name); 425 continue; 426 case HTML_REQUIRED: 427 required_attrs--; /* cross off the number still needed */ 428 /* fallthrough - required implies valid */ 429 default: 430 break; 431 } 432 } 433 ctx->offset = 0; 434 if (a[1]) { 435 pappend(ctx, a[1], strlen(a[1])+1); 436 is_uri = ATTR_IGNORE; 437 if (linkattrs) { 438 tattr *attrs = (tattr*) linkattrs->elts; 439 for (i=0; i < linkattrs->nelts; ++i) { 440 if (!strcmp(*a, attrs[i].val)) { 441 is_uri = ATTR_URI; 442 break; 443 } 444 } 445 } 446 if ((is_uri == ATTR_IGNORE) && ctx->cfg->extfix 447 && (ctx->cfg->events != NULL)) { 448 for (i=0; i < ctx->cfg->events->nelts; ++i) { 449 tattr *attrs = (tattr*) ctx->cfg->events->elts; 450 if (!strcmp(*a, attrs[i].val)) { 451 is_uri = ATTR_EVENT; 452 break; 453 } 454 } 455 } 456 switch (is_uri) { 457 case ATTR_URI: 458 num_match = 0; 459 for (m = themap; m; m = m->next) { 460 if (!(m->flags & M_HTML)) 461 continue; 462 if (m->flags & M_REGEX) { 463 nmatch = 10; 464 if (!ap_regexec(m->from.r, ctx->buf, nmatch, 465 pmatch, 0)) { 466 ++num_match; 467 offs = match = pmatch[0].rm_so; 468 s_from = pmatch[0].rm_eo - match; 469 subs = ap_pregsub(ctx->f->r->pool, m->to, 470 ctx->buf, nmatch, pmatch); 471 VERBOSE({ 472 const char *f; 473 f = apr_pstrndup(ctx->f->r->pool, 474 ctx->buf + offs, s_from); 475 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, 476 ctx->f->r, 477 "H/RX: match at %s, substituting %s", 478 f, subs); 479 }) 480 s_to = strlen(subs); 481 len = strlen(ctx->buf); 482 if (s_to > s_from) { 483 preserve(ctx, s_to - s_from); 484 memmove(ctx->buf+offs+s_to, 485 ctx->buf+offs+s_from, 486 len + 1 - s_from - offs); 487 memcpy(ctx->buf+offs, subs, s_to); 488 } 489 else { 490 memcpy(ctx->buf + offs, subs, s_to); 491 memmove(ctx->buf+offs+s_to, 492 ctx->buf+offs+s_from, 493 len + 1 - s_from - offs); 494 } 495 } 496 } else { 497 s_from = strlen(m->from.c); 498 if (!strncasecmp(ctx->buf, m->from.c, s_from)) { 499 ++num_match; 500 s_to = strlen(m->to); 501 len = strlen(ctx->buf); 502 VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 503 0, ctx->f->r, 504 "H: matched %s, substituting %s", 505 m->from.c, m->to)); 506 if (s_to > s_from) { 507 preserve(ctx, s_to - s_from); 508 memmove(ctx->buf+s_to, ctx->buf+s_from, 509 len + 1 - s_from); 510 memcpy(ctx->buf, m->to, s_to); 511 } 512 else { /* it fits in the existing space */ 513 memcpy(ctx->buf, m->to, s_to); 514 memmove(ctx->buf+s_to, ctx->buf+s_from, 515 len + 1 - s_from); 516 } 517 break; 518 } 519 } 520 /* URIs only want one match unless overridden in the config */ 521 if ((num_match > 0) && !(m->flags & M_NOTLAST)) 522 break; 523 } 524 break; 525 case ATTR_EVENT: 526 for (m = themap; m; m = m->next) { 527 num_match = 0; /* reset here since we're working per-rule */ 528 if (!(m->flags & M_EVENTS)) 529 continue; 530 if (m->flags & M_REGEX) { 531 nmatch = 10; 532 offs = 0; 533 while (!ap_regexec(m->from.r, ctx->buf+offs, 534 nmatch, pmatch, 0)) { 535 match = pmatch[0].rm_so; 536 s_from = pmatch[0].rm_eo - match; 537 subs = ap_pregsub(ctx->f->r->pool, m->to, ctx->buf+offs, 538 nmatch, pmatch); 539 VERBOSE({ 540 const char *f; 541 f = apr_pstrndup(ctx->f->r->pool, 542 ctx->buf + offs, s_from); 543 ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 0, 544 ctx->f->r, 545 "E/RX: match at %s, substituting %s", 546 f, subs); 547 }) 548 s_to = strlen(subs); 549 offs += match; 550 len = strlen(ctx->buf); 551 if (s_to > s_from) { 552 preserve(ctx, s_to - s_from); 553 memmove(ctx->buf+offs+s_to, 554 ctx->buf+offs+s_from, 555 len + 1 - s_from - offs); 556 memcpy(ctx->buf+offs, subs, s_to); 557 } 558 else { 559 memcpy(ctx->buf + offs, subs, s_to); 560 memmove(ctx->buf+offs+s_to, 561 ctx->buf+offs+s_from, 562 len + 1 - s_from - offs); 563 } 564 offs += s_to; 565 ++num_match; 566 } 567 } 568 else { 569 found = strstr(ctx->buf, m->from.c); 570 if ((m->flags & M_ATSTART) && (found != ctx->buf)) 571 continue; 572 while (found) { 573 s_from = strlen(m->from.c); 574 s_to = strlen(m->to); 575 match = found - ctx->buf; 576 if ((s_from < strlen(found)) 577 && (m->flags & M_ATEND)) { 578 found = strstr(ctx->buf+match+s_from, 579 m->from.c); 580 continue; 581 } 582 else { 583 found = strstr(ctx->buf+match+s_to, 584 m->from.c); 585 } 586 VERBOSE(ap_log_rerror(APLOG_MARK, APLOG_TRACE3, 587 0, ctx->f->r, 588 "E: matched %s, substituting %s", 589 m->from.c, m->to)); 590 len = strlen(ctx->buf); 591 if (s_to > s_from) { 592 preserve(ctx, s_to - s_from); 593 memmove(ctx->buf+match+s_to, 594 ctx->buf+match+s_from, 595 len + 1 - s_from - match); 596 memcpy(ctx->buf+match, m->to, s_to); 597 } 598 else { 599 memcpy(ctx->buf+match, m->to, s_to); 600 memmove(ctx->buf+match+s_to, 601 ctx->buf+match+s_from, 602 len + 1 - s_from - match); 603 } 604 ++num_match; 605 } 606 } 607 if (num_match && (m->flags & M_LAST)) 608 break; 609 } 610 break; 611 case ATTR_IGNORE: 612 break; 613 } 614 } 615 if (!a[1]) 616 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], NULL); 617 else { 618 619 if (ctx->cfg->flags != 0) 620 normalise(ctx->cfg->flags, ctx->buf); 621 622 /* write the attribute, using pcharacters to html-escape 623 anything that needs it in the value. 624 */ 625 ap_fputstrs(ctx->f->next, ctx->bb, " ", a[0], "=\"", NULL); 626 pcharacters(ctx, (const xmlChar*)ctx->buf, strlen(ctx->buf)); 627 ap_fputc(ctx->f->next, ctx->bb, '"'); 628 } 629 } 630 } 631 ctx->offset = 0; 632 if (desc && desc->empty) 633 ap_fputs(ctx->f->next, ctx->bb, ctx->cfg->etag); 634 else 635 ap_fputc(ctx->f->next, ctx->bb, '>'); 636 637 if ((enforce > 0) && (required_attrs > 0)) { 638 /* if there are more required attributes than we found then complain */ 639 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, ctx->f->r, APLOGNO(01420) 640 "HTML element %s is missing %d required attributes", 641 name, required_attrs); 642 } 643} 644 645static meta *metafix(request_rec *r, const char *buf) 646{ 647 meta *ret = NULL; 648 size_t offs = 0; 649 const char *p; 650 const char *q; 651 char *header; 652 char *content; 653 ap_regmatch_t pmatch[2]; 654 char delim; 655 656 while (!ap_regexec(seek_meta, buf+offs, 2, pmatch, 0)) { 657 header = NULL; 658 content = NULL; 659 p = buf+offs+pmatch[1].rm_eo; 660 while (!apr_isalpha(*++p)); 661 for (q = p; apr_isalnum(*q) || (*q == '-'); ++q); 662 header = apr_pstrndup(r->pool, p, q-p); 663 if (strncasecmp(header, "Content-", 8)) { 664 /* find content=... string */ 665 p = apr_strmatch(seek_content, buf+offs+pmatch[0].rm_so, 666 pmatch[0].rm_eo - pmatch[0].rm_so); 667 /* if it doesn't contain "content", ignore, don't crash! */ 668 if (p != NULL) { 669 while (*p) { 670 p += 7; 671 while (apr_isspace(*p)) 672 ++p; 673 if (*p != '=') 674 continue; 675 while (*p && apr_isspace(*++p)); 676 if ((*p == '\'') || (*p == '"')) { 677 delim = *p++; 678 for (q = p; *q != delim; ++q); 679 } else { 680 for (q = p; *q && !apr_isspace(*q) && (*q != '>'); ++q); 681 } 682 content = apr_pstrndup(r->pool, p, q-p); 683 break; 684 } 685 } 686 } 687 else if (!strncasecmp(header, "Content-Type", 12)) { 688 ret = apr_palloc(r->pool, sizeof(meta)); 689 ret->start = pmatch[0].rm_so; 690 ret->end = pmatch[0].rm_eo; 691 } 692 if (header && content) { 693#ifndef GO_FASTER 694 ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r, 695 "Adding header [%s: %s] from HTML META", 696 header, content); 697#endif 698 apr_table_setn(r->headers_out, header, content); 699 } 700 offs += pmatch[0].rm_eo; 701 } 702 return ret; 703} 704 705static const char *interpolate_vars(request_rec *r, const char *str) 706{ 707 const char *start; 708 const char *end; 709 const char *delim; 710 const char *before; 711 const char *after; 712 const char *replacement; 713 const char *var; 714 for (;;) { 715 start = str; 716 if (start = ap_strstr_c(start, "${"), start == NULL) 717 break; 718 719 if (end = ap_strchr_c(start+2, '}'), end == NULL) 720 break; 721 722 delim = ap_strchr_c(start, '|'); 723 before = apr_pstrndup(r->pool, str, start-str); 724 after = end+1; 725 if (delim) { 726 var = apr_pstrndup(r->pool, start+2, delim-start-2); 727 } 728 else { 729 var = apr_pstrndup(r->pool, start+2, end-start-2); 730 } 731 replacement = apr_table_get(r->subprocess_env, var); 732 if (!replacement) { 733 if (delim) 734 replacement = apr_pstrndup(r->pool, delim+1, end-delim-1); 735 else 736 replacement = ""; 737 } 738 str = apr_pstrcat(r->pool, before, replacement, after, NULL); 739 ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, r, 740 "Interpolating %s => %s", var, replacement); 741 } 742 return str; 743} 744static void fixup_rules(saxctxt *ctx) 745{ 746 urlmap *newp; 747 urlmap *p; 748 urlmap *prev = NULL; 749 request_rec *r = ctx->f->r; 750 751 for (p = ctx->cfg->map; p; p = p->next) { 752 if (p->cond != NULL) { 753 const char *err; 754 int ok = ap_expr_exec(r, p->cond, &err); 755 if (err) { 756 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01421) 757 "Error evaluating expr: %s", err); 758 } 759 if (ok == 0) { 760 continue; /* condition is unsatisfied */ 761 } 762 } 763 764 newp = apr_pmemdup(r->pool, p, sizeof(urlmap)); 765 766 if (newp->flags & M_INTERPOLATE_FROM) { 767 newp->from.c = interpolate_vars(r, newp->from.c); 768 if (!newp->from.c || !*newp->from.c) 769 continue; /* don't use empty from-pattern */ 770 if (newp->flags & M_REGEX) { 771 newp->from.r = ap_pregcomp(r->pool, newp->from.c, 772 newp->regflags); 773 } 774 } 775 if (newp->flags & M_INTERPOLATE_TO) { 776 newp->to = interpolate_vars(r, newp->to); 777 } 778 /* evaluate p->cond; continue if unsatisfied */ 779 /* create new urlmap with memcpy and append to map */ 780 /* interpolate from if flagged to do so */ 781 /* interpolate to if flagged to do so */ 782 783 if (prev != NULL) 784 prev->next = newp; 785 else 786 ctx->map = newp; 787 prev = newp; 788 } 789 790 if (prev) 791 prev->next = NULL; 792} 793 794static saxctxt *check_filter_init (ap_filter_t *f) 795{ 796 saxctxt *fctx; 797 if (!f->ctx) { 798 proxy_html_conf *cfg; 799 const char *force; 800 const char *errmsg = NULL; 801 cfg = ap_get_module_config(f->r->per_dir_config, &proxy_html_module); 802 force = apr_table_get(f->r->subprocess_env, "PROXY_HTML_FORCE"); 803 804 if (!force) { 805 if (!f->r->proxyreq) { 806 errmsg = "Non-proxy request; not inserting proxy-html filter"; 807 } 808 else if (!f->r->content_type) { 809 errmsg = "No content-type; bailing out of proxy-html filter"; 810 } 811 else if (strncasecmp(f->r->content_type, "text/html", 9) && 812 strncasecmp(f->r->content_type, 813 "application/xhtml+xml", 21)) { 814 errmsg = "Non-HTML content; not inserting proxy-html filter"; 815 } 816 } 817 if (!cfg->links) { 818 errmsg = "No links configured: nothing for proxy-html filter to do"; 819 } 820 821 if (errmsg) { 822#ifndef GO_FASTER 823 ap_log_rerror(APLOG_MARK, APLOG_TRACE1, 0, f->r, "%s", errmsg); 824#endif 825 ap_remove_output_filter(f); 826 return NULL; 827 } 828 829 fctx = f->ctx = apr_pcalloc(f->r->pool, sizeof(saxctxt)); 830 fctx->f = f; 831 fctx->bb = apr_brigade_create(f->r->pool, 832 f->r->connection->bucket_alloc); 833 fctx->cfg = cfg; 834 apr_table_unset(f->r->headers_out, "Content-Length"); 835 836 if (cfg->interp) 837 fixup_rules(fctx); 838 else 839 fctx->map = cfg->map; 840 /* defer dealing with charset_out until after sniffing charset_in 841 * so we can support setting one to t'other. 842 */ 843 } 844 return f->ctx; 845} 846 847static apr_status_t proxy_html_filter(ap_filter_t *f, apr_bucket_brigade *bb) 848{ 849 apr_bucket* b; 850 meta *m = NULL; 851 xmlCharEncoding enc; 852 const char *buf = 0; 853 apr_size_t bytes = 0; 854#ifndef USE_OLD_LIBXML2 855 int xmlopts = XML_PARSE_RECOVER | XML_PARSE_NONET | 856 XML_PARSE_NOBLANKS | XML_PARSE_NOERROR | XML_PARSE_NOWARNING; 857#endif 858 859 saxctxt *ctxt = check_filter_init(f); 860 if (!ctxt) 861 return ap_pass_brigade(f->next, bb); 862 for (b = APR_BRIGADE_FIRST(bb); 863 b != APR_BRIGADE_SENTINEL(bb); 864 b = APR_BUCKET_NEXT(b)) { 865 if (APR_BUCKET_IS_METADATA(b)) { 866 if (APR_BUCKET_IS_EOS(b)) { 867 if (ctxt->parser != NULL) { 868 consume_buffer(ctxt, buf, 0, 1); 869 } 870 APR_BRIGADE_INSERT_TAIL(ctxt->bb, 871 apr_bucket_eos_create(ctxt->bb->bucket_alloc)); 872 ap_pass_brigade(ctxt->f->next, ctxt->bb); 873 } 874 else if (APR_BUCKET_IS_FLUSH(b)) { 875 /* pass on flush, except at start where it would cause 876 * headers to be sent before doc sniffing 877 */ 878 if (ctxt->parser != NULL) { 879 ap_fflush(ctxt->f->next, ctxt->bb); 880 } 881 } 882 } 883 else if (apr_bucket_read(b, &buf, &bytes, APR_BLOCK_READ) 884 == APR_SUCCESS) { 885 if (ctxt->parser == NULL) { 886 const char *cenc; 887 if (!xml2enc_charset || 888 (xml2enc_charset(f->r, &enc, &cenc) != APR_SUCCESS)) { 889 if (!xml2enc_charset) 890 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01422) 891 "No i18n support found. Install mod_xml2enc if required"); 892 enc = XML_CHAR_ENCODING_NONE; 893 ap_set_content_type(f->r, "text/html;charset=utf-8"); 894 } 895 else { 896 /* if we wanted a non-default charset_out, insert the 897 * xml2enc filter now that we've sniffed it 898 */ 899 if (ctxt->cfg->charset_out && xml2enc_filter) { 900 if (*ctxt->cfg->charset_out != '*') 901 cenc = ctxt->cfg->charset_out; 902 xml2enc_filter(f->r, cenc, ENCIO_OUTPUT); 903 ap_set_content_type(f->r, 904 apr_pstrcat(f->r->pool, 905 "text/html;charset=", 906 cenc, NULL)); 907 } 908 else /* Normal case, everything worked, utf-8 output */ 909 ap_set_content_type(f->r, "text/html;charset=utf-8"); 910 } 911 912 ap_fputs(f->next, ctxt->bb, ctxt->cfg->doctype); 913 ctxt->parser = htmlCreatePushParserCtxt(&sax, ctxt, buf, 914 4, 0, enc); 915 buf += 4; 916 bytes -= 4; 917 if (ctxt->parser == NULL) { 918 apr_status_t rv = ap_pass_brigade(f->next, bb); 919 ap_remove_output_filter(f); 920 return rv; 921 } 922 apr_pool_cleanup_register(f->r->pool, ctxt->parser, 923 (int(*)(void*))htmlFreeParserCtxt, 924 apr_pool_cleanup_null); 925#ifndef USE_OLD_LIBXML2 926 if (xmlopts = xmlCtxtUseOptions(ctxt->parser, xmlopts), xmlopts) 927 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, f->r, APLOGNO(01423) 928 "Unsupported parser opts %x", xmlopts); 929#endif 930 if (ctxt->cfg->metafix) 931 m = metafix(f->r, buf); 932 if (m) { 933 consume_buffer(ctxt, buf, m->start, 0); 934 consume_buffer(ctxt, buf+m->end, bytes-m->end, 0); 935 } 936 else { 937 consume_buffer(ctxt, buf, bytes, 0); 938 } 939 } 940 else { 941 consume_buffer(ctxt, buf, bytes, 0); 942 } 943 } 944 else { 945 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01424) 946 "Error in bucket read"); 947 } 948 } 949 /*ap_fflush(ctxt->f->next, ctxt->bb); // uncomment for debug */ 950 apr_brigade_cleanup(bb); 951 return APR_SUCCESS; 952} 953 954static void *proxy_html_config(apr_pool_t *pool, char *x) 955{ 956 proxy_html_conf *ret = apr_pcalloc(pool, sizeof(proxy_html_conf)); 957 ret->doctype = DEFAULT_DOCTYPE; 958 ret->etag = DEFAULT_ETAG; 959 ret->bufsz = 8192; 960 /* ret->interp = 1; */ 961 /* don't initialise links and events until they get set/used */ 962 return ret; 963} 964 965static void *proxy_html_merge(apr_pool_t *pool, void *BASE, void *ADD) 966{ 967 proxy_html_conf *base = (proxy_html_conf *) BASE; 968 proxy_html_conf *add = (proxy_html_conf *) ADD; 969 proxy_html_conf *conf = apr_palloc(pool, sizeof(proxy_html_conf)); 970 971 /* don't merge declarations - just use the most specific */ 972 conf->links = (add->links == NULL) ? base->links : add->links; 973 conf->events = (add->events == NULL) ? base->events : add->events; 974 975 conf->charset_out = (add->charset_out == NULL) 976 ? base->charset_out : add->charset_out; 977 978 if (add->map && base->map) { 979 urlmap *a; 980 conf->map = NULL; 981 for (a = base->map; a; a = a->next) { 982 urlmap *save = conf->map; 983 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)); 984 conf->map->next = save; 985 } 986 for (a = add->map; a; a = a->next) { 987 urlmap *save = conf->map; 988 conf->map = apr_pmemdup(pool, a, sizeof(urlmap)); 989 conf->map->next = save; 990 } 991 } 992 else 993 conf->map = add->map ? add->map : base->map; 994 995 conf->doctype = (add->doctype == DEFAULT_DOCTYPE) 996 ? base->doctype : add->doctype; 997 conf->etag = (add->etag == DEFAULT_ETAG) ? base->etag : add->etag; 998 conf->bufsz = add->bufsz; 999 if (add->flags & NORM_RESET) { 1000 conf->flags = add->flags ^ NORM_RESET; 1001 conf->metafix = add->metafix; 1002 conf->extfix = add->extfix; 1003 conf->interp = add->interp; 1004 conf->strip_comments = add->strip_comments; 1005 conf->enabled = add->enabled; 1006 } 1007 else { 1008 conf->flags = base->flags | add->flags; 1009 conf->metafix = base->metafix | add->metafix; 1010 conf->extfix = base->extfix | add->extfix; 1011 conf->interp = base->interp | add->interp; 1012 conf->strip_comments = base->strip_comments | add->strip_comments; 1013 conf->enabled = add->enabled | base->enabled; 1014 } 1015 return conf; 1016} 1017#define REGFLAG(n,s,c) ((s&&(ap_strchr_c((s),(c))!=NULL)) ? (n) : 0) 1018#define XREGFLAG(n,s,c) ((!s||(ap_strchr_c((s),(c))==NULL)) ? (n) : 0) 1019static const char *comp_urlmap(cmd_parms *cmd, urlmap *newmap, 1020 const char *from, const char *to, 1021 const char *flags, const char *cond) 1022{ 1023 const char *err = NULL; 1024 newmap->flags 1025 = XREGFLAG(M_HTML,flags,'h') 1026 | XREGFLAG(M_EVENTS,flags,'e') 1027 | XREGFLAG(M_CDATA,flags,'c') 1028 | REGFLAG(M_ATSTART,flags,'^') 1029 | REGFLAG(M_ATEND,flags,'$') 1030 | REGFLAG(M_REGEX,flags,'R') 1031 | REGFLAG(M_LAST,flags,'L') 1032 | REGFLAG(M_NOTLAST,flags,'l') 1033 | REGFLAG(M_INTERPOLATE_TO,flags,'V') 1034 | REGFLAG(M_INTERPOLATE_FROM,flags,'v'); 1035 1036 if ((newmap->flags & M_INTERPOLATE_FROM) || !(newmap->flags & M_REGEX)) { 1037 newmap->from.c = from; 1038 newmap->to = to; 1039 } 1040 else { 1041 newmap->regflags 1042 = REGFLAG(AP_REG_EXTENDED,flags,'x') 1043 | REGFLAG(AP_REG_ICASE,flags,'i') 1044 | REGFLAG(AP_REG_NOSUB,flags,'n') 1045 | REGFLAG(AP_REG_NEWLINE,flags,'s'); 1046 newmap->from.r = ap_pregcomp(cmd->pool, from, newmap->regflags); 1047 newmap->to = to; 1048 } 1049 if (cond != NULL) { 1050 /* back-compatibility: support old-style ENV expressions 1051 * by converting to ap_expr syntax. 1052 * 1053 * 1. var --> env(var) 1054 * 2. var=val --> env(var)=val 1055 * 3. !var --> !env(var) 1056 * 4. !var=val --> env(var)!=val 1057 */ 1058 char *newcond = NULL; 1059 if (ap_rxplus_exec(cmd->temp_pool, old_expr, cond, &newcond)) { 1060 /* we got a substitution. Check for the case (3) above 1061 * that the regexp gets wrong: a negation without a comparison. 1062 */ 1063 if ((cond[0] == '!') && !ap_strchr_c(cond, '=')) { 1064 memmove(newcond+1, newcond, strlen(newcond)-1); 1065 newcond[0] = '!'; 1066 } 1067 cond = newcond; 1068 } 1069 newmap->cond = ap_expr_parse_cmd(cmd, cond, 0, &err, NULL); 1070 } 1071 else { 1072 newmap->cond = NULL; 1073 } 1074 return err; 1075} 1076 1077static const char *set_urlmap(cmd_parms *cmd, void *CFG, const char *args) 1078{ 1079 proxy_html_conf *cfg = (proxy_html_conf *)CFG; 1080 urlmap *map; 1081 apr_pool_t *pool = cmd->pool; 1082 urlmap *newmap; 1083 const char *usage = 1084 "Usage: ProxyHTMLURLMap from-pattern to-pattern [flags] [cond]"; 1085 const char *from; 1086 const char *to; 1087 const char *flags; 1088 const char *cond = NULL; 1089 1090 if (from = ap_getword_conf(cmd->pool, &args), !from) 1091 return usage; 1092 if (to = ap_getword_conf(cmd->pool, &args), !to) 1093 return usage; 1094 flags = ap_getword_conf(cmd->pool, &args); 1095 if (flags && *flags) 1096 cond = ap_getword_conf(cmd->pool, &args); 1097 if (cond && !*cond) 1098 cond = NULL; 1099 1100 /* the args look OK, so let's use them */ 1101 newmap = apr_palloc(pool, sizeof(urlmap)); 1102 newmap->next = NULL; 1103 if (cfg->map) { 1104 for (map = cfg->map; map->next; map = map->next); 1105 map->next = newmap; 1106 } 1107 else 1108 cfg->map = newmap; 1109 1110 return comp_urlmap(cmd, newmap, from, to, flags, cond); 1111} 1112 1113static const char *set_doctype(cmd_parms *cmd, void *CFG, 1114 const char *t, const char *l) 1115{ 1116 proxy_html_conf *cfg = (proxy_html_conf *)CFG; 1117 if (!strcasecmp(t, "xhtml")) { 1118 cfg->etag = xhtml_etag; 1119 if (l && !strcasecmp(l, "legacy")) 1120 cfg->doctype = fpi_xhtml_legacy; 1121 else 1122 cfg->doctype = fpi_xhtml; 1123 } 1124 else if (!strcasecmp(t, "html")) { 1125 cfg->etag = html_etag; 1126 if (l && !strcasecmp(l, "legacy")) 1127 cfg->doctype = fpi_html_legacy; 1128 else 1129 cfg->doctype = fpi_html; 1130 } 1131 else { 1132 cfg->doctype = apr_pstrdup(cmd->pool, t); 1133 if (l && ((l[0] == 'x') || (l[0] == 'X'))) 1134 cfg->etag = xhtml_etag; 1135 else 1136 cfg->etag = html_etag; 1137 } 1138 return NULL; 1139} 1140 1141static const char *set_flags(cmd_parms *cmd, void *CFG, const char *arg) 1142{ 1143 proxy_html_conf *cfg = CFG; 1144 if (arg && *arg) { 1145 if (!strcasecmp(arg, "lowercase")) 1146 cfg->flags |= NORM_LC; 1147 else if (!strcasecmp(arg, "dospath")) 1148 cfg->flags |= NORM_MSSLASH; 1149 else if (!strcasecmp(arg, "reset")) 1150 cfg->flags |= NORM_RESET; 1151 } 1152 return NULL; 1153} 1154 1155static const char *set_events(cmd_parms *cmd, void *CFG, const char *arg) 1156{ 1157 tattr *attr; 1158 proxy_html_conf *cfg = CFG; 1159 if (cfg->events == NULL) 1160 cfg->events = apr_array_make(cmd->pool, 20, sizeof(tattr)); 1161 attr = apr_array_push(cfg->events); 1162 attr->val = arg; 1163 return NULL; 1164} 1165 1166static const char *set_links(cmd_parms *cmd, void *CFG, 1167 const char *elt, const char *att) 1168{ 1169 apr_array_header_t *attrs; 1170 tattr *attr; 1171 proxy_html_conf *cfg = CFG; 1172 1173 if (cfg->links == NULL) 1174 cfg->links = apr_hash_make(cmd->pool); 1175 1176 attrs = apr_hash_get(cfg->links, elt, APR_HASH_KEY_STRING); 1177 if (!attrs) { 1178 attrs = apr_array_make(cmd->pool, 2, sizeof(tattr*)); 1179 apr_hash_set(cfg->links, elt, APR_HASH_KEY_STRING, attrs); 1180 } 1181 attr = apr_array_push(attrs); 1182 attr->val = att; 1183 return NULL; 1184} 1185static const command_rec proxy_html_cmds[] = { 1186 AP_INIT_ITERATE("ProxyHTMLEvents", set_events, NULL, 1187 RSRC_CONF|ACCESS_CONF, 1188 "Strings to be treated as scripting events"), 1189 AP_INIT_ITERATE2("ProxyHTMLLinks", set_links, NULL, 1190 RSRC_CONF|ACCESS_CONF, "Declare HTML Attributes"), 1191 AP_INIT_RAW_ARGS("ProxyHTMLURLMap", set_urlmap, NULL, 1192 RSRC_CONF|ACCESS_CONF, "Map URL From To"), 1193 AP_INIT_TAKE12("ProxyHTMLDoctype", set_doctype, NULL, 1194 RSRC_CONF|ACCESS_CONF, "(HTML|XHTML) [Legacy]"), 1195 AP_INIT_ITERATE("ProxyHTMLFixups", set_flags, NULL, 1196 RSRC_CONF|ACCESS_CONF, "Options are lowercase, dospath"), 1197 AP_INIT_FLAG("ProxyHTMLMeta", ap_set_flag_slot, 1198 (void*)APR_OFFSETOF(proxy_html_conf, metafix), 1199 RSRC_CONF|ACCESS_CONF, "Fix META http-equiv elements"), 1200 AP_INIT_FLAG("ProxyHTMLInterp", ap_set_flag_slot, 1201 (void*)APR_OFFSETOF(proxy_html_conf, interp), 1202 RSRC_CONF|ACCESS_CONF, 1203 "Support interpolation and conditions in URLMaps"), 1204 AP_INIT_FLAG("ProxyHTMLExtended", ap_set_flag_slot, 1205 (void*)APR_OFFSETOF(proxy_html_conf, extfix), 1206 RSRC_CONF|ACCESS_CONF, "Map URLs in Javascript and CSS"), 1207 AP_INIT_FLAG("ProxyHTMLStripComments", ap_set_flag_slot, 1208 (void*)APR_OFFSETOF(proxy_html_conf, strip_comments), 1209 RSRC_CONF|ACCESS_CONF, "Strip out comments"), 1210 AP_INIT_TAKE1("ProxyHTMLBufSize", ap_set_int_slot, 1211 (void*)APR_OFFSETOF(proxy_html_conf, bufsz), 1212 RSRC_CONF|ACCESS_CONF, "Buffer size"), 1213 AP_INIT_TAKE1("ProxyHTMLCharsetOut", ap_set_string_slot, 1214 (void*)APR_OFFSETOF(proxy_html_conf, charset_out), 1215 RSRC_CONF|ACCESS_CONF, "Usage: ProxyHTMLCharsetOut charset"), 1216 AP_INIT_FLAG("ProxyHTMLEnable", ap_set_flag_slot, 1217 (void*)APR_OFFSETOF(proxy_html_conf, enabled), 1218 RSRC_CONF|ACCESS_CONF, 1219 "Enable proxy-html and xml2enc filters"), 1220 { NULL } 1221}; 1222static int mod_proxy_html(apr_pool_t *p, apr_pool_t *p1, apr_pool_t *p2) 1223{ 1224 seek_meta = ap_pregcomp(p, "<meta[^>]*(http-equiv)[^>]*>", 1225 AP_REG_EXTENDED|AP_REG_ICASE); 1226 seek_content = apr_strmatch_precompile(p, "content", 0); 1227 memset(&sax, 0, sizeof(htmlSAXHandler)); 1228 sax.startElement = pstartElement; 1229 sax.endElement = pendElement; 1230 sax.characters = pcharacters; 1231 sax.comment = pcomment; 1232 sax.cdataBlock = pcdata; 1233 xml2enc_charset = APR_RETRIEVE_OPTIONAL_FN(xml2enc_charset); 1234 xml2enc_filter = APR_RETRIEVE_OPTIONAL_FN(xml2enc_filter); 1235 if (!xml2enc_charset) { 1236 ap_log_perror(APLOG_MARK, APLOG_NOTICE, 0, p2, APLOGNO(01425) 1237 "I18n support in mod_proxy_html requires mod_xml2enc. " 1238 "Without it, non-ASCII characters in proxied pages are " 1239 "likely to display incorrectly."); 1240 } 1241 1242 /* old_expr only needs to last the life of the config phase */ 1243 old_expr = ap_rxplus_compile(p1, "s/^(!)?(\\w+)((=)(.+))?$/reqenv('$2')$1$4'$5'/"); 1244 return OK; 1245} 1246static void proxy_html_insert(request_rec *r) 1247{ 1248 proxy_html_conf *cfg; 1249 cfg = ap_get_module_config(r->per_dir_config, &proxy_html_module); 1250 if (cfg->enabled) { 1251 if (xml2enc_filter) 1252 xml2enc_filter(r, NULL, ENCIO_INPUT_CHECKS); 1253 ap_add_output_filter("proxy-html", NULL, r, r->connection); 1254 } 1255} 1256static void proxy_html_hooks(apr_pool_t *p) 1257{ 1258 static const char *aszSucc[] = { "mod_filter.c", NULL }; 1259 ap_register_output_filter_protocol("proxy-html", proxy_html_filter, 1260 NULL, AP_FTYPE_RESOURCE, 1261 AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH); 1262 /* move this to pre_config so old_expr is available to interpret 1263 * old-style conditions on URL maps. 1264 */ 1265 ap_hook_pre_config(mod_proxy_html, NULL, NULL, APR_HOOK_MIDDLE); 1266 ap_hook_insert_filter(proxy_html_insert, NULL, aszSucc, APR_HOOK_MIDDLE); 1267} 1268 1269AP_DECLARE_MODULE(proxy_html) = { 1270 STANDARD20_MODULE_STUFF, 1271 proxy_html_config, 1272 proxy_html_merge, 1273 NULL, 1274 NULL, 1275 proxy_html_cmds, 1276 proxy_html_hooks 1277}; 1278