1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_substitute.c: Perform content rewriting on the fly 19 */ 20 21#include "httpd.h" 22#include "http_config.h" 23#include "http_core.h" 24#include "http_log.h" 25#include "apr_general.h" 26#include "apr_strings.h" 27#include "apr_strmatch.h" 28#include "apr_lib.h" 29#include "util_filter.h" 30#include "util_varbuf.h" 31#include "apr_buckets.h" 32#include "http_request.h" 33#define APR_WANT_STRFUNC 34#include "apr_want.h" 35 36static const char substitute_filter_name[] = "SUBSTITUTE"; 37 38module AP_MODULE_DECLARE_DATA substitute_module; 39 40typedef struct subst_pattern_t { 41 const apr_strmatch_pattern *pattern; 42 const ap_regex_t *regexp; 43 const char *replacement; 44 apr_size_t replen; 45 apr_size_t patlen; 46 int flatten; 47} subst_pattern_t; 48 49typedef struct { 50 apr_array_header_t *patterns; 51} subst_dir_conf; 52 53typedef struct { 54 apr_bucket_brigade *linebb; 55 apr_bucket_brigade *linesbb; 56 apr_bucket_brigade *passbb; 57 apr_bucket_brigade *pattbb; 58 apr_pool_t *tpool; 59} substitute_module_ctx; 60 61static void *create_substitute_dcfg(apr_pool_t *p, char *d) 62{ 63 subst_dir_conf *dcfg = 64 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf)); 65 66 dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t)); 67 return dcfg; 68} 69 70static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv) 71{ 72 subst_dir_conf *a = 73 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf)); 74 subst_dir_conf *base = (subst_dir_conf *) basev; 75 subst_dir_conf *over = (subst_dir_conf *) overv; 76 77 a->patterns = apr_array_append(p, over->patterns, 78 base->patterns); 79 return a; 80} 81 82#define AP_MAX_BUCKETS 1000 83/* 84 * We want to limit the memory usage in a way that is predictable. Therefore 85 * we limit the resulting length of the line to this value. 86 */ 87#define AP_SUBST_MAX_LINE_LENGTH (128*MAX_STRING_LEN) 88 89#define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \ 90 apr_bucket_split(b, offset); \ 91 tmp_b = APR_BUCKET_NEXT(b); \ 92 apr_bucket_split(tmp_b, patlen); \ 93 b = APR_BUCKET_NEXT(tmp_b); \ 94 apr_bucket_delete(tmp_b); \ 95} while (0) 96 97static apr_status_t do_pattmatch(ap_filter_t *f, apr_bucket *inb, 98 apr_bucket_brigade *mybb, 99 apr_pool_t *pool) 100{ 101 int i; 102 int force_quick = 0; 103 ap_regmatch_t regm[AP_MAX_REG_MATCH]; 104 apr_size_t bytes; 105 apr_size_t len; 106 const char *buff; 107 struct ap_varbuf vb; 108 apr_bucket *b; 109 apr_bucket *tmp_b; 110 111 subst_dir_conf *cfg = 112 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config, 113 &substitute_module); 114 subst_pattern_t *script; 115 116 APR_BRIGADE_INSERT_TAIL(mybb, inb); 117 ap_varbuf_init(pool, &vb, 0); 118 119 script = (subst_pattern_t *) cfg->patterns->elts; 120 /* 121 * Simple optimization. If we only have one pattern, then 122 * we can safely avoid the overhead of flattening 123 */ 124 if (cfg->patterns->nelts == 1) { 125 force_quick = 1; 126 } 127 for (i = 0; i < cfg->patterns->nelts; i++) { 128 for (b = APR_BRIGADE_FIRST(mybb); 129 b != APR_BRIGADE_SENTINEL(mybb); 130 b = APR_BUCKET_NEXT(b)) { 131 if (APR_BUCKET_IS_METADATA(b)) { 132 /* 133 * we should NEVER see this, because we should never 134 * be passed any, but "handle" it just in case. 135 */ 136 continue; 137 } 138 if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ) 139 == APR_SUCCESS) { 140 int have_match = 0; 141 vb.strlen = 0; 142 if (script->pattern) { 143 const char *repl; 144 /* 145 * space_left counts how many bytes we have left until the 146 * line length reaches AP_SUBST_MAX_LINE_LENGTH. 147 */ 148 apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH; 149 apr_size_t repl_len = strlen(script->replacement); 150 while ((repl = apr_strmatch(script->pattern, buff, bytes))) 151 { 152 have_match = 1; 153 /* get offset into buff for pattern */ 154 len = (apr_size_t) (repl - buff); 155 if (script->flatten && !force_quick) { 156 /* 157 * We are flattening the buckets here, meaning 158 * that we don't do the fast bucket splits. 159 * Instead we copy over what the buckets would 160 * contain and use them. This is slow, since we 161 * are constanting allocing space and copying 162 * strings. 163 */ 164 if (vb.strlen + len + repl_len > AP_SUBST_MAX_LINE_LENGTH) 165 return APR_ENOMEM; 166 ap_varbuf_strmemcat(&vb, buff, len); 167 ap_varbuf_strmemcat(&vb, script->replacement, repl_len); 168 } 169 else { 170 /* 171 * The string before the match but after the 172 * previous match (if any) has length 'len'. 173 * Check if we still have space for this string and 174 * the replacement string. 175 */ 176 if (space_left < len + repl_len) 177 return APR_ENOMEM; 178 space_left -= len + repl_len; 179 /* 180 * We now split off the string before the match 181 * as its own bucket, then isolate the matched 182 * string and delete it. 183 */ 184 SEDRMPATBCKT(b, len, tmp_b, script->patlen); 185 /* 186 * Finally, we create a bucket that contains the 187 * replacement... 188 */ 189 tmp_b = apr_bucket_transient_create(script->replacement, 190 script->replen, 191 f->r->connection->bucket_alloc); 192 /* ... and insert it */ 193 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 194 } 195 /* now we need to adjust buff for all these changes */ 196 len += script->patlen; 197 bytes -= len; 198 buff += len; 199 } 200 if (have_match) { 201 if (script->flatten && !force_quick) { 202 /* XXX: we should check for AP_MAX_BUCKETS here and 203 * XXX: call ap_pass_brigade accordingly 204 */ 205 char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0, 206 buff, bytes, &len); 207 tmp_b = apr_bucket_pool_create(copy, len, pool, 208 f->r->connection->bucket_alloc); 209 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 210 apr_bucket_delete(b); 211 b = tmp_b; 212 } 213 else { 214 /* 215 * We want the behaviour to be predictable. 216 * Therefore we try to always error out if the 217 * line length is larger than the limit, 218 * regardless of the content of the line. So, 219 * let's check if the remaining non-matching 220 * string does not exceed the limit. 221 */ 222 if (space_left < b->length) 223 return APR_ENOMEM; 224 } 225 } 226 } 227 else if (script->regexp) { 228 int left = bytes; 229 const char *pos = buff; 230 char *repl; 231 apr_size_t space_left = AP_SUBST_MAX_LINE_LENGTH; 232 while (!ap_regexec_len(script->regexp, pos, left, 233 AP_MAX_REG_MATCH, regm, 0)) { 234 apr_status_t rv; 235 have_match = 1; 236 if (script->flatten && !force_quick) { 237 /* copy bytes before the match */ 238 if (regm[0].rm_so > 0) 239 ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so); 240 /* add replacement string */ 241 rv = ap_varbuf_regsub(&vb, script->replacement, pos, 242 AP_MAX_REG_MATCH, regm, 243 AP_SUBST_MAX_LINE_LENGTH - vb.strlen); 244 if (rv != APR_SUCCESS) 245 return rv; 246 } 247 else { 248 apr_size_t repl_len; 249 /* acount for string before the match */ 250 if (space_left <= regm[0].rm_so) 251 return APR_ENOMEM; 252 space_left -= regm[0].rm_so; 253 rv = ap_pregsub_ex(pool, &repl, 254 script->replacement, pos, 255 AP_MAX_REG_MATCH, regm, 256 space_left); 257 if (rv != APR_SUCCESS) 258 return rv; 259 repl_len = strlen(repl); 260 space_left -= repl_len; 261 len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so); 262 SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len); 263 tmp_b = apr_bucket_transient_create(repl, repl_len, 264 f->r->connection->bucket_alloc); 265 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 266 } 267 /* 268 * reset to past what we just did. pos now maps to b 269 * again 270 */ 271 pos += regm[0].rm_eo; 272 left -= regm[0].rm_eo; 273 } 274 if (have_match && script->flatten && !force_quick) { 275 char *copy; 276 /* Copy result plus the part after the last match into 277 * a bucket. 278 */ 279 copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left, 280 &len); 281 tmp_b = apr_bucket_pool_create(copy, len, pool, 282 f->r->connection->bucket_alloc); 283 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 284 apr_bucket_delete(b); 285 b = tmp_b; 286 } 287 } 288 else { 289 ap_assert(0); 290 continue; 291 } 292 } 293 } 294 script++; 295 } 296 ap_varbuf_free(&vb); 297 return APR_SUCCESS; 298} 299 300static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb) 301{ 302 apr_size_t bytes; 303 apr_size_t len; 304 apr_size_t fbytes; 305 const char *buff; 306 const char *nl = NULL; 307 char *bflat; 308 apr_bucket *b; 309 apr_bucket *tmp_b; 310 apr_bucket_brigade *tmp_bb = NULL; 311 apr_status_t rv; 312 313 substitute_module_ctx *ctx = f->ctx; 314 315 /* 316 * First time around? Create the saved bb that we used for each pass 317 * through. Note that we can also get here when we explicitly clear ctx, 318 * for error handling 319 */ 320 if (!ctx) { 321 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx)); 322 /* 323 * Create all the temporary brigades we need and reuse them to avoid 324 * creating them over and over again from r->pool which would cost a 325 * lot of memory in some cases. 326 */ 327 ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 328 ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 329 ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 330 /* 331 * Everything to be passed to the next filter goes in 332 * here, our pass brigade. 333 */ 334 ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 335 /* Create our temporary pool only once */ 336 apr_pool_create(&(ctx->tpool), f->r->pool); 337 apr_table_unset(f->r->headers_out, "Content-Length"); 338 } 339 340 /* 341 * Shortcircuit processing 342 */ 343 if (APR_BRIGADE_EMPTY(bb)) 344 return APR_SUCCESS; 345 346 /* 347 * Here's the concept: 348 * Read in the data and look for newlines. Once we 349 * find a full "line", add it to our working brigade. 350 * If we've finished reading the brigade and we have 351 * any left over data (not a "full" line), store that 352 * for the next pass. 353 * 354 * Note: anything stored in ctx->linebb for sure does not have 355 * a newline char, so we don't concat that bb with the 356 * new bb, since we would spending time searching for the newline 357 * in data we know it doesn't exist. So instead, we simply scan 358 * our current bb and, if we see a newline, prepend ctx->linebb 359 * to the front of it. This makes the code much less straight- 360 * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb) 361 * and just scan for newlines and not bother with needing to know 362 * when ctx->linebb needs to be reset) but also faster. We'll take 363 * the speed. 364 * 365 * Note: apr_brigade_split_line would be nice here, but we 366 * really can't use it since we need more control and we want 367 * to re-use already read bucket data. 368 * 369 * See mod_include if still confused :) 370 */ 371 372 while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) { 373 if (APR_BUCKET_IS_EOS(b)) { 374 /* 375 * if we see the EOS, then we need to pass along everything we 376 * have. But if the ctx->linebb isn't empty, then we need to add 377 * that to the end of what we'll be passing. 378 */ 379 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 380 rv = apr_brigade_pflatten(ctx->linebb, &bflat, 381 &fbytes, ctx->tpool); 382 if (rv != APR_SUCCESS) 383 goto err; 384 if (fbytes > AP_SUBST_MAX_LINE_LENGTH) { 385 rv = APR_ENOMEM; 386 goto err; 387 } 388 tmp_b = apr_bucket_transient_create(bflat, fbytes, 389 f->r->connection->bucket_alloc); 390 rv = do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool); 391 if (rv != APR_SUCCESS) 392 goto err; 393 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb); 394 } 395 apr_brigade_cleanup(ctx->linebb); 396 APR_BUCKET_REMOVE(b); 397 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 398 } 399 /* 400 * No need to handle FLUSH buckets separately as we call 401 * ap_pass_brigade anyway at the end of the loop. 402 */ 403 else if (APR_BUCKET_IS_METADATA(b)) { 404 APR_BUCKET_REMOVE(b); 405 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 406 } 407 else { 408 /* 409 * We have actual "data" so read in as much as we can and start 410 * scanning and splitting from our read buffer 411 */ 412 rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ); 413 if (rv != APR_SUCCESS || bytes == 0) { 414 apr_bucket_delete(b); 415 } 416 else { 417 int num = 0; 418 while (bytes > 0) { 419 nl = memchr(buff, APR_ASCII_LF, bytes); 420 if (nl) { 421 len = (apr_size_t) (nl - buff) + 1; 422 /* split *after* the newline */ 423 apr_bucket_split(b, len); 424 /* 425 * We've likely read more data, so bypass rereading 426 * bucket data and continue scanning through this 427 * buffer 428 */ 429 bytes -= len; 430 buff += len; 431 /* 432 * we need b to be updated for future potential 433 * splitting 434 */ 435 tmp_b = APR_BUCKET_NEXT(b); 436 APR_BUCKET_REMOVE(b); 437 /* 438 * Hey, we found a newline! Don't forget the old 439 * stuff that needs to be added to the front. So we 440 * add the split bucket to the end, flatten the whole 441 * bb, morph the whole shebang into a bucket which is 442 * then added to the tail of the newline bb. 443 */ 444 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 445 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b); 446 rv = apr_brigade_pflatten(ctx->linebb, &bflat, 447 &fbytes, ctx->tpool); 448 if (rv != APR_SUCCESS) 449 goto err; 450 if (fbytes > AP_SUBST_MAX_LINE_LENGTH) { 451 /* Avoid pflattening further lines, we will 452 * abort later on anyway. 453 */ 454 rv = APR_ENOMEM; 455 goto err; 456 } 457 b = apr_bucket_transient_create(bflat, fbytes, 458 f->r->connection->bucket_alloc); 459 apr_brigade_cleanup(ctx->linebb); 460 } 461 rv = do_pattmatch(f, b, ctx->pattbb, ctx->tpool); 462 if (rv != APR_SUCCESS) 463 goto err; 464 /* 465 * Count how many buckets we have in ctx->passbb 466 * so far. Yes, this is correct we count ctx->passbb 467 * and not ctx->pattbb as we do not reset num on every 468 * iteration. 469 */ 470 for (b = APR_BRIGADE_FIRST(ctx->pattbb); 471 b != APR_BRIGADE_SENTINEL(ctx->pattbb); 472 b = APR_BUCKET_NEXT(b)) { 473 num++; 474 } 475 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb); 476 /* 477 * If the number of buckets in ctx->passbb reaches an 478 * "insane" level, we consume much memory for all the 479 * buckets as such. So lets flush them down the chain 480 * in this case and thus clear ctx->passbb. This frees 481 * the buckets memory for further processing. 482 * Usually this condition should not become true, but 483 * it is a safety measure for edge cases. 484 */ 485 if (num > AP_MAX_BUCKETS) { 486 b = apr_bucket_flush_create( 487 f->r->connection->bucket_alloc); 488 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 489 rv = ap_pass_brigade(f->next, ctx->passbb); 490 apr_brigade_cleanup(ctx->passbb); 491 num = 0; 492 apr_pool_clear(ctx->tpool); 493 if (rv != APR_SUCCESS) 494 goto err; 495 } 496 b = tmp_b; 497 } 498 else { 499 /* 500 * no newline in whatever is left of this buffer so 501 * tuck data away and get next bucket 502 */ 503 APR_BUCKET_REMOVE(b); 504 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b); 505 bytes = 0; 506 } 507 } 508 } 509 } 510 if (!APR_BRIGADE_EMPTY(ctx->passbb)) { 511 rv = ap_pass_brigade(f->next, ctx->passbb); 512 apr_brigade_cleanup(ctx->passbb); 513 if (rv != APR_SUCCESS) 514 goto err; 515 } 516 apr_pool_clear(ctx->tpool); 517 } 518 519 /* Anything left we want to save/setaside for the next go-around */ 520 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 521 /* 522 * Provide ap_save_brigade with an existing empty brigade 523 * (ctx->linesbb) to avoid creating a new one. 524 */ 525 ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool); 526 tmp_bb = ctx->linebb; 527 ctx->linebb = ctx->linesbb; 528 ctx->linesbb = tmp_bb; 529 } 530 531 return APR_SUCCESS; 532err: 533 if (rv == APR_ENOMEM) 534 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01328) "Line too long, URI %s", 535 f->r->uri); 536 apr_pool_clear(ctx->tpool); 537 return rv; 538} 539 540static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line) 541{ 542 char *from = NULL; 543 char *to = NULL; 544 char *flags = NULL; 545 char *ourline; 546 char delim; 547 subst_pattern_t *nscript; 548 int is_pattern = 0; 549 int ignore_case = 0; 550 int flatten = 1; 551 ap_regex_t *r = NULL; 552 553 if (apr_tolower(*line) != 's') { 554 return "Bad Substitute format, must be an s/// pattern"; 555 } 556 ourline = apr_pstrdup(cmd->pool, line); 557 delim = *++ourline; 558 if (delim) 559 from = ++ourline; 560 if (from) { 561 if (*ourline != delim) { 562 while (*++ourline && *ourline != delim); 563 } 564 if (*ourline) { 565 *ourline = '\0'; 566 to = ++ourline; 567 } 568 } 569 if (to) { 570 if (*ourline != delim) { 571 while (*++ourline && *ourline != delim); 572 } 573 if (*ourline) { 574 *ourline = '\0'; 575 flags = ++ourline; 576 } 577 } 578 579 if (!delim || !from || !*from || !to) { 580 return "Bad Substitute format, must be a complete s/// pattern"; 581 } 582 583 if (flags) { 584 while (*flags) { 585 delim = apr_tolower(*flags); /* re-use */ 586 if (delim == 'i') 587 ignore_case = 1; 588 else if (delim == 'n') 589 is_pattern = 1; 590 else if (delim == 'f') 591 flatten = 1; 592 else if (delim == 'q') 593 flatten = 0; 594 else 595 return "Bad Substitute flag, only s///[infq] are supported"; 596 flags++; 597 } 598 } 599 600 /* first see if we can compile the regex */ 601 if (!is_pattern) { 602 r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED | 603 (ignore_case ? AP_REG_ICASE : 0)); 604 if (!r) 605 return "Substitute could not compile regex"; 606 } 607 nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns); 608 /* init the new entries */ 609 nscript->pattern = NULL; 610 nscript->regexp = NULL; 611 nscript->replacement = NULL; 612 nscript->patlen = 0; 613 614 if (is_pattern) { 615 nscript->patlen = strlen(from); 616 nscript->pattern = apr_strmatch_precompile(cmd->pool, from, 617 !ignore_case); 618 } 619 else { 620 nscript->regexp = r; 621 } 622 623 nscript->replacement = to; 624 nscript->replen = strlen(to); 625 nscript->flatten = flatten; 626 627 return NULL; 628} 629 630#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH 631static void register_hooks(apr_pool_t *pool) 632{ 633 ap_register_output_filter(substitute_filter_name, substitute_filter, 634 NULL, AP_FTYPE_RESOURCE); 635} 636 637static const command_rec substitute_cmds[] = { 638 AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_ALL, 639 "Pattern to filter the response content (s/foo/bar/[inf])"), 640 {NULL} 641}; 642 643AP_DECLARE_MODULE(substitute) = { 644 STANDARD20_MODULE_STUFF, 645 create_substitute_dcfg, /* dir config creater */ 646 merge_substitute_dcfg, /* dir merger --- default is to override */ 647 NULL, /* server config */ 648 NULL, /* merge server config */ 649 substitute_cmds, /* command table */ 650 register_hooks /* register hooks */ 651}; 652