1/* Licensed to the Apache Software Foundation (ASF) under one or more 2 * contributor license agreements. See the NOTICE file distributed with 3 * this work for additional information regarding copyright ownership. 4 * The ASF licenses this file to You under the Apache License, Version 2.0 5 * (the "License"); you may not use this file except in compliance with 6 * the License. You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * mod_substitute.c: Perform content rewriting on the fly 19 */ 20 21#include "httpd.h" 22#include "http_config.h" 23#include "http_core.h" 24#include "apr_general.h" 25#include "apr_strings.h" 26#include "apr_strmatch.h" 27#include "apr_lib.h" 28#include "util_filter.h" 29#include "apr_buckets.h" 30#include "http_request.h" 31#define APR_WANT_STRFUNC 32#include "apr_want.h" 33 34static const char substitute_filter_name[] = "SUBSTITUTE"; 35 36module AP_MODULE_DECLARE_DATA substitute_module; 37 38typedef struct subst_pattern_t { 39 const apr_strmatch_pattern *pattern; 40 const ap_regex_t *regexp; 41 const char *replacement; 42 apr_size_t replen; 43 apr_size_t patlen; 44 int flatten; 45} subst_pattern_t; 46 47typedef struct { 48 apr_array_header_t *patterns; 49} subst_dir_conf; 50 51typedef struct { 52 apr_bucket_brigade *linebb; 53 apr_bucket_brigade *linesbb; 54 apr_bucket_brigade *passbb; 55 apr_bucket_brigade *pattbb; 56 apr_pool_t *tpool; 57} substitute_module_ctx; 58 59static void *create_substitute_dcfg(apr_pool_t *p, char *d) 60{ 61 subst_dir_conf *dcfg = 62 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf)); 63 64 dcfg->patterns = apr_array_make(p, 10, sizeof(subst_pattern_t)); 65 return dcfg; 66} 67 68static void *merge_substitute_dcfg(apr_pool_t *p, void *basev, void *overv) 69{ 70 subst_dir_conf *a = 71 (subst_dir_conf *) apr_pcalloc(p, sizeof(subst_dir_conf)); 72 subst_dir_conf *base = (subst_dir_conf *) basev; 73 subst_dir_conf *over = (subst_dir_conf *) overv; 74 75 a->patterns = apr_array_append(p, over->patterns, 76 base->patterns); 77 return a; 78} 79 80#define AP_MAX_BUCKETS 1000 81 82#define SEDSCAT(s1, s2, pool, buff, blen, repl) do { \ 83 if (!s1) { \ 84 s1 = apr_pstrmemdup(pool, buff, blen); \ 85 } \ 86 else { \ 87 s2 = apr_pstrmemdup(pool, buff, blen); \ 88 s1 = apr_pstrcat(pool, s1, s2, NULL); \ 89 } \ 90 s1 = apr_pstrcat(pool, s1, repl, NULL); \ 91} while (0) 92 93#define SEDRMPATBCKT(b, offset, tmp_b, patlen) do { \ 94 apr_bucket_split(b, offset); \ 95 tmp_b = APR_BUCKET_NEXT(b); \ 96 apr_bucket_split(tmp_b, patlen); \ 97 b = APR_BUCKET_NEXT(tmp_b); \ 98 apr_bucket_delete(tmp_b); \ 99} while (0) 100 101static void do_pattmatch(ap_filter_t *f, apr_bucket *inb, 102 apr_bucket_brigade *mybb, 103 apr_pool_t *tmp_pool) 104{ 105 int i; 106 int force_quick = 0; 107 ap_regmatch_t regm[AP_MAX_REG_MATCH]; 108 apr_size_t bytes; 109 apr_size_t len; 110 apr_size_t fbytes; 111 const char *buff; 112 const char *repl; 113 char *scratch; 114 char *p; 115 char *s1; 116 char *s2; 117 apr_bucket *b; 118 apr_bucket *tmp_b; 119 apr_pool_t *tpool; 120 121 subst_dir_conf *cfg = 122 (subst_dir_conf *) ap_get_module_config(f->r->per_dir_config, 123 &substitute_module); 124 subst_pattern_t *script; 125 126 APR_BRIGADE_INSERT_TAIL(mybb, inb); 127 128 script = (subst_pattern_t *) cfg->patterns->elts; 129 apr_pool_create(&tpool, tmp_pool); 130 scratch = NULL; 131 fbytes = 0; 132 /* 133 * Simple optimization. If we only have one pattern, then 134 * we can safely avoid the overhead of flattening 135 */ 136 if (cfg->patterns->nelts == 1) { 137 force_quick = 1; 138 } 139 for (i = 0; i < cfg->patterns->nelts; i++) { 140 for (b = APR_BRIGADE_FIRST(mybb); 141 b != APR_BRIGADE_SENTINEL(mybb); 142 b = APR_BUCKET_NEXT(b)) { 143 if (APR_BUCKET_IS_METADATA(b)) { 144 /* 145 * we should NEVER see this, because we should never 146 * be passed any, but "handle" it just in case. 147 */ 148 continue; 149 } 150 if (apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ) 151 == APR_SUCCESS) { 152 s1 = NULL; 153 if (script->pattern) { 154 while ((repl = apr_strmatch(script->pattern, buff, bytes))) 155 { 156 /* get offset into buff for pattern */ 157 len = (apr_size_t) (repl - buff); 158 if (script->flatten && !force_quick) { 159 /* 160 * We are flattening the buckets here, meaning 161 * that we don't do the fast bucket splits. 162 * Instead we copy over what the buckets would 163 * contain and use them. This is slow, since we 164 * are constanting allocing space and copying 165 * strings. 166 */ 167 SEDSCAT(s1, s2, tmp_pool, buff, len, 168 script->replacement); 169 } 170 else { 171 /* 172 * We now split off the stuff before the regex 173 * as its own bucket, then isolate the pattern 174 * and delete it. 175 */ 176 SEDRMPATBCKT(b, len, tmp_b, script->patlen); 177 /* 178 * Finally, we create a bucket that contains the 179 * replacement... 180 */ 181 tmp_b = apr_bucket_transient_create(script->replacement, 182 script->replen, 183 f->r->connection->bucket_alloc); 184 /* ... and insert it */ 185 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 186 } 187 /* now we need to adjust buff for all these changes */ 188 len += script->patlen; 189 bytes -= len; 190 buff += len; 191 } 192 if (script->flatten && s1 && !force_quick) { 193 /* 194 * we've finished looking at the bucket, so remove the 195 * old one and add in our new one 196 */ 197 s2 = apr_pstrmemdup(tmp_pool, buff, bytes); 198 s1 = apr_pstrcat(tmp_pool, s1, s2, NULL); 199 tmp_b = apr_bucket_transient_create(s1, strlen(s1), 200 f->r->connection->bucket_alloc); 201 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 202 apr_bucket_delete(b); 203 b = tmp_b; 204 } 205 206 } 207 else if (script->regexp) { 208 /* 209 * we need a null terminated string here :(. To hopefully 210 * save time and memory, we don't alloc for each run 211 * through, but only if we need to have a larger chunk 212 * to save the string to. So we keep track of how much 213 * we've allocated and only re-alloc when we need it. 214 * NOTE: this screams for a macro. 215 */ 216 if (!scratch || (bytes + 1 > fbytes)) { 217 fbytes = bytes + 1; 218 scratch = apr_palloc(tpool, fbytes); 219 } 220 /* reset pointer to the scratch space */ 221 p = scratch; 222 memcpy(p, buff, bytes); 223 p[bytes] = '\0'; 224 while (!ap_regexec(script->regexp, p, 225 AP_MAX_REG_MATCH, regm, 0)) { 226 /* first, grab the replacement string */ 227 repl = ap_pregsub(tmp_pool, script->replacement, p, 228 AP_MAX_REG_MATCH, regm); 229 if (script->flatten && !force_quick) { 230 SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so, repl); 231 } 232 else { 233 len = (apr_size_t) (regm[0].rm_eo - regm[0].rm_so); 234 SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len); 235 tmp_b = apr_bucket_transient_create(repl, 236 strlen(repl), 237 f->r->connection->bucket_alloc); 238 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 239 } 240 /* 241 * reset to past what we just did. buff now maps to b 242 * again 243 */ 244 p += regm[0].rm_eo; 245 } 246 if (script->flatten && s1 && !force_quick) { 247 s1 = apr_pstrcat(tmp_pool, s1, p, NULL); 248 tmp_b = apr_bucket_transient_create(s1, strlen(s1), 249 f->r->connection->bucket_alloc); 250 APR_BUCKET_INSERT_BEFORE(b, tmp_b); 251 apr_bucket_delete(b); 252 b = tmp_b; 253 } 254 255 } 256 else { 257 /* huh? */ 258 continue; 259 } 260 } 261 } 262 script++; 263 } 264 265 apr_pool_destroy(tpool); 266 267 return; 268} 269 270static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb) 271{ 272 apr_size_t bytes; 273 apr_size_t len; 274 apr_size_t fbytes; 275 const char *buff; 276 const char *nl = NULL; 277 char *bflat; 278 apr_bucket *b; 279 apr_bucket *tmp_b; 280 apr_bucket_brigade *tmp_bb = NULL; 281 apr_status_t rv; 282 283 substitute_module_ctx *ctx = f->ctx; 284 285 /* 286 * First time around? Create the saved bb that we used for each pass 287 * through. Note that we can also get here when we explicitly clear ctx, 288 * for error handling 289 */ 290 if (!ctx) { 291 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(*ctx)); 292 /* 293 * Create all the temporary brigades we need and reuse them to avoid 294 * creating them over and over again from r->pool which would cost a 295 * lot of memory in some cases. 296 */ 297 ctx->linebb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 298 ctx->linesbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 299 ctx->pattbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 300 /* 301 * Everything to be passed to the next filter goes in 302 * here, our pass brigade. 303 */ 304 ctx->passbb = apr_brigade_create(f->r->pool, f->c->bucket_alloc); 305 /* Create our temporary pool only once */ 306 apr_pool_create(&(ctx->tpool), f->r->pool); 307 apr_table_unset(f->r->headers_out, "Content-Length"); 308 } 309 310 /* 311 * Shortcircuit processing 312 */ 313 if (APR_BRIGADE_EMPTY(bb)) 314 return APR_SUCCESS; 315 316 /* 317 * Here's the concept: 318 * Read in the data and look for newlines. Once we 319 * find a full "line", add it to our working brigade. 320 * If we've finished reading the brigade and we have 321 * any left over data (not a "full" line), store that 322 * for the next pass. 323 * 324 * Note: anything stored in ctx->linebb for sure does not have 325 * a newline char, so we don't concat that bb with the 326 * new bb, since we would spending time searching for the newline 327 * in data we know it doesn't exist. So instead, we simply scan 328 * our current bb and, if we see a newline, prepend ctx->linebb 329 * to the front of it. This makes the code much less straight- 330 * forward (otherwise we could APR_BRIGADE_CONCAT(ctx->linebb, bb) 331 * and just scan for newlines and not bother with needing to know 332 * when ctx->linebb needs to be reset) but also faster. We'll take 333 * the speed. 334 * 335 * Note: apr_brigade_split_line would be nice here, but we 336 * really can't use it since we need more control and we want 337 * to re-use already read bucket data. 338 * 339 * See mod_include if still confused :) 340 */ 341 342 while ((b = APR_BRIGADE_FIRST(bb)) && (b != APR_BRIGADE_SENTINEL(bb))) { 343 if (APR_BUCKET_IS_EOS(b)) { 344 /* 345 * if we see the EOS, then we need to pass along everything we 346 * have. But if the ctx->linebb isn't empty, then we need to add 347 * that to the end of what we'll be passing. 348 */ 349 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 350 rv = apr_brigade_pflatten(ctx->linebb, &bflat, 351 &fbytes, ctx->tpool); 352 tmp_b = apr_bucket_transient_create(bflat, fbytes, 353 f->r->connection->bucket_alloc); 354 do_pattmatch(f, tmp_b, ctx->pattbb, ctx->tpool); 355 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb); 356 } 357 apr_brigade_cleanup(ctx->linebb); 358 APR_BUCKET_REMOVE(b); 359 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 360 } 361 /* 362 * No need to handle FLUSH buckets separately as we call 363 * ap_pass_brigade anyway at the end of the loop. 364 */ 365 else if (APR_BUCKET_IS_METADATA(b)) { 366 APR_BUCKET_REMOVE(b); 367 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 368 } 369 else { 370 /* 371 * We have actual "data" so read in as much as we can and start 372 * scanning and splitting from our read buffer 373 */ 374 rv = apr_bucket_read(b, &buff, &bytes, APR_BLOCK_READ); 375 if (rv != APR_SUCCESS || bytes == 0) { 376 apr_bucket_delete(b); 377 } 378 else { 379 int num = 0; 380 while (bytes > 0) { 381 nl = memchr(buff, APR_ASCII_LF, bytes); 382 if (nl) { 383 len = (apr_size_t) (nl - buff) + 1; 384 /* split *after* the newline */ 385 apr_bucket_split(b, len); 386 /* 387 * We've likely read more data, so bypass rereading 388 * bucket data and continue scanning through this 389 * buffer 390 */ 391 bytes -= len; 392 buff += len; 393 /* 394 * we need b to be updated for future potential 395 * splitting 396 */ 397 tmp_b = APR_BUCKET_NEXT(b); 398 APR_BUCKET_REMOVE(b); 399 /* 400 * Hey, we found a newline! Don't forget the old 401 * stuff that needs to be added to the front. So we 402 * add the split bucket to the end, flatten the whole 403 * bb, morph the whole shebang into a bucket which is 404 * then added to the tail of the newline bb. 405 */ 406 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 407 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b); 408 rv = apr_brigade_pflatten(ctx->linebb, &bflat, 409 &fbytes, ctx->tpool); 410 b = apr_bucket_transient_create(bflat, fbytes, 411 f->r->connection->bucket_alloc); 412 apr_brigade_cleanup(ctx->linebb); 413 } 414 do_pattmatch(f, b, ctx->pattbb, ctx->tpool); 415 /* 416 * Count how many buckets we have in ctx->passbb 417 * so far. Yes, this is correct we count ctx->passbb 418 * and not ctx->pattbb as we do not reset num on every 419 * iteration. 420 */ 421 for (b = APR_BRIGADE_FIRST(ctx->pattbb); 422 b != APR_BRIGADE_SENTINEL(ctx->pattbb); 423 b = APR_BUCKET_NEXT(b)) { 424 num++; 425 } 426 APR_BRIGADE_CONCAT(ctx->passbb, ctx->pattbb); 427 /* 428 * If the number of buckets in ctx->passbb reaches an 429 * "insane" level, we consume much memory for all the 430 * buckets as such. So lets flush them down the chain 431 * in this case and thus clear ctx->passbb. This frees 432 * the buckets memory for further processing. 433 * Usually this condition should not become true, but 434 * it is a safety measure for edge cases. 435 */ 436 if (num > AP_MAX_BUCKETS) { 437 b = apr_bucket_flush_create( 438 f->r->connection->bucket_alloc); 439 APR_BRIGADE_INSERT_TAIL(ctx->passbb, b); 440 rv = ap_pass_brigade(f->next, ctx->passbb); 441 apr_brigade_cleanup(ctx->passbb); 442 num = 0; 443 apr_pool_clear(ctx->tpool); 444 if (rv != APR_SUCCESS) 445 return rv; 446 } 447 b = tmp_b; 448 } 449 else { 450 /* 451 * no newline in whatever is left of this buffer so 452 * tuck data away and get next bucket 453 */ 454 APR_BUCKET_REMOVE(b); 455 APR_BRIGADE_INSERT_TAIL(ctx->linebb, b); 456 bytes = 0; 457 } 458 } 459 } 460 } 461 if (!APR_BRIGADE_EMPTY(ctx->passbb)) { 462 rv = ap_pass_brigade(f->next, ctx->passbb); 463 apr_brigade_cleanup(ctx->passbb); 464 if (rv != APR_SUCCESS) { 465 apr_pool_clear(ctx->tpool); 466 return rv; 467 } 468 } 469 apr_pool_clear(ctx->tpool); 470 } 471 472 /* Anything left we want to save/setaside for the next go-around */ 473 if (!APR_BRIGADE_EMPTY(ctx->linebb)) { 474 /* 475 * Provide ap_save_brigade with an existing empty brigade 476 * (ctx->linesbb) to avoid creating a new one. 477 */ 478 ap_save_brigade(f, &(ctx->linesbb), &(ctx->linebb), f->r->pool); 479 tmp_bb = ctx->linebb; 480 ctx->linebb = ctx->linesbb; 481 ctx->linesbb = tmp_bb; 482 } 483 484 return APR_SUCCESS; 485} 486 487static const char *set_pattern(cmd_parms *cmd, void *cfg, const char *line) 488{ 489 char *from = NULL; 490 char *to = NULL; 491 char *flags = NULL; 492 char *ourline; 493 char delim; 494 subst_pattern_t *nscript; 495 int is_pattern = 0; 496 int ignore_case = 0; 497 int flatten = 1; 498 ap_regex_t *r = NULL; 499 500 if (apr_tolower(*line) != 's') { 501 return "Bad Substitute format, must be an s/// pattern"; 502 } 503 ourline = apr_pstrdup(cmd->pool, line); 504 delim = *++ourline; 505 if (delim) 506 from = ++ourline; 507 if (from) { 508 if (*ourline != delim) { 509 while (*++ourline && *ourline != delim); 510 } 511 if (*ourline) { 512 *ourline = '\0'; 513 to = ++ourline; 514 } 515 } 516 if (to) { 517 if (*ourline != delim) { 518 while (*++ourline && *ourline != delim); 519 } 520 if (*ourline) { 521 *ourline = '\0'; 522 flags = ++ourline; 523 } 524 } 525 526 if (!delim || !from || !*from || !to) { 527 return "Bad Substitute format, must be a complete s/// pattern"; 528 } 529 530 if (flags) { 531 while (*flags) { 532 delim = apr_tolower(*flags); /* re-use */ 533 if (delim == 'i') 534 ignore_case = 1; 535 else if (delim == 'n') 536 is_pattern = 1; 537 else if (delim == 'f') 538 flatten = 1; 539 else if (delim == 'q') 540 flatten = 0; 541 else 542 return "Bad Substitute flag, only s///[infq] are supported"; 543 flags++; 544 } 545 } 546 547 /* first see if we can compile the regex */ 548 if (!is_pattern) { 549 r = ap_pregcomp(cmd->pool, from, AP_REG_EXTENDED | 550 (ignore_case ? AP_REG_ICASE : 0)); 551 if (!r) 552 return "Substitute could not compile regex"; 553 } 554 nscript = apr_array_push(((subst_dir_conf *) cfg)->patterns); 555 /* init the new entries */ 556 nscript->pattern = NULL; 557 nscript->regexp = NULL; 558 nscript->replacement = NULL; 559 nscript->patlen = 0; 560 561 if (is_pattern) { 562 nscript->patlen = strlen(from); 563 nscript->pattern = apr_strmatch_precompile(cmd->pool, from, 564 !ignore_case); 565 } 566 else { 567 nscript->regexp = r; 568 } 569 570 nscript->replacement = to; 571 nscript->replen = strlen(to); 572 nscript->flatten = flatten; 573 574 return NULL; 575} 576 577#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH 578static void register_hooks(apr_pool_t *pool) 579{ 580 ap_register_output_filter(substitute_filter_name, substitute_filter, 581 NULL, AP_FTYPE_RESOURCE); 582} 583 584static const command_rec substitute_cmds[] = { 585 AP_INIT_TAKE1("Substitute", set_pattern, NULL, OR_ALL, 586 "Pattern to filter the response content (s/foo/bar/[inf])"), 587 {NULL} 588}; 589 590module AP_MODULE_DECLARE_DATA substitute_module = { 591 STANDARD20_MODULE_STUFF, 592 create_substitute_dcfg, /* dir config creater */ 593 merge_substitute_dcfg, /* dir merger --- default is to override */ 594 NULL, /* server config */ 595 NULL, /* merge server config */ 596 substitute_cmds, /* command table */ 597 register_hooks /* register hooks */ 598}; 599