1/* Copyright (c) 2007-11, WebThing Ltd 2 * Copyright (c) 2011-, The Apache Software Foundation 3 * 4 * Licensed to the Apache Software Foundation (ASF) under one or more 5 * contributor license agreements. See the NOTICE file distributed with 6 * this work for additional information regarding copyright ownership. 7 * The ASF licenses this file to You under the Apache License, Version 2.0 8 * (the "License"); you may not use this file except in compliance with 9 * the License. You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 */ 19 20#if defined(WIN32) 21#define XML2ENC_DECLARE_EXPORT 22#endif 23 24#include <ctype.h> 25 26/* libxml2 */ 27#include <libxml/encoding.h> 28 29#include "http_protocol.h" 30#include "http_config.h" 31#include "http_log.h" 32#include "apr_strings.h" 33#include "apr_xlate.h" 34 35#include "apr_optional.h" 36#include "mod_xml2enc.h" 37 38module AP_MODULE_DECLARE_DATA xml2enc_module; 39 40#define BUFLEN 8192 41#define BUF_MIN 4096 42#define APR_BRIGADE_DO(b,bb) for (b = APR_BRIGADE_FIRST(bb); \ 43 b != APR_BRIGADE_SENTINEL(bb); \ 44 b = APR_BUCKET_NEXT(b)) 45 46#define ENC_INITIALISED 0x100 47#define ENC_SEEN_EOS 0x200 48#define ENC_SKIPTO ENCIO_SKIPTO 49 50#define HAVE_ENCODING(enc) \ 51 (((enc)!=XML_CHAR_ENCODING_NONE)&&((enc)!=XML_CHAR_ENCODING_ERROR)) 52 53/* 54 * XXX: Check all those ap_assert()s ans replace those that should not happen 55 * XXX: with AP_DEBUG_ASSERT and those that may happen with proper error 56 * XXX: handling. 57 */ 58typedef struct { 59 xmlCharEncoding xml2enc; 60 char* buf; 61 apr_size_t bytes; 62 apr_xlate_t* convset; 63 unsigned int flags; 64 apr_off_t bblen; 65 apr_bucket_brigade* bbnext; 66 apr_bucket_brigade* bbsave; 67 const char* encoding; 68} xml2ctx; 69 70typedef struct { 71 const char* default_charset; 72 xmlCharEncoding default_encoding; 73 apr_array_header_t* skipto; 74} xml2cfg; 75 76typedef struct { 77 const char* val; 78} tattr; 79 80static ap_regex_t* seek_meta_ctype; 81static ap_regex_t* seek_charset; 82 83static apr_status_t xml2enc_filter(request_rec* r, const char* enc, 84 unsigned int mode) 85{ 86 /* set up a ready-initialised ctx to convert to enc, and insert filter */ 87 apr_xlate_t* convset; 88 apr_status_t rv; 89 unsigned int flags = (mode ^ ENCIO); 90 if ((mode & ENCIO) == ENCIO_OUTPUT) { 91 rv = apr_xlate_open(&convset, enc, "UTF-8", r->pool); 92 flags |= ENC_INITIALISED; 93 } 94 else if ((mode & ENCIO) == ENCIO_INPUT) { 95 rv = apr_xlate_open(&convset, "UTF-8", enc, r->pool); 96 flags |= ENC_INITIALISED; 97 } 98 else if ((mode & ENCIO) == ENCIO_INPUT_CHECKS) { 99 convset = NULL; 100 rv = APR_SUCCESS; /* we'll initialise later by sniffing */ 101 } 102 else { 103 rv = APR_EGENERAL; 104 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01426) 105 "xml2enc: bad mode %x", mode); 106 } 107 if (rv == APR_SUCCESS) { 108 xml2ctx* ctx = apr_pcalloc(r->pool, sizeof(xml2ctx)); 109 ctx->flags = flags; 110 if (flags & ENC_INITIALISED) { 111 ctx->convset = convset; 112 ctx->bblen = BUFLEN; 113 ctx->buf = apr_palloc(r->pool, (apr_size_t)ctx->bblen); 114 } 115 ap_add_output_filter("xml2enc", ctx, r, r->connection); 116 } 117 else { 118 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, r, APLOGNO(01427) 119 "xml2enc: Charset %s not supported.", enc) ; 120 } 121 return rv; 122} 123 124/* This needs to operate only when we're using htmlParser */ 125/* Different modules may apply different rules here. Ho, hum. */ 126static void fix_skipto(request_rec* r, xml2ctx* ctx) 127{ 128 apr_status_t rv; 129 xml2cfg* cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module); 130 if ((cfg->skipto != NULL) && (ctx->flags | ENC_SKIPTO)) { 131 int found = 0; 132 char* p = ap_strchr(ctx->buf, '<'); 133 tattr* starts = (tattr*) cfg->skipto->elts; 134 while (!found && p && *p) { 135 int i; 136 for (i = 0; i < cfg->skipto->nelts; ++i) { 137 if (!strncasecmp(p+1, starts[i].val, strlen(starts[i].val))) { 138 /* found a starting element. Strip all that comes before. */ 139 apr_bucket* b; 140 apr_bucket* bstart; 141 rv = apr_brigade_partition(ctx->bbsave, (p-ctx->buf), 142 &bstart); 143 ap_assert(rv == APR_SUCCESS); 144 while (b = APR_BRIGADE_FIRST(ctx->bbsave), b != bstart) { 145 APR_BUCKET_REMOVE(b); 146 apr_bucket_destroy(b); 147 } 148 ctx->bytes -= (p-ctx->buf); 149 ctx->buf = p ; 150 found = 1; 151 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01428) 152 "Skipped to first <%s> element", 153 starts[i].val) ; 154 break; 155 } 156 } 157 p = ap_strchr(p+1, '<'); 158 } 159 if (p == NULL) { 160 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01429) 161 "Failed to find start of recognised HTML!"); 162 } 163 } 164} 165static void sniff_encoding(request_rec* r, xml2ctx* ctx) 166{ 167 xml2cfg* cfg = NULL; /* initialise to shut compiler warnings up */ 168 char* p ; 169 apr_bucket* cutb; 170 apr_bucket* cute; 171 apr_bucket* b; 172 ap_regmatch_t match[2] ; 173 apr_status_t rv; 174 const char* ctype = r->content_type; 175 176 if (ctype) { 177 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01430) 178 "Content-Type is %s", ctype) ; 179 180 /* If we've got it in the HTTP headers, there's nothing to do */ 181 if (ctype && (p = ap_strcasestr(ctype, "charset=") , p != NULL)) { 182 p += 8 ; 183 if (ctx->encoding = apr_pstrndup(r->pool, p, strcspn(p, " ;") ), 184 ctx->encoding) { 185 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01431) 186 "Got charset %s from HTTP headers", ctx->encoding) ; 187 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding); 188 } 189 } 190 } 191 192 /* to sniff, first we look for BOM */ 193 if (ctx->xml2enc == XML_CHAR_ENCODING_NONE) { 194 ctx->xml2enc = xmlDetectCharEncoding((const xmlChar*)ctx->buf, 195 ctx->bytes); 196 if (HAVE_ENCODING(ctx->xml2enc)) { 197 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01432) 198 "Got charset from XML rules.") ; 199 ctx->encoding = xmlGetCharEncodingName(ctx->xml2enc); 200 } 201 } 202 203 /* If none of the above, look for a META-thingey */ 204 /* also we're probably about to invalidate it, so we remove it. */ 205 if (ap_regexec(seek_meta_ctype, ctx->buf, 1, match, 0) == 0 ) { 206 /* get markers on the start and end of the match */ 207 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_eo, &cute); 208 ap_assert(rv == APR_SUCCESS); 209 rv = apr_brigade_partition(ctx->bbsave, match[0].rm_so, &cutb); 210 ap_assert(rv == APR_SUCCESS); 211 /* now set length of useful buf for start-of-data hooks */ 212 ctx->bytes = match[0].rm_so; 213 if (ctx->encoding == NULL) { 214 p = apr_pstrndup(r->pool, ctx->buf + match[0].rm_so, 215 match[0].rm_eo - match[0].rm_so) ; 216 if (ap_regexec(seek_charset, p, 2, match, 0) == 0) { 217 if (ctx->encoding = apr_pstrndup(r->pool, p+match[1].rm_so, 218 match[1].rm_eo - match[1].rm_so), 219 ctx->encoding) { 220 ctx->xml2enc = xmlParseCharEncoding(ctx->encoding); 221 if (HAVE_ENCODING(ctx->xml2enc)) 222 ap_log_rerror(APLOG_MARK, APLOG_INFO, 0, r, APLOGNO(01433) 223 "Got charset %s from HTML META", ctx->encoding) ; 224 } 225 } 226 } 227 228 /* cut out the <meta> we're invalidating */ 229 while (cutb != cute) { 230 b = APR_BUCKET_NEXT(cutb); 231 APR_BUCKET_REMOVE(cutb); 232 apr_bucket_destroy(cutb); 233 cutb = b; 234 } 235 /* and leave a string */ 236 ctx->buf[ctx->bytes] = 0; 237 } 238 239 /* either it's set to something we found or it's still the default */ 240 /* Aaargh! libxml2 has undocumented <META-crap> support. So this fails 241 * if metafix is not active. Have to make it conditional. 242 * 243 * No, that means no-metafix breaks things. Deal immediately with 244 * this particular instance of metafix. 245 */ 246 if (!HAVE_ENCODING(ctx->xml2enc)) { 247 cfg = ap_get_module_config(r->per_dir_config, &xml2enc_module); 248 if (!ctx->encoding) { 249 ctx->encoding = cfg->default_charset?cfg->default_charset:"ISO-8859-1"; 250 } 251 /* Unsupported charset. Can we get (iconv) support through apr_xlate? */ 252 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, r, APLOGNO(01434) 253 "Charset %s not supported by libxml2; trying apr_xlate", 254 ctx->encoding); 255 if (apr_xlate_open(&ctx->convset, "UTF-8", ctx->encoding, r->pool) 256 == APR_SUCCESS) { 257 ctx->xml2enc = XML_CHAR_ENCODING_UTF8 ; 258 } else { 259 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r, APLOGNO(01435) 260 "Charset %s not supported. Consider aliasing it?", 261 ctx->encoding) ; 262 } 263 } 264 265 if (!HAVE_ENCODING(ctx->xml2enc)) { 266 /* Use configuration default as a last resort */ 267 ap_log_rerror(APLOG_MARK, APLOG_WARNING, 0, r, APLOGNO(01436) 268 "No usable charset information; using configuration default"); 269 ctx->xml2enc = (cfg->default_encoding == XML_CHAR_ENCODING_NONE) 270 ? XML_CHAR_ENCODING_8859_1 : cfg->default_encoding ; 271 } 272 if (ctype && ctx->encoding) { 273 if (ap_regexec(seek_charset, ctype, 2, match, 0)) { 274 r->content_type = apr_pstrcat(r->pool, ctype, ";charset=utf-8", 275 NULL); 276 } else { 277 char* str = apr_palloc(r->pool, strlen(r->content_type) + 13 278 - (match[0].rm_eo - match[0].rm_so) + 1); 279 memcpy(str, r->content_type, match[1].rm_so); 280 memcpy(str + match[1].rm_so, "utf-8", 5); 281 strcpy(str + match[1].rm_so + 5, r->content_type+match[1].rm_eo); 282 r->content_type = str; 283 } 284 } 285} 286 287static apr_status_t xml2enc_filter_init(ap_filter_t* f) 288{ 289 xml2ctx* ctx; 290 if (!f->ctx) { 291 xml2cfg* cfg = ap_get_module_config(f->r->per_dir_config, 292 &xml2enc_module); 293 f->ctx = ctx = apr_pcalloc(f->r->pool, sizeof(xml2ctx)); 294 ctx->xml2enc = XML_CHAR_ENCODING_NONE; 295 if (cfg->skipto != NULL) { 296 ctx->flags |= ENC_SKIPTO; 297 } 298 } 299 return APR_SUCCESS; 300} 301static apr_status_t xml2enc_ffunc(ap_filter_t* f, apr_bucket_brigade* bb) 302{ 303 xml2ctx* ctx = f->ctx; 304 apr_status_t rv; 305 apr_bucket* b; 306 apr_bucket* bstart; 307 apr_size_t insz = 0; 308 char *ctype; 309 char *p; 310 311 if (!ctx || !f->r->content_type) { 312 /* log error about configuring this */ 313 ap_remove_output_filter(f); 314 return ap_pass_brigade(f->next, bb) ; 315 } 316 317 ctype = apr_pstrdup(f->r->pool, f->r->content_type); 318 for (p = ctype; *p; ++p) 319 if (isupper(*p)) 320 *p = tolower(*p); 321 322 /* only act if starts-with "text/" or contains "xml" */ 323 if (strncmp(ctype, "text/", 5) && !strstr(ctype, "xml")) { 324 ap_remove_output_filter(f); 325 return ap_pass_brigade(f->next, bb) ; 326 } 327 328 if (ctx->bbsave == NULL) { 329 ctx->bbsave = apr_brigade_create(f->r->pool, 330 f->r->connection->bucket_alloc); 331 } 332 /* append to any data left over from last time */ 333 APR_BRIGADE_CONCAT(ctx->bbsave, bb); 334 335 if (!(ctx->flags & ENC_INITIALISED)) { 336 /* some kind of initialisation required */ 337 /* Turn all this off when post-processing */ 338 339 /* if we don't have enough data to sniff but more's to come, wait */ 340 apr_brigade_length(ctx->bbsave, 0, &ctx->bblen); 341 if ((ctx->bblen < BUF_MIN) && (ctx->bblen != -1)) { 342 APR_BRIGADE_DO(b, ctx->bbsave) { 343 if (APR_BUCKET_IS_EOS(b)) { 344 ctx->flags |= ENC_SEEN_EOS; 345 break; 346 } 347 } 348 if (!(ctx->flags & ENC_SEEN_EOS)) { 349 /* not enough data to sniff. Wait for more */ 350 APR_BRIGADE_DO(b, ctx->bbsave) { 351 rv = apr_bucket_setaside(b, f->r->pool); 352 ap_assert(rv == APR_SUCCESS); 353 } 354 return APR_SUCCESS; 355 } 356 } 357 if (ctx->bblen == -1) { 358 ctx->bblen = BUFLEN-1; 359 } 360 361 /* flatten it into a NULL-terminated string */ 362 ctx->buf = apr_palloc(f->r->pool, (apr_size_t)(ctx->bblen+1)); 363 ctx->bytes = (apr_size_t)ctx->bblen; 364 rv = apr_brigade_flatten(ctx->bbsave, ctx->buf, &ctx->bytes); 365 ap_assert(rv == APR_SUCCESS); 366 ctx->buf[ctx->bytes] = 0; 367 sniff_encoding(f->r, ctx); 368 369 /* FIXME: hook here for rewriting start-of-data? */ 370 /* nah, we only have one action here - call it inline */ 371 fix_skipto(f->r, ctx); 372 373 /* we might change the Content-Length, so let's force its re-calculation */ 374 apr_table_unset(f->r->headers_out, "Content-Length"); 375 376 /* consume the data we just sniffed */ 377 /* we need to omit any <meta> we just invalidated */ 378 ctx->flags |= ENC_INITIALISED; 379 ap_set_module_config(f->r->request_config, &xml2enc_module, ctx); 380 } 381 if (ctx->bbnext == NULL) { 382 ctx->bbnext = apr_brigade_create(f->r->pool, 383 f->r->connection->bucket_alloc); 384 } 385 386 if (!ctx->convset) { 387 rv = ap_pass_brigade(f->next, ctx->bbsave); 388 apr_brigade_cleanup(ctx->bbsave); 389 ap_remove_output_filter(f); 390 return rv; 391 } 392 /* move the data back to bb */ 393 APR_BRIGADE_CONCAT(bb, ctx->bbsave); 394 395 while (b = APR_BRIGADE_FIRST(bb), b != APR_BRIGADE_SENTINEL(bb)) { 396 ctx->bytes = 0; 397 if (APR_BUCKET_IS_METADATA(b)) { 398 APR_BUCKET_REMOVE(b); 399 if (APR_BUCKET_IS_EOS(b)) { 400 /* send remaining data */ 401 APR_BRIGADE_INSERT_TAIL(ctx->bbnext, b); 402 return ap_fflush(f->next, ctx->bbnext); 403 } else if (APR_BUCKET_IS_FLUSH(b)) { 404 ap_fflush(f->next, ctx->bbnext); 405 } 406 apr_bucket_destroy(b); 407 } 408 else { /* data bucket */ 409 char* buf; 410 apr_size_t bytes = 0; 411 char fixbuf[BUFLEN]; 412 apr_bucket* bdestroy = NULL; 413 if (insz > 0) { /* we have dangling data. Flatten it. */ 414 buf = fixbuf; 415 bytes = BUFLEN; 416 rv = apr_brigade_flatten(bb, buf, &bytes); 417 ap_assert(rv == APR_SUCCESS); 418 if (bytes == insz) { 419 /* this is only what we've already tried to convert. 420 * The brigade is exhausted. 421 * Save remaining data for next time round 422 */ 423 424 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01437) 425 "xml2enc: Setting aside %" APR_SIZE_T_FMT 426 " unconverted bytes", bytes); 427 rv = ap_fflush(f->next, ctx->bbnext); 428 APR_BRIGADE_CONCAT(ctx->bbsave, bb); 429 APR_BRIGADE_DO(b, ctx->bbsave) { 430 ap_assert(apr_bucket_setaside(b, f->r->pool) 431 == APR_SUCCESS); 432 } 433 return rv; 434 } 435 /* remove the data we've just read */ 436 rv = apr_brigade_partition(bb, bytes, &bstart); 437 while (b = APR_BRIGADE_FIRST(bb), b != bstart) { 438 APR_BUCKET_REMOVE(b); 439 apr_bucket_destroy(b); 440 } 441 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01438) 442 "xml2enc: consuming %" APR_SIZE_T_FMT 443 " bytes flattened", bytes); 444 } 445 else { 446 rv = apr_bucket_read(b, (const char**)&buf, &bytes, 447 APR_BLOCK_READ); 448 APR_BUCKET_REMOVE(b); 449 bdestroy = b; /* can't destroy until finished with the data */ 450 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01439) 451 "xml2enc: consuming %" APR_SIZE_T_FMT 452 " bytes from bucket", bytes); 453 } 454 /* OK, we've got some input we can use in [buf,bytes] */ 455 if (rv == APR_SUCCESS) { 456 apr_size_t consumed; 457 xml2enc_run_preprocess(f, &buf, &bytes); 458 consumed = insz = bytes; 459 while (insz > 0) { 460 apr_status_t rv2; 461 if (ctx->bytes == ctx->bblen) { 462 /* nothing was converted last time! 463 * break out of this loop! 464 */ 465 b = apr_bucket_transient_create(buf+(bytes - insz), insz, 466 bb->bucket_alloc); 467 APR_BRIGADE_INSERT_HEAD(bb, b); 468 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01440) 469 "xml2enc: reinserting %" APR_SIZE_T_FMT 470 " unconsumed bytes from bucket", insz); 471 break; 472 } 473 ctx->bytes = (apr_size_t)ctx->bblen; 474 rv = apr_xlate_conv_buffer(ctx->convset, buf+(bytes - insz), 475 &insz, ctx->buf, &ctx->bytes); 476 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01441) 477 "xml2enc: converted %" APR_SIZE_T_FMT 478 "/%" APR_OFF_T_FMT " bytes", consumed - insz, 479 ctx->bblen - ctx->bytes); 480 consumed = insz; 481 rv2 = ap_fwrite(f->next, ctx->bbnext, ctx->buf, 482 (apr_size_t)ctx->bblen - ctx->bytes); 483 if (rv2 != APR_SUCCESS) { 484 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv2, f->r, APLOGNO(01442) 485 "ap_fwrite failed"); 486 return rv2; 487 } 488 switch (rv) { 489 case APR_SUCCESS: 490 continue; 491 case APR_EINCOMPLETE: 492 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, 0, f->r, APLOGNO(01443) 493 "INCOMPLETE"); 494 continue; /* If outbuf too small, go round again. 495 * If it was inbuf, we'll break out when 496 * we test ctx->bytes == ctx->bblen 497 */ 498 case APR_EINVAL: /* try skipping one bad byte */ 499 ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, f->r, APLOGNO(01444) 500 "Skipping invalid byte(s) in input stream!"); 501 --insz; 502 continue; 503 default: 504 /* Erk! What's this? 505 * Bail out, flush, and hope to eat the buf raw 506 */ 507 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01445) 508 "Failed to convert input; trying it raw") ; 509 ctx->convset = NULL; 510 rv = ap_fflush(f->next, ctx->bbnext); 511 if (rv != APR_SUCCESS) 512 ap_log_rerror(APLOG_MARK, APLOG_DEBUG, rv, f->r, APLOGNO(01446) 513 "ap_fflush failed"); 514 else 515 rv = ap_pass_brigade(f->next, ctx->bbnext); 516 } 517 } 518 } else { 519 ap_log_rerror(APLOG_MARK, APLOG_ERR, rv, f->r, APLOGNO(01447) 520 "xml2enc: error reading data") ; 521 } 522 if (bdestroy) 523 apr_bucket_destroy(bdestroy); 524 if (rv != APR_SUCCESS) 525 return rv; 526 } 527 } 528 return APR_SUCCESS; 529} 530static apr_status_t xml2enc_charset(request_rec* r, xmlCharEncoding* encp, 531 const char** encoding) 532{ 533 xml2ctx* ctx = ap_get_module_config(r->request_config, &xml2enc_module); 534 if (!ctx || !(ctx->flags & ENC_INITIALISED)) { 535 return APR_EAGAIN; 536 } 537 *encp = ctx->xml2enc; 538 *encoding = ctx->encoding; 539 return HAVE_ENCODING(ctx->xml2enc) ? APR_SUCCESS : APR_EGENERAL; 540} 541 542#define PROTO_FLAGS AP_FILTER_PROTO_CHANGE|AP_FILTER_PROTO_CHANGE_LENGTH 543static void xml2enc_hooks(apr_pool_t* pool) 544{ 545 ap_register_output_filter_protocol("xml2enc", xml2enc_ffunc, 546 xml2enc_filter_init, 547 AP_FTYPE_RESOURCE, PROTO_FLAGS); 548 APR_REGISTER_OPTIONAL_FN(xml2enc_filter); 549 APR_REGISTER_OPTIONAL_FN(xml2enc_charset); 550 seek_meta_ctype = ap_pregcomp(pool, 551 "(<meta[^>]*http-equiv[ \t\r\n='\"]*content-type[^>]*>)", 552 AP_REG_EXTENDED|AP_REG_ICASE) ; 553 seek_charset = ap_pregcomp(pool, "charset=([A-Za-z0-9_-]+)", 554 AP_REG_EXTENDED|AP_REG_ICASE) ; 555} 556static const char* set_alias(cmd_parms* cmd, void* CFG, 557 const char* charset, const char* alias) 558{ 559 const char* errmsg = ap_check_cmd_context(cmd, GLOBAL_ONLY); 560 if (errmsg != NULL) 561 return errmsg ; 562 else if (xmlAddEncodingAlias(charset, alias) == 0) 563 return NULL; 564 else 565 return "Error setting charset alias"; 566} 567 568static const char* set_default(cmd_parms* cmd, void* CFG, const char* charset) 569{ 570 xml2cfg* cfg = CFG; 571 cfg->default_charset = charset; 572 cfg->default_encoding = xmlParseCharEncoding(charset); 573 switch(cfg->default_encoding) { 574 case XML_CHAR_ENCODING_NONE: 575 return "Default charset not found"; 576 case XML_CHAR_ENCODING_ERROR: 577 return "Invalid or unsupported default charset"; 578 default: 579 return NULL; 580 } 581} 582static const char* set_skipto(cmd_parms* cmd, void* CFG, const char* arg) 583{ 584 tattr* attr; 585 xml2cfg* cfg = CFG; 586 if (cfg->skipto == NULL) 587 cfg->skipto = apr_array_make(cmd->pool, 4, sizeof(tattr)); 588 attr = apr_array_push(cfg->skipto) ; 589 attr->val = arg; 590 return NULL; 591} 592 593static const command_rec xml2enc_cmds[] = { 594 AP_INIT_TAKE1("xml2EncDefault", set_default, NULL, OR_ALL, 595 "Usage: xml2EncDefault charset"), 596 AP_INIT_ITERATE2("xml2EncAlias", set_alias, NULL, RSRC_CONF, 597 "EncodingAlias charset alias [more aliases]"), 598 AP_INIT_ITERATE("xml2StartParse", set_skipto, NULL, OR_ALL, 599 "Ignore anything in front of the first of these elements"), 600 { NULL } 601}; 602static void* xml2enc_config(apr_pool_t* pool, char* x) 603{ 604 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg)); 605 ret->default_encoding = XML_CHAR_ENCODING_NONE ; 606 return ret; 607} 608 609static void* xml2enc_merge(apr_pool_t* pool, void* BASE, void* ADD) 610{ 611 xml2cfg* base = BASE; 612 xml2cfg* add = ADD; 613 xml2cfg* ret = apr_pcalloc(pool, sizeof(xml2cfg)); 614 ret->default_encoding = (add->default_encoding == XML_CHAR_ENCODING_NONE) 615 ? base->default_encoding : add->default_encoding ; 616 ret->default_charset = add->default_charset 617 ? add->default_charset : base->default_charset; 618 ret->skipto = add->skipto ? add->skipto : base->skipto; 619 return ret; 620} 621 622AP_DECLARE_MODULE(xml2enc) = { 623 STANDARD20_MODULE_STUFF, 624 xml2enc_config, 625 xml2enc_merge, 626 NULL, 627 NULL, 628 xml2enc_cmds, 629 xml2enc_hooks 630}; 631 632APR_IMPLEMENT_OPTIONAL_HOOK_RUN_ALL(xml2enc, XML2ENC, int, preprocess, 633 (ap_filter_t *f, char** bufp, apr_size_t* bytesp), 634 (f, bufp, bytesp), OK, DECLINED) 635