1/* $NetBSD$ */ 2 3/*++ 4/* NAME 5/* dict_regexp 3 6/* SUMMARY 7/* dictionary manager interface to REGEXP regular expression library 8/* SYNOPSIS 9/* #include <dict_regexp.h> 10/* 11/* DICT *dict_regexp_open(name, dummy, dict_flags) 12/* const char *name; 13/* int dummy; 14/* int dict_flags; 15/* DESCRIPTION 16/* dict_regexp_open() opens the named file and compiles the contained 17/* regular expressions. The result object can be used to match strings 18/* against the table. 19/* SEE ALSO 20/* dict(3) generic dictionary manager 21/* regexp_table(5) format of Postfix regular expression tables 22/* AUTHOR(S) 23/* LaMont Jones 24/* lamont@hp.com 25/* 26/* Based on PCRE dictionary contributed by Andrew McNamara 27/* andrewm@connect.com.au 28/* connect.com.au Pty. Ltd. 29/* Level 3, 213 Miller St 30/* North Sydney, NSW, Australia 31/* 32/* Heavily rewritten by Wietse Venema 33/* IBM T.J. Watson Research 34/* P.O. Box 704 35/* Yorktown Heights, NY 10598, USA 36/*--*/ 37 38/* System library. */ 39 40#include "sys_defs.h" 41 42#ifdef HAS_POSIX_REGEXP 43 44#include <stdlib.h> 45#include <unistd.h> 46#include <string.h> 47#include <ctype.h> 48#include <regex.h> 49#ifdef STRCASECMP_IN_STRINGS_H 50#include <strings.h> 51#endif 52 53/* Utility library. */ 54 55#include "mymalloc.h" 56#include "msg.h" 57#include "safe.h" 58#include "vstream.h" 59#include "vstring.h" 60#include "stringops.h" 61#include "readlline.h" 62#include "dict.h" 63#include "dict_regexp.h" 64#include "mac_parse.h" 65 66 /* 67 * Support for IF/ENDIF based on an idea by Bert Driehuis. 68 */ 69#define DICT_REGEXP_OP_MATCH 1 /* Match this regexp */ 70#define DICT_REGEXP_OP_IF 2 /* Increase if/endif nesting on match */ 71#define DICT_REGEXP_OP_ENDIF 3 /* Decrease if/endif nesting on match */ 72 73 /* 74 * Regular expression before compiling. 75 */ 76typedef struct { 77 char *regexp; /* regular expression */ 78 int options; /* regcomp() options */ 79 int match; /* positive or negative match */ 80} DICT_REGEXP_PATTERN; 81 82 /* 83 * Compiled generic rule, and subclasses that derive from it. 84 */ 85typedef struct DICT_REGEXP_RULE { 86 int op; /* DICT_REGEXP_OP_MATCH/IF/ENDIF */ 87 int nesting; /* Level of search nesting */ 88 int lineno; /* source file line number */ 89 struct DICT_REGEXP_RULE *next; /* next rule in dict */ 90} DICT_REGEXP_RULE; 91 92typedef struct { 93 DICT_REGEXP_RULE rule; /* generic part */ 94 regex_t *first_exp; /* compiled primary pattern */ 95 int first_match; /* positive or negative match */ 96 regex_t *second_exp; /* compiled secondary pattern */ 97 int second_match; /* positive or negative match */ 98 char *replacement; /* replacement text */ 99 size_t max_sub; /* largest $number in replacement */ 100} DICT_REGEXP_MATCH_RULE; 101 102typedef struct { 103 DICT_REGEXP_RULE rule; /* generic members */ 104 regex_t *expr; /* the condition */ 105 int match; /* positive or negative match */ 106} DICT_REGEXP_IF_RULE; 107 108 /* 109 * Regexp map. 110 */ 111typedef struct { 112 DICT dict; /* generic members */ 113 regmatch_t *pmatch; /* matched substring info */ 114 DICT_REGEXP_RULE *head; /* first rule */ 115 VSTRING *expansion_buf; /* lookup result */ 116} DICT_REGEXP; 117 118 /* 119 * Macros to make dense code more readable. 120 */ 121#define NULL_SUBSTITUTIONS (0) 122#define NULL_MATCH_RESULT ((regmatch_t *) 0) 123 124 /* 125 * Context for $number expansion callback. 126 */ 127typedef struct { 128 DICT_REGEXP *dict_regexp; /* the dictionary handle */ 129 DICT_REGEXP_MATCH_RULE *match_rule; /* the rule we matched */ 130 const char *lookup_string; /* matched text */ 131} DICT_REGEXP_EXPAND_CONTEXT; 132 133 /* 134 * Context for $number pre-scan callback. 135 */ 136typedef struct { 137 const char *mapname; /* name of regexp map */ 138 int lineno; /* where in file */ 139 size_t max_sub; /* largest $number seen */ 140 char *literal; /* constant result, $$ -> $ */ 141} DICT_REGEXP_PRESCAN_CONTEXT; 142 143 /* 144 * Compatibility. 145 */ 146#ifndef MAC_PARSE_OK 147#define MAC_PARSE_OK 0 148#endif 149 150/* dict_regexp_expand - replace $number with substring from matched text */ 151 152static int dict_regexp_expand(int type, VSTRING *buf, char *ptr) 153{ 154 DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr; 155 DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule; 156 DICT_REGEXP *dict_regexp = ctxt->dict_regexp; 157 regmatch_t *pmatch; 158 size_t n; 159 160 /* 161 * Replace $number by the corresponding substring from the matched text. 162 * We pre-scanned the replacement text at compile time, so any out of 163 * range $number means that something impossible has happened. 164 */ 165 if (type == MAC_PARSE_VARNAME) { 166 n = atoi(vstring_str(buf)); 167 if (n < 1 || n > match_rule->max_sub) 168 msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"", 169 dict_regexp->dict.name, match_rule->rule.lineno, 170 vstring_str(buf)); 171 pmatch = dict_regexp->pmatch + n; 172 if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo) 173 return (MAC_PARSE_UNDEF); /* empty or not matched */ 174 vstring_strncat(dict_regexp->expansion_buf, 175 ctxt->lookup_string + pmatch->rm_so, 176 pmatch->rm_eo - pmatch->rm_so); 177 return (MAC_PARSE_OK); 178 } 179 180 /* 181 * Straight text - duplicate with no substitution. 182 */ 183 else { 184 vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf)); 185 return (MAC_PARSE_OK); 186 } 187} 188 189/* dict_regexp_regerror - report regexp compile/execute error */ 190 191static void dict_regexp_regerror(const char *mapname, int lineno, int error, 192 const regex_t *expr) 193{ 194 char errbuf[256]; 195 196 (void) regerror(error, expr, errbuf, sizeof(errbuf)); 197 msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf); 198} 199 200 /* 201 * Inlined to reduce function call overhead in the time-critical loop. 202 */ 203#define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \ 204 ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \ 205 ((err) == REG_NOMATCH ? !(match) : \ 206 (err) == 0 ? (match) : \ 207 (dict_regexp_regerror((map), (line), (err), (expr)), 0))) 208 209/* dict_regexp_lookup - match string and perform optional substitution */ 210 211static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string) 212{ 213 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 214 DICT_REGEXP_RULE *rule; 215 DICT_REGEXP_IF_RULE *if_rule; 216 DICT_REGEXP_MATCH_RULE *match_rule; 217 DICT_REGEXP_EXPAND_CONTEXT expand_context; 218 int error; 219 int nesting = 0; 220 221 dict_errno = 0; 222 223 if (msg_verbose) 224 msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string); 225 226 /* 227 * Optionally fold the key. 228 */ 229 if (dict->flags & DICT_FLAG_FOLD_MUL) { 230 if (dict->fold_buf == 0) 231 dict->fold_buf = vstring_alloc(10); 232 vstring_strcpy(dict->fold_buf, lookup_string); 233 lookup_string = lowercase(vstring_str(dict->fold_buf)); 234 } 235 for (rule = dict_regexp->head; rule; rule = rule->next) { 236 237 /* 238 * Skip rules inside failed IF/ENDIF. 239 */ 240 if (nesting < rule->nesting) 241 continue; 242 243 switch (rule->op) { 244 245 /* 246 * Search for the first matching primary expression. Limit the 247 * overhead for substring substitution to the bare minimum. 248 */ 249 case DICT_REGEXP_OP_MATCH: 250 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 251 if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 252 match_rule->first_exp, 253 match_rule->first_match, 254 lookup_string, 255 match_rule->max_sub > 0 ? 256 match_rule->max_sub + 1 : 0, 257 dict_regexp->pmatch)) 258 continue; 259 if (match_rule->second_exp 260 && !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 261 match_rule->second_exp, 262 match_rule->second_match, 263 lookup_string, 264 NULL_SUBSTITUTIONS, 265 NULL_MATCH_RESULT)) 266 continue; 267 268 /* 269 * Skip $number substitutions when the replacement text contains 270 * no $number strings, as learned during the compile time 271 * pre-scan. The pre-scan already replaced $$ by $. 272 */ 273 if (match_rule->max_sub == 0) 274 return (match_rule->replacement); 275 276 /* 277 * Perform $number substitutions on the replacement text. We 278 * pre-scanned the replacement text at compile time. Any macro 279 * expansion errors at this point mean something impossible has 280 * happened. 281 */ 282 if (!dict_regexp->expansion_buf) 283 dict_regexp->expansion_buf = vstring_alloc(10); 284 VSTRING_RESET(dict_regexp->expansion_buf); 285 expand_context.lookup_string = lookup_string; 286 expand_context.match_rule = match_rule; 287 expand_context.dict_regexp = dict_regexp; 288 289 if (mac_parse(match_rule->replacement, dict_regexp_expand, 290 (char *) &expand_context) & MAC_PARSE_ERROR) 291 msg_panic("regexp map %s, line %d: bad replacement syntax", 292 dict->name, rule->lineno); 293 VSTRING_TERMINATE(dict_regexp->expansion_buf); 294 return (vstring_str(dict_regexp->expansion_buf)); 295 296 /* 297 * Conditional. 298 */ 299 case DICT_REGEXP_OP_IF: 300 if_rule = (DICT_REGEXP_IF_RULE *) rule; 301 if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno, 302 if_rule->expr, if_rule->match, lookup_string, 303 NULL_SUBSTITUTIONS, NULL_MATCH_RESULT)) 304 nesting++; 305 continue; 306 307 /* 308 * ENDIF after successful IF. 309 */ 310 case DICT_REGEXP_OP_ENDIF: 311 nesting--; 312 continue; 313 314 default: 315 msg_panic("dict_regexp_lookup: impossible operation %d", rule->op); 316 } 317 } 318 return (0); 319} 320 321/* dict_regexp_close - close regexp dictionary */ 322 323static void dict_regexp_close(DICT *dict) 324{ 325 DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict; 326 DICT_REGEXP_RULE *rule; 327 DICT_REGEXP_RULE *next; 328 DICT_REGEXP_MATCH_RULE *match_rule; 329 DICT_REGEXP_IF_RULE *if_rule; 330 331 for (rule = dict_regexp->head; rule; rule = next) { 332 next = rule->next; 333 switch (rule->op) { 334 case DICT_REGEXP_OP_MATCH: 335 match_rule = (DICT_REGEXP_MATCH_RULE *) rule; 336 if (match_rule->first_exp) { 337 regfree(match_rule->first_exp); 338 myfree((char *) match_rule->first_exp); 339 } 340 if (match_rule->second_exp) { 341 regfree(match_rule->second_exp); 342 myfree((char *) match_rule->second_exp); 343 } 344 if (match_rule->replacement) 345 myfree((char *) match_rule->replacement); 346 break; 347 case DICT_REGEXP_OP_IF: 348 if_rule = (DICT_REGEXP_IF_RULE *) rule; 349 if (if_rule->expr) { 350 regfree(if_rule->expr); 351 myfree((char *) if_rule->expr); 352 } 353 break; 354 case DICT_REGEXP_OP_ENDIF: 355 break; 356 default: 357 msg_panic("dict_regexp_close: unknown operation %d", rule->op); 358 } 359 myfree((char *) rule); 360 } 361 if (dict_regexp->pmatch) 362 myfree((char *) dict_regexp->pmatch); 363 if (dict_regexp->expansion_buf) 364 vstring_free(dict_regexp->expansion_buf); 365 if (dict->fold_buf) 366 vstring_free(dict->fold_buf); 367 dict_free(dict); 368} 369 370/* dict_regexp_get_pat - extract one pattern with options from rule */ 371 372static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp, 373 DICT_REGEXP_PATTERN *pat) 374{ 375 char *p = *bufp; 376 char re_delim; 377 378 /* 379 * Process negation operators. 380 */ 381 pat->match = 1; 382 while (*p == '!') { 383 pat->match = !pat->match; 384 p++; 385 } 386 387 /* 388 * Grr...aceful handling of whitespace after '!'. 389 */ 390 while (*p && ISSPACE(*p)) 391 p++; 392 if (*p == 0) { 393 msg_warn("regexp map %s, line %d: no regexp: skipping this rule", 394 mapname, lineno); 395 return (0); 396 } 397 398 /* 399 * Search for the closing delimiter, handling backslash escape. 400 */ 401 re_delim = *p++; 402 pat->regexp = p; 403 while (*p) { 404 if (*p == '\\') { 405 if (p[1]) 406 p++; 407 else 408 break; 409 } else if (*p == re_delim) { 410 break; 411 } 412 ++p; 413 } 414 if (!*p) { 415 msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": " 416 "skipping this rule", mapname, lineno, re_delim); 417 return (0); 418 } 419 *p++ = 0; /* null terminate */ 420 421 /* 422 * Search for options. 423 */ 424 pat->options = REG_EXTENDED | REG_ICASE; 425 while (*p && !ISSPACE(*p) && *p != '!') { 426 switch (*p) { 427 case 'i': 428 pat->options ^= REG_ICASE; 429 break; 430 case 'm': 431 pat->options ^= REG_NEWLINE; 432 break; 433 case 'x': 434 pat->options ^= REG_EXTENDED; 435 break; 436 default: 437 msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": " 438 "skipping this rule", mapname, lineno, *p); 439 return (0); 440 } 441 ++p; 442 } 443 *bufp = p; 444 return (1); 445} 446 447/* dict_regexp_get_pats - get the primary and second patterns and flags */ 448 449static int dict_regexp_get_pats(const char *mapname, int lineno, char **p, 450 DICT_REGEXP_PATTERN *first_pat, 451 DICT_REGEXP_PATTERN *second_pat) 452{ 453 454 /* 455 * Get the primary and optional secondary patterns and their flags. 456 */ 457 if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0) 458 return (0); 459 if (**p == '!') { 460#if 0 461 static int bitrot_warned = 0; 462 463 if (bitrot_warned == 0) { 464 msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away," 465 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead", 466 mapname, lineno); 467 bitrot_warned = 1; 468 } 469#endif 470 if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0) 471 return (0); 472 } else { 473 second_pat->regexp = 0; 474 } 475 return (1); 476} 477 478/* dict_regexp_prescan - find largest $number in replacement text */ 479 480static int dict_regexp_prescan(int type, VSTRING *buf, char *context) 481{ 482 DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context; 483 size_t n; 484 485 /* 486 * Keep a copy of literal text (with $$ already replaced by $) if and 487 * only if the replacement text contains no $number expression. This way 488 * we can avoid having to scan the replacement text at lookup time. 489 */ 490 if (type == MAC_PARSE_VARNAME) { 491 if (ctxt->literal) { 492 myfree(ctxt->literal); 493 ctxt->literal = 0; 494 } 495 if (!alldig(vstring_str(buf))) { 496 msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"", 497 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 498 return (MAC_PARSE_ERROR); 499 } 500 n = atoi(vstring_str(buf)); 501 if (n < 1) { 502 msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"", 503 ctxt->mapname, ctxt->lineno, vstring_str(buf)); 504 return (MAC_PARSE_ERROR); 505 } 506 if (n > ctxt->max_sub) 507 ctxt->max_sub = n; 508 } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) { 509 if (ctxt->literal) 510 msg_panic("regexp map %s, line %d: multiple literals but no $number", 511 ctxt->mapname, ctxt->lineno); 512 ctxt->literal = mystrdup(vstring_str(buf)); 513 } 514 return (MAC_PARSE_OK); 515} 516 517/* dict_regexp_compile_pat - compile one pattern */ 518 519static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno, 520 DICT_REGEXP_PATTERN *pat) 521{ 522 int error; 523 regex_t *expr; 524 525 expr = (regex_t *) mymalloc(sizeof(*expr)); 526 error = regcomp(expr, pat->regexp, pat->options); 527 if (error != 0) { 528 dict_regexp_regerror(mapname, lineno, error, expr); 529 myfree((char *) expr); 530 return (0); 531 } 532 return (expr); 533} 534 535/* dict_regexp_rule_alloc - fill in a generic rule structure */ 536 537static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting, 538 int lineno, 539 size_t size) 540{ 541 DICT_REGEXP_RULE *rule; 542 543 rule = (DICT_REGEXP_RULE *) mymalloc(size); 544 rule->op = op; 545 rule->nesting = nesting; 546 rule->lineno = lineno; 547 rule->next = 0; 548 549 return (rule); 550} 551 552/* dict_regexp_parseline - parse one rule */ 553 554static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno, 555 char *line, int nesting, 556 int dict_flags) 557{ 558 char *p; 559 560 p = line; 561 562 /* 563 * An ordinary rule takes one or two patterns and replacement text. 564 */ 565 if (!ISALNUM(*p)) { 566 DICT_REGEXP_PATTERN first_pat; 567 DICT_REGEXP_PATTERN second_pat; 568 DICT_REGEXP_PRESCAN_CONTEXT prescan_context; 569 regex_t *first_exp = 0; 570 regex_t *second_exp; 571 DICT_REGEXP_MATCH_RULE *match_rule; 572 573 /* 574 * Get the primary and the optional secondary patterns. 575 */ 576 if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat)) 577 return (0); 578 579 /* 580 * Get the replacement text. 581 */ 582 while (*p && ISSPACE(*p)) 583 ++p; 584 if (!*p) { 585 msg_warn("regexp map %s, line %d: using empty replacement string", 586 mapname, lineno); 587 } 588 589 /* 590 * Find the highest-numbered $number in the replacement text. We can 591 * speed up pattern matching 1) by passing hints to the regexp 592 * compiler, setting the REG_NOSUB flag when the replacement text 593 * contains no $number string; 2) by passing hints to the regexp 594 * execution code, limiting the amount of text that is made available 595 * for substitution. 596 */ 597 prescan_context.mapname = mapname; 598 prescan_context.lineno = lineno; 599 prescan_context.max_sub = 0; 600 prescan_context.literal = 0; 601 602 /* 603 * The optimizer will eliminate code duplication and/or dead code. 604 */ 605#define CREATE_MATCHOP_ERROR_RETURN(rval) do { \ 606 if (first_exp) { \ 607 regfree(first_exp); \ 608 myfree((char *) first_exp); \ 609 } \ 610 if (prescan_context.literal) \ 611 myfree(prescan_context.literal); \ 612 return (rval); \ 613 } while (0) 614 615 if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context) 616 & MAC_PARSE_ERROR) { 617 msg_warn("regexp map %s, line %d: bad replacement syntax: " 618 "skipping this rule", mapname, lineno); 619 CREATE_MATCHOP_ERROR_RETURN(0); 620 } 621 622 /* 623 * Compile the primary and the optional secondary pattern. Speed up 624 * execution when no matched text needs to be substituted into the 625 * result string, or when the highest numbered substring is less than 626 * the total number of () subpatterns. 627 */ 628 if (prescan_context.max_sub == 0) 629 first_pat.options |= REG_NOSUB; 630 if (prescan_context.max_sub > 0 && first_pat.match == 0) { 631 msg_warn("regexp map %s, line %d: $number found in negative match " 632 "replacement text: skipping this rule", mapname, lineno); 633 CREATE_MATCHOP_ERROR_RETURN(0); 634 } 635 if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) { 636 msg_warn("regexp map %s, line %d: " 637 "regular expression substitution is not allowed: " 638 "skipping this rule", mapname, lineno); 639 CREATE_MATCHOP_ERROR_RETURN(0); 640 } 641 if ((first_exp = dict_regexp_compile_pat(mapname, lineno, 642 &first_pat)) == 0) 643 CREATE_MATCHOP_ERROR_RETURN(0); 644 if (prescan_context.max_sub > first_exp->re_nsub) { 645 msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": " 646 "skipping this rule", mapname, lineno, 647 (int) prescan_context.max_sub); 648 CREATE_MATCHOP_ERROR_RETURN(0); 649 } 650 if (second_pat.regexp != 0) { 651 second_pat.options |= REG_NOSUB; 652 if ((second_exp = dict_regexp_compile_pat(mapname, lineno, 653 &second_pat)) == 0) 654 CREATE_MATCHOP_ERROR_RETURN(0); 655 } else { 656 second_exp = 0; 657 } 658 match_rule = (DICT_REGEXP_MATCH_RULE *) 659 dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno, 660 sizeof(DICT_REGEXP_MATCH_RULE)); 661 match_rule->first_exp = first_exp; 662 match_rule->first_match = first_pat.match; 663 match_rule->max_sub = prescan_context.max_sub; 664 match_rule->second_exp = second_exp; 665 match_rule->second_match = second_pat.match; 666 if (prescan_context.literal) 667 match_rule->replacement = prescan_context.literal; 668 else 669 match_rule->replacement = mystrdup(p); 670 return ((DICT_REGEXP_RULE *) match_rule); 671 } 672 673 /* 674 * The IF operator takes one pattern but no replacement text. 675 */ 676 else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) { 677 DICT_REGEXP_PATTERN pattern; 678 regex_t *expr; 679 DICT_REGEXP_IF_RULE *if_rule; 680 681 p += 2; 682 while (*p && ISSPACE(*p)) 683 p++; 684 if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern)) 685 return (0); 686 while (*p && ISSPACE(*p)) 687 ++p; 688 if (*p) { 689 msg_warn("regexp map %s, line %d: ignoring extra text after" 690 " IF statement: \"%s\"", mapname, lineno, p); 691 msg_warn("regexp map %s, line %d: do not prepend whitespace" 692 " to statements between IF and ENDIF", mapname, lineno); 693 } 694 if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0) 695 return (0); 696 if_rule = (DICT_REGEXP_IF_RULE *) 697 dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno, 698 sizeof(DICT_REGEXP_IF_RULE)); 699 if_rule->expr = expr; 700 if_rule->match = pattern.match; 701 return ((DICT_REGEXP_RULE *) if_rule); 702 } 703 704 /* 705 * The ENDIF operator takes no patterns and no replacement text. 706 */ 707 else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) { 708 DICT_REGEXP_RULE *rule; 709 710 p += 5; 711 if (nesting == 0) { 712 msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF", 713 mapname, lineno); 714 return (0); 715 } 716 while (*p && ISSPACE(*p)) 717 ++p; 718 if (*p) 719 msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF", 720 mapname, lineno); 721 rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno, 722 sizeof(DICT_REGEXP_RULE)); 723 return (rule); 724 } 725 726 /* 727 * Unrecognized input. 728 */ 729 else { 730 msg_warn("regexp map %s, line %d: ignoring unrecognized request", 731 mapname, lineno); 732 return (0); 733 } 734} 735 736/* dict_regexp_open - load and compile a file containing regular expressions */ 737 738DICT *dict_regexp_open(const char *mapname, int unused_flags, int dict_flags) 739{ 740 DICT_REGEXP *dict_regexp; 741 VSTREAM *map_fp; 742 VSTRING *line_buffer; 743 DICT_REGEXP_RULE *rule; 744 DICT_REGEXP_RULE *last_rule = 0; 745 int lineno = 0; 746 size_t max_sub = 0; 747 int nesting = 0; 748 char *p; 749 750 line_buffer = vstring_alloc(100); 751 752 dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname, 753 sizeof(*dict_regexp)); 754 dict_regexp->dict.lookup = dict_regexp_lookup; 755 dict_regexp->dict.close = dict_regexp_close; 756 dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN; 757 if (dict_flags & DICT_FLAG_FOLD_MUL) 758 dict_regexp->dict.fold_buf = vstring_alloc(10); 759 dict_regexp->head = 0; 760 dict_regexp->pmatch = 0; 761 dict_regexp->expansion_buf = 0; 762 763 /* 764 * Parse the regexp table. 765 */ 766 if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0) 767 msg_fatal("open %s: %m", mapname); 768 769 while (readlline(line_buffer, map_fp, &lineno)) { 770 p = vstring_str(line_buffer); 771 trimblanks(p, 0)[0] = 0; 772 if (*p == 0) 773 continue; 774 rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags); 775 if (rule == 0) 776 continue; 777 if (rule->op == DICT_REGEXP_OP_MATCH) { 778 if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub) 779 max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub; 780 } else if (rule->op == DICT_REGEXP_OP_IF) { 781 nesting++; 782 } else if (rule->op == DICT_REGEXP_OP_ENDIF) { 783 nesting--; 784 } 785 if (last_rule == 0) 786 dict_regexp->head = rule; 787 else 788 last_rule->next = rule; 789 last_rule = rule; 790 } 791 792 if (nesting) 793 msg_warn("regexp map %s, line %d: more IFs than ENDIFs", 794 mapname, lineno); 795 796 /* 797 * Allocate space for only as many matched substrings as used in the 798 * replacement text. 799 */ 800 if (max_sub > 0) 801 dict_regexp->pmatch = 802 (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1)); 803 804 /* 805 * Clean up. 806 */ 807 vstring_free(line_buffer); 808 vstream_fclose(map_fp); 809 810 return (DICT_DEBUG (&dict_regexp->dict)); 811} 812 813#endif 814