1/*	$NetBSD$	*/
2
3/*++
4/* NAME
5/*	dict_regexp 3
6/* SUMMARY
7/*	dictionary manager interface to REGEXP regular expression library
8/* SYNOPSIS
9/*	#include <dict_regexp.h>
10/*
11/*	DICT	*dict_regexp_open(name, dummy, dict_flags)
12/*	const char *name;
13/*	int	dummy;
14/*	int	dict_flags;
15/* DESCRIPTION
16/*	dict_regexp_open() opens the named file and compiles the contained
17/*	regular expressions. The result object can be used to match strings
18/*	against the table.
19/* SEE ALSO
20/*	dict(3) generic dictionary manager
21/*	regexp_table(5) format of Postfix regular expression tables
22/* AUTHOR(S)
23/*	LaMont Jones
24/*	lamont@hp.com
25/*
26/*	Based on PCRE dictionary contributed by Andrew McNamara
27/*	andrewm@connect.com.au
28/*	connect.com.au Pty. Ltd.
29/*	Level 3, 213 Miller St
30/*	North Sydney, NSW, Australia
31/*
32/*	Heavily rewritten by Wietse Venema
33/*	IBM T.J. Watson Research
34/*	P.O. Box 704
35/*	Yorktown Heights, NY 10598, USA
36/*--*/
37
38/* System library. */
39
40#include "sys_defs.h"
41
42#ifdef HAS_POSIX_REGEXP
43
44#include <stdlib.h>
45#include <unistd.h>
46#include <string.h>
47#include <ctype.h>
48#include <regex.h>
49#ifdef STRCASECMP_IN_STRINGS_H
50#include <strings.h>
51#endif
52
53/* Utility library. */
54
55#include "mymalloc.h"
56#include "msg.h"
57#include "safe.h"
58#include "vstream.h"
59#include "vstring.h"
60#include "stringops.h"
61#include "readlline.h"
62#include "dict.h"
63#include "dict_regexp.h"
64#include "mac_parse.h"
65
66 /*
67  * Support for IF/ENDIF based on an idea by Bert Driehuis.
68  */
69#define DICT_REGEXP_OP_MATCH	1	/* Match this regexp */
70#define DICT_REGEXP_OP_IF	2	/* Increase if/endif nesting on match */
71#define DICT_REGEXP_OP_ENDIF	3	/* Decrease if/endif nesting on match */
72
73 /*
74  * Regular expression before compiling.
75  */
76typedef struct {
77    char   *regexp;			/* regular expression */
78    int     options;			/* regcomp() options */
79    int     match;			/* positive or negative match */
80} DICT_REGEXP_PATTERN;
81
82 /*
83  * Compiled generic rule, and subclasses that derive from it.
84  */
85typedef struct DICT_REGEXP_RULE {
86    int     op;				/* DICT_REGEXP_OP_MATCH/IF/ENDIF */
87    int     nesting;			/* Level of search nesting */
88    int     lineno;			/* source file line number */
89    struct DICT_REGEXP_RULE *next;	/* next rule in dict */
90} DICT_REGEXP_RULE;
91
92typedef struct {
93    DICT_REGEXP_RULE rule;		/* generic part */
94    regex_t *first_exp;			/* compiled primary pattern */
95    int     first_match;		/* positive or negative match */
96    regex_t *second_exp;		/* compiled secondary pattern */
97    int     second_match;		/* positive or negative match */
98    char   *replacement;		/* replacement text */
99    size_t  max_sub;			/* largest $number in replacement */
100} DICT_REGEXP_MATCH_RULE;
101
102typedef struct {
103    DICT_REGEXP_RULE rule;		/* generic members */
104    regex_t *expr;			/* the condition */
105    int     match;			/* positive or negative match */
106} DICT_REGEXP_IF_RULE;
107
108 /*
109  * Regexp map.
110  */
111typedef struct {
112    DICT    dict;			/* generic members */
113    regmatch_t *pmatch;			/* matched substring info */
114    DICT_REGEXP_RULE *head;		/* first rule */
115    VSTRING *expansion_buf;		/* lookup result */
116} DICT_REGEXP;
117
118 /*
119  * Macros to make dense code more readable.
120  */
121#define NULL_SUBSTITUTIONS	(0)
122#define NULL_MATCH_RESULT	((regmatch_t *) 0)
123
124 /*
125  * Context for $number expansion callback.
126  */
127typedef struct {
128    DICT_REGEXP *dict_regexp;		/* the dictionary handle */
129    DICT_REGEXP_MATCH_RULE *match_rule;	/* the rule we matched */
130    const char *lookup_string;		/* matched text */
131} DICT_REGEXP_EXPAND_CONTEXT;
132
133 /*
134  * Context for $number pre-scan callback.
135  */
136typedef struct {
137    const char *mapname;		/* name of regexp map */
138    int     lineno;			/* where in file */
139    size_t  max_sub;			/* largest $number seen */
140    char   *literal;			/* constant result, $$ -> $ */
141} DICT_REGEXP_PRESCAN_CONTEXT;
142
143 /*
144  * Compatibility.
145  */
146#ifndef MAC_PARSE_OK
147#define MAC_PARSE_OK 0
148#endif
149
150/* dict_regexp_expand - replace $number with substring from matched text */
151
152static int dict_regexp_expand(int type, VSTRING *buf, char *ptr)
153{
154    DICT_REGEXP_EXPAND_CONTEXT *ctxt = (DICT_REGEXP_EXPAND_CONTEXT *) ptr;
155    DICT_REGEXP_MATCH_RULE *match_rule = ctxt->match_rule;
156    DICT_REGEXP *dict_regexp = ctxt->dict_regexp;
157    regmatch_t *pmatch;
158    size_t  n;
159
160    /*
161     * Replace $number by the corresponding substring from the matched text.
162     * We pre-scanned the replacement text at compile time, so any out of
163     * range $number means that something impossible has happened.
164     */
165    if (type == MAC_PARSE_VARNAME) {
166	n = atoi(vstring_str(buf));
167	if (n < 1 || n > match_rule->max_sub)
168	    msg_panic("regexp map %s, line %d: out of range replacement index \"%s\"",
169		      dict_regexp->dict.name, match_rule->rule.lineno,
170		      vstring_str(buf));
171	pmatch = dict_regexp->pmatch + n;
172	if (pmatch->rm_so < 0 || pmatch->rm_so == pmatch->rm_eo)
173	    return (MAC_PARSE_UNDEF);		/* empty or not matched */
174	vstring_strncat(dict_regexp->expansion_buf,
175			ctxt->lookup_string + pmatch->rm_so,
176			pmatch->rm_eo - pmatch->rm_so);
177	return (MAC_PARSE_OK);
178    }
179
180    /*
181     * Straight text - duplicate with no substitution.
182     */
183    else {
184	vstring_strcat(dict_regexp->expansion_buf, vstring_str(buf));
185	return (MAC_PARSE_OK);
186    }
187}
188
189/* dict_regexp_regerror - report regexp compile/execute error */
190
191static void dict_regexp_regerror(const char *mapname, int lineno, int error,
192				         const regex_t *expr)
193{
194    char    errbuf[256];
195
196    (void) regerror(error, expr, errbuf, sizeof(errbuf));
197    msg_warn("regexp map %s, line %d: %s", mapname, lineno, errbuf);
198}
199
200 /*
201  * Inlined to reduce function call overhead in the time-critical loop.
202  */
203#define DICT_REGEXP_REGEXEC(err, map, line, expr, match, str, nsub, pmatch) \
204    ((err) = regexec((expr), (str), (nsub), (pmatch), 0), \
205     ((err) == REG_NOMATCH ? !(match) : \
206      (err) == 0 ? (match) : \
207      (dict_regexp_regerror((map), (line), (err), (expr)), 0)))
208
209/* dict_regexp_lookup - match string and perform optional substitution */
210
211static const char *dict_regexp_lookup(DICT *dict, const char *lookup_string)
212{
213    DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
214    DICT_REGEXP_RULE *rule;
215    DICT_REGEXP_IF_RULE *if_rule;
216    DICT_REGEXP_MATCH_RULE *match_rule;
217    DICT_REGEXP_EXPAND_CONTEXT expand_context;
218    int     error;
219    int     nesting = 0;
220
221    dict_errno = 0;
222
223    if (msg_verbose)
224	msg_info("dict_regexp_lookup: %s: %s", dict->name, lookup_string);
225
226    /*
227     * Optionally fold the key.
228     */
229    if (dict->flags & DICT_FLAG_FOLD_MUL) {
230	if (dict->fold_buf == 0)
231	    dict->fold_buf = vstring_alloc(10);
232	vstring_strcpy(dict->fold_buf, lookup_string);
233	lookup_string = lowercase(vstring_str(dict->fold_buf));
234    }
235    for (rule = dict_regexp->head; rule; rule = rule->next) {
236
237	/*
238	 * Skip rules inside failed IF/ENDIF.
239	 */
240	if (nesting < rule->nesting)
241	    continue;
242
243	switch (rule->op) {
244
245	    /*
246	     * Search for the first matching primary expression. Limit the
247	     * overhead for substring substitution to the bare minimum.
248	     */
249	case DICT_REGEXP_OP_MATCH:
250	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
251	    if (!DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
252				     match_rule->first_exp,
253				     match_rule->first_match,
254				     lookup_string,
255				     match_rule->max_sub > 0 ?
256				     match_rule->max_sub + 1 : 0,
257				     dict_regexp->pmatch))
258		continue;
259	    if (match_rule->second_exp
260		&& !DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
261					match_rule->second_exp,
262					match_rule->second_match,
263					lookup_string,
264					NULL_SUBSTITUTIONS,
265					NULL_MATCH_RESULT))
266		continue;
267
268	    /*
269	     * Skip $number substitutions when the replacement text contains
270	     * no $number strings, as learned during the compile time
271	     * pre-scan. The pre-scan already replaced $$ by $.
272	     */
273	    if (match_rule->max_sub == 0)
274		return (match_rule->replacement);
275
276	    /*
277	     * Perform $number substitutions on the replacement text. We
278	     * pre-scanned the replacement text at compile time. Any macro
279	     * expansion errors at this point mean something impossible has
280	     * happened.
281	     */
282	    if (!dict_regexp->expansion_buf)
283		dict_regexp->expansion_buf = vstring_alloc(10);
284	    VSTRING_RESET(dict_regexp->expansion_buf);
285	    expand_context.lookup_string = lookup_string;
286	    expand_context.match_rule = match_rule;
287	    expand_context.dict_regexp = dict_regexp;
288
289	    if (mac_parse(match_rule->replacement, dict_regexp_expand,
290			  (char *) &expand_context) & MAC_PARSE_ERROR)
291		msg_panic("regexp map %s, line %d: bad replacement syntax",
292			  dict->name, rule->lineno);
293	    VSTRING_TERMINATE(dict_regexp->expansion_buf);
294	    return (vstring_str(dict_regexp->expansion_buf));
295
296	    /*
297	     * Conditional.
298	     */
299	case DICT_REGEXP_OP_IF:
300	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
301	    if (DICT_REGEXP_REGEXEC(error, dict->name, rule->lineno,
302			       if_rule->expr, if_rule->match, lookup_string,
303				    NULL_SUBSTITUTIONS, NULL_MATCH_RESULT))
304		nesting++;
305	    continue;
306
307	    /*
308	     * ENDIF after successful IF.
309	     */
310	case DICT_REGEXP_OP_ENDIF:
311	    nesting--;
312	    continue;
313
314	default:
315	    msg_panic("dict_regexp_lookup: impossible operation %d", rule->op);
316	}
317    }
318    return (0);
319}
320
321/* dict_regexp_close - close regexp dictionary */
322
323static void dict_regexp_close(DICT *dict)
324{
325    DICT_REGEXP *dict_regexp = (DICT_REGEXP *) dict;
326    DICT_REGEXP_RULE *rule;
327    DICT_REGEXP_RULE *next;
328    DICT_REGEXP_MATCH_RULE *match_rule;
329    DICT_REGEXP_IF_RULE *if_rule;
330
331    for (rule = dict_regexp->head; rule; rule = next) {
332	next = rule->next;
333	switch (rule->op) {
334	case DICT_REGEXP_OP_MATCH:
335	    match_rule = (DICT_REGEXP_MATCH_RULE *) rule;
336	    if (match_rule->first_exp) {
337		regfree(match_rule->first_exp);
338		myfree((char *) match_rule->first_exp);
339	    }
340	    if (match_rule->second_exp) {
341		regfree(match_rule->second_exp);
342		myfree((char *) match_rule->second_exp);
343	    }
344	    if (match_rule->replacement)
345		myfree((char *) match_rule->replacement);
346	    break;
347	case DICT_REGEXP_OP_IF:
348	    if_rule = (DICT_REGEXP_IF_RULE *) rule;
349	    if (if_rule->expr) {
350		regfree(if_rule->expr);
351		myfree((char *) if_rule->expr);
352	    }
353	    break;
354	case DICT_REGEXP_OP_ENDIF:
355	    break;
356	default:
357	    msg_panic("dict_regexp_close: unknown operation %d", rule->op);
358	}
359	myfree((char *) rule);
360    }
361    if (dict_regexp->pmatch)
362	myfree((char *) dict_regexp->pmatch);
363    if (dict_regexp->expansion_buf)
364	vstring_free(dict_regexp->expansion_buf);
365    if (dict->fold_buf)
366	vstring_free(dict->fold_buf);
367    dict_free(dict);
368}
369
370/* dict_regexp_get_pat - extract one pattern with options from rule */
371
372static int dict_regexp_get_pat(const char *mapname, int lineno, char **bufp,
373			               DICT_REGEXP_PATTERN *pat)
374{
375    char   *p = *bufp;
376    char    re_delim;
377
378    /*
379     * Process negation operators.
380     */
381    pat->match = 1;
382    while (*p == '!') {
383	pat->match = !pat->match;
384	p++;
385    }
386
387    /*
388     * Grr...aceful handling of whitespace after '!'.
389     */
390    while (*p && ISSPACE(*p))
391	p++;
392    if (*p == 0) {
393	msg_warn("regexp map %s, line %d: no regexp: skipping this rule",
394		 mapname, lineno);
395	return (0);
396    }
397
398    /*
399     * Search for the closing delimiter, handling backslash escape.
400     */
401    re_delim = *p++;
402    pat->regexp = p;
403    while (*p) {
404	if (*p == '\\') {
405	    if (p[1])
406		p++;
407	    else
408		break;
409	} else if (*p == re_delim) {
410	    break;
411	}
412	++p;
413    }
414    if (!*p) {
415	msg_warn("regexp map %s, line %d: no closing regexp delimiter \"%c\": "
416		 "skipping this rule", mapname, lineno, re_delim);
417	return (0);
418    }
419    *p++ = 0;					/* null terminate */
420
421    /*
422     * Search for options.
423     */
424    pat->options = REG_EXTENDED | REG_ICASE;
425    while (*p && !ISSPACE(*p) && *p != '!') {
426	switch (*p) {
427	case 'i':
428	    pat->options ^= REG_ICASE;
429	    break;
430	case 'm':
431	    pat->options ^= REG_NEWLINE;
432	    break;
433	case 'x':
434	    pat->options ^= REG_EXTENDED;
435	    break;
436	default:
437	    msg_warn("regexp map %s, line %d: unknown regexp option \"%c\": "
438		     "skipping this rule", mapname, lineno, *p);
439	    return (0);
440	}
441	++p;
442    }
443    *bufp = p;
444    return (1);
445}
446
447/* dict_regexp_get_pats - get the primary and second patterns and flags */
448
449static int dict_regexp_get_pats(const char *mapname, int lineno, char **p,
450				        DICT_REGEXP_PATTERN *first_pat,
451				        DICT_REGEXP_PATTERN *second_pat)
452{
453
454    /*
455     * Get the primary and optional secondary patterns and their flags.
456     */
457    if (dict_regexp_get_pat(mapname, lineno, p, first_pat) == 0)
458	return (0);
459    if (**p == '!') {
460#if 0
461	static int bitrot_warned = 0;
462
463	if (bitrot_warned == 0) {
464	    msg_warn("regexp file %s, line %d: /pattern1/!/pattern2/ goes away,"
465		 " use \"if !/pattern2/ ... /pattern1/ ... endif\" instead",
466		     mapname, lineno);
467	    bitrot_warned = 1;
468	}
469#endif
470	if (dict_regexp_get_pat(mapname, lineno, p, second_pat) == 0)
471	    return (0);
472    } else {
473	second_pat->regexp = 0;
474    }
475    return (1);
476}
477
478/* dict_regexp_prescan - find largest $number in replacement text */
479
480static int dict_regexp_prescan(int type, VSTRING *buf, char *context)
481{
482    DICT_REGEXP_PRESCAN_CONTEXT *ctxt = (DICT_REGEXP_PRESCAN_CONTEXT *) context;
483    size_t  n;
484
485    /*
486     * Keep a copy of literal text (with $$ already replaced by $) if and
487     * only if the replacement text contains no $number expression. This way
488     * we can avoid having to scan the replacement text at lookup time.
489     */
490    if (type == MAC_PARSE_VARNAME) {
491	if (ctxt->literal) {
492	    myfree(ctxt->literal);
493	    ctxt->literal = 0;
494	}
495	if (!alldig(vstring_str(buf))) {
496	    msg_warn("regexp map %s, line %d: non-numeric replacement index \"%s\"",
497		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
498	    return (MAC_PARSE_ERROR);
499	}
500	n = atoi(vstring_str(buf));
501	if (n < 1) {
502	    msg_warn("regexp map %s, line %d: out-of-range replacement index \"%s\"",
503		     ctxt->mapname, ctxt->lineno, vstring_str(buf));
504	    return (MAC_PARSE_ERROR);
505	}
506	if (n > ctxt->max_sub)
507	    ctxt->max_sub = n;
508    } else if (type == MAC_PARSE_LITERAL && ctxt->max_sub == 0) {
509	if (ctxt->literal)
510	    msg_panic("regexp map %s, line %d: multiple literals but no $number",
511		      ctxt->mapname, ctxt->lineno);
512	ctxt->literal = mystrdup(vstring_str(buf));
513    }
514    return (MAC_PARSE_OK);
515}
516
517/* dict_regexp_compile_pat - compile one pattern */
518
519static regex_t *dict_regexp_compile_pat(const char *mapname, int lineno,
520					        DICT_REGEXP_PATTERN *pat)
521{
522    int     error;
523    regex_t *expr;
524
525    expr = (regex_t *) mymalloc(sizeof(*expr));
526    error = regcomp(expr, pat->regexp, pat->options);
527    if (error != 0) {
528	dict_regexp_regerror(mapname, lineno, error, expr);
529	myfree((char *) expr);
530	return (0);
531    }
532    return (expr);
533}
534
535/* dict_regexp_rule_alloc - fill in a generic rule structure */
536
537static DICT_REGEXP_RULE *dict_regexp_rule_alloc(int op, int nesting,
538						        int lineno,
539						        size_t size)
540{
541    DICT_REGEXP_RULE *rule;
542
543    rule = (DICT_REGEXP_RULE *) mymalloc(size);
544    rule->op = op;
545    rule->nesting = nesting;
546    rule->lineno = lineno;
547    rule->next = 0;
548
549    return (rule);
550}
551
552/* dict_regexp_parseline - parse one rule */
553
554static DICT_REGEXP_RULE *dict_regexp_parseline(const char *mapname, int lineno,
555					            char *line, int nesting,
556					               int dict_flags)
557{
558    char   *p;
559
560    p = line;
561
562    /*
563     * An ordinary rule takes one or two patterns and replacement text.
564     */
565    if (!ISALNUM(*p)) {
566	DICT_REGEXP_PATTERN first_pat;
567	DICT_REGEXP_PATTERN second_pat;
568	DICT_REGEXP_PRESCAN_CONTEXT prescan_context;
569	regex_t *first_exp = 0;
570	regex_t *second_exp;
571	DICT_REGEXP_MATCH_RULE *match_rule;
572
573	/*
574	 * Get the primary and the optional secondary patterns.
575	 */
576	if (!dict_regexp_get_pats(mapname, lineno, &p, &first_pat, &second_pat))
577	    return (0);
578
579	/*
580	 * Get the replacement text.
581	 */
582	while (*p && ISSPACE(*p))
583	    ++p;
584	if (!*p) {
585	    msg_warn("regexp map %s, line %d: using empty replacement string",
586		     mapname, lineno);
587	}
588
589	/*
590	 * Find the highest-numbered $number in the replacement text. We can
591	 * speed up pattern matching 1) by passing hints to the regexp
592	 * compiler, setting the REG_NOSUB flag when the replacement text
593	 * contains no $number string; 2) by passing hints to the regexp
594	 * execution code, limiting the amount of text that is made available
595	 * for substitution.
596	 */
597	prescan_context.mapname = mapname;
598	prescan_context.lineno = lineno;
599	prescan_context.max_sub = 0;
600	prescan_context.literal = 0;
601
602	/*
603	 * The optimizer will eliminate code duplication and/or dead code.
604	 */
605#define CREATE_MATCHOP_ERROR_RETURN(rval) do { \
606	if (first_exp) { \
607	    regfree(first_exp); \
608	    myfree((char *) first_exp); \
609	} \
610	if (prescan_context.literal) \
611	    myfree(prescan_context.literal); \
612	return (rval); \
613    } while (0)
614
615	if (mac_parse(p, dict_regexp_prescan, (char *) &prescan_context)
616	    & MAC_PARSE_ERROR) {
617	    msg_warn("regexp map %s, line %d: bad replacement syntax: "
618		     "skipping this rule", mapname, lineno);
619	    CREATE_MATCHOP_ERROR_RETURN(0);
620	}
621
622	/*
623	 * Compile the primary and the optional secondary pattern. Speed up
624	 * execution when no matched text needs to be substituted into the
625	 * result string, or when the highest numbered substring is less than
626	 * the total number of () subpatterns.
627	 */
628	if (prescan_context.max_sub == 0)
629	    first_pat.options |= REG_NOSUB;
630	if (prescan_context.max_sub > 0 && first_pat.match == 0) {
631	    msg_warn("regexp map %s, line %d: $number found in negative match "
632		   "replacement text: skipping this rule", mapname, lineno);
633	    CREATE_MATCHOP_ERROR_RETURN(0);
634	}
635	if (prescan_context.max_sub > 0 && (dict_flags & DICT_FLAG_NO_REGSUB)) {
636	    msg_warn("regexp map %s, line %d: "
637		     "regular expression substitution is not allowed: "
638		     "skipping this rule", mapname, lineno);
639	    CREATE_MATCHOP_ERROR_RETURN(0);
640	}
641	if ((first_exp = dict_regexp_compile_pat(mapname, lineno,
642						 &first_pat)) == 0)
643	    CREATE_MATCHOP_ERROR_RETURN(0);
644	if (prescan_context.max_sub > first_exp->re_nsub) {
645	    msg_warn("regexp map %s, line %d: out of range replacement index \"%d\": "
646		     "skipping this rule", mapname, lineno,
647		     (int) prescan_context.max_sub);
648	    CREATE_MATCHOP_ERROR_RETURN(0);
649	}
650	if (second_pat.regexp != 0) {
651	    second_pat.options |= REG_NOSUB;
652	    if ((second_exp = dict_regexp_compile_pat(mapname, lineno,
653						      &second_pat)) == 0)
654		CREATE_MATCHOP_ERROR_RETURN(0);
655	} else {
656	    second_exp = 0;
657	}
658	match_rule = (DICT_REGEXP_MATCH_RULE *)
659	    dict_regexp_rule_alloc(DICT_REGEXP_OP_MATCH, nesting, lineno,
660				   sizeof(DICT_REGEXP_MATCH_RULE));
661	match_rule->first_exp = first_exp;
662	match_rule->first_match = first_pat.match;
663	match_rule->max_sub = prescan_context.max_sub;
664	match_rule->second_exp = second_exp;
665	match_rule->second_match = second_pat.match;
666	if (prescan_context.literal)
667	    match_rule->replacement = prescan_context.literal;
668	else
669	    match_rule->replacement = mystrdup(p);
670	return ((DICT_REGEXP_RULE *) match_rule);
671    }
672
673    /*
674     * The IF operator takes one pattern but no replacement text.
675     */
676    else if (strncasecmp(p, "IF", 2) == 0 && !ISALNUM(p[2])) {
677	DICT_REGEXP_PATTERN pattern;
678	regex_t *expr;
679	DICT_REGEXP_IF_RULE *if_rule;
680
681	p += 2;
682	while (*p && ISSPACE(*p))
683	    p++;
684	if (!dict_regexp_get_pat(mapname, lineno, &p, &pattern))
685	    return (0);
686	while (*p && ISSPACE(*p))
687	    ++p;
688	if (*p) {
689	    msg_warn("regexp map %s, line %d: ignoring extra text after"
690		     " IF statement: \"%s\"", mapname, lineno, p);
691	    msg_warn("regexp map %s, line %d: do not prepend whitespace"
692		     " to statements between IF and ENDIF", mapname, lineno);
693	}
694	if ((expr = dict_regexp_compile_pat(mapname, lineno, &pattern)) == 0)
695	    return (0);
696	if_rule = (DICT_REGEXP_IF_RULE *)
697	    dict_regexp_rule_alloc(DICT_REGEXP_OP_IF, nesting, lineno,
698				   sizeof(DICT_REGEXP_IF_RULE));
699	if_rule->expr = expr;
700	if_rule->match = pattern.match;
701	return ((DICT_REGEXP_RULE *) if_rule);
702    }
703
704    /*
705     * The ENDIF operator takes no patterns and no replacement text.
706     */
707    else if (strncasecmp(p, "ENDIF", 5) == 0 && !ISALNUM(p[5])) {
708	DICT_REGEXP_RULE *rule;
709
710	p += 5;
711	if (nesting == 0) {
712	    msg_warn("regexp map %s, line %d: ignoring ENDIF without matching IF",
713		     mapname, lineno);
714	    return (0);
715	}
716	while (*p && ISSPACE(*p))
717	    ++p;
718	if (*p)
719	    msg_warn("regexp map %s, line %d: ignoring extra text after ENDIF",
720		     mapname, lineno);
721	rule = dict_regexp_rule_alloc(DICT_REGEXP_OP_ENDIF, nesting, lineno,
722				      sizeof(DICT_REGEXP_RULE));
723	return (rule);
724    }
725
726    /*
727     * Unrecognized input.
728     */
729    else {
730	msg_warn("regexp map %s, line %d: ignoring unrecognized request",
731		 mapname, lineno);
732	return (0);
733    }
734}
735
736/* dict_regexp_open - load and compile a file containing regular expressions */
737
738DICT   *dict_regexp_open(const char *mapname, int unused_flags, int dict_flags)
739{
740    DICT_REGEXP *dict_regexp;
741    VSTREAM *map_fp;
742    VSTRING *line_buffer;
743    DICT_REGEXP_RULE *rule;
744    DICT_REGEXP_RULE *last_rule = 0;
745    int     lineno = 0;
746    size_t  max_sub = 0;
747    int     nesting = 0;
748    char   *p;
749
750    line_buffer = vstring_alloc(100);
751
752    dict_regexp = (DICT_REGEXP *) dict_alloc(DICT_TYPE_REGEXP, mapname,
753					     sizeof(*dict_regexp));
754    dict_regexp->dict.lookup = dict_regexp_lookup;
755    dict_regexp->dict.close = dict_regexp_close;
756    dict_regexp->dict.flags = dict_flags | DICT_FLAG_PATTERN;
757    if (dict_flags & DICT_FLAG_FOLD_MUL)
758	dict_regexp->dict.fold_buf = vstring_alloc(10);
759    dict_regexp->head = 0;
760    dict_regexp->pmatch = 0;
761    dict_regexp->expansion_buf = 0;
762
763    /*
764     * Parse the regexp table.
765     */
766    if ((map_fp = vstream_fopen(mapname, O_RDONLY, 0)) == 0)
767	msg_fatal("open %s: %m", mapname);
768
769    while (readlline(line_buffer, map_fp, &lineno)) {
770	p = vstring_str(line_buffer);
771	trimblanks(p, 0)[0] = 0;
772	if (*p == 0)
773	    continue;
774	rule = dict_regexp_parseline(mapname, lineno, p, nesting, dict_flags);
775	if (rule == 0)
776	    continue;
777	if (rule->op == DICT_REGEXP_OP_MATCH) {
778	    if (((DICT_REGEXP_MATCH_RULE *) rule)->max_sub > max_sub)
779		max_sub = ((DICT_REGEXP_MATCH_RULE *) rule)->max_sub;
780	} else if (rule->op == DICT_REGEXP_OP_IF) {
781	    nesting++;
782	} else if (rule->op == DICT_REGEXP_OP_ENDIF) {
783	    nesting--;
784	}
785	if (last_rule == 0)
786	    dict_regexp->head = rule;
787	else
788	    last_rule->next = rule;
789	last_rule = rule;
790    }
791
792    if (nesting)
793	msg_warn("regexp map %s, line %d: more IFs than ENDIFs",
794		 mapname, lineno);
795
796    /*
797     * Allocate space for only as many matched substrings as used in the
798     * replacement text.
799     */
800    if (max_sub > 0)
801	dict_regexp->pmatch =
802	    (regmatch_t *) mymalloc(sizeof(regmatch_t) * (max_sub + 1));
803
804    /*
805     * Clean up.
806     */
807    vstring_free(line_buffer);
808    vstream_fclose(map_fp);
809
810    return (DICT_DEBUG (&dict_regexp->dict));
811}
812
813#endif
814