119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
1319304Speterstatic const char sccsid[] = "@(#)ex_subst.c	10.37 (Berkeley) 9/15/96";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
1819304Speter#include <sys/time.h>
1919304Speter
2019304Speter#include <bitstring.h>
2119304Speter#include <ctype.h>
2219304Speter#include <errno.h>
2319304Speter#include <limits.h>
2419304Speter#include <stdio.h>
2519304Speter#include <stdlib.h>
2619304Speter#include <string.h>
2719304Speter#include <unistd.h>
2819304Speter
2919304Speter#include "../common/common.h"
3019304Speter#include "../vi/vi.h"
3119304Speter
3219304Speter#define	SUB_FIRST	0x01		/* The 'r' flag isn't reasonable. */
3319304Speter#define	SUB_MUSTSETR	0x02		/* The 'r' flag is required. */
3419304Speter
3519304Speterstatic int re_conv __P((SCR *, char **, size_t *, int *));
3619304Speterstatic int re_cscope_conv __P((SCR *, char **, size_t *, int *));
3719304Speterstatic int re_sub __P((SCR *,
3819304Speter		char *, char **, size_t *, size_t *, regmatch_t [10]));
3919304Speterstatic int re_tag_conv __P((SCR *, char **, size_t *, int *));
4019304Speterstatic int s __P((SCR *, EXCMD *, char *, regex_t *, u_int));
4119304Speter
4219304Speter/*
4319304Speter * ex_s --
4419304Speter *	[line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]]
4519304Speter *
4619304Speter *	Substitute on lines matching a pattern.
4719304Speter *
4819304Speter * PUBLIC: int ex_s __P((SCR *, EXCMD *));
4919304Speter */
5019304Speterint
5119304Speterex_s(sp, cmdp)
5219304Speter	SCR *sp;
5319304Speter	EXCMD *cmdp;
5419304Speter{
5519304Speter	regex_t *re;
5619304Speter	size_t blen, len;
5719304Speter	u_int flags;
5819304Speter	int delim;
5919304Speter	char *bp, *ptrn, *rep, *p, *t;
6019304Speter
6119304Speter	/*
6219304Speter	 * Skip leading white space.
6319304Speter	 *
6419304Speter	 * !!!
6519304Speter	 * Historic vi allowed any non-alphanumeric to serve as the
6619304Speter	 * substitution command delimiter.
6719304Speter	 *
6819304Speter	 * !!!
6919304Speter	 * If the arguments are empty, it's the same as &, i.e. we
7019304Speter	 * repeat the last substitution.
7119304Speter	 */
7219304Speter	if (cmdp->argc == 0)
7319304Speter		goto subagain;
7419304Speter	for (p = cmdp->argv[0]->bp,
7519304Speter	    len = cmdp->argv[0]->len; len > 0; --len, ++p) {
7619304Speter		if (!isblank(*p))
7719304Speter			break;
7819304Speter	}
7919304Speter	if (len == 0)
8019304Spetersubagain:	return (ex_subagain(sp, cmdp));
8119304Speter
8219304Speter	delim = *p++;
8319304Speter	if (isalnum(delim) || delim == '\\')
8419304Speter		return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR));
8519304Speter
8619304Speter	/*
8719304Speter	 * !!!
8819304Speter	 * The full-blown substitute command reset the remembered
8919304Speter	 * state of the 'c' and 'g' suffices.
9019304Speter	 */
9119304Speter	sp->c_suffix = sp->g_suffix = 0;
9219304Speter
9319304Speter	/*
9419304Speter	 * Get the pattern string, toss escaping characters.
9519304Speter	 *
9619304Speter	 * !!!
9719304Speter	 * Historic vi accepted any of the following forms:
9819304Speter	 *
9919304Speter	 *	:s/abc/def/		change "abc" to "def"
10019304Speter	 *	:s/abc/def		change "abc" to "def"
10119304Speter	 *	:s/abc/			delete "abc"
10219304Speter	 *	:s/abc			delete "abc"
10319304Speter	 *
10419304Speter	 * QUOTING NOTE:
10519304Speter	 *
10619304Speter	 * Only toss an escaping character if it escapes a delimiter.
10719304Speter	 * This means that "s/A/\\\\f" replaces "A" with "\\f".  It
10819304Speter	 * would be nice to be more regular, i.e. for each layer of
10919304Speter	 * escaping a single escaping character is removed, but that's
11019304Speter	 * not how the historic vi worked.
11119304Speter	 */
11219304Speter	for (ptrn = t = p;;) {
11319304Speter		if (p[0] == '\0' || p[0] == delim) {
11419304Speter			if (p[0] == delim)
11519304Speter				++p;
11619304Speter			/*
11719304Speter			 * !!!
11819304Speter			 * Nul terminate the pattern string -- it's passed
11919304Speter			 * to regcomp which doesn't understand anything else.
12019304Speter			 */
12119304Speter			*t = '\0';
12219304Speter			break;
12319304Speter		}
12419304Speter		if (p[0] == '\\')
12519304Speter			if (p[1] == delim)
12619304Speter				++p;
12719304Speter			else if (p[1] == '\\')
12819304Speter				*t++ = *p++;
12919304Speter		*t++ = *p++;
13019304Speter	}
13119304Speter
13219304Speter	/*
13319304Speter	 * If the pattern string is empty, use the last RE (not just the
13419304Speter	 * last substitution RE).
13519304Speter	 */
13619304Speter	if (*ptrn == '\0') {
13719304Speter		if (sp->re == NULL) {
13819304Speter			ex_emsg(sp, NULL, EXM_NOPREVRE);
13919304Speter			return (1);
14019304Speter		}
14119304Speter
14219304Speter		/* Re-compile the RE if necessary. */
14319304Speter		if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
14419304Speter		    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
14519304Speter			return (1);
14619304Speter		flags = 0;
14719304Speter	} else {
14819304Speter		/*
14919304Speter		 * !!!
15019304Speter		 * Compile the RE.  Historic practice is that substitutes set
15119304Speter		 * the search direction as well as both substitute and search
15219304Speter		 * RE's.  We compile the RE twice, as we don't want to bother
15319304Speter		 * ref counting the pattern string and (opaque) structure.
15419304Speter		 */
15519304Speter		if (re_compile(sp, ptrn, t - ptrn,
15619304Speter		    &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH))
15719304Speter			return (1);
15819304Speter		if (re_compile(sp, ptrn, t - ptrn,
15919304Speter		    &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST))
16019304Speter			return (1);
16119304Speter
16219304Speter		flags = SUB_FIRST;
16319304Speter		sp->searchdir = FORWARD;
16419304Speter	}
16519304Speter	re = &sp->re_c;
16619304Speter
16719304Speter	/*
16819304Speter	 * Get the replacement string.
16919304Speter	 *
17019304Speter	 * The special character & (\& if O_MAGIC not set) matches the
17119304Speter	 * entire RE.  No handling of & is required here, it's done by
17219304Speter	 * re_sub().
17319304Speter	 *
17419304Speter	 * The special character ~ (\~ if O_MAGIC not set) inserts the
17519304Speter	 * previous replacement string into this replacement string.
17619304Speter	 * Count ~'s to figure out how much space we need.  We could
17719304Speter	 * special case nonexistent last patterns or whether or not
17819304Speter	 * O_MAGIC is set, but it's probably not worth the effort.
17919304Speter	 *
18019304Speter	 * QUOTING NOTE:
18119304Speter	 *
18219304Speter	 * Only toss an escaping character if it escapes a delimiter or
18319304Speter	 * if O_MAGIC is set and it escapes a tilde.
18419304Speter	 *
18519304Speter	 * !!!
18619304Speter	 * If the entire replacement pattern is "%", then use the last
18719304Speter	 * replacement pattern.  This semantic was added to vi in System
18819304Speter	 * V and then percolated elsewhere, presumably around the time
18919304Speter	 * that it was added to their version of ed(1).
19019304Speter	 */
19119304Speter	if (p[0] == '\0' || p[0] == delim) {
19219304Speter		if (p[0] == delim)
19319304Speter			++p;
19419304Speter		if (sp->repl != NULL)
19519304Speter			free(sp->repl);
19619304Speter		sp->repl = NULL;
19719304Speter		sp->repl_len = 0;
19819304Speter	} else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim))
19919304Speter		p += p[1] == delim ? 2 : 1;
20019304Speter	else {
20119304Speter		for (rep = p, len = 0;
20219304Speter		    p[0] != '\0' && p[0] != delim; ++p, ++len)
20319304Speter			if (p[0] == '~')
20419304Speter				len += sp->repl_len;
20519304Speter		GET_SPACE_RET(sp, bp, blen, len);
20619304Speter		for (t = bp, len = 0, p = rep;;) {
20719304Speter			if (p[0] == '\0' || p[0] == delim) {
20819304Speter				if (p[0] == delim)
20919304Speter					++p;
21019304Speter				break;
21119304Speter			}
21219304Speter			if (p[0] == '\\') {
21319304Speter				if (p[1] == delim)
21419304Speter					++p;
21519304Speter				else if (p[1] == '\\') {
21619304Speter					*t++ = *p++;
21719304Speter					++len;
21819304Speter				} else if (p[1] == '~') {
21919304Speter					++p;
22019304Speter					if (!O_ISSET(sp, O_MAGIC))
22119304Speter						goto tilde;
22219304Speter				}
22319304Speter			} else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) {
22419304Spetertilde:				++p;
22519304Speter				memcpy(t, sp->repl, sp->repl_len);
22619304Speter				t += sp->repl_len;
22719304Speter				len += sp->repl_len;
22819304Speter				continue;
22919304Speter			}
23019304Speter			*t++ = *p++;
23119304Speter			++len;
23219304Speter		}
23319304Speter		if ((sp->repl_len = len) != 0) {
23419304Speter			if (sp->repl != NULL)
23519304Speter				free(sp->repl);
23619304Speter			if ((sp->repl = malloc(len)) == NULL) {
23719304Speter				msgq(sp, M_SYSERR, NULL);
23819304Speter				FREE_SPACE(sp, bp, blen);
23919304Speter				return (1);
24019304Speter			}
24119304Speter			memcpy(sp->repl, bp, len);
24219304Speter		}
24319304Speter		FREE_SPACE(sp, bp, blen);
24419304Speter	}
24519304Speter	return (s(sp, cmdp, p, re, flags));
24619304Speter}
24719304Speter
24819304Speter/*
24919304Speter * ex_subagain --
25019304Speter *	[line [,line]] & [cgr] [count] [#lp]]
25119304Speter *
25219304Speter *	Substitute using the last substitute RE and replacement pattern.
25319304Speter *
25419304Speter * PUBLIC: int ex_subagain __P((SCR *, EXCMD *));
25519304Speter */
25619304Speterint
25719304Speterex_subagain(sp, cmdp)
25819304Speter	SCR *sp;
25919304Speter	EXCMD *cmdp;
26019304Speter{
26119304Speter	if (sp->subre == NULL) {
26219304Speter		ex_emsg(sp, NULL, EXM_NOPREVRE);
26319304Speter		return (1);
26419304Speter	}
26519304Speter	if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp,
26619304Speter	    sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST))
26719304Speter		return (1);
26819304Speter	return (s(sp,
26919304Speter	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0));
27019304Speter}
27119304Speter
27219304Speter/*
27319304Speter * ex_subtilde --
27419304Speter *	[line [,line]] ~ [cgr] [count] [#lp]]
27519304Speter *
27619304Speter *	Substitute using the last RE and last substitute replacement pattern.
27719304Speter *
27819304Speter * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *));
27919304Speter */
28019304Speterint
28119304Speterex_subtilde(sp, cmdp)
28219304Speter	SCR *sp;
28319304Speter	EXCMD *cmdp;
28419304Speter{
28519304Speter	if (sp->re == NULL) {
28619304Speter		ex_emsg(sp, NULL, EXM_NOPREVRE);
28719304Speter		return (1);
28819304Speter	}
28919304Speter	if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
29019304Speter	    sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH))
29119304Speter		return (1);
29219304Speter	return (s(sp,
29319304Speter	    cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0));
29419304Speter}
29519304Speter
29619304Speter/*
29719304Speter * s --
29819304Speter * Do the substitution.  This stuff is *really* tricky.  There are lots of
29919304Speter * special cases, and general nastiness.  Don't mess with it unless you're
30019304Speter * pretty confident.
30119304Speter *
30219304Speter * The nasty part of the substitution is what happens when the replacement
30319304Speter * string contains newlines.  It's a bit tricky -- consider the information
30419304Speter * that has to be retained for "s/f\(o\)o/^M\1^M\1/".  The solution here is
30519304Speter * to build a set of newline offsets which we use to break the line up later,
30619304Speter * when the replacement is done.  Don't change it unless you're *damned*
30719304Speter * confident.
30819304Speter */
30919304Speter#define	NEEDNEWLINE(sp) {						\
31019304Speter	if (sp->newl_len == sp->newl_cnt) {				\
31119304Speter		sp->newl_len += 25;					\
31219304Speter		REALLOC(sp, sp->newl, size_t *,				\
31319304Speter		    sp->newl_len * sizeof(size_t));			\
31419304Speter		if (sp->newl == NULL) {					\
31519304Speter			sp->newl_len = 0;				\
31619304Speter			return (1);					\
31719304Speter		}							\
31819304Speter	}								\
31919304Speter}
32019304Speter
32119304Speter#define	BUILD(sp, l, len) {						\
32219304Speter	if (lbclen + (len) > lblen) {					\
32319304Speter		lblen += MAX(lbclen + (len), 256);			\
32419304Speter		REALLOC(sp, lb, char *, lblen);				\
32519304Speter		if (lb == NULL) {					\
32619304Speter			lbclen = 0;					\
32719304Speter			return (1);					\
32819304Speter		}							\
32919304Speter	}								\
33019304Speter	memcpy(lb + lbclen, l, len);					\
33119304Speter	lbclen += len;							\
33219304Speter}
33319304Speter
33419304Speter#define	NEEDSP(sp, len, pnt) {						\
33519304Speter	if (lbclen + (len) > lblen) {					\
33619304Speter		lblen += MAX(lbclen + (len), 256);			\
33719304Speter		REALLOC(sp, lb, char *, lblen);				\
33819304Speter		if (lb == NULL) {					\
33919304Speter			lbclen = 0;					\
34019304Speter			return (1);					\
34119304Speter		}							\
34219304Speter		pnt = lb + lbclen;					\
34319304Speter	}								\
34419304Speter}
34519304Speter
34619304Speterstatic int
34719304Speters(sp, cmdp, s, re, flags)
34819304Speter	SCR *sp;
34919304Speter	EXCMD *cmdp;
35019304Speter	char *s;
35119304Speter	regex_t *re;
35219304Speter	u_int flags;
35319304Speter{
35419304Speter	EVENT ev;
35519304Speter	MARK from, to;
35619304Speter	TEXTH tiq;
35719304Speter	recno_t elno, lno, slno;
35819304Speter	regmatch_t match[10];
35919304Speter	size_t blen, cnt, last, lbclen, lblen, len, llen;
36019304Speter	size_t offset, saved_offset, scno;
36119304Speter	int cflag, lflag, nflag, pflag, rflag;
36219304Speter	int didsub, do_eol_match, eflags, empty_ok, eval;
36319304Speter	int linechanged, matched, quit, rval;
36419304Speter	char *bp, *lb;
36519304Speter
36619304Speter	NEEDFILE(sp, cmdp);
36719304Speter
36819304Speter	slno = sp->lno;
36919304Speter	scno = sp->cno;
37019304Speter
37119304Speter	/*
37219304Speter	 * !!!
37319304Speter	 * Historically, the 'g' and 'c' suffices were always toggled as flags,
37419304Speter	 * so ":s/A/B/" was the same as ":s/A/B/ccgg".  If O_EDCOMPATIBLE was
37519304Speter	 * not set, they were initialized to 0 for all substitute commands.  If
37619304Speter	 * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user
37719304Speter	 * specified substitute/replacement patterns (see ex_s()).
37819304Speter	 */
37919304Speter	if (!O_ISSET(sp, O_EDCOMPATIBLE))
38019304Speter		sp->c_suffix = sp->g_suffix = 0;
38119304Speter
38219304Speter	/*
38319304Speter	 * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but
38419304Speter	 * it only displayed the last change.  I'd disallow them, but they are
38519304Speter	 * useful in combination with the [v]global commands.  In the current
38619304Speter	 * model the problem is combining them with the 'c' flag -- the screen
38719304Speter	 * would have to flip back and forth between the confirm screen and the
38819304Speter	 * ex print screen, which would be pretty awful.  We do display all
38919304Speter	 * changes, though, for what that's worth.
39019304Speter	 *
39119304Speter	 * !!!
39219304Speter	 * Historic vi was fairly strict about the order of "options", the
39319304Speter	 * count, and "flags".  I'm somewhat fuzzy on the difference between
39419304Speter	 * options and flags, anyway, so this is a simpler approach, and we
39519304Speter	 * just take it them in whatever order the user gives them.  (The ex
39619304Speter	 * usage statement doesn't reflect this.)
39719304Speter	 */
39819304Speter	cflag = lflag = nflag = pflag = rflag = 0;
39919304Speter	if (s == NULL)
40019304Speter		goto noargs;
40119304Speter	for (lno = OOBLNO; *s != '\0'; ++s)
40219304Speter		switch (*s) {
40319304Speter		case ' ':
40419304Speter		case '\t':
40519304Speter			continue;
40619304Speter		case '+':
40719304Speter			++cmdp->flagoff;
40819304Speter			break;
40919304Speter		case '-':
41019304Speter			--cmdp->flagoff;
41119304Speter			break;
41219304Speter		case '0': case '1': case '2': case '3': case '4':
41319304Speter		case '5': case '6': case '7': case '8': case '9':
41419304Speter			if (lno != OOBLNO)
41519304Speter				goto usage;
41619304Speter			errno = 0;
41719304Speter			lno = strtoul(s, &s, 10);
41819304Speter			if (*s == '\0')		/* Loop increment correction. */
41919304Speter				--s;
42019304Speter			if (errno == ERANGE) {
42119304Speter				if (lno == LONG_MAX)
42219304Speter					msgq(sp, M_ERR, "153|Count overflow");
42319304Speter				else if (lno == LONG_MIN)
42419304Speter					msgq(sp, M_ERR, "154|Count underflow");
42519304Speter				else
42619304Speter					msgq(sp, M_SYSERR, NULL);
42719304Speter				return (1);
42819304Speter			}
42919304Speter			/*
43019304Speter			 * In historic vi, the count was inclusive from the
43119304Speter			 * second address.
43219304Speter			 */
43319304Speter			cmdp->addr1.lno = cmdp->addr2.lno;
43419304Speter			cmdp->addr2.lno += lno - 1;
43519304Speter			if (!db_exist(sp, cmdp->addr2.lno) &&
43619304Speter			    db_last(sp, &cmdp->addr2.lno))
43719304Speter				return (1);
43819304Speter			break;
43919304Speter		case '#':
44019304Speter			nflag = 1;
44119304Speter			break;
44219304Speter		case 'c':
44319304Speter			sp->c_suffix = !sp->c_suffix;
44419304Speter
44519304Speter			/* Ex text structure initialization. */
44619304Speter			if (F_ISSET(sp, SC_EX)) {
44719304Speter				memset(&tiq, 0, sizeof(TEXTH));
44819304Speter				CIRCLEQ_INIT(&tiq);
44919304Speter			}
45019304Speter			break;
45119304Speter		case 'g':
45219304Speter			sp->g_suffix = !sp->g_suffix;
45319304Speter			break;
45419304Speter		case 'l':
45519304Speter			lflag = 1;
45619304Speter			break;
45719304Speter		case 'p':
45819304Speter			pflag = 1;
45919304Speter			break;
46019304Speter		case 'r':
46119304Speter			if (LF_ISSET(SUB_FIRST)) {
46219304Speter				msgq(sp, M_ERR,
46319304Speter		    "155|Regular expression specified; r flag meaningless");
46419304Speter				return (1);
46519304Speter			}
46619304Speter			if (!F_ISSET(sp, SC_RE_SEARCH)) {
46719304Speter				ex_emsg(sp, NULL, EXM_NOPREVRE);
46819304Speter				return (1);
46919304Speter			}
47019304Speter			rflag = 1;
47119304Speter			re = &sp->re_c;
47219304Speter			break;
47319304Speter		default:
47419304Speter			goto usage;
47519304Speter		}
47619304Speter
47719304Speter	if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) {
47819304Speterusage:		ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE);
47919304Speter		return (1);
48019304Speter	}
48119304Speter
48219304Speternoargs:	if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) {
48319304Speter		msgq(sp, M_ERR,
48419304Speter"156|The #, l and p flags may not be combined with the c flag in vi mode");
48519304Speter		return (1);
48619304Speter	}
48719304Speter
48819304Speter	/*
48919304Speter	 * bp:		if interactive, line cache
49019304Speter	 * blen:	if interactive, line cache length
49119304Speter	 * lb:		build buffer pointer.
49219304Speter	 * lbclen:	current length of built buffer.
49319304Speter	 * lblen;	length of build buffer.
49419304Speter	 */
49519304Speter	bp = lb = NULL;
49619304Speter	blen = lbclen = lblen = 0;
49719304Speter
49819304Speter	/* For each line... */
49919304Speter	for (matched = quit = 0, lno = cmdp->addr1.lno,
50019304Speter	    elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) {
50119304Speter
50219304Speter		/* Someone's unhappy, time to stop. */
50319304Speter		if (INTERRUPTED(sp))
50419304Speter			break;
50519304Speter
50619304Speter		/* Get the line. */
50719304Speter		if (db_get(sp, lno, DBG_FATAL, &s, &llen))
50819304Speter			goto err;
50919304Speter
51019304Speter		/*
51119304Speter		 * Make a local copy if doing confirmation -- when calling
51219304Speter		 * the confirm routine we're likely to lose the cached copy.
51319304Speter		 */
51419304Speter		if (sp->c_suffix) {
51519304Speter			if (bp == NULL) {
51619304Speter				GET_SPACE_RET(sp, bp, blen, llen);
51719304Speter			} else
51819304Speter				ADD_SPACE_RET(sp, bp, blen, llen);
51919304Speter			memcpy(bp, s, llen);
52019304Speter			s = bp;
52119304Speter		}
52219304Speter
52319304Speter		/* Start searching from the beginning. */
52419304Speter		offset = 0;
52519304Speter		len = llen;
52619304Speter
52719304Speter		/* Reset the build buffer offset. */
52819304Speter		lbclen = 0;
52919304Speter
53019304Speter		/* Reset empty match flag. */
53119304Speter		empty_ok = 1;
53219304Speter
53319304Speter		/*
53419304Speter		 * We don't want to have to do a setline if the line didn't
53519304Speter		 * change -- keep track of whether or not this line changed.
53619304Speter		 * If doing confirmations, don't want to keep setting the
53719304Speter		 * line if change is refused -- keep track of substitutions.
53819304Speter		 */
53919304Speter		didsub = linechanged = 0;
54019304Speter
54119304Speter		/* New line, do an EOL match. */
54219304Speter		do_eol_match = 1;
54319304Speter
54419304Speter		/* It's not nul terminated, but we pretend it is. */
54519304Speter		eflags = REG_STARTEND;
54619304Speter
54719304Speter		/*
54819304Speter		 * The search area is from s + offset to the EOL.
54919304Speter		 *
55019304Speter		 * Generally, match[0].rm_so is the offset of the start
55119304Speter		 * of the match from the start of the search, and offset
55219304Speter		 * is the offset of the start of the last search.
55319304Speter		 */
55419304Speternextmatch:	match[0].rm_so = 0;
55519304Speter		match[0].rm_eo = len;
55619304Speter
55719304Speter		/* Get the next match. */
55819304Speter		eval = regexec(re, (char *)s + offset, 10, match, eflags);
55919304Speter
56019304Speter		/*
56119304Speter		 * There wasn't a match or if there was an error, deal with
56219304Speter		 * it.  If there was a previous match in this line, resolve
56319304Speter		 * the changes into the database.  Otherwise, just move on.
56419304Speter		 */
56519304Speter		if (eval == REG_NOMATCH)
56619304Speter			goto endmatch;
56719304Speter		if (eval != 0) {
56819304Speter			re_error(sp, eval, re);
56919304Speter			goto err;
57019304Speter		}
57119304Speter		matched = 1;
57219304Speter
57319304Speter		/* Only the first search can match an anchored expression. */
57419304Speter		eflags |= REG_NOTBOL;
57519304Speter
57619304Speter		/*
57719304Speter		 * !!!
57819304Speter		 * It's possible to match 0-length strings -- for example, the
57919304Speter		 * command s;a*;X;, when matched against the string "aabb" will
58019304Speter		 * result in "XbXbX", i.e. the matches are "aa", the space
58119304Speter		 * between the b's and the space between the b's and the end of
58219304Speter		 * the string.  There is a similar space between the beginning
58319304Speter		 * of the string and the a's.  The rule that we use (because vi
58419304Speter		 * historically used it) is that any 0-length match, occurring
58519304Speter		 * immediately after a match, is ignored.  Otherwise, the above
58619304Speter		 * example would have resulted in "XXbXbX".  Another example is
58719304Speter		 * incorrectly using " *" to replace groups of spaces with one
58819304Speter		 * space.
58919304Speter		 *
59019304Speter		 * The way we do this is that if we just had a successful match,
59119304Speter		 * the starting offset does not skip characters, and the match
59219304Speter		 * is empty, ignore the match and move forward.  If there's no
59319304Speter		 * more characters in the string, we were attempting to match
59419304Speter		 * after the last character, so quit.
59519304Speter		 */
59619304Speter		if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) {
59719304Speter			empty_ok = 1;
59819304Speter			if (len == 0)
59919304Speter				goto endmatch;
60019304Speter			BUILD(sp, s + offset, 1)
60119304Speter			++offset;
60219304Speter			--len;
60319304Speter			goto nextmatch;
60419304Speter		}
60519304Speter
60619304Speter		/* Confirm change. */
60719304Speter		if (sp->c_suffix) {
60819304Speter			/*
60919304Speter			 * Set the cursor position for confirmation.  Note,
61019304Speter			 * if we matched on a '$', the cursor may be past
61119304Speter			 * the end of line.
61219304Speter			 */
61319304Speter			from.lno = to.lno = lno;
61419304Speter			from.cno = match[0].rm_so + offset;
61519304Speter			to.cno = match[0].rm_eo + offset;
61619304Speter			/*
61719304Speter			 * Both ex and vi have to correct for a change before
61819304Speter			 * the first character in the line.
61919304Speter			 */
62019304Speter			if (llen == 0)
62119304Speter				from.cno = to.cno = 0;
62219304Speter			if (F_ISSET(sp, SC_VI)) {
62319304Speter				/*
62419304Speter				 * Only vi has to correct for a change after
62519304Speter				 * the last character in the line.
62619304Speter				 *
62719304Speter				 * XXX
62819304Speter				 * It would be nice to change the vi code so
62919304Speter				 * that we could display a cursor past EOL.
63019304Speter				 */
63119304Speter				if (to.cno >= llen)
63219304Speter					to.cno = llen - 1;
63319304Speter				if (from.cno >= llen)
63419304Speter					from.cno = llen - 1;
63519304Speter
63619304Speter				sp->lno = from.lno;
63719304Speter				sp->cno = from.cno;
63819304Speter				if (vs_refresh(sp, 1))
63919304Speter					goto err;
64019304Speter
64119304Speter				vs_update(sp, msg_cat(sp,
64219304Speter				    "169|Confirm change? [n]", NULL), NULL);
64319304Speter
64419304Speter				if (v_event_get(sp, &ev, 0, 0))
64519304Speter					goto err;
64619304Speter				switch (ev.e_event) {
64719304Speter				case E_CHARACTER:
64819304Speter					break;
64919304Speter				case E_EOF:
65019304Speter				case E_ERR:
65119304Speter				case E_INTERRUPT:
65219304Speter					goto lquit;
65319304Speter				default:
65419304Speter					v_event_err(sp, &ev);
65519304Speter					goto lquit;
65619304Speter				}
65719304Speter			} else {
65819304Speter				if (ex_print(sp, cmdp, &from, &to, 0) ||
65919304Speter				    ex_scprint(sp, &from, &to))
66019304Speter					goto lquit;
66119304Speter				if (ex_txt(sp, &tiq, 0, TXT_CR))
66219304Speter					goto err;
66319304Speter				ev.e_c = tiq.cqh_first->lb[0];
66419304Speter			}
66519304Speter
66619304Speter			switch (ev.e_c) {
66719304Speter			case CH_YES:
66819304Speter				break;
66919304Speter			default:
67019304Speter			case CH_NO:
67119304Speter				didsub = 0;
67219304Speter				BUILD(sp, s +offset, match[0].rm_eo);
67319304Speter				goto skip;
67419304Speter			case CH_QUIT:
67519304Speter				/* Set the quit/interrupted flags. */
67619304Speterlquit:				quit = 1;
67719304Speter				F_SET(sp->gp, G_INTERRUPTED);
67819304Speter
67919304Speter				/*
68019304Speter				 * Resolve any changes, then return to (and
68119304Speter				 * exit from) the main loop.
68219304Speter				 */
68319304Speter				goto endmatch;
68419304Speter			}
68519304Speter		}
68619304Speter
68719304Speter		/*
68819304Speter		 * Set the cursor to the last position changed, converting
68919304Speter		 * from 1-based to 0-based.
69019304Speter		 */
69119304Speter		sp->lno = lno;
69219304Speter		sp->cno = match[0].rm_so;
69319304Speter
69419304Speter		/* Copy the bytes before the match into the build buffer. */
69519304Speter		BUILD(sp, s + offset, match[0].rm_so);
69619304Speter
69719304Speter		/* Substitute the matching bytes. */
69819304Speter		didsub = 1;
69919304Speter		if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match))
70019304Speter			goto err;
70119304Speter
70219304Speter		/* Set the change flag so we know this line was modified. */
70319304Speter		linechanged = 1;
70419304Speter
70519304Speter		/* Move past the matched bytes. */
70619304Speterskip:		offset += match[0].rm_eo;
70719304Speter		len -= match[0].rm_eo;
70819304Speter
70919304Speter		/* A match cannot be followed by an empty pattern. */
71019304Speter		empty_ok = 0;
71119304Speter
71219304Speter		/*
71319304Speter		 * If doing a global change with confirmation, we have to
71419304Speter		 * update the screen.  The basic idea is to store the line
71519304Speter		 * so the screen update routines can find it, and restart.
71619304Speter		 */
71719304Speter		if (didsub && sp->c_suffix && sp->g_suffix) {
71819304Speter			/*
71919304Speter			 * The new search offset will be the end of the
72019304Speter			 * modified line.
72119304Speter			 */
72219304Speter			saved_offset = lbclen;
72319304Speter
72419304Speter			/* Copy the rest of the line. */
72519304Speter			if (len)
72619304Speter				BUILD(sp, s + offset, len)
72719304Speter
72819304Speter			/* Set the new offset. */
72919304Speter			offset = saved_offset;
73019304Speter
73119304Speter			/* Store inserted lines, adjusting the build buffer. */
73219304Speter			last = 0;
73319304Speter			if (sp->newl_cnt) {
73419304Speter				for (cnt = 0;
73519304Speter				    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
73619304Speter					if (db_insert(sp, lno,
73719304Speter					    lb + last, sp->newl[cnt] - last))
73819304Speter						goto err;
73919304Speter					last = sp->newl[cnt] + 1;
74019304Speter					++sp->rptlines[L_ADDED];
74119304Speter				}
74219304Speter				lbclen -= last;
74319304Speter				offset -= last;
74419304Speter				sp->newl_cnt = 0;
74519304Speter			}
74619304Speter
74719304Speter			/* Store and retrieve the line. */
74819304Speter			if (db_set(sp, lno, lb + last, lbclen))
74919304Speter				goto err;
75019304Speter			if (db_get(sp, lno, DBG_FATAL, &s, &llen))
75119304Speter				goto err;
75219304Speter			ADD_SPACE_RET(sp, bp, blen, llen)
75319304Speter			memcpy(bp, s, llen);
75419304Speter			s = bp;
75519304Speter			len = llen - offset;
75619304Speter
75719304Speter			/* Restart the build. */
75819304Speter			lbclen = 0;
75919304Speter			BUILD(sp, s, offset);
76019304Speter
76119304Speter			/*
76219304Speter			 * If we haven't already done the after-the-string
76319304Speter			 * match, do one.  Set REG_NOTEOL so the '$' pattern
76419304Speter			 * only matches once.
76519304Speter			 */
76619304Speter			if (!do_eol_match)
76719304Speter				goto endmatch;
76819304Speter			if (offset == len) {
76919304Speter				do_eol_match = 0;
77019304Speter				eflags |= REG_NOTEOL;
77119304Speter			}
77219304Speter			goto nextmatch;
77319304Speter		}
77419304Speter
77519304Speter		/*
77619304Speter		 * If it's a global:
77719304Speter		 *
77819304Speter		 * If at the end of the string, do a test for the after
77919304Speter		 * the string match.  Set REG_NOTEOL so the '$' pattern
78019304Speter		 * only matches once.
78119304Speter		 */
78219304Speter		if (sp->g_suffix && do_eol_match) {
78319304Speter			if (len == 0) {
78419304Speter				do_eol_match = 0;
78519304Speter				eflags |= REG_NOTEOL;
78619304Speter			}
78719304Speter			goto nextmatch;
78819304Speter		}
78919304Speter
79019304Speterendmatch:	if (!linechanged)
79119304Speter			continue;
79219304Speter
79319304Speter		/* Copy any remaining bytes into the build buffer. */
79419304Speter		if (len)
79519304Speter			BUILD(sp, s + offset, len)
79619304Speter
79719304Speter		/* Store inserted lines, adjusting the build buffer. */
79819304Speter		last = 0;
79919304Speter		if (sp->newl_cnt) {
80019304Speter			for (cnt = 0;
80119304Speter			    cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) {
80219304Speter				if (db_insert(sp,
80319304Speter				    lno, lb + last, sp->newl[cnt] - last))
80419304Speter					goto err;
80519304Speter				last = sp->newl[cnt] + 1;
80619304Speter				++sp->rptlines[L_ADDED];
80719304Speter			}
80819304Speter			lbclen -= last;
80919304Speter			sp->newl_cnt = 0;
81019304Speter		}
81119304Speter
81219304Speter		/* Store the changed line. */
81319304Speter		if (db_set(sp, lno, lb + last, lbclen))
81419304Speter			goto err;
81519304Speter
81619304Speter		/* Update changed line counter. */
81719304Speter		if (sp->rptlchange != lno) {
81819304Speter			sp->rptlchange = lno;
81919304Speter			++sp->rptlines[L_CHANGED];
82019304Speter		}
82119304Speter
82219304Speter		/*
82319304Speter		 * !!!
82419304Speter		 * Display as necessary.  Historic practice is to only
82519304Speter		 * display the last line of a line split into multiple
82619304Speter		 * lines.
82719304Speter		 */
82819304Speter		if (lflag || nflag || pflag) {
82919304Speter			from.lno = to.lno = lno;
83019304Speter			from.cno = to.cno = 0;
83119304Speter			if (lflag)
83219304Speter				(void)ex_print(sp, cmdp, &from, &to, E_C_LIST);
83319304Speter			if (nflag)
83419304Speter				(void)ex_print(sp, cmdp, &from, &to, E_C_HASH);
83519304Speter			if (pflag)
83619304Speter				(void)ex_print(sp, cmdp, &from, &to, E_C_PRINT);
83719304Speter		}
83819304Speter	}
83919304Speter
84019304Speter	/*
84119304Speter	 * !!!
84219304Speter	 * Historically, vi attempted to leave the cursor at the same place if
84319304Speter	 * the substitution was done at the current cursor position.  Otherwise
84419304Speter	 * it moved it to the first non-blank of the last line changed.  There
84519304Speter	 * were some problems: for example, :s/$/foo/ with the cursor on the
84619304Speter	 * last character of the line left the cursor on the last character, or
84719304Speter	 * the & command with multiple occurrences of the matching string in the
84819304Speter	 * line usually left the cursor in a fairly random position.
84919304Speter	 *
85019304Speter	 * We try to do the same thing, with the exception that if the user is
85119304Speter	 * doing substitution with confirmation, we move to the last line about
85219304Speter	 * which the user was consulted, as opposed to the last line that they
85319304Speter	 * actually changed.  This prevents a screen flash if the user doesn't
85419304Speter	 * change many of the possible lines.
85519304Speter	 */
85619304Speter	if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) {
85719304Speter		sp->cno = 0;
85819304Speter		(void)nonblank(sp, sp->lno, &sp->cno);
85919304Speter	}
86019304Speter
86119304Speter	/*
86219304Speter	 * If not in a global command, and nothing matched, say so.
86319304Speter	 * Else, if none of the lines displayed, put something up.
86419304Speter	 */
86519304Speter	rval = 0;
86619304Speter	if (!matched) {
86719304Speter		if (!F_ISSET(sp, SC_EX_GLOBAL)) {
86819304Speter			msgq(sp, M_ERR, "157|No match found");
86919304Speter			goto err;
87019304Speter		}
87119304Speter	} else if (!lflag && !nflag && !pflag)
87219304Speter		F_SET(cmdp, E_AUTOPRINT);
87319304Speter
87419304Speter	if (0) {
87519304Spetererr:		rval = 1;
87619304Speter	}
87719304Speter
87819304Speter	if (bp != NULL)
87919304Speter		FREE_SPACE(sp, bp, blen);
88019304Speter	if (lb != NULL)
88119304Speter		free(lb);
88219304Speter	return (rval);
88319304Speter}
88419304Speter
88519304Speter/*
88619304Speter * re_compile --
88719304Speter *	Compile the RE.
88819304Speter *
88919304Speter * PUBLIC: int re_compile __P((SCR *,
89019304Speter * PUBLIC:     char *, size_t, char **, size_t *, regex_t *, u_int));
89119304Speter */
89219304Speterint
89319304Speterre_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags)
89419304Speter	SCR *sp;
89519304Speter	char *ptrn, **ptrnp;
89619304Speter	size_t plen, *lenp;
89719304Speter	regex_t *rep;
89819304Speter	u_int flags;
89919304Speter{
90019304Speter	size_t len;
90119304Speter	int reflags, replaced, rval;
90219304Speter	char *p;
90319304Speter
90419304Speter	/* Set RE flags. */
90519304Speter	reflags = 0;
90619304Speter	if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) {
90719304Speter		if (O_ISSET(sp, O_EXTENDED))
90819304Speter			reflags |= REG_EXTENDED;
90919304Speter		if (O_ISSET(sp, O_IGNORECASE))
91019304Speter			reflags |= REG_ICASE;
91119304Speter		if (O_ISSET(sp, O_ICLOWER)) {
91219304Speter			for (p = ptrn, len = plen; len > 0; ++p, --len)
91319304Speter				if (isupper(*p))
91419304Speter					break;
91519304Speter			if (len == 0)
91619304Speter				reflags |= REG_ICASE;
91719304Speter		}
91819304Speter	}
91919304Speter
92019304Speter	/* If we're replacing a saved value, clear the old one. */
92119304Speter	if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) {
92219304Speter		regfree(&sp->re_c);
92319304Speter		F_CLR(sp, SC_RE_SEARCH);
92419304Speter	}
92519304Speter	if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) {
92619304Speter		regfree(&sp->subre_c);
92719304Speter		F_CLR(sp, SC_RE_SUBST);
92819304Speter	}
92919304Speter
93019304Speter	/*
93119304Speter	 * If we're saving the string, it's a pattern we haven't seen before,
93219304Speter	 * so convert the vi-style RE's to POSIX 1003.2 RE's.  Save a copy for
93319304Speter	 * later recompilation.   Free any previously saved value.
93419304Speter	 */
93519304Speter	if (ptrnp != NULL) {
93619304Speter		if (LF_ISSET(RE_C_CSCOPE)) {
93719304Speter			if (re_cscope_conv(sp, &ptrn, &plen, &replaced))
93819304Speter				return (1);
93919304Speter			/*
94019304Speter			 * XXX
94119304Speter			 * Currently, the match-any-<blank> expression used in
94219304Speter			 * re_cscope_conv() requires extended RE's.  This may
94319304Speter			 * not be right or safe.
94419304Speter			 */
94519304Speter			reflags |= REG_EXTENDED;
94619304Speter		} else if (LF_ISSET(RE_C_TAG)) {
94719304Speter			if (re_tag_conv(sp, &ptrn, &plen, &replaced))
94819304Speter				return (1);
94919304Speter		} else
95019304Speter			if (re_conv(sp, &ptrn, &plen, &replaced))
95119304Speter				return (1);
95219304Speter
95319304Speter		/* Discard previous pattern. */
95419304Speter		if (*ptrnp != NULL) {
95519304Speter			free(*ptrnp);
95619304Speter			*ptrnp = NULL;
95719304Speter		}
95819304Speter		if (lenp != NULL)
95919304Speter			*lenp = plen;
96019304Speter
96119304Speter		/*
96219304Speter		 * Copy the string into allocated memory.
96319304Speter		 *
96419304Speter		 * XXX
96519304Speter		 * Regcomp isn't 8-bit clean, so the pattern is nul-terminated
96619304Speter		 * for now.  There's just no other solution.
96719304Speter		 */
96819304Speter		MALLOC(sp, *ptrnp, char *, plen + 1);
96919304Speter		if (*ptrnp != NULL) {
97019304Speter			memcpy(*ptrnp, ptrn, plen);
97119304Speter			(*ptrnp)[plen] = '\0';
97219304Speter		}
97319304Speter
97419304Speter		/* Free up conversion-routine-allocated memory. */
97519304Speter		if (replaced)
97619304Speter			FREE_SPACE(sp, ptrn, 0);
97719304Speter
97819304Speter		if (*ptrnp == NULL)
97919304Speter			return (1);
98019304Speter
98119304Speter		ptrn = *ptrnp;
98219304Speter	}
98319304Speter
98419304Speter	/*
98519304Speter	 * XXX
98619304Speter	 * Regcomp isn't 8-bit clean, so we just lost if the pattern
98719304Speter	 * contained a nul.  Bummer!
98819304Speter	 */
98919304Speter	if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) {
99019304Speter		if (!LF_ISSET(RE_C_SILENT))
99119304Speter			re_error(sp, rval, rep);
99219304Speter		return (1);
99319304Speter	}
99419304Speter
99519304Speter	if (LF_ISSET(RE_C_SEARCH))
99619304Speter		F_SET(sp, SC_RE_SEARCH);
99719304Speter	if (LF_ISSET(RE_C_SUBST))
99819304Speter		F_SET(sp, SC_RE_SUBST);
99919304Speter
100019304Speter	return (0);
100119304Speter}
100219304Speter
100319304Speter/*
100419304Speter * re_conv --
100519304Speter *	Convert vi's regular expressions into something that the
100619304Speter *	the POSIX 1003.2 RE functions can handle.
100719304Speter *
100819304Speter * There are three conversions we make to make vi's RE's (specifically
100919304Speter * the global, search, and substitute patterns) work with POSIX RE's.
101019304Speter *
101119304Speter * 1: If O_MAGIC is not set, strip backslashes from the magic character
101219304Speter *    set (.[*~) that have them, and add them to the ones that don't.
101319304Speter * 2: If O_MAGIC is not set, the string "\~" is replaced with the text
101419304Speter *    from the last substitute command's replacement string.  If O_MAGIC
101519304Speter *    is set, it's the string "~".
101619304Speter * 3: The pattern \<ptrn\> does "word" searches, convert it to use the
101719304Speter *    new RE escapes.
101819304Speter *
101919304Speter * !!!/XXX
102019304Speter * This doesn't exactly match the historic behavior of vi because we do
102119304Speter * the ~ substitution before calling the RE engine, so magic characters
102219304Speter * in the replacement string will be expanded by the RE engine, and they
102319304Speter * weren't historically.  It's a bug.
102419304Speter */
102519304Speterstatic int
102619304Speterre_conv(sp, ptrnp, plenp, replacedp)
102719304Speter	SCR *sp;
102819304Speter	char **ptrnp;
102919304Speter	size_t *plenp;
103019304Speter	int *replacedp;
103119304Speter{
103219304Speter	size_t blen, len, needlen;
103319304Speter	int magic;
103419304Speter	char *bp, *p, *t;
103519304Speter
103619304Speter	/*
103719304Speter	 * First pass through, we figure out how much space we'll need.
103819304Speter	 * We do it in two passes, on the grounds that most of the time
103919304Speter	 * the user is doing a search and won't have magic characters.
104019304Speter	 * That way we can skip most of the memory allocation and copies.
104119304Speter	 */
104219304Speter	magic = 0;
104319304Speter	for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len)
104419304Speter		switch (*p) {
104519304Speter		case '\\':
104619304Speter			if (len > 1) {
104719304Speter				--len;
104819304Speter				switch (*++p) {
104919304Speter				case '<':
105019304Speter					magic = 1;
105119304Speter					needlen += sizeof(RE_WSTART);
105219304Speter					break;
105319304Speter				case '>':
105419304Speter					magic = 1;
105519304Speter					needlen += sizeof(RE_WSTOP);
105619304Speter					break;
105719304Speter				case '~':
105819304Speter					if (!O_ISSET(sp, O_MAGIC)) {
105919304Speter						magic = 1;
106019304Speter						needlen += sp->repl_len;
106119304Speter					}
106219304Speter					break;
106319304Speter				case '.':
106419304Speter				case '[':
106519304Speter				case '*':
106619304Speter					if (!O_ISSET(sp, O_MAGIC)) {
106719304Speter						magic = 1;
106819304Speter						needlen += 1;
106919304Speter					}
107019304Speter					break;
107119304Speter				default:
107219304Speter					needlen += 2;
107319304Speter				}
107419304Speter			} else
107519304Speter				needlen += 1;
107619304Speter			break;
107719304Speter		case '~':
107819304Speter			if (O_ISSET(sp, O_MAGIC)) {
107919304Speter				magic = 1;
108019304Speter				needlen += sp->repl_len;
108119304Speter			}
108219304Speter			break;
108319304Speter		case '.':
108419304Speter		case '[':
108519304Speter		case '*':
108619304Speter			if (!O_ISSET(sp, O_MAGIC)) {
108719304Speter				magic = 1;
108819304Speter				needlen += 2;
108919304Speter			}
109019304Speter			break;
109119304Speter		default:
109219304Speter			needlen += 1;
109319304Speter			break;
109419304Speter		}
109519304Speter
109619304Speter	if (!magic) {
109719304Speter		*replacedp = 0;
109819304Speter		return (0);
109919304Speter	}
110019304Speter
110119304Speter	/* Get enough memory to hold the final pattern. */
110219304Speter	*replacedp = 1;
110319304Speter	GET_SPACE_RET(sp, bp, blen, needlen);
110419304Speter
110519304Speter	for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len)
110619304Speter		switch (*p) {
110719304Speter		case '\\':
110819304Speter			if (len > 1) {
110919304Speter				--len;
111019304Speter				switch (*++p) {
111119304Speter				case '<':
111219304Speter					memcpy(t,
111319304Speter					    RE_WSTART, sizeof(RE_WSTART) - 1);
111419304Speter					t += sizeof(RE_WSTART) - 1;
111519304Speter					break;
111619304Speter				case '>':
111719304Speter					memcpy(t,
111819304Speter					    RE_WSTOP, sizeof(RE_WSTOP) - 1);
111919304Speter					t += sizeof(RE_WSTOP) - 1;
112019304Speter					break;
112119304Speter				case '~':
112219304Speter					if (O_ISSET(sp, O_MAGIC))
112319304Speter						*t++ = '~';
112419304Speter					else {
112519304Speter						memcpy(t,
112619304Speter						    sp->repl, sp->repl_len);
112719304Speter						t += sp->repl_len;
112819304Speter					}
112919304Speter					break;
113019304Speter				case '.':
113119304Speter				case '[':
113219304Speter				case '*':
113319304Speter					if (O_ISSET(sp, O_MAGIC))
113419304Speter						*t++ = '\\';
113519304Speter					*t++ = *p;
113619304Speter					break;
113719304Speter				default:
113819304Speter					*t++ = '\\';
113919304Speter					*t++ = *p;
114019304Speter				}
114119304Speter			} else
114219304Speter				*t++ = '\\';
114319304Speter			break;
114419304Speter		case '~':
114519304Speter			if (O_ISSET(sp, O_MAGIC)) {
114619304Speter				memcpy(t, sp->repl, sp->repl_len);
114719304Speter				t += sp->repl_len;
114819304Speter			} else
114919304Speter				*t++ = '~';
115019304Speter			break;
115119304Speter		case '.':
115219304Speter		case '[':
115319304Speter		case '*':
115419304Speter			if (!O_ISSET(sp, O_MAGIC))
115519304Speter				*t++ = '\\';
115619304Speter			*t++ = *p;
115719304Speter			break;
115819304Speter		default:
115919304Speter			*t++ = *p;
116019304Speter			break;
116119304Speter		}
116219304Speter
116319304Speter	*ptrnp = bp;
116419304Speter	*plenp = t - bp;
116519304Speter	return (0);
116619304Speter}
116719304Speter
116819304Speter/*
116919304Speter * re_tag_conv --
117019304Speter *	Convert a tags search path into something that the POSIX
117119304Speter *	1003.2 RE functions can handle.
117219304Speter */
117319304Speterstatic int
117419304Speterre_tag_conv(sp, ptrnp, plenp, replacedp)
117519304Speter	SCR *sp;
117619304Speter	char **ptrnp;
117719304Speter	size_t *plenp;
117819304Speter	int *replacedp;
117919304Speter{
118019304Speter	size_t blen, len;
118119304Speter	int lastdollar;
118219304Speter	char *bp, *p, *t;
118319304Speter
118419304Speter	len = *plenp;
118519304Speter
118619304Speter	/* Max memory usage is 2 times the length of the string. */
118719304Speter	*replacedp = 1;
118819304Speter	GET_SPACE_RET(sp, bp, blen, len * 2);
118919304Speter
119019304Speter	p = *ptrnp;
119119304Speter	t = bp;
119219304Speter
119319304Speter	/* If the last character is a '/' or '?', we just strip it. */
119419304Speter	if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?'))
119519304Speter		--len;
119619304Speter
119719304Speter	/* If the next-to-last or last character is a '$', it's magic. */
119819304Speter	if (len > 0 && p[len - 1] == '$') {
119919304Speter		--len;
120019304Speter		lastdollar = 1;
120119304Speter	} else
120219304Speter		lastdollar = 0;
120319304Speter
120419304Speter	/* If the first character is a '/' or '?', we just strip it. */
120519304Speter	if (len > 0 && (p[0] == '/' || p[0] == '?')) {
120619304Speter		++p;
120719304Speter		--len;
120819304Speter	}
120919304Speter
121019304Speter	/* If the first or second character is a '^', it's magic. */
121119304Speter	if (p[0] == '^') {
121219304Speter		*t++ = *p++;
121319304Speter		--len;
121419304Speter	}
121519304Speter
121619304Speter	/*
121719304Speter	 * Escape every other magic character we can find, meanwhile stripping
121819304Speter	 * the backslashes ctags inserts when escaping the search delimiter
121919304Speter	 * characters.
122019304Speter	 */
122119304Speter	for (; len > 0; --len) {
122219304Speter		if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) {
122319304Speter			++p;
122419304Speter			--len;
122519304Speter		} else if (strchr("^.[]$*", p[0]))
122619304Speter			*t++ = '\\';
122719304Speter		*t++ = *p++;
122819304Speter	}
122919304Speter	if (lastdollar)
123019304Speter		*t++ = '$';
123119304Speter
123219304Speter	*ptrnp = bp;
123319304Speter	*plenp = t - bp;
123419304Speter	return (0);
123519304Speter}
123619304Speter
123719304Speter/*
123819304Speter * re_cscope_conv --
123919304Speter *	 Convert a cscope search path into something that the POSIX
124019304Speter *      1003.2 RE functions can handle.
124119304Speter */
124219304Speterstatic int
124319304Speterre_cscope_conv(sp, ptrnp, plenp, replacedp)
124419304Speter	SCR *sp;
124519304Speter	char **ptrnp;
124619304Speter	size_t *plenp;
124719304Speter	int *replacedp;
124819304Speter{
124919304Speter	size_t blen, len, nspaces;
125019304Speter	char *bp, *p, *t;
125119304Speter
125219304Speter	/*
125319304Speter	 * Each space in the source line printed by cscope represents an
125419304Speter	 * arbitrary sequence of spaces, tabs, and comments.
125519304Speter	 */
125619304Speter#define	CSCOPE_RE_SPACE		"([ \t]|/\\*([^*]|\\*/)*\\*/)*"
125719304Speter	for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len)
125819304Speter		if (*p == ' ')
125919304Speter			++nspaces;
126019304Speter
126119304Speter	/*
126219304Speter	 * Allocate plenty of space:
126319304Speter	 *	the string, plus potential escaping characters;
126419304Speter	 *	nspaces + 2 copies of CSCOPE_RE_SPACE;
126519304Speter	 *	^, $, nul terminator characters.
126619304Speter	 */
126719304Speter	*replacedp = 1;
126819304Speter	len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3;
126919304Speter	GET_SPACE_RET(sp, bp, blen, len);
127019304Speter
127119304Speter	p = *ptrnp;
127219304Speter	t = bp;
127319304Speter
127419304Speter	*t++ = '^';
127519304Speter	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
127619304Speter	t += sizeof(CSCOPE_RE_SPACE) - 1;
127719304Speter
127819304Speter	for (len = *plenp; len > 0; ++p, --len)
127919304Speter		if (*p == ' ') {
128019304Speter			memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
128119304Speter			t += sizeof(CSCOPE_RE_SPACE) - 1;
128219304Speter		} else {
128319304Speter			if (strchr("\\^.[]$*+?()|{}", *p))
128419304Speter				*t++ = '\\';
128519304Speter			*t++ = *p;
128619304Speter		}
128719304Speter
128819304Speter	memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1);
128919304Speter	t += sizeof(CSCOPE_RE_SPACE) - 1;
129019304Speter	*t++ = '$';
129119304Speter
129219304Speter	*ptrnp = bp;
129319304Speter	*plenp = t - bp;
129419304Speter	return (0);
129519304Speter}
129619304Speter
129719304Speter/*
129819304Speter * re_error --
129919304Speter *	Report a regular expression error.
130019304Speter *
130119304Speter * PUBLIC: void re_error __P((SCR *, int, regex_t *));
130219304Speter */
130319304Spetervoid
130419304Speterre_error(sp, errcode, preg)
130519304Speter	SCR *sp;
130619304Speter	int errcode;
130719304Speter	regex_t *preg;
130819304Speter{
130919304Speter	size_t s;
131019304Speter	char *oe;
131119304Speter
131219304Speter	s = regerror(errcode, preg, "", 0);
131319304Speter	if ((oe = malloc(s)) == NULL)
131419304Speter		msgq(sp, M_SYSERR, NULL);
131519304Speter	else {
131619304Speter		(void)regerror(errcode, preg, oe, s);
131719304Speter		msgq(sp, M_ERR, "RE error: %s", oe);
131819304Speter		free(oe);
131919304Speter	}
132019304Speter}
132119304Speter
132219304Speter/*
132319304Speter * re_sub --
132419304Speter * 	Do the substitution for a regular expression.
132519304Speter */
132619304Speterstatic int
132719304Speterre_sub(sp, ip, lbp, lbclenp, lblenp, match)
132819304Speter	SCR *sp;
132919304Speter	char *ip;			/* Input line. */
133019304Speter	char **lbp;
133119304Speter	size_t *lbclenp, *lblenp;
133219304Speter	regmatch_t match[10];
133319304Speter{
133419304Speter	enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv;
133519304Speter	size_t lbclen, lblen;		/* Local copies. */
133619304Speter	size_t mlen;			/* Match length. */
133719304Speter	size_t rpl;			/* Remaining replacement length. */
133819304Speter	char *rp;			/* Replacement pointer. */
133919304Speter	int ch;
134019304Speter	int no;				/* Match replacement offset. */
134119304Speter	char *p, *t;			/* Buffer pointers. */
134219304Speter	char *lb;			/* Local copies. */
134319304Speter
134419304Speter	lb = *lbp;			/* Get local copies. */
134519304Speter	lbclen = *lbclenp;
134619304Speter	lblen = *lblenp;
134719304Speter
134819304Speter	/*
134919304Speter	 * QUOTING NOTE:
135019304Speter	 *
135119304Speter	 * There are some special sequences that vi provides in the
135219304Speter	 * replacement patterns.
135319304Speter	 *	 & string the RE matched (\& if nomagic set)
135419304Speter	 *	\# n-th regular subexpression
135519304Speter	 *	\E end \U, \L conversion
135619304Speter	 *	\e end \U, \L conversion
135719304Speter	 *	\l convert the next character to lower-case
135819304Speter	 *	\L convert to lower-case, until \E, \e, or end of replacement
135919304Speter	 *	\u convert the next character to upper-case
136019304Speter	 *	\U convert to upper-case, until \E, \e, or end of replacement
136119304Speter	 *
136219304Speter	 * Otherwise, since this is the lowest level of replacement, discard
136319304Speter	 * all escaping characters.  This (hopefully) matches historic practice.
136419304Speter	 */
136519304Speter#define	OUTCH(ch, nltrans) {						\
136619304Speter	CHAR_T __ch = (ch);						\
136719304Speter	u_int __value = KEY_VAL(sp, __ch);				\
136819304Speter	if (nltrans && (__value == K_CR || __value == K_NL)) {		\
136919304Speter		NEEDNEWLINE(sp);					\
137019304Speter		sp->newl[sp->newl_cnt++] = lbclen;			\
137119304Speter	} else if (conv != C_NOTSET) {					\
137219304Speter		switch (conv) {						\
137319304Speter		case C_ONELOWER:					\
137419304Speter			conv = C_NOTSET;				\
137519304Speter			/* FALLTHROUGH */				\
137619304Speter		case C_LOWER:						\
137719304Speter			if (isupper(__ch))				\
137819304Speter				__ch = tolower(__ch);			\
137919304Speter			break;						\
138019304Speter		case C_ONEUPPER:					\
138119304Speter			conv = C_NOTSET;				\
138219304Speter			/* FALLTHROUGH */				\
138319304Speter		case C_UPPER:						\
138419304Speter			if (islower(__ch))				\
138519304Speter				__ch = toupper(__ch);			\
138619304Speter			break;						\
138719304Speter		default:						\
138819304Speter			abort();					\
138919304Speter		}							\
139019304Speter	}								\
139119304Speter	NEEDSP(sp, 1, p);						\
139219304Speter	*p++ = __ch;							\
139319304Speter	++lbclen;							\
139419304Speter}
139519304Speter	conv = C_NOTSET;
139619304Speter	for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) {
139719304Speter		switch (ch = *rp++) {
139819304Speter		case '&':
139919304Speter			if (O_ISSET(sp, O_MAGIC)) {
140019304Speter				no = 0;
140119304Speter				goto subzero;
140219304Speter			}
140319304Speter			break;
140419304Speter		case '\\':
140519304Speter			if (rpl == 0)
140619304Speter				break;
140719304Speter			--rpl;
140819304Speter			switch (ch = *rp) {
140919304Speter			case '&':
141019304Speter				++rp;
141119304Speter				if (!O_ISSET(sp, O_MAGIC)) {
141219304Speter					no = 0;
141319304Speter					goto subzero;
141419304Speter				}
141519304Speter				break;
141619304Speter			case '0': case '1': case '2': case '3': case '4':
141719304Speter			case '5': case '6': case '7': case '8': case '9':
141819304Speter				no = *rp++ - '0';
141919304Spetersubzero:			if (match[no].rm_so == -1 ||
142019304Speter			    	    match[no].rm_eo == -1)
142119304Speter					break;
142219304Speter				mlen = match[no].rm_eo - match[no].rm_so;
142319304Speter				for (t = ip + match[no].rm_so; mlen--; ++t)
142419304Speter					OUTCH(*t, 0);
142519304Speter				continue;
142619304Speter			case 'e':
142719304Speter			case 'E':
142819304Speter				++rp;
142919304Speter				conv = C_NOTSET;
143019304Speter				continue;
143119304Speter			case 'l':
143219304Speter				++rp;
143319304Speter				conv = C_ONELOWER;
143419304Speter				continue;
143519304Speter			case 'L':
143619304Speter				++rp;
143719304Speter				conv = C_LOWER;
143819304Speter				continue;
143919304Speter			case 'u':
144019304Speter				++rp;
144119304Speter				conv = C_ONEUPPER;
144219304Speter				continue;
144319304Speter			case 'U':
144419304Speter				++rp;
144519304Speter				conv = C_UPPER;
144619304Speter				continue;
144719304Speter			default:
144819304Speter				++rp;
144919304Speter				break;
145019304Speter			}
145119304Speter		}
145219304Speter		OUTCH(ch, 1);
145319304Speter	}
145419304Speter
145519304Speter	*lbp = lb;			/* Update caller's information. */
145619304Speter	*lbclenp = lbclen;
145719304Speter	*lblenp = lblen;
145819304Speter	return (0);
145919304Speter}
1460