1/*	$OpenBSD: sub.c,v 1.18 2016/10/11 06:54:05 martijn Exp $	*/
2/*	$NetBSD: sub.c,v 1.4 1995/03/21 09:04:50 cgd Exp $	*/
3
4/* sub.c: This file contains the substitution routines for the ed
5   line editor */
6/*-
7 * Copyright (c) 1993 Andrew Moore, Talke Studio.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <limits.h>
33#include <regex.h>
34#include <signal.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39#include "ed.h"
40
41static char *extract_subst_template(void);
42static int substitute_matching_text(regex_t *, line_t *, int, int);
43static int apply_subst_template(char *, regmatch_t *, int, int);
44
45static char *rhbuf;		/* rhs substitution buffer */
46static int rhbufsz;		/* rhs substitution buffer size */
47static int rhbufi;		/* rhs substitution buffer index */
48
49/* extract_subst_tail: extract substitution tail from the command buffer */
50int
51extract_subst_tail(int *flagp, int *np)
52{
53	char delimiter;
54
55	*flagp = *np = 0;
56	if ((delimiter = *ibufp) == '\n') {
57		rhbufi = 0;
58		*flagp = GPR;
59		return 0;
60	} else if (extract_subst_template() == NULL)
61		return  ERR;
62	else if (*ibufp == '\n') {
63		*flagp = GPR;
64		return 0;
65	} else if (*ibufp == delimiter)
66		ibufp++;
67	if ('1' <= *ibufp && *ibufp <= '9') {
68		STRTOI(*np, ibufp);
69		return 0;
70	} else if (*ibufp == 'g') {
71		ibufp++;
72		*flagp = GSG;
73		return 0;
74	}
75	return 0;
76}
77
78
79/* extract_subst_template: return pointer to copy of substitution template
80   in the command buffer */
81static char *
82extract_subst_template(void)
83{
84	int n = 0;
85	int i = 0;
86	char c;
87	char delimiter = *ibufp++;
88
89	if (*ibufp == '%' && *(ibufp + 1) == delimiter) {
90		ibufp++;
91		if (!rhbuf)
92			seterrmsg("no previous substitution");
93		return rhbuf;
94	}
95	while (*ibufp != delimiter) {
96		REALLOC(rhbuf, rhbufsz, i + 2, NULL);
97		if ((c = rhbuf[i++] = *ibufp++) == '\n' && *ibufp == '\0') {
98			i--, ibufp--;
99			break;
100		} else if (c != '\\')
101			;
102		else if ((rhbuf[i++] = *ibufp++) != '\n')
103			;
104		else if (!isglobal) {
105			while ((n = get_tty_line()) == 0 ||
106			    (n > 0 && ibuf[n - 1] != '\n'))
107				clearerr(stdin);
108			if (n < 0)
109				return NULL;
110		}
111	}
112	REALLOC(rhbuf, rhbufsz, i + 1, NULL);
113	rhbuf[rhbufi = i] = '\0';
114	return  rhbuf;
115}
116
117
118static char *rbuf;		/* substitute_matching_text buffer */
119static int rbufsz;		/* substitute_matching_text buffer size */
120
121/* search_and_replace: for each line in a range, change text matching a pattern
122   according to a substitution template; return status  */
123int
124search_and_replace(regex_t *pat, int gflag, int kth)
125{
126	undo_t *up;
127	char *txt;
128	char *eot;
129	int lc;
130	int xa = current_addr;
131	int nsubs = 0;
132	line_t *lp;
133	int len;
134
135	current_addr = first_addr - 1;
136	for (lc = 0; lc <= second_addr - first_addr; lc++) {
137		lp = get_addressed_line_node(++current_addr);
138		if ((len = substitute_matching_text(pat, lp, gflag, kth)) < 0)
139			return ERR;
140		else if (len) {
141			up = NULL;
142			if (delete_lines(current_addr, current_addr) < 0)
143				return ERR;
144			txt = rbuf;
145			eot = rbuf + len;
146			SPL1();
147			do {
148				if ((txt = put_sbuf_line(txt)) == NULL) {
149					SPL0();
150					return ERR;
151				} else if (up)
152					up->t = get_addressed_line_node(current_addr);
153				else if ((up = push_undo_stack(UADD,
154				    current_addr, current_addr)) == NULL) {
155					SPL0();
156					return ERR;
157				}
158			} while (txt != eot);
159			SPL0();
160			nsubs++;
161			xa = current_addr;
162		}
163	}
164	current_addr = xa;
165	if  (nsubs == 0 && !(gflag & GLB)) {
166		seterrmsg("no match");
167		return ERR;
168	} else if ((gflag & (GPR | GLS | GNP)) &&
169	    display_lines(current_addr, current_addr, gflag) < 0)
170		return ERR;
171	return 0;
172}
173
174
175/* substitute_matching_text: replace text matched by a pattern according to
176   a substitution template; return length of rbuf if changed, 0 if unchanged, or
177   ERR on error */
178static int
179substitute_matching_text(regex_t *pat, line_t *lp, int gflag, int kth)
180{
181	int off = 0;
182	int changed = 0;
183	int matchno = 0;
184	int i = 0;
185	int nempty = -1;
186	regmatch_t rm[SE_MAX];
187	char *txt;
188	char *eot, *eom;
189
190	if ((eom = txt = get_sbuf_line(lp)) == NULL)
191		return ERR;
192	if (isbinary)
193		NUL_TO_NEWLINE(txt, lp->len);
194	eot = txt + lp->len;
195	if (!regexec(pat, txt, SE_MAX, rm, 0)) {
196		do {
197/* Don't do a 0-length match directly after a non-0-length */
198			if (rm[0].rm_eo == nempty) {
199				rm[0].rm_so++;
200				rm[0].rm_eo = lp->len;
201				continue;
202			}
203			if (!kth || kth == ++matchno) {
204				changed = 1;
205				i = rm[0].rm_so - (eom - txt);
206				REALLOC(rbuf, rbufsz, off + i, ERR);
207				if (isbinary)
208					NEWLINE_TO_NUL(eom,
209					    rm[0].rm_eo - (eom - txt));
210				memcpy(rbuf + off, eom, i);
211				off += i;
212				if ((off = apply_subst_template(txt, rm, off,
213				    pat->re_nsub)) < 0)
214					return ERR;
215				eom = txt + rm[0].rm_eo;
216				if (kth)
217					break;
218			}
219			if (rm[0].rm_so == rm[0].rm_eo)
220				rm[0].rm_so = rm[0].rm_eo + 1;
221			else
222				nempty = rm[0].rm_so = rm[0].rm_eo;
223			rm[0].rm_eo = lp->len;
224		} while (rm[0].rm_so < lp->len && (gflag & GSG || kth) &&
225		    !regexec(pat, txt, SE_MAX, rm, REG_STARTEND | REG_NOTBOL));
226		i = eot - eom;
227		REALLOC(rbuf, rbufsz, off + i + 2, ERR);
228		if (isbinary)
229			NEWLINE_TO_NUL(eom, i);
230		memcpy(rbuf + off, eom, i);
231		memcpy(rbuf + off + i, "\n", 2);
232	}
233	return changed ? off + i + 1 : 0;
234}
235
236
237/* apply_subst_template: modify text according to a substitution template;
238   return offset to end of modified text */
239static int
240apply_subst_template(char *boln, regmatch_t *rm, int off, int re_nsub)
241{
242	int j = 0;
243	int k = 0;
244	int n;
245	char *sub = rhbuf;
246
247	for (; sub - rhbuf < rhbufi; sub++)
248		if (*sub == '&') {
249			j = rm[0].rm_so;
250			k = rm[0].rm_eo;
251			REALLOC(rbuf, rbufsz, off + k - j, ERR);
252			while (j < k)
253				rbuf[off++] = boln[j++];
254		} else if (*sub == '\\' && '1' <= *++sub && *sub <= '9' &&
255		    (n = *sub - '0') <= re_nsub) {
256			j = rm[n].rm_so;
257			k = rm[n].rm_eo;
258			REALLOC(rbuf, rbufsz, off + k - j, ERR);
259			while (j < k)
260				rbuf[off++] = boln[j++];
261		} else {
262			REALLOC(rbuf, rbufsz, off + 1, ERR);
263			rbuf[off++] = *sub;
264		}
265	REALLOC(rbuf, rbufsz, off + 1, ERR);
266	rbuf[off] = '\0';
267	return off;
268}
269