1/*	$OpenBSD: process.c,v 1.35 2022/01/12 15:13:36 martijn Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Diomidis Spinellis.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Diomidis Spinellis of Imperial College, University of London.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <sys/types.h>
37#include <sys/stat.h>
38#include <sys/uio.h>
39
40#include <ctype.h>
41#include <errno.h>
42#include <fcntl.h>
43#include <limits.h>
44#include <regex.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49
50#include "defs.h"
51#include "extern.h"
52
53static SPACE HS, PS, SS;
54#define	pd		PS.deleted
55#define	ps		PS.space
56#define	psl		PS.len
57#define	psanl		PS.append_newline
58#define	hs		HS.space
59#define	hsl		HS.len
60
61static inline int	 applies(struct s_command *);
62static void		 flush_appends(void);
63static void		 lputs(char *, size_t);
64static inline int	 regexec_e(regex_t *, const char *, int, int, size_t,
65			     size_t);
66static void		 regsub(SPACE *, char *, char *);
67static int		 substitute(struct s_command *);
68
69struct s_appends *appends;	/* Array of pointers to strings to append. */
70static size_t appendx;		/* Index into appends array. */
71size_t appendnum;		/* Size of appends array. */
72
73static int lastaddr;		/* Set by applies if last address of a range. */
74static int sdone;		/* If any substitutes since last line input. */
75				/* Iov structure for 'w' commands. */
76static regex_t *defpreg;
77size_t maxnsub;
78regmatch_t *match;
79
80#define OUT() do {\
81	fwrite(ps, 1, psl, outfile);\
82	if (psanl) fputc('\n', outfile);\
83} while (0)
84
85void
86process(void)
87{
88	struct s_command *cp;
89	SPACE tspace;
90	size_t len, oldpsl;
91	char *p;
92
93	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
94		pd = 0;
95top:
96		cp = prog;
97redirect:
98		while (cp != NULL) {
99			if (!applies(cp)) {
100				cp = cp->next;
101				continue;
102			}
103			switch (cp->code) {
104			case '{':
105				cp = cp->u.c;
106				goto redirect;
107			case 'a':
108				if (appendx >= appendnum) {
109					appends = xreallocarray(appends,
110					    appendnum,
111					    2 * sizeof(struct s_appends));
112					appendnum *= 2;
113				}
114				appends[appendx].type = AP_STRING;
115				appends[appendx].s = cp->t;
116				appends[appendx].len = strlen(cp->t);
117				appendx++;
118				break;
119			case 'b':
120				cp = cp->u.c;
121				goto redirect;
122			case 'c':
123				pd = 1;
124				psl = 0;
125				if (cp->a2 == NULL || lastaddr || lastline())
126					(void)fprintf(outfile, "%s", cp->t);
127				break;
128			case 'd':
129				pd = 1;
130				goto new;
131			case 'D':
132				if (pd)
133					goto new;
134				if (psl == 0 ||
135				    (p = memchr(ps, '\n', psl)) == NULL) {
136					pd = 1;
137					goto new;
138				} else {
139					psl -= (p + 1) - ps;
140					memmove(ps, p + 1, psl);
141					goto top;
142				}
143			case 'g':
144				cspace(&PS, hs, hsl, REPLACE);
145				break;
146			case 'G':
147				cspace(&PS, "\n", 1, 0);
148				cspace(&PS, hs, hsl, 0);
149				break;
150			case 'h':
151				cspace(&HS, ps, psl, REPLACE);
152				break;
153			case 'H':
154				cspace(&HS, "\n", 1, 0);
155				cspace(&HS, ps, psl, 0);
156				break;
157			case 'i':
158				(void)fprintf(outfile, "%s", cp->t);
159				break;
160			case 'l':
161				lputs(ps, psl);
162				break;
163			case 'n':
164				if (!nflag && !pd)
165					OUT();
166				flush_appends();
167				if (!mf_fgets(&PS, REPLACE))
168					exit(0);
169				pd = 0;
170				break;
171			case 'N':
172				flush_appends();
173				cspace(&PS, "\n", 1, 0);
174				if (!mf_fgets(&PS, 0))
175					exit(0);
176				break;
177			case 'p':
178				if (pd)
179					break;
180				OUT();
181				break;
182			case 'P':
183				if (pd)
184					break;
185				if ((p = memchr(ps, '\n', psl)) != NULL) {
186					oldpsl = psl;
187					psl = p - ps;
188					psanl = 1;
189					OUT();
190					psl = oldpsl;
191				} else {
192					OUT();
193				}
194				break;
195			case 'q':
196				if (!nflag && !pd)
197					OUT();
198				flush_appends();
199				finish_file();
200				exit(0);
201			case 'r':
202				if (appendx >= appendnum) {
203					appends = xreallocarray(appends,
204					    appendnum,
205					    2 * sizeof(struct s_appends));
206					appendnum *= 2;
207				}
208				appends[appendx].type = AP_FILE;
209				appends[appendx].s = cp->t;
210				appends[appendx].len = strlen(cp->t);
211				appendx++;
212				break;
213			case 's':
214				sdone |= substitute(cp);
215				break;
216			case 't':
217				if (sdone) {
218					sdone = 0;
219					cp = cp->u.c;
220					goto redirect;
221				}
222				break;
223			case 'w':
224				if (pd)
225					break;
226				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
227				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
228				    DEFFILEMODE)) == -1)
229					error(FATAL, "%s: %s",
230					    cp->t, strerror(errno));
231				if ((size_t)write(cp->u.fd, ps, psl) != psl ||
232				    write(cp->u.fd, "\n", 1) != 1)
233					error(FATAL, "%s: %s",
234					    cp->t, strerror(errno));
235				break;
236			case 'x':
237				if (hs == NULL)
238					cspace(&HS, "", 0, REPLACE);
239				tspace = PS;
240				PS = HS;
241				psanl = tspace.append_newline;
242				HS = tspace;
243				break;
244			case 'y':
245				if (pd || psl == 0)
246					break;
247				for (p = ps, len = psl; len--; ++p)
248					*p = cp->u.y[(unsigned char)*p];
249				break;
250			case ':':
251			case '}':
252				break;
253			case '=':
254				(void)fprintf(outfile, "%lu\n", linenum);
255			}
256			cp = cp->next;
257		} /* for all cp */
258
259new:		if (!nflag && !pd)
260			OUT();
261		flush_appends();
262	} /* for all lines */
263}
264
265/*
266 * TRUE if the address passed matches the current program state
267 * (lastline, linenumber, ps).
268 */
269#define	MATCH(a)						\
270	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
271	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
272
273/*
274 * Return TRUE if the command applies to the current line.  Sets the inrange
275 * flag to process ranges.  Interprets the non-select (``!'') flag.
276 */
277static inline int
278applies(struct s_command *cp)
279{
280	int r;
281
282	lastaddr = 0;
283	if (cp->a1 == NULL && cp->a2 == NULL)
284		r = 1;
285	else if (cp->a2)
286		if (cp->inrange) {
287			if (MATCH(cp->a2)) {
288				cp->inrange = 0;
289				lastaddr = 1;
290			}
291			r = 1;
292		} else if (MATCH(cp->a1)) {
293			/*
294			 * If the second address is a number less than or
295			 * equal to the line number first selected, only
296			 * one line shall be selected.
297			 *	-- POSIX 1003.2
298			 */
299			if (cp->a2->type == AT_LINE &&
300			    linenum >= cp->a2->u.l)
301				lastaddr = 1;
302			else
303				cp->inrange = 1;
304			r = 1;
305		} else
306			r = 0;
307	else
308		r = MATCH(cp->a1);
309	return (cp->nonsel ? !r : r);
310}
311
312/*
313 * Reset all inrange markers.
314 */
315void
316resetstate(void)
317{
318	struct s_command *cp;
319
320	free(HS.back);
321	memset(&HS, 0, sizeof(HS));
322
323	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
324		if (cp->a2)
325			cp->inrange = 0;
326}
327
328/*
329 * substitute --
330 *	Do substitutions in the pattern space.  Currently, we build a
331 *	copy of the new pattern space in the substitute space structure
332 *	and then swap them.
333 */
334static int
335substitute(struct s_command *cp)
336{
337	SPACE tspace;
338	regex_t *re;
339	regoff_t slen;
340	int n, lastempty;
341	regoff_t le = 0;
342	char *s;
343
344	s = ps;
345	re = cp->u.s->re;
346	if (re == NULL) {
347		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
348			linenum = cp->u.s->linenum;
349			error(COMPILE, "\\%d not defined in the RE",
350			    cp->u.s->maxbref);
351		}
352	}
353	if (!regexec_e(re, ps, 0, 0, 0, psl))
354		return (0);
355
356	SS.len = 0;				/* Clean substitute space. */
357	slen = psl;
358	n = cp->u.s->n;
359	lastempty = 1;
360
361	do {
362		/* Copy the leading retained string. */
363		if (n <= 1 && (match[0].rm_so > le))
364			cspace(&SS, s, match[0].rm_so - le, APPEND);
365
366		/* Skip zero-length matches right after other matches. */
367		if (lastempty || (match[0].rm_so - le) ||
368		    match[0].rm_so != match[0].rm_eo) {
369			if (n <= 1) {
370				/* Want this match: append replacement. */
371				regsub(&SS, ps, cp->u.s->new);
372				if (n == 1)
373					n = -1;
374			} else {
375				/* Want a later match: append original. */
376				if (match[0].rm_eo - le)
377					cspace(&SS, s, match[0].rm_eo - le,
378					    APPEND);
379				n--;
380			}
381		}
382
383		/* Move past this match. */
384		s = ps + match[0].rm_eo;
385		slen = psl - match[0].rm_eo;
386		le = match[0].rm_eo;
387
388		/*
389		 * After a zero-length match, advance one byte,
390		 * and at the end of the line, terminate.
391		 */
392		if (match[0].rm_so == match[0].rm_eo) {
393			if (*s == '\0' || *s == '\n')
394				slen = -1;
395			else
396				slen--;
397			if (*s != '\0') {
398				cspace(&SS, s++, 1, APPEND);
399				le++;
400			}
401			lastempty = 1;
402		} else
403			lastempty = 0;
404
405	} while (n >= 0 && slen >= 0 &&
406	    regexec_e(re, ps, REG_NOTBOL, 0, le, psl));
407
408	/* Did not find the requested number of matches. */
409	if (n > 0)
410		return (0);
411
412	/* Copy the trailing retained string. */
413	if (slen > 0)
414		cspace(&SS, s, slen, APPEND);
415
416	/*
417	 * Swap the substitute space and the pattern space, and make sure
418	 * that any leftover pointers into stdio memory get lost.
419	 */
420	tspace = PS;
421	PS = SS;
422	psanl = tspace.append_newline;
423	SS = tspace;
424	SS.space = SS.back;
425
426	/* Handle the 'p' flag. */
427	if (cp->u.s->p)
428		OUT();
429
430	/* Handle the 'w' flag. */
431	if (cp->u.s->wfile && !pd) {
432		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
433		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
434			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
435		if ((size_t)write(cp->u.s->wfd, ps, psl) != psl ||
436		    write(cp->u.s->wfd, "\n", 1) != 1)
437			error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno));
438	}
439	return (1);
440}
441
442/*
443 * Flush append requests.  Always called before reading a line,
444 * therefore it also resets the substitution done (sdone) flag.
445 */
446static void
447flush_appends(void)
448{
449	FILE *f;
450	size_t count, idx;
451	char buf[8 * 1024];
452
453	for (idx = 0; idx < appendx; idx++)
454		switch (appends[idx].type) {
455		case AP_STRING:
456			fwrite(appends[idx].s, sizeof(char), appends[idx].len,
457			    outfile);
458			break;
459		case AP_FILE:
460			/*
461			 * Read files probably shouldn't be cached.  Since
462			 * it's not an error to read a non-existent file,
463			 * it's possible that another program is interacting
464			 * with the sed script through the file system.  It
465			 * would be truly bizarre, but possible.  It's probably
466			 * not that big a performance win, anyhow.
467			 */
468			if ((f = fopen(appends[idx].s, "r")) == NULL)
469				break;
470			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
471				(void)fwrite(buf, sizeof(char), count, outfile);
472			(void)fclose(f);
473			break;
474		}
475	if (ferror(outfile))
476		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
477	appendx = sdone = 0;
478}
479
480static void
481lputs(char *s, size_t len)
482{
483	int count;
484	extern int termwidth;
485	const char *escapes;
486	char *p;
487
488	for (count = 0; len > 0; len--, s++) {
489		if (count >= termwidth) {
490			(void)fprintf(outfile, "\\\n");
491			count = 0;
492		}
493		if (isascii((unsigned char)*s) && isprint((unsigned char)*s)
494		    && *s != '\\') {
495			(void)fputc(*s, outfile);
496			count++;
497		} else if (*s == '\n') {
498			(void)fputc('$', outfile);
499			(void)fputc('\n', outfile);
500			count = 0;
501		} else {
502			escapes = "\\\a\b\f\r\t\v";
503			(void)fputc('\\', outfile);
504			if ((p = strchr(escapes, *s)) && *s != '\0') {
505				(void)fputc("\\abfrtv"[p - escapes], outfile);
506				count += 2;
507			} else {
508				(void)fprintf(outfile, "%03o", *(u_char *)s);
509				count += 4;
510			}
511		}
512	}
513	(void)fputc('$', outfile);
514	(void)fputc('\n', outfile);
515	if (ferror(outfile))
516		error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO));
517}
518
519static inline int
520regexec_e(regex_t *preg, const char *string, int eflags,
521    int nomatch, size_t start, size_t stop)
522{
523	int eval;
524
525	if (preg == NULL) {
526		if (defpreg == NULL)
527			error(FATAL, "first RE may not be empty");
528	} else
529		defpreg = preg;
530
531	/* Set anchors */
532	match[0].rm_so = start;
533	match[0].rm_eo = stop;
534
535	eval = regexec(defpreg, string,
536	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
537	switch (eval) {
538	case 0:
539		return (1);
540	case REG_NOMATCH:
541		return (0);
542	}
543	error(FATAL, "RE error: %s", strregerror(eval, defpreg));
544}
545
546/*
547 * regsub - perform substitutions after a regexp match
548 * Based on a routine by Henry Spencer
549 */
550static void
551regsub(SPACE *sp, char *string, char *src)
552{
553	int len, no;
554	char c, *dst;
555
556#define	NEEDSP(reqlen)							\
557	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
558		size_t newlen = sp->blen + (reqlen) + 1024;		\
559		sp->space = sp->back = xrealloc(sp->back, newlen);	\
560		sp->blen = newlen;					\
561		dst = sp->space + sp->len;				\
562	}
563
564	dst = sp->space + sp->len;
565	while ((c = *src++) != '\0') {
566		if (c == '&')
567			no = 0;
568		else if (c == '\\' && isdigit((unsigned char)*src))
569			no = *src++ - '0';
570		else
571			no = -1;
572		if (no < 0) {		/* Ordinary character. */
573			if (c == '\\' && (*src == '\\' || *src == '&'))
574				c = *src++;
575			NEEDSP(1);
576			*dst++ = c;
577			++sp->len;
578		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
579			len = match[no].rm_eo - match[no].rm_so;
580			NEEDSP(len);
581			memmove(dst, string + match[no].rm_so, len);
582			dst += len;
583			sp->len += len;
584		}
585	}
586	NEEDSP(1);
587	*dst = '\0';
588}
589
590/*
591 * aspace --
592 *	Append the source space to the destination space, allocating new
593 *	space as necessary.
594 */
595void
596cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
597{
598	size_t tlen;
599
600	/* Make sure SPACE has enough memory and ramp up quickly. */
601	tlen = sp->len + len + 1;
602	if (tlen > sp->blen) {
603		size_t newlen = tlen + 1024;
604		sp->space = sp->back = xrealloc(sp->back, newlen);
605		sp->blen = newlen;
606	}
607
608	if (spflag == REPLACE)
609		sp->len = 0;
610
611	memmove(sp->space + sp->len, p, len);
612
613	sp->space[sp->len += len] = '\0';
614}
615
616/*
617 * Close all cached opened files and report any errors
618 */
619void
620cfclose(struct s_command *cp, struct s_command *end)
621{
622
623	for (; cp != end; cp = cp->next)
624		switch (cp->code) {
625		case 's':
626			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
627				error(FATAL,
628				    "%s: %s", cp->u.s->wfile, strerror(errno));
629			cp->u.s->wfd = -1;
630			break;
631		case 'w':
632			if (cp->u.fd != -1 && close(cp->u.fd))
633				error(FATAL, "%s: %s", cp->t, strerror(errno));
634			cp->u.fd = -1;
635			break;
636		case '{':
637			cfclose(cp->u.c, cp->next);
638			break;
639		}
640}
641