1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#ifndef lint
38static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
39#endif
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/uio.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <wchar.h>
57#include <wctype.h>
58
59#include "defs.h"
60#include "extern.h"
61
62static SPACE HS, PS, SS, YS;
63#define	pd		PS.deleted
64#define	ps		PS.space
65#define	psl		PS.len
66#define	hs		HS.space
67#define	hsl		HS.len
68
69static __inline int	 applies(struct s_command *);
70static void		 do_tr(struct s_tr *);
71static void		 flush_appends(void);
72static void		 lputs(char *, size_t);
73static __inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
74static void		 regsub(SPACE *, char *, char *);
75static int		 substitute(struct s_command *);
76
77struct s_appends *appends;	/* Array of pointers to strings to append. */
78static int appendx;		/* Index into appends array. */
79int appendnum;			/* Size of appends array. */
80
81static int lastaddr;		/* Set by applies if last address of a range. */
82static int sdone;		/* If any substitutes since last line input. */
83				/* Iov structure for 'w' commands. */
84static regex_t *defpreg;
85size_t maxnsub;
86regmatch_t *match;
87
88#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
89
90void
91process(void)
92{
93	struct s_command *cp;
94	SPACE tspace;
95	size_t oldpsl = 0;
96	char *p;
97
98	p = NULL;
99
100	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101		pd = 0;
102top:
103		cp = prog;
104redirect:
105		while (cp != NULL) {
106			if (!applies(cp)) {
107				cp = cp->next;
108				continue;
109			}
110			switch (cp->code) {
111			case '{':
112				cp = cp->u.c;
113				goto redirect;
114			case 'a':
115				if (appendx >= appendnum)
116					if ((appends = realloc(appends,
117					    sizeof(struct s_appends) *
118					    (appendnum *= 2))) == NULL)
119						err(1, "realloc");
120				appends[appendx].type = AP_STRING;
121				appends[appendx].s = cp->t;
122				appends[appendx].len = strlen(cp->t);
123				appendx++;
124				break;
125			case 'b':
126				cp = cp->u.c;
127				goto redirect;
128			case 'c':
129				pd = 1;
130				psl = 0;
131				if (cp->a2 == NULL || lastaddr || lastline())
132					(void)fprintf(outfile, "%s", cp->t);
133				break;
134			case 'd':
135				pd = 1;
136				goto new;
137			case 'D':
138				if (pd)
139					goto new;
140				if (psl == 0 ||
141				    (p = memchr(ps, '\n', psl)) == NULL) {
142					pd = 1;
143					goto new;
144				} else {
145					psl -= (p + 1) - ps;
146					memmove(ps, p + 1, psl);
147					goto top;
148				}
149			case 'g':
150				cspace(&PS, hs, hsl, REPLACE);
151				break;
152			case 'G':
153				cspace(&PS, "\n", 1, APPEND);
154				cspace(&PS, hs, hsl, APPEND);
155				break;
156			case 'h':
157				cspace(&HS, ps, psl, REPLACE);
158				break;
159			case 'H':
160				cspace(&HS, "\n", 1, APPEND);
161				cspace(&HS, ps, psl, APPEND);
162				break;
163			case 'i':
164				(void)fprintf(outfile, "%s", cp->t);
165				break;
166			case 'l':
167				lputs(ps, psl);
168				break;
169			case 'n':
170				if (!nflag && !pd)
171					OUT();
172				flush_appends();
173				if (!mf_fgets(&PS, REPLACE))
174					exit(0);
175				pd = 0;
176				break;
177			case 'N':
178				flush_appends();
179				cspace(&PS, "\n", 1, APPEND);
180				if (!mf_fgets(&PS, APPEND))
181					exit(0);
182				break;
183			case 'p':
184				if (pd)
185					break;
186				OUT();
187				break;
188			case 'P':
189				if (pd)
190					break;
191				if ((p = memchr(ps, '\n', psl)) != NULL) {
192					oldpsl = psl;
193					psl = p - ps;
194				}
195				OUT();
196				if (p != NULL)
197					psl = oldpsl;
198				break;
199			case 'q':
200				if (!nflag && !pd)
201					OUT();
202				flush_appends();
203				exit(0);
204			case 'r':
205				if (appendx >= appendnum)
206					if ((appends = realloc(appends,
207					    sizeof(struct s_appends) *
208					    (appendnum *= 2))) == NULL)
209						err(1, "realloc");
210				appends[appendx].type = AP_FILE;
211				appends[appendx].s = cp->t;
212				appends[appendx].len = strlen(cp->t);
213				appendx++;
214				break;
215			case 's':
216				sdone |= substitute(cp);
217				break;
218			case 't':
219				if (sdone) {
220					sdone = 0;
221					cp = cp->u.c;
222					goto redirect;
223				}
224				break;
225			case 'w':
226				if (pd)
227					break;
228				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
229				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
230				    DEFFILEMODE)) == -1)
231					err(1, "%s", cp->t);
232				if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
233				    write(cp->u.fd, "\n", 1) != 1)
234					err(1, "%s", cp->t);
235				break;
236			case 'x':
237				/*
238				 * If the hold space is null, make it empty
239				 * but not null.  Otherwise the pattern space
240				 * will become null after the swap, which is
241				 * an abnormal condition.
242				 */
243				if (hs == NULL)
244					cspace(&HS, "", 0, REPLACE);
245				tspace = PS;
246				PS = HS;
247				HS = tspace;
248				break;
249			case 'y':
250				if (pd || psl == 0)
251					break;
252				do_tr(cp->u.y);
253				break;
254			case ':':
255			case '}':
256				break;
257			case '=':
258				(void)fprintf(outfile, "%lu\n", linenum);
259			}
260			cp = cp->next;
261		} /* for all cp */
262
263new:		if (!nflag && !pd)
264			OUT();
265		flush_appends();
266	} /* for all lines */
267}
268
269/*
270 * TRUE if the address passed matches the current program state
271 * (lastline, linenumber, ps).
272 */
273#define	MATCH(a)							\
274	((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
275	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
276
277/*
278 * Return TRUE if the command applies to the current line.  Sets the start
279 * line for process ranges.  Interprets the non-select (``!'') flag.
280 */
281static __inline int
282applies(struct s_command *cp)
283{
284	int r;
285
286	lastaddr = 0;
287	if (cp->a1 == NULL && cp->a2 == NULL)
288		r = 1;
289	else if (cp->a2)
290		if (cp->startline > 0) {
291			if (MATCH(cp->a2)) {
292				cp->startline = 0;
293				lastaddr = 1;
294				r = 1;
295			} else if (linenum - cp->startline <= cp->a2->u.l)
296				r = 1;
297			else if ((cp->a2->type == AT_LINE &&
298				   linenum > cp->a2->u.l) ||
299				   (cp->a2->type == AT_RELLINE &&
300				   linenum - cp->startline > cp->a2->u.l)) {
301				/*
302				 * We missed the 2nd address due to a branch,
303				 * so just close the range and return false.
304				 */
305				cp->startline = 0;
306				r = 0;
307			} else
308				r = 1;
309		} else if (MATCH(cp->a1)) {
310			/*
311			 * If the second address is a number less than or
312			 * equal to the line number first selected, only
313			 * one line shall be selected.
314			 *	-- POSIX 1003.2
315			 * Likewise if the relative second line address is zero.
316			 */
317			if ((cp->a2->type == AT_LINE &&
318			    linenum >= cp->a2->u.l) ||
319			    (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
320				lastaddr = 1;
321			else {
322				cp->startline = linenum;
323			}
324			r = 1;
325		} else
326			r = 0;
327	else
328		r = MATCH(cp->a1);
329	return (cp->nonsel ? ! r : r);
330}
331
332/*
333 * Reset the sed processor to its initial state.
334 */
335void
336resetstate(void)
337{
338	struct s_command *cp;
339
340	/*
341	 * Reset all in-range markers.
342	 */
343	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
344		if (cp->a2)
345			cp->startline = 0;
346
347	/*
348	 * Clear out the hold space.
349	 */
350	cspace(&HS, "", 0, REPLACE);
351}
352
353/*
354 * substitute --
355 *	Do substitutions in the pattern space.  Currently, we build a
356 *	copy of the new pattern space in the substitute space structure
357 *	and then swap them.
358 */
359static int
360substitute(struct s_command *cp)
361{
362	SPACE tspace;
363	regex_t *re;
364	regoff_t re_off, slen;
365	int lastempty, n;
366	char *s;
367
368	s = ps;
369	re = cp->u.s->re;
370	if (re == NULL) {
371		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
372			linenum = cp->u.s->linenum;
373			errx(1, "%lu: %s: \\%u not defined in the RE",
374					linenum, fname, cp->u.s->maxbref);
375		}
376	}
377	if (!regexec_e(re, s, 0, 0, psl))
378		return (0);
379
380	SS.len = 0;				/* Clean substitute space. */
381	slen = psl;
382	n = cp->u.s->n;
383	lastempty = 1;
384
385	switch (n) {
386	case 0:					/* Global */
387		do {
388			if (lastempty || match[0].rm_so != match[0].rm_eo) {
389				/* Locate start of replaced string. */
390				re_off = match[0].rm_so;
391				/* Copy leading retained string. */
392				cspace(&SS, s, re_off, APPEND);
393				/* Add in regular expression. */
394				regsub(&SS, s, cp->u.s->new);
395			}
396
397			/* Move past this match. */
398			if (match[0].rm_so != match[0].rm_eo) {
399				s += match[0].rm_eo;
400				slen -= match[0].rm_eo;
401				lastempty = 0;
402			} else {
403				if (match[0].rm_so < slen)
404					cspace(&SS, s + match[0].rm_so, 1,
405					    APPEND);
406				s += match[0].rm_so + 1;
407				slen -= match[0].rm_so + 1;
408				lastempty = 1;
409			}
410		} while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
411		/* Copy trailing retained string. */
412		if (slen > 0)
413			cspace(&SS, s, slen, APPEND);
414		break;
415	default:				/* Nth occurrence */
416		while (--n) {
417			if (match[0].rm_eo == match[0].rm_so)
418				match[0].rm_eo = match[0].rm_so + 1;
419			s += match[0].rm_eo;
420			slen -= match[0].rm_eo;
421			if (slen < 0)
422				return (0);
423			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
424				return (0);
425		}
426		/* FALLTHROUGH */
427	case 1:					/* 1st occurrence */
428		/* Locate start of replaced string. */
429		re_off = match[0].rm_so + (s - ps);
430		/* Copy leading retained string. */
431		cspace(&SS, ps, re_off, APPEND);
432		/* Add in regular expression. */
433		regsub(&SS, s, cp->u.s->new);
434		/* Copy trailing retained string. */
435		s += match[0].rm_eo;
436		slen -= match[0].rm_eo;
437		cspace(&SS, s, slen, APPEND);
438		break;
439	}
440
441	/*
442	 * Swap the substitute space and the pattern space, and make sure
443	 * that any leftover pointers into stdio memory get lost.
444	 */
445	tspace = PS;
446	PS = SS;
447	SS = tspace;
448	SS.space = SS.back;
449
450	/* Handle the 'p' flag. */
451	if (cp->u.s->p)
452		OUT();
453
454	/* Handle the 'w' flag. */
455	if (cp->u.s->wfile && !pd) {
456		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
457		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
458			err(1, "%s", cp->u.s->wfile);
459		if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
460		    write(cp->u.s->wfd, "\n", 1) != 1)
461			err(1, "%s", cp->u.s->wfile);
462	}
463	return (1);
464}
465
466/*
467 * do_tr --
468 *	Perform translation ('y' command) in the pattern space.
469 */
470static void
471do_tr(struct s_tr *y)
472{
473	SPACE tmp;
474	char c, *p;
475	size_t clen, left;
476	int i;
477
478	if (MB_CUR_MAX == 1) {
479		/*
480		 * Single-byte encoding: perform in-place translation
481		 * of the pattern space.
482		 */
483		for (p = ps; p < &ps[psl]; p++)
484			*p = y->bytetab[(u_char)*p];
485	} else {
486		/*
487		 * Multi-byte encoding: perform translation into the
488		 * translation space, then swap the translation and
489		 * pattern spaces.
490		 */
491		/* Clean translation space. */
492		YS.len = 0;
493		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
494			if ((c = y->bytetab[(u_char)*p]) != '\0') {
495				cspace(&YS, &c, 1, APPEND);
496				clen = 1;
497				continue;
498			}
499			for (i = 0; i < y->nmultis; i++)
500				if (left >= y->multis[i].fromlen &&
501				    memcmp(p, y->multis[i].from,
502				    y->multis[i].fromlen) == 0)
503					break;
504			if (i < y->nmultis) {
505				cspace(&YS, y->multis[i].to,
506				    y->multis[i].tolen, APPEND);
507				clen = y->multis[i].fromlen;
508			} else {
509				cspace(&YS, p, 1, APPEND);
510				clen = 1;
511			}
512		}
513		/* Swap the translation space and the pattern space. */
514		tmp = PS;
515		PS = YS;
516		YS = tmp;
517		YS.space = YS.back;
518	}
519}
520
521/*
522 * Flush append requests.  Always called before reading a line,
523 * therefore it also resets the substitution done (sdone) flag.
524 */
525static void
526flush_appends(void)
527{
528	FILE *f;
529	int count, i;
530	char buf[8 * 1024];
531
532	for (i = 0; i < appendx; i++)
533		switch (appends[i].type) {
534		case AP_STRING:
535			fwrite(appends[i].s, sizeof(char), appends[i].len,
536			    outfile);
537			break;
538		case AP_FILE:
539			/*
540			 * Read files probably shouldn't be cached.  Since
541			 * it's not an error to read a non-existent file,
542			 * it's possible that another program is interacting
543			 * with the sed script through the filesystem.  It
544			 * would be truly bizarre, but possible.  It's probably
545			 * not that big a performance win, anyhow.
546			 */
547			if ((f = fopen(appends[i].s, "r")) == NULL)
548				break;
549			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
550				(void)fwrite(buf, sizeof(char), count, outfile);
551			(void)fclose(f);
552			break;
553		}
554	if (ferror(outfile))
555		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
556	appendx = sdone = 0;
557}
558
559static void
560lputs(char *s, size_t len)
561{
562	static const char escapes[] = "\\\a\b\f\r\t\v";
563	int c, col, width;
564	const char *p;
565	struct winsize win;
566	static int termwidth = -1;
567	size_t clen, i;
568	wchar_t wc;
569	mbstate_t mbs;
570
571	if (outfile != stdout)
572		termwidth = 60;
573	if (termwidth == -1) {
574		if ((p = getenv("COLUMNS")) && *p != '\0')
575			termwidth = atoi(p);
576		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
577		    win.ws_col > 0)
578			termwidth = win.ws_col;
579		else
580			termwidth = 60;
581	}
582	if (termwidth <= 0)
583		termwidth = 1;
584
585	memset(&mbs, 0, sizeof(mbs));
586	col = 0;
587	while (len != 0) {
588		clen = mbrtowc(&wc, s, len, &mbs);
589		if (clen == 0)
590			clen = 1;
591		if (clen == (size_t)-1 || clen == (size_t)-2) {
592			wc = (unsigned char)*s;
593			clen = 1;
594			memset(&mbs, 0, sizeof(mbs));
595		}
596		if (wc == '\n') {
597			if (col + 1 >= termwidth)
598				fprintf(outfile, "\\\n");
599			fputc('$', outfile);
600			fputc('\n', outfile);
601			col = 0;
602		} else if (iswprint(wc)) {
603			width = wcwidth(wc);
604			if (col + width >= termwidth) {
605				fprintf(outfile, "\\\n");
606				col = 0;
607			}
608			fwrite(s, 1, clen, outfile);
609			col += width;
610		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
611		    (p = strchr(escapes, c)) != NULL) {
612			if (col + 2 >= termwidth) {
613				fprintf(outfile, "\\\n");
614				col = 0;
615			}
616			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
617			col += 2;
618		} else {
619			if (col + 4 * clen >= (unsigned)termwidth) {
620				fprintf(outfile, "\\\n");
621				col = 0;
622			}
623			for (i = 0; i < clen; i++)
624				fprintf(outfile, "\\%03o",
625				    (int)(unsigned char)s[i]);
626			col += 4 * clen;
627		}
628		s += clen;
629		len -= clen;
630	}
631	if (col + 1 >= termwidth)
632		fprintf(outfile, "\\\n");
633	(void)fputc('$', outfile);
634	(void)fputc('\n', outfile);
635	if (ferror(outfile))
636		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
637}
638
639static __inline int
640regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
641	size_t slen)
642{
643	int eval;
644
645	if (preg == NULL) {
646		if (defpreg == NULL)
647			errx(1, "first RE may not be empty");
648	} else
649		defpreg = preg;
650
651	/* Set anchors */
652	match[0].rm_so = 0;
653	match[0].rm_eo = slen;
654
655	eval = regexec(defpreg, string,
656	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
657	switch(eval) {
658	case 0:
659		return (1);
660	case REG_NOMATCH:
661		return (0);
662	}
663	errx(1, "RE error: %s", strregerror(eval, defpreg));
664	/* NOTREACHED */
665}
666
667/*
668 * regsub - perform substitutions after a regexp match
669 * Based on a routine by Henry Spencer
670 */
671static void
672regsub(SPACE *sp, char *string, char *src)
673{
674	int len, no;
675	char c, *dst;
676
677#define	NEEDSP(reqlen)							\
678	/* XXX What is the +1 for? */					\
679	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
680		sp->blen += (reqlen) + 1024;				\
681		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
682		    == NULL)						\
683			err(1, "realloc");				\
684		dst = sp->space + sp->len;				\
685	}
686
687	dst = sp->space + sp->len;
688	while ((c = *src++) != '\0') {
689		if (c == '&')
690			no = 0;
691		else if (c == '\\' && isdigit((unsigned char)*src))
692			no = *src++ - '0';
693		else
694			no = -1;
695		if (no < 0) {		/* Ordinary character. */
696			if (c == '\\' && (*src == '\\' || *src == '&'))
697				c = *src++;
698			NEEDSP(1);
699			*dst++ = c;
700			++sp->len;
701		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
702			len = match[no].rm_eo - match[no].rm_so;
703			NEEDSP(len);
704			memmove(dst, string + match[no].rm_so, len);
705			dst += len;
706			sp->len += len;
707		}
708	}
709	NEEDSP(1);
710	*dst = '\0';
711}
712
713/*
714 * cspace --
715 *	Concatenate space: append the source space to the destination space,
716 *	allocating new space as necessary.
717 */
718void
719cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
720{
721	size_t tlen;
722
723	/* Make sure SPACE has enough memory and ramp up quickly. */
724	tlen = sp->len + len + 1;
725	if (tlen > sp->blen) {
726		sp->blen = tlen + 1024;
727		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
728		    NULL)
729			err(1, "realloc");
730	}
731
732	if (spflag == REPLACE)
733		sp->len = 0;
734
735	memmove(sp->space + sp->len, p, len);
736
737	sp->space[sp->len += len] = '\0';
738}
739
740/*
741 * Close all cached opened files and report any errors
742 */
743void
744cfclose(struct s_command *cp, struct s_command *end)
745{
746
747	for (; cp != end; cp = cp->next)
748		switch(cp->code) {
749		case 's':
750			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
751				err(1, "%s", cp->u.s->wfile);
752			cp->u.s->wfd = -1;
753			break;
754		case 'w':
755			if (cp->u.fd != -1 && close(cp->u.fd))
756				err(1, "%s", cp->t);
757			cp->u.fd = -1;
758			break;
759		case '{':
760			cfclose(cp->u.c, cp->next);
761			break;
762		}
763}
764