process.c revision 302228
1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: stable/10/usr.bin/sed/process.c 302228 2016-06-27 20:54:02Z pfg $");
36
37#ifndef lint
38static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
39#endif
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/uio.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <wchar.h>
57#include <wctype.h>
58
59#include "defs.h"
60#include "extern.h"
61
62static SPACE HS, PS, SS, YS;
63#define	pd		PS.deleted
64#define	ps		PS.space
65#define	psl		PS.len
66#define	hs		HS.space
67#define	hsl		HS.len
68
69static inline int	 applies(struct s_command *);
70static void		 do_tr(struct s_tr *);
71static void		 flush_appends(void);
72static void		 lputs(char *, size_t);
73static int		 regexec_e(regex_t *, const char *, int, int, size_t,
74			     size_t);
75static void		 regsub(SPACE *, char *, char *);
76static int		 substitute(struct s_command *);
77
78struct s_appends *appends;	/* Array of pointers to strings to append. */
79static int appendx;		/* Index into appends array. */
80int appendnum;			/* Size of appends array. */
81
82static int lastaddr;		/* Set by applies if last address of a range. */
83static int sdone;		/* If any substitutes since last line input. */
84				/* Iov structure for 'w' commands. */
85static regex_t *defpreg;
86size_t maxnsub;
87regmatch_t *match;
88
89#define OUT() do {fwrite(ps, 1, psl, outfile); fputc('\n', outfile);} while (0)
90
91void
92process(void)
93{
94	struct s_command *cp;
95	SPACE tspace;
96	size_t oldpsl = 0;
97	char *p;
98
99	p = NULL;
100
101	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
102		pd = 0;
103top:
104		cp = prog;
105redirect:
106		while (cp != NULL) {
107			if (!applies(cp)) {
108				cp = cp->next;
109				continue;
110			}
111			switch (cp->code) {
112			case '{':
113				cp = cp->u.c;
114				goto redirect;
115			case 'a':
116				if (appendx >= appendnum)
117					if ((appends = realloc(appends,
118					    sizeof(struct s_appends) *
119					    (appendnum *= 2))) == NULL)
120						err(1, "realloc");
121				appends[appendx].type = AP_STRING;
122				appends[appendx].s = cp->t;
123				appends[appendx].len = strlen(cp->t);
124				appendx++;
125				break;
126			case 'b':
127				cp = cp->u.c;
128				goto redirect;
129			case 'c':
130				pd = 1;
131				psl = 0;
132				if (cp->a2 == NULL || lastaddr || lastline())
133					(void)fprintf(outfile, "%s", cp->t);
134				break;
135			case 'd':
136				pd = 1;
137				goto new;
138			case 'D':
139				if (pd)
140					goto new;
141				if (psl == 0 ||
142				    (p = memchr(ps, '\n', psl)) == NULL) {
143					pd = 1;
144					goto new;
145				} else {
146					psl -= (p + 1) - ps;
147					memmove(ps, p + 1, psl);
148					goto top;
149				}
150			case 'g':
151				cspace(&PS, hs, hsl, REPLACE);
152				break;
153			case 'G':
154				cspace(&PS, "\n", 1, APPEND);
155				cspace(&PS, hs, hsl, APPEND);
156				break;
157			case 'h':
158				cspace(&HS, ps, psl, REPLACE);
159				break;
160			case 'H':
161				cspace(&HS, "\n", 1, APPEND);
162				cspace(&HS, ps, psl, APPEND);
163				break;
164			case 'i':
165				(void)fprintf(outfile, "%s", cp->t);
166				break;
167			case 'l':
168				lputs(ps, psl);
169				break;
170			case 'n':
171				if (!nflag && !pd)
172					OUT();
173				flush_appends();
174				if (!mf_fgets(&PS, REPLACE))
175					exit(0);
176				pd = 0;
177				break;
178			case 'N':
179				flush_appends();
180				cspace(&PS, "\n", 1, APPEND);
181				if (!mf_fgets(&PS, APPEND))
182					exit(0);
183				break;
184			case 'p':
185				if (pd)
186					break;
187				OUT();
188				break;
189			case 'P':
190				if (pd)
191					break;
192				if ((p = memchr(ps, '\n', psl)) != NULL) {
193					oldpsl = psl;
194					psl = p - ps;
195				}
196				OUT();
197				if (p != NULL)
198					psl = oldpsl;
199				break;
200			case 'q':
201				if (!nflag && !pd)
202					OUT();
203				flush_appends();
204				exit(0);
205			case 'r':
206				if (appendx >= appendnum)
207					if ((appends = realloc(appends,
208					    sizeof(struct s_appends) *
209					    (appendnum *= 2))) == NULL)
210						err(1, "realloc");
211				appends[appendx].type = AP_FILE;
212				appends[appendx].s = cp->t;
213				appends[appendx].len = strlen(cp->t);
214				appendx++;
215				break;
216			case 's':
217				sdone |= substitute(cp);
218				break;
219			case 't':
220				if (sdone) {
221					sdone = 0;
222					cp = cp->u.c;
223					goto redirect;
224				}
225				break;
226			case 'w':
227				if (pd)
228					break;
229				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
230				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
231				    DEFFILEMODE)) == -1)
232					err(1, "%s", cp->t);
233				if (write(cp->u.fd, ps, psl) != (ssize_t)psl ||
234				    write(cp->u.fd, "\n", 1) != 1)
235					err(1, "%s", cp->t);
236				break;
237			case 'x':
238				/*
239				 * If the hold space is null, make it empty
240				 * but not null.  Otherwise the pattern space
241				 * will become null after the swap, which is
242				 * an abnormal condition.
243				 */
244				if (hs == NULL)
245					cspace(&HS, "", 0, REPLACE);
246				tspace = PS;
247				PS = HS;
248				HS = tspace;
249				break;
250			case 'y':
251				if (pd || psl == 0)
252					break;
253				do_tr(cp->u.y);
254				break;
255			case ':':
256			case '}':
257				break;
258			case '=':
259				(void)fprintf(outfile, "%lu\n", linenum);
260			}
261			cp = cp->next;
262		} /* for all cp */
263
264new:		if (!nflag && !pd)
265			OUT();
266		flush_appends();
267	} /* for all lines */
268}
269
270/*
271 * TRUE if the address passed matches the current program state
272 * (lastline, linenumber, ps).
273 */
274#define	MATCH(a)							\
275	((a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) :	\
276	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline())
277
278/*
279 * Return TRUE if the command applies to the current line.  Sets the start
280 * line for process ranges.  Interprets the non-select (``!'') flag.
281 */
282static inline int
283applies(struct s_command *cp)
284{
285	int r;
286
287	lastaddr = 0;
288	if (cp->a1 == NULL && cp->a2 == NULL)
289		r = 1;
290	else if (cp->a2)
291		if (cp->startline > 0) {
292                        switch (cp->a2->type) {
293                        case AT_RELLINE:
294                                if (linenum - cp->startline <= cp->a2->u.l)
295                                        r = 1;
296                                else {
297				        cp->startline = 0;
298				        r = 0;
299                                }
300                                break;
301                        default:
302                                if (MATCH(cp->a2)) {
303                                        cp->startline = 0;
304                                        lastaddr = 1;
305                                        r = 1;
306                                } else if (cp->a2->type == AT_LINE &&
307                                            linenum > cp->a2->u.l) {
308                                        /*
309                                         * We missed the 2nd address due to a
310                                         * branch, so just close the range and
311                                         * return false.
312                                         */
313                                        cp->startline = 0;
314                                        r = 0;
315                                } else
316                                        r = 1;
317                        }
318		} else if (cp->a1 && MATCH(cp->a1)) {
319			/*
320			 * If the second address is a number less than or
321			 * equal to the line number first selected, only
322			 * one line shall be selected.
323			 *	-- POSIX 1003.2
324			 * Likewise if the relative second line address is zero.
325			 */
326			if ((cp->a2->type == AT_LINE &&
327			    linenum >= cp->a2->u.l) ||
328			    (cp->a2->type == AT_RELLINE && cp->a2->u.l == 0))
329				lastaddr = 1;
330			else {
331				cp->startline = linenum;
332			}
333			r = 1;
334		} else
335			r = 0;
336	else
337		r = MATCH(cp->a1);
338	return (cp->nonsel ? ! r : r);
339}
340
341/*
342 * Reset the sed processor to its initial state.
343 */
344void
345resetstate(void)
346{
347	struct s_command *cp;
348
349	/*
350	 * Reset all in-range markers.
351	 */
352	for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next)
353		if (cp->a2)
354			cp->startline = 0;
355
356	/*
357	 * Clear out the hold space.
358	 */
359	cspace(&HS, "", 0, REPLACE);
360}
361
362/*
363 * substitute --
364 *	Do substitutions in the pattern space.  Currently, we build a
365 *	copy of the new pattern space in the substitute space structure
366 *	and then swap them.
367 */
368static int
369substitute(struct s_command *cp)
370{
371	SPACE tspace;
372	regex_t *re;
373	regoff_t slen;
374	int lastempty, n;
375	size_t le = 0;
376	char *s;
377
378	s = ps;
379	re = cp->u.s->re;
380	if (re == NULL) {
381		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
382			linenum = cp->u.s->linenum;
383			errx(1, "%lu: %s: \\%u not defined in the RE",
384					linenum, fname, cp->u.s->maxbref);
385		}
386	}
387	if (!regexec_e(re, s, 0, 0, 0, psl))
388		return (0);
389
390	SS.len = 0;				/* Clean substitute space. */
391	slen = psl;
392	n = cp->u.s->n;
393	lastempty = 1;
394
395	do {
396		/* Copy the leading retained string. */
397		if (n <= 1 && match[0].rm_so - le)
398			cspace(&SS, s, match[0].rm_so - le, APPEND);
399
400		/* Skip zero-length matches right after other matches. */
401		if (lastempty || (match[0].rm_so - le) ||
402		    match[0].rm_so != match[0].rm_eo) {
403			if (n <= 1) {
404				/* Want this match: append replacement. */
405				regsub(&SS, ps, cp->u.s->new);
406				if (n == 1)
407					n = -1;
408			} else {
409				/* Want a later match: append original. */
410				if (match[0].rm_eo - le)
411					cspace(&SS, s, match[0].rm_eo - le,
412					    APPEND);
413				n--;
414			}
415		}
416
417		/* Move past this match. */
418		s += (match[0].rm_eo - le);
419		slen -= (match[0].rm_eo - le);
420		le = match[0].rm_eo;
421
422		/*
423		 * After a zero-length match, advance one byte,
424		 * and at the end of the line, terminate.
425		 */
426		if (match[0].rm_so == match[0].rm_eo) {
427			if (*s == '\0' || *s == '\n')
428				slen = -1;
429			else
430				slen--;
431			if (*s != '\0') {
432			 	cspace(&SS, s++, 1, APPEND);
433				le++;
434			}
435			lastempty = 1;
436		} else
437			lastempty = 0;
438
439	} while (n >= 0 && slen >= 0 && regexec_e(re, ps, 0, 0, le, psl));
440
441	/* Did not find the requested number of matches. */
442	if (n > 1)
443		return (0);
444
445	/* Copy the trailing retained string. */
446	if (slen > 0)
447		cspace(&SS, s, slen, APPEND);
448
449	/*
450	 * Swap the substitute space and the pattern space, and make sure
451	 * that any leftover pointers into stdio memory get lost.
452	 */
453	tspace = PS;
454	PS = SS;
455	SS = tspace;
456	SS.space = SS.back;
457
458	/* Handle the 'p' flag. */
459	if (cp->u.s->p)
460		OUT();
461
462	/* Handle the 'w' flag. */
463	if (cp->u.s->wfile && !pd) {
464		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
465		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
466			err(1, "%s", cp->u.s->wfile);
467		if (write(cp->u.s->wfd, ps, psl) != (ssize_t)psl ||
468		    write(cp->u.s->wfd, "\n", 1) != 1)
469			err(1, "%s", cp->u.s->wfile);
470	}
471	return (1);
472}
473
474/*
475 * do_tr --
476 *	Perform translation ('y' command) in the pattern space.
477 */
478static void
479do_tr(struct s_tr *y)
480{
481	SPACE tmp;
482	char c, *p;
483	size_t clen, left;
484	int i;
485
486	if (MB_CUR_MAX == 1) {
487		/*
488		 * Single-byte encoding: perform in-place translation
489		 * of the pattern space.
490		 */
491		for (p = ps; p < &ps[psl]; p++)
492			*p = y->bytetab[(u_char)*p];
493	} else {
494		/*
495		 * Multi-byte encoding: perform translation into the
496		 * translation space, then swap the translation and
497		 * pattern spaces.
498		 */
499		/* Clean translation space. */
500		YS.len = 0;
501		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
502			if ((c = y->bytetab[(u_char)*p]) != '\0') {
503				cspace(&YS, &c, 1, APPEND);
504				clen = 1;
505				continue;
506			}
507			for (i = 0; i < y->nmultis; i++)
508				if (left >= y->multis[i].fromlen &&
509				    memcmp(p, y->multis[i].from,
510				    y->multis[i].fromlen) == 0)
511					break;
512			if (i < y->nmultis) {
513				cspace(&YS, y->multis[i].to,
514				    y->multis[i].tolen, APPEND);
515				clen = y->multis[i].fromlen;
516			} else {
517				cspace(&YS, p, 1, APPEND);
518				clen = 1;
519			}
520		}
521		/* Swap the translation space and the pattern space. */
522		tmp = PS;
523		PS = YS;
524		YS = tmp;
525		YS.space = YS.back;
526	}
527}
528
529/*
530 * Flush append requests.  Always called before reading a line,
531 * therefore it also resets the substitution done (sdone) flag.
532 */
533static void
534flush_appends(void)
535{
536	FILE *f;
537	int count, i;
538	char buf[8 * 1024];
539
540	for (i = 0; i < appendx; i++)
541		switch (appends[i].type) {
542		case AP_STRING:
543			fwrite(appends[i].s, sizeof(char), appends[i].len,
544			    outfile);
545			break;
546		case AP_FILE:
547			/*
548			 * Read files probably shouldn't be cached.  Since
549			 * it's not an error to read a non-existent file,
550			 * it's possible that another program is interacting
551			 * with the sed script through the filesystem.  It
552			 * would be truly bizarre, but possible.  It's probably
553			 * not that big a performance win, anyhow.
554			 */
555			if ((f = fopen(appends[i].s, "r")) == NULL)
556				break;
557			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
558				(void)fwrite(buf, sizeof(char), count, outfile);
559			(void)fclose(f);
560			break;
561		}
562	if (ferror(outfile))
563		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
564	appendx = sdone = 0;
565}
566
567static void
568lputs(char *s, size_t len)
569{
570	static const char escapes[] = "\\\a\b\f\r\t\v";
571	int c, col, width;
572	const char *p;
573	struct winsize win;
574	static int termwidth = -1;
575	size_t clen, i;
576	wchar_t wc;
577	mbstate_t mbs;
578
579	if (outfile != stdout)
580		termwidth = 60;
581	if (termwidth == -1) {
582		if ((p = getenv("COLUMNS")) && *p != '\0')
583			termwidth = atoi(p);
584		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
585		    win.ws_col > 0)
586			termwidth = win.ws_col;
587		else
588			termwidth = 60;
589	}
590	if (termwidth <= 0)
591		termwidth = 1;
592
593	memset(&mbs, 0, sizeof(mbs));
594	col = 0;
595	while (len != 0) {
596		clen = mbrtowc(&wc, s, len, &mbs);
597		if (clen == 0)
598			clen = 1;
599		if (clen == (size_t)-1 || clen == (size_t)-2) {
600			wc = (unsigned char)*s;
601			clen = 1;
602			memset(&mbs, 0, sizeof(mbs));
603		}
604		if (wc == '\n') {
605			if (col + 1 >= termwidth)
606				fprintf(outfile, "\\\n");
607			fputc('$', outfile);
608			fputc('\n', outfile);
609			col = 0;
610		} else if (iswprint(wc)) {
611			width = wcwidth(wc);
612			if (col + width >= termwidth) {
613				fprintf(outfile, "\\\n");
614				col = 0;
615			}
616			fwrite(s, 1, clen, outfile);
617			col += width;
618		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
619		    (p = strchr(escapes, c)) != NULL) {
620			if (col + 2 >= termwidth) {
621				fprintf(outfile, "\\\n");
622				col = 0;
623			}
624			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
625			col += 2;
626		} else {
627			if (col + 4 * clen >= (unsigned)termwidth) {
628				fprintf(outfile, "\\\n");
629				col = 0;
630			}
631			for (i = 0; i < clen; i++)
632				fprintf(outfile, "\\%03o",
633				    (int)(unsigned char)s[i]);
634			col += 4 * clen;
635		}
636		s += clen;
637		len -= clen;
638	}
639	if (col + 1 >= termwidth)
640		fprintf(outfile, "\\\n");
641	(void)fputc('$', outfile);
642	(void)fputc('\n', outfile);
643	if (ferror(outfile))
644		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
645}
646
647static int
648regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
649	size_t start, size_t stop)
650{
651	int eval;
652
653	if (preg == NULL) {
654		if (defpreg == NULL)
655			errx(1, "first RE may not be empty");
656	} else
657		defpreg = preg;
658
659	/* Set anchors */
660	match[0].rm_so = start;
661	match[0].rm_eo = stop;
662
663	eval = regexec(defpreg, string,
664	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
665	switch(eval) {
666	case 0:
667		return (1);
668	case REG_NOMATCH:
669		return (0);
670	}
671	errx(1, "RE error: %s", strregerror(eval, defpreg));
672	/* NOTREACHED */
673}
674
675/*
676 * regsub - perform substitutions after a regexp match
677 * Based on a routine by Henry Spencer
678 */
679static void
680regsub(SPACE *sp, char *string, char *src)
681{
682	int len, no;
683	char c, *dst;
684
685#define	NEEDSP(reqlen)							\
686	/* XXX What is the +1 for? */					\
687	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
688		sp->blen += (reqlen) + 1024;				\
689		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
690		    == NULL)						\
691			err(1, "realloc");				\
692		dst = sp->space + sp->len;				\
693	}
694
695	dst = sp->space + sp->len;
696	while ((c = *src++) != '\0') {
697		if (c == '&')
698			no = 0;
699		else if (c == '\\' && isdigit((unsigned char)*src))
700			no = *src++ - '0';
701		else
702			no = -1;
703		if (no < 0) {		/* Ordinary character. */
704			if (c == '\\' && (*src == '\\' || *src == '&'))
705				c = *src++;
706			NEEDSP(1);
707			*dst++ = c;
708			++sp->len;
709		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
710			len = match[no].rm_eo - match[no].rm_so;
711			NEEDSP(len);
712			memmove(dst, string + match[no].rm_so, len);
713			dst += len;
714			sp->len += len;
715		}
716	}
717	NEEDSP(1);
718	*dst = '\0';
719}
720
721/*
722 * cspace --
723 *	Concatenate space: append the source space to the destination space,
724 *	allocating new space as necessary.
725 */
726void
727cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
728{
729	size_t tlen;
730
731	/* Make sure SPACE has enough memory and ramp up quickly. */
732	tlen = sp->len + len + 1;
733	if (tlen > sp->blen) {
734		sp->blen = tlen + 1024;
735		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
736		    NULL)
737			err(1, "realloc");
738	}
739
740	if (spflag == REPLACE)
741		sp->len = 0;
742
743	memmove(sp->space + sp->len, p, len);
744
745	sp->space[sp->len += len] = '\0';
746}
747
748/*
749 * Close all cached opened files and report any errors
750 */
751void
752cfclose(struct s_command *cp, struct s_command *end)
753{
754
755	for (; cp != end; cp = cp->next)
756		switch(cp->code) {
757		case 's':
758			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
759				err(1, "%s", cp->u.s->wfile);
760			cp->u.s->wfd = -1;
761			break;
762		case 'w':
763			if (cp->u.fd != -1 && close(cp->u.fd))
764				err(1, "%s", cp->t);
765			cp->u.fd = -1;
766			break;
767		case '{':
768			cfclose(cp->u.c, cp->next);
769			break;
770		}
771}
772