1/*-
2 * Copyright (c) 1992 Diomidis Spinellis.
3 * Copyright (c) 1992, 1993, 1994
4 *	The Regents of the University of California.  All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Diomidis Spinellis of Imperial College, University of London.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: src/usr.bin/sed/process.c,v 1.39 2005/04/09 14:31:41 stefanf Exp $");
36
37#ifndef lint
38static const char sccsid[] = "@(#)process.c	8.6 (Berkeley) 4/20/94";
39#endif
40
41#include <sys/types.h>
42#include <sys/stat.h>
43#include <sys/ioctl.h>
44#include <sys/uio.h>
45
46#include <ctype.h>
47#include <err.h>
48#include <errno.h>
49#include <fcntl.h>
50#include <limits.h>
51#include <regex.h>
52#include <stdio.h>
53#include <stdlib.h>
54#include <string.h>
55#include <unistd.h>
56#include <wchar.h>
57#include <wctype.h>
58
59#include "defs.h"
60#include "extern.h"
61
62static SPACE HS, PS, SS, YS;
63#define	pd		PS.deleted
64#define	ps		PS.space
65#define	psl		PS.len
66#define	hs		HS.space
67#define	hsl		HS.len
68
69static __inline int	 applies(struct s_command *);
70static void		 do_tr(struct s_tr *);
71static void		 flush_appends(void);
72static void		 lputs(char *, size_t);
73static __inline int	 regexec_e(regex_t *, const char *, int, int, size_t);
74static void		 regsub(SPACE *, char *, char *);
75static int		 substitute(struct s_command *);
76
77struct s_appends *appends;	/* Array of pointers to strings to append. */
78static int appendx;		/* Index into appends array. */
79int appendnum;			/* Size of appends array. */
80
81static int lastaddr;		/* Set by applies if last address of a range. */
82static int sdone;		/* If any substitutes since last line input. */
83				/* Iov structure for 'w' commands. */
84static regex_t *defpreg;
85size_t maxnsub;
86regmatch_t *match;
87
88#define OUT(s) { fwrite(s, sizeof(u_char), psl, outfile); fputc('\n', outfile); }
89
90void
91process(void)
92{
93	struct s_command *cp;
94	SPACE tspace;
95	size_t oldpsl = 0;
96	char *p;
97
98	p = NULL;
99
100	for (linenum = 0; mf_fgets(&PS, REPLACE);) {
101		pd = 0;
102top:
103		cp = prog;
104redirect:
105		while (cp != NULL) {
106			if (!applies(cp)) {
107				cp = cp->next;
108				continue;
109			}
110			switch (cp->code) {
111			case '{':
112				cp = cp->u.c;
113				goto redirect;
114			case 'a':
115				if (appendx >= appendnum)
116					if ((appends = realloc(appends,
117					    sizeof(struct s_appends) *
118					    (appendnum *= 2))) == NULL)
119						err(1, "realloc");
120				appends[appendx].type = AP_STRING;
121				appends[appendx].s = cp->t;
122				appends[appendx].len = strlen(cp->t);
123				appendx++;
124				break;
125			case 'b':
126				cp = cp->u.c;
127				goto redirect;
128			case 'c':
129				pd = 1;
130				psl = 0;
131				if (cp->a2 == NULL || lastaddr)
132					(void)fprintf(outfile, "%s", cp->t);
133				break;
134			case 'd':
135				pd = 1;
136				goto new;
137			case 'D':
138				if (pd)
139					goto new;
140				if (psl == 0 ||
141				    (p = memchr(ps, '\n', psl)) == NULL) {
142					pd = 1;
143					goto new;
144				} else {
145					psl -= (p + 1) - ps;
146					memmove(ps, p + 1, psl);
147					goto top;
148				}
149			case 'g':
150				cspace(&PS, hs, hsl, REPLACE);
151				break;
152			case 'G':
153				cspace(&PS, "\n", 1, 0);
154				cspace(&PS, hs, hsl, 0);
155				break;
156			case 'h':
157				cspace(&HS, ps, psl, REPLACE);
158				break;
159			case 'H':
160				cspace(&HS, "\n", 1, 0);
161				cspace(&HS, ps, psl, 0);
162				break;
163			case 'i':
164				(void)fprintf(outfile, "%s", cp->t);
165				break;
166			case 'l':
167				lputs(ps, psl);
168				break;
169			case 'n':
170				if (!nflag && !pd)
171					OUT(ps)
172				flush_appends();
173				if (!mf_fgets(&PS, REPLACE))
174					exit(0);
175				pd = 0;
176				break;
177			case 'N':
178				flush_appends();
179				cspace(&PS, "\n", 1, 0);
180				if (!mf_fgets(&PS, 0))
181					exit(0);
182				break;
183			case 'p':
184				if (pd)
185					break;
186				OUT(ps)
187				break;
188			case 'P':
189				if (pd)
190					break;
191				if (psl != 0 &&
192				    (p = memchr(ps, '\n', psl)) != NULL) {
193					oldpsl = psl;
194					psl = p - ps;
195				}
196				OUT(ps)
197				if (p != NULL)
198					psl = oldpsl;
199				break;
200			case 'q':
201				if (!nflag && !pd)
202					OUT(ps)
203				flush_appends();
204				lseek(STDIN_FILENO, ftell(stdin), SEEK_SET);
205				exit(0);
206			case 'r':
207				if (appendx >= appendnum)
208					if ((appends = realloc(appends,
209					    sizeof(struct s_appends) *
210					    (appendnum *= 2))) == NULL)
211						err(1, "realloc");
212				appends[appendx].type = AP_FILE;
213				appends[appendx].s = cp->t;
214				appends[appendx].len = strlen(cp->t);
215				appendx++;
216				break;
217			case 's':
218				sdone |= substitute(cp);
219				break;
220			case 't':
221				if (sdone) {
222					sdone = 0;
223					cp = cp->u.c;
224					goto redirect;
225				}
226				break;
227			case 'w':
228				if (pd)
229					break;
230				if (cp->u.fd == -1 && (cp->u.fd = open(cp->t,
231				    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC,
232				    DEFFILEMODE)) == -1)
233					err(1, "%s", cp->t);
234				if (write(cp->u.fd, ps, psl) != psl ||
235				    write(cp->u.fd, "\n", 1) != 1)
236					err(1, "%s", cp->t);
237				break;
238			case 'x':
239				if (hs == NULL)
240					cspace(&HS, "", 0, REPLACE);
241				tspace = PS;
242				PS = HS;
243				HS = tspace;
244				break;
245			case 'y':
246				if (pd || psl == 0)
247					break;
248				do_tr(cp->u.y);
249				break;
250			case ':':
251			case '}':
252				break;
253			case '=':
254				(void)fprintf(outfile, "%lu\n", linenum);
255			}
256			cp = cp->next;
257		} /* for all cp */
258
259new:		if (!nflag && !pd)
260			OUT(ps)
261		flush_appends();
262	} /* for all lines */
263}
264
265/*
266 * TRUE if the address passed matches the current program state
267 * (lastline, linenumber, ps).
268 */
269#define	MATCH(a)						\
270	(a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, psl) :	\
271	    (a)->type == AT_LINE ? linenum == (a)->u.l : lastline()
272
273/*
274 * Return TRUE if the command applies to the current line.  Sets the inrange
275 * flag to process ranges.  Interprets the non-select (``!'') flag.
276 */
277static __inline int
278applies(struct s_command *cp)
279{
280	int r;
281
282	lastaddr = 0;
283	if (cp->a1 == NULL && cp->a2 == NULL)
284		r = 1;
285	else if (cp->a2)
286		if (cp->inrange) {
287			if (MATCH(cp->a2)) {
288				cp->inrange = 0;
289				lastaddr = 1;
290			}
291			r = 1;
292		} else if (MATCH(cp->a1)) {
293			/*
294			 * If the second address is a number less than or
295			 * equal to the line number first selected, only
296			 * one line shall be selected.
297			 *	-- POSIX 1003.2
298			 */
299			if (cp->a2->type == AT_LINE &&
300			    linenum >= cp->a2->u.l)
301				lastaddr = 1;
302			else
303				cp->inrange = 1;
304			r = 1;
305		} else
306			r = 0;
307	else
308		r = MATCH(cp->a1);
309	return (cp->nonsel ? ! r : r);
310}
311
312/*
313 * substitute --
314 *	Do substitutions in the pattern space.  Currently, we build a
315 *	copy of the new pattern space in the substitute space structure
316 *	and then swap them.
317 */
318static int
319substitute(struct s_command *cp)
320{
321	SPACE tspace;
322	regex_t *re;
323	regoff_t re_off, slen;
324	int lastempty, n;
325	char *s;
326
327	s = ps;
328	re = cp->u.s->re;
329	if (re == NULL) {
330		if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) {
331			linenum = cp->u.s->linenum;
332			errx(1, "%lu: %s: \\%d not defined in the RE",
333					linenum, fname, cp->u.s->maxbref);
334		}
335	}
336	if (!regexec_e(re, s, 0, 0, psl))
337		return (0);
338
339	SS.len = 0;				/* Clean substitute space. */
340	slen = psl;
341	n = cp->u.s->n;
342	lastempty = 1;
343
344	switch (n) {
345	case 0:					/* Global */
346		do {
347			if (lastempty || match[0].rm_so != match[0].rm_eo) {
348				/* Locate start of replaced string. */
349				re_off = match[0].rm_so;
350				/* Copy leading retained string. */
351				cspace(&SS, s, re_off, APPEND);
352				/* Add in regular expression. */
353				regsub(&SS, s, cp->u.s->new);
354			}
355
356			/* Move past this match. */
357			if (match[0].rm_so != match[0].rm_eo) {
358				s += match[0].rm_eo;
359				slen -= match[0].rm_eo;
360				lastempty = 0;
361			} else {
362				if (match[0].rm_so < slen)
363					cspace(&SS, s + match[0].rm_so, 1,
364					    APPEND);
365				s += match[0].rm_so + 1;
366				slen -= match[0].rm_so + 1;
367				lastempty = 1;
368			}
369		} while (slen >= 0 && regexec_e(re, s, REG_NOTBOL, 0, slen));
370		/* Copy trailing retained string. */
371		if (slen > 0)
372			cspace(&SS, s, slen, APPEND);
373		break;
374	default:				/* Nth occurrence */
375		while (--n) {
376			if (match[0].rm_eo == match[0].rm_so)
377				match[0].rm_eo = match[0].rm_so + 1;
378			s += match[0].rm_eo;
379			slen -= match[0].rm_eo;
380			if (slen < 0)
381				return (0);
382			if (!regexec_e(re, s, REG_NOTBOL, 0, slen))
383				return (0);
384		}
385		/* FALLTHROUGH */
386	case 1:					/* 1st occurrence */
387		/* Locate start of replaced string. */
388		re_off = match[0].rm_so + (s - ps);
389		/* Copy leading retained string. */
390		cspace(&SS, ps, re_off, APPEND);
391		/* Add in regular expression. */
392		regsub(&SS, s, cp->u.s->new);
393		/* Copy trailing retained string. */
394		s += match[0].rm_eo;
395		slen -= match[0].rm_eo;
396		cspace(&SS, s, slen, APPEND);
397		break;
398	}
399
400	/*
401	 * Swap the substitute space and the pattern space, and make sure
402	 * that any leftover pointers into stdio memory get lost.
403	 */
404	tspace = PS;
405	PS = SS;
406	SS = tspace;
407	SS.space = SS.back;
408
409	/* Handle the 'p' flag. */
410	if (cp->u.s->p)
411		OUT(ps)
412
413	/* Handle the 'w' flag. */
414	if (cp->u.s->wfile && !pd) {
415		if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile,
416		    O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1)
417			err(1, "%s", cp->u.s->wfile);
418		if (write(cp->u.s->wfd, ps, psl) != psl ||
419		    write(cp->u.s->wfd, "\n", 1) != 1)
420			err(1, "%s", cp->u.s->wfile);
421	}
422	return (1);
423}
424
425/*
426 * do_tr --
427 *	Perform translation ('y' command) in the pattern space.
428 */
429static void
430do_tr(struct s_tr *y)
431{
432	SPACE tmp;
433	char c, *p;
434	size_t clen, left;
435	int i;
436
437	if (MB_CUR_MAX == 1) {
438		/*
439		 * Single-byte encoding: perform in-place translation
440		 * of the pattern space.
441		 */
442		for (p = ps; p < &ps[psl]; p++)
443			*p = y->bytetab[(u_char)*p];
444	} else {
445		/*
446		 * Multi-byte encoding: perform translation into the
447		 * translation space, then swap the translation and
448		 * pattern spaces.
449		 */
450		/* Clean translation space. */
451		YS.len = 0;
452		for (p = ps, left = psl; left > 0; p += clen, left -= clen) {
453			if ((c = y->bytetab[(u_char)*p]) != '\0') {
454				cspace(&YS, &c, 1, APPEND);
455				clen = 1;
456				continue;
457			}
458			for (i = 0; i < y->nmultis; i++)
459				if (left >= y->multis[i].fromlen &&
460				    memcmp(p, y->multis[i].from,
461				    y->multis[i].fromlen) == 0)
462					break;
463			if (i < y->nmultis) {
464				cspace(&YS, y->multis[i].to,
465				    y->multis[i].tolen, APPEND);
466				clen = y->multis[i].fromlen;
467			} else {
468				cspace(&YS, p, 1, APPEND);
469				clen = 1;
470			}
471		}
472		/* Swap the translation space and the pattern space. */
473		tmp = PS;
474		PS = YS;
475		YS = tmp;
476		YS.space = YS.back;
477	}
478}
479
480/*
481 * Flush append requests.  Always called before reading a line,
482 * therefore it also resets the substitution done (sdone) flag.
483 */
484static void
485flush_appends(void)
486{
487	FILE *f;
488	int count, i;
489	char buf[8 * 1024];
490
491	for (i = 0; i < appendx; i++)
492		switch (appends[i].type) {
493		case AP_STRING:
494			fwrite(appends[i].s, sizeof(char), appends[i].len,
495			    outfile);
496			break;
497		case AP_FILE:
498			/*
499			 * Read files probably shouldn't be cached.  Since
500			 * it's not an error to read a non-existent file,
501			 * it's possible that another program is interacting
502			 * with the sed script through the filesystem.  It
503			 * would be truly bizarre, but possible.  It's probably
504			 * not that big a performance win, anyhow.
505			 */
506			if ((f = fopen(appends[i].s, "r")) == NULL)
507				break;
508			while ((count = fread(buf, sizeof(char), sizeof(buf), f)))
509				(void)fwrite(buf, sizeof(char), count, outfile);
510			(void)fclose(f);
511			break;
512		}
513	if (ferror(outfile))
514		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
515	appendx = sdone = 0;
516}
517
518static void
519lputs(char *s, size_t len)
520{
521	static const char escapes[] = "\\\a\b\f\r\t\v";
522	int c, col, width;
523	char *p;
524	struct winsize win;
525	static int termwidth = -1;
526	size_t clen, i;
527	wchar_t wc;
528	mbstate_t mbs;
529
530	if (outfile != stdout)
531		termwidth = 60;
532	if (termwidth == -1) {
533		if ((p = getenv("COLUMNS")) && *p != '\0')
534			termwidth = atoi(p);
535		else if (ioctl(STDOUT_FILENO, TIOCGWINSZ, &win) == 0 &&
536		    win.ws_col > 0)
537			termwidth = win.ws_col;
538		else
539			termwidth = 60;
540	}
541
542	memset(&mbs, 0, sizeof(mbs));
543	col = 0;
544	while (len != 0) {
545		clen = mbrtowc(&wc, s, len, &mbs);
546		if (clen == 0)
547			clen = 1;
548		if (clen == (size_t)-1 || clen == (size_t)-2) {
549			wc = (unsigned char)*s;
550			clen = 1;
551			memset(&mbs, 0, sizeof(mbs));
552		}
553		if (wc == '\n') {
554			if (col + 1 >= termwidth)
555				fprintf(outfile, "\\\n");
556			fputc('$', outfile);
557			fputc('\n', outfile);
558			col = 0;
559		} else if (iswprint(wc)) {
560			width = wcwidth(wc);
561			if (col + width >= termwidth) {
562				fprintf(outfile, "\\\n");
563				col = 0;
564			}
565			fwrite(s, 1, clen, outfile);
566			col += width;
567		} else if (wc != L'\0' && (c = wctob(wc)) != EOF &&
568		    (p = strchr(escapes, c)) != NULL) {
569			if (col + 2 >= termwidth) {
570				fprintf(outfile, "\\\n");
571				col = 0;
572			}
573			fprintf(outfile, "\\%c", "\\abfrtv"[p - escapes]);
574			col += 2;
575		} else {
576			if (col + 4 * clen >= termwidth) {
577				fprintf(outfile, "\\\n");
578				col = 0;
579			}
580			for (i = 0; i < clen; i++)
581				fprintf(outfile, "\\%03o",
582				    (int)(unsigned char)s[i]);
583			col += 4 * clen;
584		}
585		s += clen;
586		len -= clen;
587	}
588	if (col + 1 >= termwidth)
589		fprintf(outfile, "\\\n");
590	(void)fputc('$', outfile);
591	(void)fputc('\n', outfile);
592	if (ferror(outfile))
593		errx(1, "%s: %s", outfname, strerror(errno ? errno : EIO));
594}
595
596static __inline int
597regexec_e(regex_t *preg, const char *string, int eflags, int nomatch,
598	size_t slen)
599{
600	int eval;
601
602	if (preg == NULL) {
603		if (defpreg == NULL)
604			errx(1, "first RE may not be empty");
605	} else
606		defpreg = preg;
607
608	/* Set anchors */
609	match[0].rm_so = 0;
610	match[0].rm_eo = slen;
611
612	eval = regexec(defpreg, string,
613	    nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND);
614	switch(eval) {
615	case 0:
616		return (1);
617	case REG_NOMATCH:
618		return (0);
619	}
620	errx(1, "RE error: %s", strregerror(eval, defpreg));
621	/* NOTREACHED */
622}
623
624/*
625 * regsub - perform substitutions after a regexp match
626 * Based on a routine by Henry Spencer
627 */
628static void
629regsub(SPACE *sp, char *string, char *src)
630{
631	int len, no;
632	char c, *dst;
633
634#define	NEEDSP(reqlen)							\
635	/* XXX What is the +1 for? */					\
636	if (sp->len + (reqlen) + 1 >= sp->blen) {			\
637		sp->blen += (reqlen) + 1024;				\
638		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) \
639		    == NULL)						\
640			err(1, "realloc");				\
641		dst = sp->space + sp->len;				\
642	}
643
644	dst = sp->space + sp->len;
645	while ((c = *src++) != '\0') {
646		if (c == '&')
647			no = 0;
648		else if (c == '\\' && isdigit((unsigned char)*src))
649			no = *src++ - '0';
650		else
651			no = -1;
652		if (no < 0) {		/* Ordinary character. */
653			if (c == '\\' && (*src == '\\' || *src == '&'))
654				c = *src++;
655			NEEDSP(1);
656			*dst++ = c;
657			++sp->len;
658		} else if (match[no].rm_so != -1 && match[no].rm_eo != -1) {
659			len = match[no].rm_eo - match[no].rm_so;
660			NEEDSP(len);
661			memmove(dst, string + match[no].rm_so, len);
662			dst += len;
663			sp->len += len;
664		}
665	}
666	NEEDSP(1);
667	*dst = '\0';
668}
669
670/*
671 * aspace --
672 *	Append the source space to the destination space, allocating new
673 *	space as necessary.
674 */
675void
676cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag)
677{
678	size_t tlen;
679
680	/* Make sure SPACE has enough memory and ramp up quickly. */
681	tlen = sp->len + len + 1;
682	if (tlen > sp->blen) {
683		sp->blen = tlen + 1024;
684		if ((sp->space = sp->back = realloc(sp->back, sp->blen)) ==
685		    NULL)
686			err(1, "realloc");
687	}
688
689	if (spflag == REPLACE)
690		sp->len = 0;
691
692	memmove(sp->space + sp->len, p, len);
693
694	sp->space[sp->len += len] = '\0';
695}
696
697/*
698 * Close all cached opened files and report any errors
699 */
700void
701cfclose(struct s_command *cp, struct s_command *end)
702{
703
704	for (; cp != end; cp = cp->next)
705		switch(cp->code) {
706		case 's':
707			if (cp->u.s->wfd != -1 && close(cp->u.s->wfd))
708				err(1, "%s", cp->u.s->wfile);
709			cp->u.s->wfd = -1;
710			break;
711		case 'w':
712			if (cp->u.fd != -1 && close(cp->u.fd))
713				err(1, "%s", cp->t);
714			cp->u.fd = -1;
715			break;
716		case '{':
717			cfclose(cp->u.c, cp->next);
718			break;
719		}
720}
721