1/* $OpenBSD: process.c,v 1.35 2022/01/12 15:13:36 martijn Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Diomidis Spinellis. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Diomidis Spinellis of Imperial College, University of London. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include <sys/types.h> 37#include <sys/stat.h> 38#include <sys/uio.h> 39 40#include <ctype.h> 41#include <errno.h> 42#include <fcntl.h> 43#include <limits.h> 44#include <regex.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include <unistd.h> 49 50#include "defs.h" 51#include "extern.h" 52 53static SPACE HS, PS, SS; 54#define pd PS.deleted 55#define ps PS.space 56#define psl PS.len 57#define psanl PS.append_newline 58#define hs HS.space 59#define hsl HS.len 60 61static inline int applies(struct s_command *); 62static void flush_appends(void); 63static void lputs(char *, size_t); 64static inline int regexec_e(regex_t *, const char *, int, int, size_t, 65 size_t); 66static void regsub(SPACE *, char *, char *); 67static int substitute(struct s_command *); 68 69struct s_appends *appends; /* Array of pointers to strings to append. */ 70static size_t appendx; /* Index into appends array. */ 71size_t appendnum; /* Size of appends array. */ 72 73static int lastaddr; /* Set by applies if last address of a range. */ 74static int sdone; /* If any substitutes since last line input. */ 75 /* Iov structure for 'w' commands. */ 76static regex_t *defpreg; 77size_t maxnsub; 78regmatch_t *match; 79 80#define OUT() do {\ 81 fwrite(ps, 1, psl, outfile);\ 82 if (psanl) fputc('\n', outfile);\ 83} while (0) 84 85void 86process(void) 87{ 88 struct s_command *cp; 89 SPACE tspace; 90 size_t len, oldpsl; 91 char *p; 92 93 for (linenum = 0; mf_fgets(&PS, REPLACE);) { 94 pd = 0; 95top: 96 cp = prog; 97redirect: 98 while (cp != NULL) { 99 if (!applies(cp)) { 100 cp = cp->next; 101 continue; 102 } 103 switch (cp->code) { 104 case '{': 105 cp = cp->u.c; 106 goto redirect; 107 case 'a': 108 if (appendx >= appendnum) { 109 appends = xreallocarray(appends, 110 appendnum, 111 2 * sizeof(struct s_appends)); 112 appendnum *= 2; 113 } 114 appends[appendx].type = AP_STRING; 115 appends[appendx].s = cp->t; 116 appends[appendx].len = strlen(cp->t); 117 appendx++; 118 break; 119 case 'b': 120 cp = cp->u.c; 121 goto redirect; 122 case 'c': 123 pd = 1; 124 psl = 0; 125 if (cp->a2 == NULL || lastaddr || lastline()) 126 (void)fprintf(outfile, "%s", cp->t); 127 break; 128 case 'd': 129 pd = 1; 130 goto new; 131 case 'D': 132 if (pd) 133 goto new; 134 if (psl == 0 || 135 (p = memchr(ps, '\n', psl)) == NULL) { 136 pd = 1; 137 goto new; 138 } else { 139 psl -= (p + 1) - ps; 140 memmove(ps, p + 1, psl); 141 goto top; 142 } 143 case 'g': 144 cspace(&PS, hs, hsl, REPLACE); 145 break; 146 case 'G': 147 cspace(&PS, "\n", 1, 0); 148 cspace(&PS, hs, hsl, 0); 149 break; 150 case 'h': 151 cspace(&HS, ps, psl, REPLACE); 152 break; 153 case 'H': 154 cspace(&HS, "\n", 1, 0); 155 cspace(&HS, ps, psl, 0); 156 break; 157 case 'i': 158 (void)fprintf(outfile, "%s", cp->t); 159 break; 160 case 'l': 161 lputs(ps, psl); 162 break; 163 case 'n': 164 if (!nflag && !pd) 165 OUT(); 166 flush_appends(); 167 if (!mf_fgets(&PS, REPLACE)) 168 exit(0); 169 pd = 0; 170 break; 171 case 'N': 172 flush_appends(); 173 cspace(&PS, "\n", 1, 0); 174 if (!mf_fgets(&PS, 0)) 175 exit(0); 176 break; 177 case 'p': 178 if (pd) 179 break; 180 OUT(); 181 break; 182 case 'P': 183 if (pd) 184 break; 185 if ((p = memchr(ps, '\n', psl)) != NULL) { 186 oldpsl = psl; 187 psl = p - ps; 188 psanl = 1; 189 OUT(); 190 psl = oldpsl; 191 } else { 192 OUT(); 193 } 194 break; 195 case 'q': 196 if (!nflag && !pd) 197 OUT(); 198 flush_appends(); 199 finish_file(); 200 exit(0); 201 case 'r': 202 if (appendx >= appendnum) { 203 appends = xreallocarray(appends, 204 appendnum, 205 2 * sizeof(struct s_appends)); 206 appendnum *= 2; 207 } 208 appends[appendx].type = AP_FILE; 209 appends[appendx].s = cp->t; 210 appends[appendx].len = strlen(cp->t); 211 appendx++; 212 break; 213 case 's': 214 sdone |= substitute(cp); 215 break; 216 case 't': 217 if (sdone) { 218 sdone = 0; 219 cp = cp->u.c; 220 goto redirect; 221 } 222 break; 223 case 'w': 224 if (pd) 225 break; 226 if (cp->u.fd == -1 && (cp->u.fd = open(cp->t, 227 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 228 DEFFILEMODE)) == -1) 229 error(FATAL, "%s: %s", 230 cp->t, strerror(errno)); 231 if ((size_t)write(cp->u.fd, ps, psl) != psl || 232 write(cp->u.fd, "\n", 1) != 1) 233 error(FATAL, "%s: %s", 234 cp->t, strerror(errno)); 235 break; 236 case 'x': 237 if (hs == NULL) 238 cspace(&HS, "", 0, REPLACE); 239 tspace = PS; 240 PS = HS; 241 psanl = tspace.append_newline; 242 HS = tspace; 243 break; 244 case 'y': 245 if (pd || psl == 0) 246 break; 247 for (p = ps, len = psl; len--; ++p) 248 *p = cp->u.y[(unsigned char)*p]; 249 break; 250 case ':': 251 case '}': 252 break; 253 case '=': 254 (void)fprintf(outfile, "%lu\n", linenum); 255 } 256 cp = cp->next; 257 } /* for all cp */ 258 259new: if (!nflag && !pd) 260 OUT(); 261 flush_appends(); 262 } /* for all lines */ 263} 264 265/* 266 * TRUE if the address passed matches the current program state 267 * (lastline, linenumber, ps). 268 */ 269#define MATCH(a) \ 270 (a)->type == AT_RE ? regexec_e((a)->u.r, ps, 0, 1, 0, psl) : \ 271 (a)->type == AT_LINE ? linenum == (a)->u.l : lastline() 272 273/* 274 * Return TRUE if the command applies to the current line. Sets the inrange 275 * flag to process ranges. Interprets the non-select (``!'') flag. 276 */ 277static inline int 278applies(struct s_command *cp) 279{ 280 int r; 281 282 lastaddr = 0; 283 if (cp->a1 == NULL && cp->a2 == NULL) 284 r = 1; 285 else if (cp->a2) 286 if (cp->inrange) { 287 if (MATCH(cp->a2)) { 288 cp->inrange = 0; 289 lastaddr = 1; 290 } 291 r = 1; 292 } else if (MATCH(cp->a1)) { 293 /* 294 * If the second address is a number less than or 295 * equal to the line number first selected, only 296 * one line shall be selected. 297 * -- POSIX 1003.2 298 */ 299 if (cp->a2->type == AT_LINE && 300 linenum >= cp->a2->u.l) 301 lastaddr = 1; 302 else 303 cp->inrange = 1; 304 r = 1; 305 } else 306 r = 0; 307 else 308 r = MATCH(cp->a1); 309 return (cp->nonsel ? !r : r); 310} 311 312/* 313 * Reset all inrange markers. 314 */ 315void 316resetstate(void) 317{ 318 struct s_command *cp; 319 320 free(HS.back); 321 memset(&HS, 0, sizeof(HS)); 322 323 for (cp = prog; cp; cp = cp->code == '{' ? cp->u.c : cp->next) 324 if (cp->a2) 325 cp->inrange = 0; 326} 327 328/* 329 * substitute -- 330 * Do substitutions in the pattern space. Currently, we build a 331 * copy of the new pattern space in the substitute space structure 332 * and then swap them. 333 */ 334static int 335substitute(struct s_command *cp) 336{ 337 SPACE tspace; 338 regex_t *re; 339 regoff_t slen; 340 int n, lastempty; 341 regoff_t le = 0; 342 char *s; 343 344 s = ps; 345 re = cp->u.s->re; 346 if (re == NULL) { 347 if (defpreg != NULL && cp->u.s->maxbref > defpreg->re_nsub) { 348 linenum = cp->u.s->linenum; 349 error(COMPILE, "\\%d not defined in the RE", 350 cp->u.s->maxbref); 351 } 352 } 353 if (!regexec_e(re, ps, 0, 0, 0, psl)) 354 return (0); 355 356 SS.len = 0; /* Clean substitute space. */ 357 slen = psl; 358 n = cp->u.s->n; 359 lastempty = 1; 360 361 do { 362 /* Copy the leading retained string. */ 363 if (n <= 1 && (match[0].rm_so > le)) 364 cspace(&SS, s, match[0].rm_so - le, APPEND); 365 366 /* Skip zero-length matches right after other matches. */ 367 if (lastempty || (match[0].rm_so - le) || 368 match[0].rm_so != match[0].rm_eo) { 369 if (n <= 1) { 370 /* Want this match: append replacement. */ 371 regsub(&SS, ps, cp->u.s->new); 372 if (n == 1) 373 n = -1; 374 } else { 375 /* Want a later match: append original. */ 376 if (match[0].rm_eo - le) 377 cspace(&SS, s, match[0].rm_eo - le, 378 APPEND); 379 n--; 380 } 381 } 382 383 /* Move past this match. */ 384 s = ps + match[0].rm_eo; 385 slen = psl - match[0].rm_eo; 386 le = match[0].rm_eo; 387 388 /* 389 * After a zero-length match, advance one byte, 390 * and at the end of the line, terminate. 391 */ 392 if (match[0].rm_so == match[0].rm_eo) { 393 if (*s == '\0' || *s == '\n') 394 slen = -1; 395 else 396 slen--; 397 if (*s != '\0') { 398 cspace(&SS, s++, 1, APPEND); 399 le++; 400 } 401 lastempty = 1; 402 } else 403 lastempty = 0; 404 405 } while (n >= 0 && slen >= 0 && 406 regexec_e(re, ps, REG_NOTBOL, 0, le, psl)); 407 408 /* Did not find the requested number of matches. */ 409 if (n > 0) 410 return (0); 411 412 /* Copy the trailing retained string. */ 413 if (slen > 0) 414 cspace(&SS, s, slen, APPEND); 415 416 /* 417 * Swap the substitute space and the pattern space, and make sure 418 * that any leftover pointers into stdio memory get lost. 419 */ 420 tspace = PS; 421 PS = SS; 422 psanl = tspace.append_newline; 423 SS = tspace; 424 SS.space = SS.back; 425 426 /* Handle the 'p' flag. */ 427 if (cp->u.s->p) 428 OUT(); 429 430 /* Handle the 'w' flag. */ 431 if (cp->u.s->wfile && !pd) { 432 if (cp->u.s->wfd == -1 && (cp->u.s->wfd = open(cp->u.s->wfile, 433 O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, DEFFILEMODE)) == -1) 434 error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 435 if ((size_t)write(cp->u.s->wfd, ps, psl) != psl || 436 write(cp->u.s->wfd, "\n", 1) != 1) 437 error(FATAL, "%s: %s", cp->u.s->wfile, strerror(errno)); 438 } 439 return (1); 440} 441 442/* 443 * Flush append requests. Always called before reading a line, 444 * therefore it also resets the substitution done (sdone) flag. 445 */ 446static void 447flush_appends(void) 448{ 449 FILE *f; 450 size_t count, idx; 451 char buf[8 * 1024]; 452 453 for (idx = 0; idx < appendx; idx++) 454 switch (appends[idx].type) { 455 case AP_STRING: 456 fwrite(appends[idx].s, sizeof(char), appends[idx].len, 457 outfile); 458 break; 459 case AP_FILE: 460 /* 461 * Read files probably shouldn't be cached. Since 462 * it's not an error to read a non-existent file, 463 * it's possible that another program is interacting 464 * with the sed script through the file system. It 465 * would be truly bizarre, but possible. It's probably 466 * not that big a performance win, anyhow. 467 */ 468 if ((f = fopen(appends[idx].s, "r")) == NULL) 469 break; 470 while ((count = fread(buf, sizeof(char), sizeof(buf), f))) 471 (void)fwrite(buf, sizeof(char), count, outfile); 472 (void)fclose(f); 473 break; 474 } 475 if (ferror(outfile)) 476 error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO)); 477 appendx = sdone = 0; 478} 479 480static void 481lputs(char *s, size_t len) 482{ 483 int count; 484 extern int termwidth; 485 const char *escapes; 486 char *p; 487 488 for (count = 0; len > 0; len--, s++) { 489 if (count >= termwidth) { 490 (void)fprintf(outfile, "\\\n"); 491 count = 0; 492 } 493 if (isascii((unsigned char)*s) && isprint((unsigned char)*s) 494 && *s != '\\') { 495 (void)fputc(*s, outfile); 496 count++; 497 } else if (*s == '\n') { 498 (void)fputc('$', outfile); 499 (void)fputc('\n', outfile); 500 count = 0; 501 } else { 502 escapes = "\\\a\b\f\r\t\v"; 503 (void)fputc('\\', outfile); 504 if ((p = strchr(escapes, *s)) && *s != '\0') { 505 (void)fputc("\\abfrtv"[p - escapes], outfile); 506 count += 2; 507 } else { 508 (void)fprintf(outfile, "%03o", *(u_char *)s); 509 count += 4; 510 } 511 } 512 } 513 (void)fputc('$', outfile); 514 (void)fputc('\n', outfile); 515 if (ferror(outfile)) 516 error(FATAL, "%s: %s", outfname, strerror(errno ? errno : EIO)); 517} 518 519static inline int 520regexec_e(regex_t *preg, const char *string, int eflags, 521 int nomatch, size_t start, size_t stop) 522{ 523 int eval; 524 525 if (preg == NULL) { 526 if (defpreg == NULL) 527 error(FATAL, "first RE may not be empty"); 528 } else 529 defpreg = preg; 530 531 /* Set anchors */ 532 match[0].rm_so = start; 533 match[0].rm_eo = stop; 534 535 eval = regexec(defpreg, string, 536 nomatch ? 0 : maxnsub + 1, match, eflags | REG_STARTEND); 537 switch (eval) { 538 case 0: 539 return (1); 540 case REG_NOMATCH: 541 return (0); 542 } 543 error(FATAL, "RE error: %s", strregerror(eval, defpreg)); 544} 545 546/* 547 * regsub - perform substitutions after a regexp match 548 * Based on a routine by Henry Spencer 549 */ 550static void 551regsub(SPACE *sp, char *string, char *src) 552{ 553 int len, no; 554 char c, *dst; 555 556#define NEEDSP(reqlen) \ 557 if (sp->len + (reqlen) + 1 >= sp->blen) { \ 558 size_t newlen = sp->blen + (reqlen) + 1024; \ 559 sp->space = sp->back = xrealloc(sp->back, newlen); \ 560 sp->blen = newlen; \ 561 dst = sp->space + sp->len; \ 562 } 563 564 dst = sp->space + sp->len; 565 while ((c = *src++) != '\0') { 566 if (c == '&') 567 no = 0; 568 else if (c == '\\' && isdigit((unsigned char)*src)) 569 no = *src++ - '0'; 570 else 571 no = -1; 572 if (no < 0) { /* Ordinary character. */ 573 if (c == '\\' && (*src == '\\' || *src == '&')) 574 c = *src++; 575 NEEDSP(1); 576 *dst++ = c; 577 ++sp->len; 578 } else if (match[no].rm_so != -1 && match[no].rm_eo != -1) { 579 len = match[no].rm_eo - match[no].rm_so; 580 NEEDSP(len); 581 memmove(dst, string + match[no].rm_so, len); 582 dst += len; 583 sp->len += len; 584 } 585 } 586 NEEDSP(1); 587 *dst = '\0'; 588} 589 590/* 591 * aspace -- 592 * Append the source space to the destination space, allocating new 593 * space as necessary. 594 */ 595void 596cspace(SPACE *sp, const char *p, size_t len, enum e_spflag spflag) 597{ 598 size_t tlen; 599 600 /* Make sure SPACE has enough memory and ramp up quickly. */ 601 tlen = sp->len + len + 1; 602 if (tlen > sp->blen) { 603 size_t newlen = tlen + 1024; 604 sp->space = sp->back = xrealloc(sp->back, newlen); 605 sp->blen = newlen; 606 } 607 608 if (spflag == REPLACE) 609 sp->len = 0; 610 611 memmove(sp->space + sp->len, p, len); 612 613 sp->space[sp->len += len] = '\0'; 614} 615 616/* 617 * Close all cached opened files and report any errors 618 */ 619void 620cfclose(struct s_command *cp, struct s_command *end) 621{ 622 623 for (; cp != end; cp = cp->next) 624 switch (cp->code) { 625 case 's': 626 if (cp->u.s->wfd != -1 && close(cp->u.s->wfd)) 627 error(FATAL, 628 "%s: %s", cp->u.s->wfile, strerror(errno)); 629 cp->u.s->wfd = -1; 630 break; 631 case 'w': 632 if (cp->u.fd != -1 && close(cp->u.fd)) 633 error(FATAL, "%s: %s", cp->t, strerror(errno)); 634 cp->u.fd = -1; 635 break; 636 case '{': 637 cfclose(cp->u.c, cp->next); 638 break; 639 } 640} 641