11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1992 Diomidis Spinellis. 31590Srgrimes * Copyright (c) 1992, 1993 41590Srgrimes * The Regents of the University of California. All rights reserved. 51590Srgrimes * 61590Srgrimes * This code is derived from software contributed to Berkeley by 71590Srgrimes * Diomidis Spinellis of Imperial College, University of London. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 3487766Smarkm#include <sys/cdefs.h> 3587766Smarkm__FBSDID("$FreeBSD: releng/10.2/usr.bin/sed/compile.c 276099 2014-12-23 02:46:00Z pfg $"); 3687766Smarkm 371590Srgrimes#ifndef lint 3887766Smarkmstatic const char sccsid[] = "@(#)compile.c 8.1 (Berkeley) 6/6/93"; 3928066Scharnier#endif 401590Srgrimes 411590Srgrimes#include <sys/types.h> 421590Srgrimes#include <sys/stat.h> 431590Srgrimes 441590Srgrimes#include <ctype.h> 4528066Scharnier#include <err.h> 46132145Stjr#include <errno.h> 471590Srgrimes#include <fcntl.h> 481590Srgrimes#include <limits.h> 491590Srgrimes#include <regex.h> 501590Srgrimes#include <stdio.h> 511590Srgrimes#include <stdlib.h> 521590Srgrimes#include <string.h> 53132145Stjr#include <wchar.h> 541590Srgrimes 551590Srgrimes#include "defs.h" 561590Srgrimes#include "extern.h" 571590Srgrimes 581590Srgrimes#define LHSZ 128 591590Srgrimes#define LHMASK (LHSZ - 1) 601590Srgrimesstatic struct labhash { 611590Srgrimes struct labhash *lh_next; 621590Srgrimes u_int lh_hash; 631590Srgrimes struct s_command *lh_cmd; 641590Srgrimes int lh_ref; 651590Srgrimes} *labels[LHSZ]; 661590Srgrimes 6792922Simpstatic char *compile_addr(char *, struct s_addr *); 6892922Simpstatic char *compile_ccl(char **, char *); 69197361Sddsstatic char *compile_delimited(char *, char *, int); 7092922Simpstatic char *compile_flags(char *, struct s_subst *); 71171206Sssouhlalstatic regex_t *compile_re(char *, int); 7292922Simpstatic char *compile_subst(char *, struct s_subst *); 7392922Simpstatic char *compile_text(void); 74132145Stjrstatic char *compile_tr(char *, struct s_tr **); 751590Srgrimesstatic struct s_command 7692922Simp **compile_stream(struct s_command **); 7792922Simpstatic char *duptoeol(char *, const char *); 7892922Simpstatic void enterlabel(struct s_command *); 791590Srgrimesstatic struct s_command 8092922Simp *findlabel(char *); 8192922Simpstatic void fixuplabel(struct s_command *, struct s_command *); 8292922Simpstatic void uselabel(void); 831590Srgrimes 841590Srgrimes/* 851590Srgrimes * Command specification. This is used to drive the command parser. 861590Srgrimes */ 871590Srgrimesstruct s_format { 881590Srgrimes char code; /* Command code */ 891590Srgrimes int naddr; /* Number of address args */ 901590Srgrimes enum e_args args; /* Argument type */ 911590Srgrimes}; 921590Srgrimes 931590Srgrimesstatic struct s_format cmd_fmts[] = { 941590Srgrimes {'{', 2, GROUP}, 9510075Sjkh {'}', 0, ENDGROUP}, 961590Srgrimes {'a', 1, TEXT}, 971590Srgrimes {'b', 2, BRANCH}, 981590Srgrimes {'c', 2, TEXT}, 991590Srgrimes {'d', 2, EMPTY}, 1001590Srgrimes {'D', 2, EMPTY}, 1011590Srgrimes {'g', 2, EMPTY}, 1021590Srgrimes {'G', 2, EMPTY}, 1031590Srgrimes {'h', 2, EMPTY}, 1041590Srgrimes {'H', 2, EMPTY}, 1051590Srgrimes {'i', 1, TEXT}, 1061590Srgrimes {'l', 2, EMPTY}, 1071590Srgrimes {'n', 2, EMPTY}, 1081590Srgrimes {'N', 2, EMPTY}, 1091590Srgrimes {'p', 2, EMPTY}, 1101590Srgrimes {'P', 2, EMPTY}, 1111590Srgrimes {'q', 1, EMPTY}, 1121590Srgrimes {'r', 1, RFILE}, 1131590Srgrimes {'s', 2, SUBST}, 1141590Srgrimes {'t', 2, BRANCH}, 1151590Srgrimes {'w', 2, WFILE}, 1161590Srgrimes {'x', 2, EMPTY}, 1171590Srgrimes {'y', 2, TR}, 1181590Srgrimes {'!', 2, NONSEL}, 1191590Srgrimes {':', 0, LABEL}, 1201590Srgrimes {'#', 0, COMMENT}, 1211590Srgrimes {'=', 1, EMPTY}, 1221590Srgrimes {'\0', 0, COMMENT}, 1231590Srgrimes}; 1241590Srgrimes 1251590Srgrimes/* The compiled program. */ 1261590Srgrimesstruct s_command *prog; 1271590Srgrimes 1281590Srgrimes/* 1291590Srgrimes * Compile the program into prog. 1301590Srgrimes * Initialise appends. 1311590Srgrimes */ 1321590Srgrimesvoid 133122044Sdescompile(void) 1341590Srgrimes{ 13510075Sjkh *compile_stream(&prog) = NULL; 1361590Srgrimes fixuplabel(prog, NULL); 1371590Srgrimes uselabel(); 13886193Smikeh if (appendnum == 0) 13986193Smikeh appends = NULL; 14086193Smikeh else if ((appends = malloc(sizeof(struct s_appends) * appendnum)) == 14186193Smikeh NULL) 14280286Sobrien err(1, "malloc"); 14380286Sobrien if ((match = malloc((maxnsub + 1) * sizeof(regmatch_t))) == NULL) 14480286Sobrien err(1, "malloc"); 1451590Srgrimes} 1461590Srgrimes 1471590Srgrimes#define EATSPACE() do { \ 1481590Srgrimes if (p) \ 14917522Sache while (*p && isspace((unsigned char)*p)) \ 1501590Srgrimes p++; \ 1511590Srgrimes } while (0) 1521590Srgrimes 1531590Srgrimesstatic struct s_command ** 154122044Sdescompile_stream(struct s_command **link) 15510075Sjkh{ 15687766Smarkm char *p; 1571590Srgrimes static char lbuf[_POSIX2_LINE_MAX + 1]; /* To save stack */ 15810075Sjkh struct s_command *cmd, *cmd2, *stack; 1591590Srgrimes struct s_format *fp; 160171206Sssouhlal char re[_POSIX2_LINE_MAX + 1]; 1611590Srgrimes int naddr; /* Number of addresses */ 1621590Srgrimes 16310075Sjkh stack = 0; 1641590Srgrimes for (;;) { 16541602Sarchie if ((p = cu_fgets(lbuf, sizeof(lbuf), NULL)) == NULL) { 16610075Sjkh if (stack != 0) 16728066Scharnier errx(1, "%lu: %s: unexpected EOF (pending }'s)", 16828066Scharnier linenum, fname); 1691590Srgrimes return (link); 1701590Srgrimes } 1711590Srgrimes 1721590Srgrimessemicolon: EATSPACE(); 173122045Sdes if (p) { 174122045Sdes if (*p == '#' || *p == '\0') 175122045Sdes continue; 176122045Sdes else if (*p == ';') { 177122045Sdes p++; 178122045Sdes goto semicolon; 179122045Sdes } 180122045Sdes } 18180286Sobrien if ((*link = cmd = malloc(sizeof(struct s_command))) == NULL) 18280286Sobrien err(1, "malloc"); 1831590Srgrimes link = &cmd->next; 184192732Sbrian cmd->startline = cmd->nonsel = 0; 1851590Srgrimes /* First parse the addresses */ 1861590Srgrimes naddr = 0; 1871590Srgrimes 1881590Srgrimes/* Valid characters to start an address */ 1891590Srgrimes#define addrchar(c) (strchr("0123456789/\\$", (c))) 1901590Srgrimes if (addrchar(*p)) { 1911590Srgrimes naddr++; 19280286Sobrien if ((cmd->a1 = malloc(sizeof(struct s_addr))) == NULL) 19380286Sobrien err(1, "malloc"); 1941590Srgrimes p = compile_addr(p, cmd->a1); 1951590Srgrimes EATSPACE(); /* EXTENSION */ 1961590Srgrimes if (*p == ',') { 1971590Srgrimes p++; 1981590Srgrimes EATSPACE(); /* EXTENSION */ 19910075Sjkh naddr++; 20080286Sobrien if ((cmd->a2 = malloc(sizeof(struct s_addr))) 20180286Sobrien == NULL) 20280286Sobrien err(1, "malloc"); 2031590Srgrimes p = compile_addr(p, cmd->a2); 20410075Sjkh EATSPACE(); 20510075Sjkh } else 20610075Sjkh cmd->a2 = 0; 20710075Sjkh } else 20810075Sjkh cmd->a1 = cmd->a2 = 0; 2091590Srgrimes 2101590Srgrimesnonsel: /* Now parse the command */ 2111590Srgrimes if (!*p) 21228066Scharnier errx(1, "%lu: %s: command expected", linenum, fname); 2131590Srgrimes cmd->code = *p; 2141590Srgrimes for (fp = cmd_fmts; fp->code; fp++) 2151590Srgrimes if (fp->code == *p) 2161590Srgrimes break; 2171590Srgrimes if (!fp->code) 21828066Scharnier errx(1, "%lu: %s: invalid command code %c", linenum, fname, *p); 2191590Srgrimes if (naddr > fp->naddr) 22028066Scharnier errx(1, 22128066Scharnier "%lu: %s: command %c expects up to %d address(es), found %d", 22228066Scharnier linenum, fname, *p, fp->naddr, naddr); 2231590Srgrimes switch (fp->args) { 2241590Srgrimes case NONSEL: /* ! */ 22510075Sjkh p++; 22610075Sjkh EATSPACE(); 227184854Sdds cmd->nonsel = 1; 2281590Srgrimes goto nonsel; 2291590Srgrimes case GROUP: /* { */ 2301590Srgrimes p++; 2311590Srgrimes EATSPACE(); 23210075Sjkh cmd->next = stack; 23310075Sjkh stack = cmd; 23410075Sjkh link = &cmd->u.c; 23510075Sjkh if (*p) 23610075Sjkh goto semicolon; 2371590Srgrimes break; 23810075Sjkh case ENDGROUP: 23910075Sjkh /* 24010075Sjkh * Short-circuit command processing, since end of 24110075Sjkh * group is really just a noop. 24210075Sjkh */ 24310075Sjkh cmd->nonsel = 1; 24410075Sjkh if (stack == 0) 24528066Scharnier errx(1, "%lu: %s: unexpected }", linenum, fname); 24610075Sjkh cmd2 = stack; 24710075Sjkh stack = cmd2->next; 24810075Sjkh cmd2->next = cmd; 24910075Sjkh /*FALLTHROUGH*/ 2501590Srgrimes case EMPTY: /* d D g G h H l n N p P q x = \0 */ 2511590Srgrimes p++; 2521590Srgrimes EATSPACE(); 2531590Srgrimes if (*p == ';') { 2541590Srgrimes p++; 2551590Srgrimes link = &cmd->next; 2561590Srgrimes goto semicolon; 2571590Srgrimes } 2581590Srgrimes if (*p) 25928066Scharnier errx(1, "%lu: %s: extra characters at the end of %c command", 26028066Scharnier linenum, fname, cmd->code); 2611590Srgrimes break; 2621590Srgrimes case TEXT: /* a c i */ 2631590Srgrimes p++; 2641590Srgrimes EATSPACE(); 2651590Srgrimes if (*p != '\\') 26628066Scharnier errx(1, 26728066Scharnier"%lu: %s: command %c expects \\ followed by text", linenum, fname, cmd->code); 2681590Srgrimes p++; 2691590Srgrimes EATSPACE(); 2701590Srgrimes if (*p) 27128066Scharnier errx(1, 27228066Scharnier "%lu: %s: extra characters after \\ at the end of %c command", 27328066Scharnier linenum, fname, cmd->code); 2741590Srgrimes cmd->t = compile_text(); 2751590Srgrimes break; 2761590Srgrimes case COMMENT: /* \0 # */ 2771590Srgrimes break; 2781590Srgrimes case WFILE: /* w */ 2791590Srgrimes p++; 2801590Srgrimes EATSPACE(); 2811590Srgrimes if (*p == '\0') 28228066Scharnier errx(1, "%lu: %s: filename expected", linenum, fname); 2831590Srgrimes cmd->t = duptoeol(p, "w command"); 2841590Srgrimes if (aflag) 2851590Srgrimes cmd->u.fd = -1; 286122045Sdes else if ((cmd->u.fd = open(p, 2871590Srgrimes O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 2881590Srgrimes DEFFILEMODE)) == -1) 28928066Scharnier err(1, "%s", p); 2901590Srgrimes break; 2911590Srgrimes case RFILE: /* r */ 2921590Srgrimes p++; 2931590Srgrimes EATSPACE(); 2941590Srgrimes if (*p == '\0') 29528066Scharnier errx(1, "%lu: %s: filename expected", linenum, fname); 2961590Srgrimes else 2971590Srgrimes cmd->t = duptoeol(p, "read command"); 2981590Srgrimes break; 2991590Srgrimes case BRANCH: /* b t */ 3001590Srgrimes p++; 3011590Srgrimes EATSPACE(); 3021590Srgrimes if (*p == '\0') 3031590Srgrimes cmd->t = NULL; 3041590Srgrimes else 3051590Srgrimes cmd->t = duptoeol(p, "branch"); 3061590Srgrimes break; 3071590Srgrimes case LABEL: /* : */ 3081590Srgrimes p++; 3091590Srgrimes EATSPACE(); 3101590Srgrimes cmd->t = duptoeol(p, "label"); 3111590Srgrimes if (strlen(p) == 0) 31228066Scharnier errx(1, "%lu: %s: empty label", linenum, fname); 3131590Srgrimes enterlabel(cmd); 3141590Srgrimes break; 3151590Srgrimes case SUBST: /* s */ 3161590Srgrimes p++; 3171590Srgrimes if (*p == '\0' || *p == '\\') 31828066Scharnier errx(1, 319122045Sdes"%lu: %s: substitute pattern can not be delimited by newline or backslash", 32028066Scharnier linenum, fname); 321171284Sdelphij if ((cmd->u.s = calloc(1, sizeof(struct s_subst))) == NULL) 32280286Sobrien err(1, "malloc"); 323197361Sdds p = compile_delimited(p, re, 0); 3241590Srgrimes if (p == NULL) 32528066Scharnier errx(1, 32628066Scharnier "%lu: %s: unterminated substitute pattern", linenum, fname); 327184777Shrs 328184777Shrs /* Compile RE with no case sensitivity temporarily */ 329184777Shrs if (*re == '\0') 330184777Shrs cmd->u.s->re = NULL; 331184777Shrs else 332184777Shrs cmd->u.s->re = compile_re(re, 0); 333171284Sdelphij --p; 334171284Sdelphij p = compile_subst(p, cmd->u.s); 335171284Sdelphij p = compile_flags(p, cmd->u.s); 336184777Shrs 337184777Shrs /* Recompile RE with case sensitivity from "I" flag if any */ 338171206Sssouhlal if (*re == '\0') 339171206Sssouhlal cmd->u.s->re = NULL; 340171206Sssouhlal else 341171206Sssouhlal cmd->u.s->re = compile_re(re, cmd->u.s->icase); 3421590Srgrimes EATSPACE(); 3431590Srgrimes if (*p == ';') { 3441590Srgrimes p++; 3451590Srgrimes link = &cmd->next; 3461590Srgrimes goto semicolon; 3471590Srgrimes } 3481590Srgrimes break; 3491590Srgrimes case TR: /* y */ 3501590Srgrimes p++; 351132145Stjr p = compile_tr(p, &cmd->u.y); 3521590Srgrimes EATSPACE(); 3531590Srgrimes if (*p == ';') { 3541590Srgrimes p++; 3551590Srgrimes link = &cmd->next; 3561590Srgrimes goto semicolon; 3571590Srgrimes } 3581590Srgrimes if (*p) 35928066Scharnier errx(1, 36028066Scharnier"%lu: %s: extra text at the end of a transform command", linenum, fname); 3611590Srgrimes break; 3621590Srgrimes } 3631590Srgrimes } 3641590Srgrimes} 3651590Srgrimes 3661590Srgrimes/* 3671590Srgrimes * Get a delimited string. P points to the delimeter of the string; d points 3681590Srgrimes * to a buffer area. Newline and delimiter escapes are processed; other 3691590Srgrimes * escapes are ignored. 3701590Srgrimes * 3711590Srgrimes * Returns a pointer to the first character after the final delimiter or NULL 3721590Srgrimes * in the case of a non-terminated string. The character array d is filled 3731590Srgrimes * with the processed string. 3741590Srgrimes */ 3751590Srgrimesstatic char * 376197361Sddscompile_delimited(char *p, char *d, int is_tr) 3771590Srgrimes{ 3781590Srgrimes char c; 3791590Srgrimes 3801590Srgrimes c = *p++; 3811590Srgrimes if (c == '\0') 3821590Srgrimes return (NULL); 3831590Srgrimes else if (c == '\\') 38428066Scharnier errx(1, "%lu: %s: \\ can not be used as a string delimiter", 38528066Scharnier linenum, fname); 3861590Srgrimes else if (c == '\n') 38728066Scharnier errx(1, "%lu: %s: newline can not be used as a string delimiter", 38828066Scharnier linenum, fname); 3891590Srgrimes while (*p) { 390197356Sdds if (*p == '[' && *p != c) { 39110075Sjkh if ((d = compile_ccl(&p, d)) == NULL) 39228066Scharnier errx(1, "%lu: %s: unbalanced brackets ([])", linenum, fname); 39310075Sjkh continue; 39410075Sjkh } else if (*p == '\\' && p[1] == '[') { 39510075Sjkh *d++ = *p++; 39610075Sjkh } else if (*p == '\\' && p[1] == c) 3971590Srgrimes p++; 3981590Srgrimes else if (*p == '\\' && p[1] == 'n') { 3991590Srgrimes *d++ = '\n'; 4001590Srgrimes p += 2; 4011590Srgrimes continue; 402197361Sdds } else if (*p == '\\' && p[1] == '\\') { 403197361Sdds if (is_tr) 404197361Sdds p++; 405197361Sdds else 406197361Sdds *d++ = *p++; 407197361Sdds } else if (*p == c) { 4081590Srgrimes *d = '\0'; 4091590Srgrimes return (p + 1); 4101590Srgrimes } 4111590Srgrimes *d++ = *p++; 4121590Srgrimes } 4131590Srgrimes return (NULL); 4141590Srgrimes} 4151590Srgrimes 41610075Sjkh 41710075Sjkh/* compile_ccl: expand a POSIX character class */ 41810075Sjkhstatic char * 419122044Sdescompile_ccl(char **sp, char *t) 42010075Sjkh{ 42110075Sjkh int c, d; 42210075Sjkh char *s = *sp; 42310075Sjkh 42410075Sjkh *t++ = *s++; 42510075Sjkh if (*s == '^') 42610075Sjkh *t++ = *s++; 42710075Sjkh if (*s == ']') 42810075Sjkh *t++ = *s++; 42910075Sjkh for (; *s && (*t = *s) != ']'; s++, t++) 43010075Sjkh if (*s == '[' && ((d = *(s+1)) == '.' || d == ':' || d == '=')) { 43110075Sjkh *++t = *++s, t++, s++; 43210075Sjkh for (c = *s; (*t = *s) != ']' || c != d; s++, t++) 43310075Sjkh if ((c = *s) == '\0') 43410075Sjkh return NULL; 435197362Sdds } 43610075Sjkh return (*s == ']') ? *sp = ++s, ++t : NULL; 43710075Sjkh} 43810075Sjkh 4391590Srgrimes/* 440171206Sssouhlal * Compiles the regular expression in RE and returns a pointer to the compiled 441171206Sssouhlal * regular expression. 4421590Srgrimes * Cflags are passed to regcomp. 4431590Srgrimes */ 444171206Sssouhlalstatic regex_t * 445171206Sssouhlalcompile_re(char *re, int case_insensitive) 4461590Srgrimes{ 447171206Sssouhlal regex_t *rep; 448171206Sssouhlal int eval, flags; 4491590Srgrimes 450171206Sssouhlal 451171206Sssouhlal flags = rflags; 452171206Sssouhlal if (case_insensitive) 453171206Sssouhlal flags |= REG_ICASE; 454171206Sssouhlal if ((rep = malloc(sizeof(regex_t))) == NULL) 45580286Sobrien err(1, "malloc"); 456176126Sdwmalone if ((eval = regcomp(rep, re, flags)) != 0) 45728066Scharnier errx(1, "%lu: %s: RE error: %s", 458171206Sssouhlal linenum, fname, strregerror(eval, rep)); 459171206Sssouhlal if (maxnsub < rep->re_nsub) 460171206Sssouhlal maxnsub = rep->re_nsub; 461171206Sssouhlal return (rep); 4621590Srgrimes} 4631590Srgrimes 4641590Srgrimes/* 4651590Srgrimes * Compile the substitution string of a regular expression and set res to 4661590Srgrimes * point to a saved copy of it. Nsub is the number of parenthesized regular 4671590Srgrimes * expressions. 4681590Srgrimes */ 4691590Srgrimesstatic char * 470122044Sdescompile_subst(char *p, struct s_subst *s) 4711590Srgrimes{ 4721590Srgrimes static char lbuf[_POSIX2_LINE_MAX + 1]; 47387766Smarkm int asize, size; 47487766Smarkm u_char ref; 4751590Srgrimes char c, *text, *op, *sp; 47697703Sgreen int more = 1, sawesc = 0; 4771590Srgrimes 4781590Srgrimes c = *p++; /* Terminator character */ 4791590Srgrimes if (c == '\0') 4801590Srgrimes return (NULL); 4811590Srgrimes 4821590Srgrimes s->maxbref = 0; 4831590Srgrimes s->linenum = linenum; 4841590Srgrimes asize = 2 * _POSIX2_LINE_MAX + 1; 48580286Sobrien if ((text = malloc(asize)) == NULL) 48680286Sobrien err(1, "malloc"); 4871590Srgrimes size = 0; 4881590Srgrimes do { 4891590Srgrimes op = sp = text + size; 4901590Srgrimes for (; *p; p++) { 49197703Sgreen if (*p == '\\' || sawesc) { 49297703Sgreen /* 49397703Sgreen * If this is a continuation from the last 49497703Sgreen * buffer, we won't have a character to 49597703Sgreen * skip over. 49697703Sgreen */ 49797703Sgreen if (sawesc) 49897703Sgreen sawesc = 0; 49997703Sgreen else 50097703Sgreen p++; 50197703Sgreen 50297703Sgreen if (*p == '\0') { 50397703Sgreen /* 50497703Sgreen * This escaped character is continued 50597703Sgreen * in the next part of the line. Note 50697703Sgreen * this fact, then cause the loop to 50797703Sgreen * exit w/ normal EOL case and reenter 50897703Sgreen * above with the new buffer. 50997703Sgreen */ 51097703Sgreen sawesc = 1; 51197703Sgreen p--; 51297703Sgreen continue; 51397703Sgreen } else if (strchr("123456789", *p) != NULL) { 5141590Srgrimes *sp++ = '\\'; 5151590Srgrimes ref = *p - '0'; 5161590Srgrimes if (s->re != NULL && 5171590Srgrimes ref > s->re->re_nsub) 51828066Scharnier errx(1, "%lu: %s: \\%c not defined in the RE", 51928066Scharnier linenum, fname, *p); 5201590Srgrimes if (s->maxbref < ref) 5211590Srgrimes s->maxbref = ref; 5221590Srgrimes } else if (*p == '&' || *p == '\\') 5231590Srgrimes *sp++ = '\\'; 5241590Srgrimes } else if (*p == c) { 52541602Sarchie if (*++p == '\0' && more) { 52641602Sarchie if (cu_fgets(lbuf, sizeof(lbuf), &more)) 52741573Sarchie p = lbuf; 52841573Sarchie } 5291590Srgrimes *sp++ = '\0'; 5301590Srgrimes size += sp - op; 53180286Sobrien if ((s->new = realloc(text, size)) == NULL) 53280286Sobrien err(1, "realloc"); 5331590Srgrimes return (p); 5341590Srgrimes } else if (*p == '\n') { 53528066Scharnier errx(1, 53628066Scharnier"%lu: %s: unescaped newline inside substitute pattern", linenum, fname); 5371590Srgrimes /* NOTREACHED */ 5381590Srgrimes } 5391590Srgrimes *sp++ = *p; 5401590Srgrimes } 5411590Srgrimes size += sp - op; 5421590Srgrimes if (asize - size < _POSIX2_LINE_MAX + 1) { 5431590Srgrimes asize *= 2; 54480286Sobrien if ((text = realloc(text, asize)) == NULL) 54580286Sobrien err(1, "realloc"); 5461590Srgrimes } 54741602Sarchie } while (cu_fgets(p = lbuf, sizeof(lbuf), &more)); 54828066Scharnier errx(1, "%lu: %s: unterminated substitute in regular expression", 54928066Scharnier linenum, fname); 5501590Srgrimes /* NOTREACHED */ 5511590Srgrimes} 5521590Srgrimes 5531590Srgrimes/* 5541590Srgrimes * Compile the flags of the s command 5551590Srgrimes */ 5561590Srgrimesstatic char * 557122044Sdescompile_flags(char *p, struct s_subst *s) 5581590Srgrimes{ 5591590Srgrimes int gn; /* True if we have seen g or n */ 560148692Sdds unsigned long nval; 561276099Spfg char wfile[_POSIX2_LINE_MAX + 1], *q, *eq; 5621590Srgrimes 5631590Srgrimes s->n = 1; /* Default */ 5641590Srgrimes s->p = 0; 5651590Srgrimes s->wfile = NULL; 5661590Srgrimes s->wfd = -1; 567171206Sssouhlal s->icase = 0; 5681590Srgrimes for (gn = 0;;) { 5691590Srgrimes EATSPACE(); /* EXTENSION */ 5701590Srgrimes switch (*p) { 5711590Srgrimes case 'g': 5721590Srgrimes if (gn) 57328066Scharnier errx(1, 57428066Scharnier"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 5751590Srgrimes gn = 1; 5761590Srgrimes s->n = 0; 5771590Srgrimes break; 5781590Srgrimes case '\0': 5791590Srgrimes case '\n': 5801590Srgrimes case ';': 5811590Srgrimes return (p); 5821590Srgrimes case 'p': 5831590Srgrimes s->p = 1; 5841590Srgrimes break; 585259443Seadler case 'i': 586171206Sssouhlal case 'I': 587171206Sssouhlal s->icase = 1; 588171206Sssouhlal break; 5891590Srgrimes case '1': case '2': case '3': 5901590Srgrimes case '4': case '5': case '6': 5911590Srgrimes case '7': case '8': case '9': 5921590Srgrimes if (gn) 59328066Scharnier errx(1, 59428066Scharnier"%lu: %s: more than one number or 'g' in substitute flags", linenum, fname); 5951590Srgrimes gn = 1; 596148692Sdds errno = 0; 597148692Sdds nval = strtol(p, &p, 10); 598148692Sdds if (errno == ERANGE || nval > INT_MAX) 599148692Sdds errx(1, 600148692Sdds"%lu: %s: overflow in the 'N' substitute flag", linenum, fname); 601148692Sdds s->n = nval; 602148692Sdds p--; 6031590Srgrimes break; 6041590Srgrimes case 'w': 6051590Srgrimes p++; 6061590Srgrimes#ifdef HISTORIC_PRACTICE 6071590Srgrimes if (*p != ' ') { 60828066Scharnier warnx("%lu: %s: space missing before w wfile", linenum, fname); 6091590Srgrimes return (p); 6101590Srgrimes } 6111590Srgrimes#endif 6121590Srgrimes EATSPACE(); 6131590Srgrimes q = wfile; 614276099Spfg eq = wfile + sizeof(wfile) - 1; 6151590Srgrimes while (*p) { 6161590Srgrimes if (*p == '\n') 6171590Srgrimes break; 618276099Spfg if (q >= eq) 619276099Spfg err(1, "wfile too long"); 6201590Srgrimes *q++ = *p++; 6211590Srgrimes } 6221590Srgrimes *q = '\0'; 6231590Srgrimes if (q == wfile) 62428066Scharnier errx(1, "%lu: %s: no wfile specified", linenum, fname); 6251590Srgrimes s->wfile = strdup(wfile); 6261590Srgrimes if (!aflag && (s->wfd = open(wfile, 6271590Srgrimes O_WRONLY|O_APPEND|O_CREAT|O_TRUNC, 6281590Srgrimes DEFFILEMODE)) == -1) 62928066Scharnier err(1, "%s", wfile); 6301590Srgrimes return (p); 6311590Srgrimes default: 63228066Scharnier errx(1, "%lu: %s: bad flag in substitute command: '%c'", 63328066Scharnier linenum, fname, *p); 6341590Srgrimes break; 6351590Srgrimes } 6361590Srgrimes p++; 6371590Srgrimes } 6381590Srgrimes} 6391590Srgrimes 6401590Srgrimes/* 6411590Srgrimes * Compile a translation set of strings into a lookup table. 6421590Srgrimes */ 6431590Srgrimesstatic char * 644132145Stjrcompile_tr(char *p, struct s_tr **py) 6451590Srgrimes{ 646132145Stjr struct s_tr *y; 6471590Srgrimes int i; 648132145Stjr const char *op, *np; 6491590Srgrimes char old[_POSIX2_LINE_MAX + 1]; 6501590Srgrimes char new[_POSIX2_LINE_MAX + 1]; 651132145Stjr size_t oclen, oldlen, nclen, newlen; 652132145Stjr mbstate_t mbs1, mbs2; 6531590Srgrimes 654132145Stjr if ((*py = y = malloc(sizeof(*y))) == NULL) 655132145Stjr err(1, NULL); 656132145Stjr y->multis = NULL; 657132145Stjr y->nmultis = 0; 658132145Stjr 6591590Srgrimes if (*p == '\0' || *p == '\\') 66028066Scharnier errx(1, 66128066Scharnier "%lu: %s: transform pattern can not be delimited by newline or backslash", 66228066Scharnier linenum, fname); 663197361Sdds p = compile_delimited(p, old, 1); 66428066Scharnier if (p == NULL) 66528066Scharnier errx(1, "%lu: %s: unterminated transform source string", 66628066Scharnier linenum, fname); 667197361Sdds p = compile_delimited(p - 1, new, 1); 66828066Scharnier if (p == NULL) 66928066Scharnier errx(1, "%lu: %s: unterminated transform target string", 67028066Scharnier linenum, fname); 6711590Srgrimes EATSPACE(); 672132145Stjr op = old; 673132145Stjr oldlen = mbsrtowcs(NULL, &op, 0, NULL); 674132145Stjr if (oldlen == (size_t)-1) 675132145Stjr err(1, NULL); 676132145Stjr np = new; 677132145Stjr newlen = mbsrtowcs(NULL, &np, 0, NULL); 678132145Stjr if (newlen == (size_t)-1) 679132145Stjr err(1, NULL); 680132145Stjr if (newlen != oldlen) 68128066Scharnier errx(1, "%lu: %s: transform strings are not the same length", 68228066Scharnier linenum, fname); 683132145Stjr if (MB_CUR_MAX == 1) { 684132145Stjr /* 685132145Stjr * The single-byte encoding case is easy: generate a 686132145Stjr * lookup table. 687132145Stjr */ 688132145Stjr for (i = 0; i <= UCHAR_MAX; i++) 689132145Stjr y->bytetab[i] = (char)i; 690132145Stjr for (; *op; op++, np++) 691132145Stjr y->bytetab[(u_char)*op] = *np; 692132145Stjr } else { 693132145Stjr /* 694132145Stjr * Multi-byte encoding case: generate a lookup table as 695132145Stjr * above, but only for single-byte characters. The first 696132145Stjr * bytes of multi-byte characters have their lookup table 697132145Stjr * entries set to 0, which causes do_tr() to search through 698132145Stjr * an auxiliary vector of multi-byte mappings. 699132145Stjr */ 700132145Stjr memset(&mbs1, 0, sizeof(mbs1)); 701132145Stjr memset(&mbs2, 0, sizeof(mbs2)); 702132145Stjr for (i = 0; i <= UCHAR_MAX; i++) 703132145Stjr y->bytetab[i] = (btowc(i) != WEOF) ? i : 0; 704132145Stjr while (*op != '\0') { 705132145Stjr oclen = mbrlen(op, MB_LEN_MAX, &mbs1); 706132145Stjr if (oclen == (size_t)-1 || oclen == (size_t)-2) 707132145Stjr errc(1, EILSEQ, NULL); 708132145Stjr nclen = mbrlen(np, MB_LEN_MAX, &mbs2); 709132145Stjr if (nclen == (size_t)-1 || nclen == (size_t)-2) 710132145Stjr errc(1, EILSEQ, NULL); 711132145Stjr if (oclen == 1 && nclen == 1) 712132145Stjr y->bytetab[(u_char)*op] = *np; 713132145Stjr else { 714132145Stjr y->bytetab[(u_char)*op] = 0; 715132145Stjr y->multis = realloc(y->multis, 716132145Stjr (y->nmultis + 1) * sizeof(*y->multis)); 717132145Stjr if (y->multis == NULL) 718132145Stjr err(1, NULL); 719132145Stjr i = y->nmultis++; 720132145Stjr y->multis[i].fromlen = oclen; 721132145Stjr memcpy(y->multis[i].from, op, oclen); 722132145Stjr y->multis[i].tolen = nclen; 723132145Stjr memcpy(y->multis[i].to, np, nclen); 724132145Stjr } 725132145Stjr op += oclen; 726132145Stjr np += nclen; 727132145Stjr } 728132145Stjr } 7291590Srgrimes return (p); 7301590Srgrimes} 7311590Srgrimes 7321590Srgrimes/* 7331590Srgrimes * Compile the text following an a or i command. 7341590Srgrimes */ 7351590Srgrimesstatic char * 736122044Sdescompile_text(void) 7371590Srgrimes{ 73817195Sbde int asize, esc_nl, size; 7391590Srgrimes char *text, *p, *op, *s; 7401590Srgrimes char lbuf[_POSIX2_LINE_MAX + 1]; 7411590Srgrimes 7421590Srgrimes asize = 2 * _POSIX2_LINE_MAX + 1; 74380286Sobrien if ((text = malloc(asize)) == NULL) 74480286Sobrien err(1, "malloc"); 7451590Srgrimes size = 0; 74641602Sarchie while (cu_fgets(lbuf, sizeof(lbuf), NULL)) { 7471590Srgrimes op = s = text + size; 7481590Srgrimes p = lbuf; 7491590Srgrimes EATSPACE(); 75017195Sbde for (esc_nl = 0; *p != '\0'; p++) { 75117195Sbde if (*p == '\\' && p[1] != '\0' && *++p == '\n') 75217195Sbde esc_nl = 1; 7531590Srgrimes *s++ = *p; 7541590Srgrimes } 7551590Srgrimes size += s - op; 75617195Sbde if (!esc_nl) { 7571590Srgrimes *s = '\0'; 7581590Srgrimes break; 7591590Srgrimes } 7601590Srgrimes if (asize - size < _POSIX2_LINE_MAX + 1) { 7611590Srgrimes asize *= 2; 76280286Sobrien if ((text = realloc(text, asize)) == NULL) 76380286Sobrien err(1, "realloc"); 7641590Srgrimes } 7651590Srgrimes } 76639571Sbrian text[size] = '\0'; 76780286Sobrien if ((p = realloc(text, size + 1)) == NULL) 76880286Sobrien err(1, "realloc"); 76980286Sobrien return (p); 7701590Srgrimes} 7711590Srgrimes 7721590Srgrimes/* 7731590Srgrimes * Get an address and return a pointer to the first character after 7741590Srgrimes * it. Fill the structure pointed to according to the address. 7751590Srgrimes */ 7761590Srgrimesstatic char * 777122044Sdescompile_addr(char *p, struct s_addr *a) 7781590Srgrimes{ 779171206Sssouhlal char *end, re[_POSIX2_LINE_MAX + 1]; 780171206Sssouhlal int icase; 7811590Srgrimes 782171206Sssouhlal icase = 0; 783171206Sssouhlal 784192732Sbrian a->type = 0; 7851590Srgrimes switch (*p) { 7861590Srgrimes case '\\': /* Context address */ 7871590Srgrimes ++p; 7881590Srgrimes /* FALLTHROUGH */ 7891590Srgrimes case '/': /* Context address */ 790197361Sdds p = compile_delimited(p, re, 0); 7911590Srgrimes if (p == NULL) 79228066Scharnier errx(1, "%lu: %s: unterminated regular expression", linenum, fname); 793171206Sssouhlal /* Check for case insensitive regexp flag */ 794171206Sssouhlal if (*p == 'I') { 795171206Sssouhlal icase = 1; 796171206Sssouhlal p++; 797171206Sssouhlal } 798171206Sssouhlal if (*re == '\0') 799171206Sssouhlal a->u.r = NULL; 800171206Sssouhlal else 801171206Sssouhlal a->u.r = compile_re(re, icase); 8021590Srgrimes a->type = AT_RE; 8031590Srgrimes return (p); 8041590Srgrimes 8051590Srgrimes case '$': /* Last line */ 8061590Srgrimes a->type = AT_LAST; 8071590Srgrimes return (p + 1); 808192732Sbrian 809192732Sbrian case '+': /* Relative line number */ 810192732Sbrian a->type = AT_RELLINE; 811192732Sbrian p++; 812192732Sbrian /* FALLTHROUGH */ 8131590Srgrimes /* Line number */ 8148874Srgrimes case '0': case '1': case '2': case '3': case '4': 8151590Srgrimes case '5': case '6': case '7': case '8': case '9': 816192732Sbrian if (a->type == 0) 817192732Sbrian a->type = AT_LINE; 8181590Srgrimes a->u.l = strtol(p, &end, 10); 8191590Srgrimes return (end); 8201590Srgrimes default: 82128066Scharnier errx(1, "%lu: %s: expected context address", linenum, fname); 8221590Srgrimes return (NULL); 8231590Srgrimes } 8241590Srgrimes} 8251590Srgrimes 8261590Srgrimes/* 8271590Srgrimes * duptoeol -- 8281590Srgrimes * Return a copy of all the characters up to \n or \0. 8291590Srgrimes */ 8301590Srgrimesstatic char * 831122044Sdesduptoeol(char *s, const char *ctype) 8321590Srgrimes{ 8331590Srgrimes size_t len; 8341590Srgrimes int ws; 83580286Sobrien char *p, *start; 8361590Srgrimes 8371590Srgrimes ws = 0; 8381590Srgrimes for (start = s; *s != '\0' && *s != '\n'; ++s) 83917522Sache ws = isspace((unsigned char)*s); 8401590Srgrimes *s = '\0'; 8411590Srgrimes if (ws) 84228066Scharnier warnx("%lu: %s: whitespace after %s", linenum, fname, ctype); 8431590Srgrimes len = s - start + 1; 84480286Sobrien if ((p = malloc(len)) == NULL) 84580286Sobrien err(1, "malloc"); 84680286Sobrien return (memmove(p, start, len)); 8471590Srgrimes} 8481590Srgrimes 8491590Srgrimes/* 8501590Srgrimes * Convert goto label names to addresses, and count a and r commands, in 8511590Srgrimes * the given subset of the script. Free the memory used by labels in b 8521590Srgrimes * and t commands (but not by :). 8531590Srgrimes * 8541590Srgrimes * TODO: Remove } nodes 8551590Srgrimes */ 8561590Srgrimesstatic void 857122044Sdesfixuplabel(struct s_command *cp, struct s_command *end) 8581590Srgrimes{ 8591590Srgrimes 8601590Srgrimes for (; cp != end; cp = cp->next) 8611590Srgrimes switch (cp->code) { 8621590Srgrimes case 'a': 8631590Srgrimes case 'r': 8641590Srgrimes appendnum++; 8651590Srgrimes break; 8661590Srgrimes case 'b': 8671590Srgrimes case 't': 8681590Srgrimes /* Resolve branch target. */ 8691590Srgrimes if (cp->t == NULL) { 8701590Srgrimes cp->u.c = NULL; 8711590Srgrimes break; 8721590Srgrimes } 8731590Srgrimes if ((cp->u.c = findlabel(cp->t)) == NULL) 87428066Scharnier errx(1, "%lu: %s: undefined label '%s'", linenum, fname, cp->t); 8751590Srgrimes free(cp->t); 8761590Srgrimes break; 8771590Srgrimes case '{': 8781590Srgrimes /* Do interior commands. */ 8791590Srgrimes fixuplabel(cp->u.c, cp->next); 8801590Srgrimes break; 8811590Srgrimes } 8821590Srgrimes} 8831590Srgrimes 8841590Srgrimes/* 8851590Srgrimes * Associate the given command label for later lookup. 8861590Srgrimes */ 8871590Srgrimesstatic void 888122044Sdesenterlabel(struct s_command *cp) 8891590Srgrimes{ 89087766Smarkm struct labhash **lhp, *lh; 89187766Smarkm u_char *p; 89287766Smarkm u_int h, c; 8931590Srgrimes 8941590Srgrimes for (h = 0, p = (u_char *)cp->t; (c = *p) != 0; p++) 8951590Srgrimes h = (h << 5) + h + c; 8961590Srgrimes lhp = &labels[h & LHMASK]; 8971590Srgrimes for (lh = *lhp; lh != NULL; lh = lh->lh_next) 8981590Srgrimes if (lh->lh_hash == h && strcmp(cp->t, lh->lh_cmd->t) == 0) 89928066Scharnier errx(1, "%lu: %s: duplicate label '%s'", linenum, fname, cp->t); 90080286Sobrien if ((lh = malloc(sizeof *lh)) == NULL) 90180286Sobrien err(1, "malloc"); 9021590Srgrimes lh->lh_next = *lhp; 9031590Srgrimes lh->lh_hash = h; 9041590Srgrimes lh->lh_cmd = cp; 9051590Srgrimes lh->lh_ref = 0; 9061590Srgrimes *lhp = lh; 9071590Srgrimes} 9081590Srgrimes 9091590Srgrimes/* 9101590Srgrimes * Find the label contained in the command l in the command linked 9111590Srgrimes * list cp. L is excluded from the search. Return NULL if not found. 9121590Srgrimes */ 9131590Srgrimesstatic struct s_command * 914122044Sdesfindlabel(char *name) 9151590Srgrimes{ 91687766Smarkm struct labhash *lh; 91787766Smarkm u_char *p; 91887766Smarkm u_int h, c; 9191590Srgrimes 9201590Srgrimes for (h = 0, p = (u_char *)name; (c = *p) != 0; p++) 9211590Srgrimes h = (h << 5) + h + c; 9221590Srgrimes for (lh = labels[h & LHMASK]; lh != NULL; lh = lh->lh_next) { 9231590Srgrimes if (lh->lh_hash == h && strcmp(name, lh->lh_cmd->t) == 0) { 9241590Srgrimes lh->lh_ref = 1; 9251590Srgrimes return (lh->lh_cmd); 9261590Srgrimes } 9271590Srgrimes } 9281590Srgrimes return (NULL); 9291590Srgrimes} 9301590Srgrimes 9318874Srgrimes/* 9321590Srgrimes * Warn about any unused labels. As a side effect, release the label hash 9331590Srgrimes * table space. 9341590Srgrimes */ 9351590Srgrimesstatic void 936122044Sdesuselabel(void) 9371590Srgrimes{ 93887766Smarkm struct labhash *lh, *next; 93987766Smarkm int i; 9401590Srgrimes 9411590Srgrimes for (i = 0; i < LHSZ; i++) { 9421590Srgrimes for (lh = labels[i]; lh != NULL; lh = next) { 9431590Srgrimes next = lh->lh_next; 9441590Srgrimes if (!lh->lh_ref) 94528066Scharnier warnx("%lu: %s: unused label '%s'", 94628066Scharnier linenum, fname, lh->lh_cmd->t); 9471590Srgrimes free(lh); 9481590Srgrimes } 9491590Srgrimes } 9501590Srgrimes} 951