119304Speter/*- 219304Speter * Copyright (c) 1992, 1993, 1994 319304Speter * The Regents of the University of California. All rights reserved. 419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996 519304Speter * Keith Bostic. All rights reserved. 619304Speter * 719304Speter * See the LICENSE file for redistribution information. 819304Speter */ 919304Speter 1019304Speter#include "config.h" 1119304Speter 1219304Speter#ifndef lint 1319304Speterstatic const char sccsid[] = "@(#)ex_subst.c 10.37 (Berkeley) 9/15/96"; 1419304Speter#endif /* not lint */ 1519304Speter 1619304Speter#include <sys/types.h> 1719304Speter#include <sys/queue.h> 1819304Speter#include <sys/time.h> 1919304Speter 2019304Speter#include <bitstring.h> 2119304Speter#include <ctype.h> 2219304Speter#include <errno.h> 2319304Speter#include <limits.h> 2419304Speter#include <stdio.h> 2519304Speter#include <stdlib.h> 2619304Speter#include <string.h> 2719304Speter#include <unistd.h> 2819304Speter 2919304Speter#include "../common/common.h" 3019304Speter#include "../vi/vi.h" 3119304Speter 3219304Speter#define SUB_FIRST 0x01 /* The 'r' flag isn't reasonable. */ 3319304Speter#define SUB_MUSTSETR 0x02 /* The 'r' flag is required. */ 3419304Speter 3519304Speterstatic int re_conv __P((SCR *, char **, size_t *, int *)); 3619304Speterstatic int re_cscope_conv __P((SCR *, char **, size_t *, int *)); 3719304Speterstatic int re_sub __P((SCR *, 3819304Speter char *, char **, size_t *, size_t *, regmatch_t [10])); 3919304Speterstatic int re_tag_conv __P((SCR *, char **, size_t *, int *)); 4019304Speterstatic int s __P((SCR *, EXCMD *, char *, regex_t *, u_int)); 4119304Speter 4219304Speter/* 4319304Speter * ex_s -- 4419304Speter * [line [,line]] s[ubstitute] [[/;]pat[/;]/repl[/;] [cgr] [count] [#lp]] 4519304Speter * 4619304Speter * Substitute on lines matching a pattern. 4719304Speter * 4819304Speter * PUBLIC: int ex_s __P((SCR *, EXCMD *)); 4919304Speter */ 5019304Speterint 5119304Speterex_s(sp, cmdp) 5219304Speter SCR *sp; 5319304Speter EXCMD *cmdp; 5419304Speter{ 5519304Speter regex_t *re; 5619304Speter size_t blen, len; 5719304Speter u_int flags; 5819304Speter int delim; 5919304Speter char *bp, *ptrn, *rep, *p, *t; 6019304Speter 6119304Speter /* 6219304Speter * Skip leading white space. 6319304Speter * 6419304Speter * !!! 6519304Speter * Historic vi allowed any non-alphanumeric to serve as the 6619304Speter * substitution command delimiter. 6719304Speter * 6819304Speter * !!! 6919304Speter * If the arguments are empty, it's the same as &, i.e. we 7019304Speter * repeat the last substitution. 7119304Speter */ 7219304Speter if (cmdp->argc == 0) 7319304Speter goto subagain; 7419304Speter for (p = cmdp->argv[0]->bp, 7519304Speter len = cmdp->argv[0]->len; len > 0; --len, ++p) { 7619304Speter if (!isblank(*p)) 7719304Speter break; 7819304Speter } 7919304Speter if (len == 0) 8019304Spetersubagain: return (ex_subagain(sp, cmdp)); 8119304Speter 8219304Speter delim = *p++; 8319304Speter if (isalnum(delim) || delim == '\\') 8419304Speter return (s(sp, cmdp, p, &sp->subre_c, SUB_MUSTSETR)); 8519304Speter 8619304Speter /* 8719304Speter * !!! 8819304Speter * The full-blown substitute command reset the remembered 8919304Speter * state of the 'c' and 'g' suffices. 9019304Speter */ 9119304Speter sp->c_suffix = sp->g_suffix = 0; 9219304Speter 9319304Speter /* 9419304Speter * Get the pattern string, toss escaping characters. 9519304Speter * 9619304Speter * !!! 9719304Speter * Historic vi accepted any of the following forms: 9819304Speter * 9919304Speter * :s/abc/def/ change "abc" to "def" 10019304Speter * :s/abc/def change "abc" to "def" 10119304Speter * :s/abc/ delete "abc" 10219304Speter * :s/abc delete "abc" 10319304Speter * 10419304Speter * QUOTING NOTE: 10519304Speter * 10619304Speter * Only toss an escaping character if it escapes a delimiter. 10719304Speter * This means that "s/A/\\\\f" replaces "A" with "\\f". It 10819304Speter * would be nice to be more regular, i.e. for each layer of 10919304Speter * escaping a single escaping character is removed, but that's 11019304Speter * not how the historic vi worked. 11119304Speter */ 11219304Speter for (ptrn = t = p;;) { 11319304Speter if (p[0] == '\0' || p[0] == delim) { 11419304Speter if (p[0] == delim) 11519304Speter ++p; 11619304Speter /* 11719304Speter * !!! 11819304Speter * Nul terminate the pattern string -- it's passed 11919304Speter * to regcomp which doesn't understand anything else. 12019304Speter */ 12119304Speter *t = '\0'; 12219304Speter break; 12319304Speter } 12419304Speter if (p[0] == '\\') 12519304Speter if (p[1] == delim) 12619304Speter ++p; 12719304Speter else if (p[1] == '\\') 12819304Speter *t++ = *p++; 12919304Speter *t++ = *p++; 13019304Speter } 13119304Speter 13219304Speter /* 13319304Speter * If the pattern string is empty, use the last RE (not just the 13419304Speter * last substitution RE). 13519304Speter */ 13619304Speter if (*ptrn == '\0') { 13719304Speter if (sp->re == NULL) { 13819304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 13919304Speter return (1); 14019304Speter } 14119304Speter 14219304Speter /* Re-compile the RE if necessary. */ 14319304Speter if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 14419304Speter sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 14519304Speter return (1); 14619304Speter flags = 0; 14719304Speter } else { 14819304Speter /* 14919304Speter * !!! 15019304Speter * Compile the RE. Historic practice is that substitutes set 15119304Speter * the search direction as well as both substitute and search 15219304Speter * RE's. We compile the RE twice, as we don't want to bother 15319304Speter * ref counting the pattern string and (opaque) structure. 15419304Speter */ 15519304Speter if (re_compile(sp, ptrn, t - ptrn, 15619304Speter &sp->re, &sp->re_len, &sp->re_c, RE_C_SEARCH)) 15719304Speter return (1); 15819304Speter if (re_compile(sp, ptrn, t - ptrn, 15919304Speter &sp->subre, &sp->subre_len, &sp->subre_c, RE_C_SUBST)) 16019304Speter return (1); 16119304Speter 16219304Speter flags = SUB_FIRST; 16319304Speter sp->searchdir = FORWARD; 16419304Speter } 16519304Speter re = &sp->re_c; 16619304Speter 16719304Speter /* 16819304Speter * Get the replacement string. 16919304Speter * 17019304Speter * The special character & (\& if O_MAGIC not set) matches the 17119304Speter * entire RE. No handling of & is required here, it's done by 17219304Speter * re_sub(). 17319304Speter * 17419304Speter * The special character ~ (\~ if O_MAGIC not set) inserts the 17519304Speter * previous replacement string into this replacement string. 17619304Speter * Count ~'s to figure out how much space we need. We could 17719304Speter * special case nonexistent last patterns or whether or not 17819304Speter * O_MAGIC is set, but it's probably not worth the effort. 17919304Speter * 18019304Speter * QUOTING NOTE: 18119304Speter * 18219304Speter * Only toss an escaping character if it escapes a delimiter or 18319304Speter * if O_MAGIC is set and it escapes a tilde. 18419304Speter * 18519304Speter * !!! 18619304Speter * If the entire replacement pattern is "%", then use the last 18719304Speter * replacement pattern. This semantic was added to vi in System 18819304Speter * V and then percolated elsewhere, presumably around the time 18919304Speter * that it was added to their version of ed(1). 19019304Speter */ 19119304Speter if (p[0] == '\0' || p[0] == delim) { 19219304Speter if (p[0] == delim) 19319304Speter ++p; 19419304Speter if (sp->repl != NULL) 19519304Speter free(sp->repl); 19619304Speter sp->repl = NULL; 19719304Speter sp->repl_len = 0; 19819304Speter } else if (p[0] == '%' && (p[1] == '\0' || p[1] == delim)) 19919304Speter p += p[1] == delim ? 2 : 1; 20019304Speter else { 20119304Speter for (rep = p, len = 0; 20219304Speter p[0] != '\0' && p[0] != delim; ++p, ++len) 20319304Speter if (p[0] == '~') 20419304Speter len += sp->repl_len; 20519304Speter GET_SPACE_RET(sp, bp, blen, len); 20619304Speter for (t = bp, len = 0, p = rep;;) { 20719304Speter if (p[0] == '\0' || p[0] == delim) { 20819304Speter if (p[0] == delim) 20919304Speter ++p; 21019304Speter break; 21119304Speter } 21219304Speter if (p[0] == '\\') { 21319304Speter if (p[1] == delim) 21419304Speter ++p; 21519304Speter else if (p[1] == '\\') { 21619304Speter *t++ = *p++; 21719304Speter ++len; 21819304Speter } else if (p[1] == '~') { 21919304Speter ++p; 22019304Speter if (!O_ISSET(sp, O_MAGIC)) 22119304Speter goto tilde; 22219304Speter } 22319304Speter } else if (p[0] == '~' && O_ISSET(sp, O_MAGIC)) { 22419304Spetertilde: ++p; 22519304Speter memcpy(t, sp->repl, sp->repl_len); 22619304Speter t += sp->repl_len; 22719304Speter len += sp->repl_len; 22819304Speter continue; 22919304Speter } 23019304Speter *t++ = *p++; 23119304Speter ++len; 23219304Speter } 23319304Speter if ((sp->repl_len = len) != 0) { 23419304Speter if (sp->repl != NULL) 23519304Speter free(sp->repl); 23619304Speter if ((sp->repl = malloc(len)) == NULL) { 23719304Speter msgq(sp, M_SYSERR, NULL); 23819304Speter FREE_SPACE(sp, bp, blen); 23919304Speter return (1); 24019304Speter } 24119304Speter memcpy(sp->repl, bp, len); 24219304Speter } 24319304Speter FREE_SPACE(sp, bp, blen); 24419304Speter } 24519304Speter return (s(sp, cmdp, p, re, flags)); 24619304Speter} 24719304Speter 24819304Speter/* 24919304Speter * ex_subagain -- 25019304Speter * [line [,line]] & [cgr] [count] [#lp]] 25119304Speter * 25219304Speter * Substitute using the last substitute RE and replacement pattern. 25319304Speter * 25419304Speter * PUBLIC: int ex_subagain __P((SCR *, EXCMD *)); 25519304Speter */ 25619304Speterint 25719304Speterex_subagain(sp, cmdp) 25819304Speter SCR *sp; 25919304Speter EXCMD *cmdp; 26019304Speter{ 26119304Speter if (sp->subre == NULL) { 26219304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 26319304Speter return (1); 26419304Speter } 26519304Speter if (!F_ISSET(sp, SC_RE_SUBST) && re_compile(sp, 26619304Speter sp->subre, sp->subre_len, NULL, NULL, &sp->subre_c, RE_C_SUBST)) 26719304Speter return (1); 26819304Speter return (s(sp, 26919304Speter cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->subre_c, 0)); 27019304Speter} 27119304Speter 27219304Speter/* 27319304Speter * ex_subtilde -- 27419304Speter * [line [,line]] ~ [cgr] [count] [#lp]] 27519304Speter * 27619304Speter * Substitute using the last RE and last substitute replacement pattern. 27719304Speter * 27819304Speter * PUBLIC: int ex_subtilde __P((SCR *, EXCMD *)); 27919304Speter */ 28019304Speterint 28119304Speterex_subtilde(sp, cmdp) 28219304Speter SCR *sp; 28319304Speter EXCMD *cmdp; 28419304Speter{ 28519304Speter if (sp->re == NULL) { 28619304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 28719304Speter return (1); 28819304Speter } 28919304Speter if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 29019304Speter sp->re, sp->re_len, NULL, NULL, &sp->re_c, RE_C_SEARCH)) 29119304Speter return (1); 29219304Speter return (s(sp, 29319304Speter cmdp, cmdp->argc ? cmdp->argv[0]->bp : NULL, &sp->re_c, 0)); 29419304Speter} 29519304Speter 29619304Speter/* 29719304Speter * s -- 29819304Speter * Do the substitution. This stuff is *really* tricky. There are lots of 29919304Speter * special cases, and general nastiness. Don't mess with it unless you're 30019304Speter * pretty confident. 30119304Speter * 30219304Speter * The nasty part of the substitution is what happens when the replacement 30319304Speter * string contains newlines. It's a bit tricky -- consider the information 30419304Speter * that has to be retained for "s/f\(o\)o/^M\1^M\1/". The solution here is 30519304Speter * to build a set of newline offsets which we use to break the line up later, 30619304Speter * when the replacement is done. Don't change it unless you're *damned* 30719304Speter * confident. 30819304Speter */ 30919304Speter#define NEEDNEWLINE(sp) { \ 31019304Speter if (sp->newl_len == sp->newl_cnt) { \ 31119304Speter sp->newl_len += 25; \ 31219304Speter REALLOC(sp, sp->newl, size_t *, \ 31319304Speter sp->newl_len * sizeof(size_t)); \ 31419304Speter if (sp->newl == NULL) { \ 31519304Speter sp->newl_len = 0; \ 31619304Speter return (1); \ 31719304Speter } \ 31819304Speter } \ 31919304Speter} 32019304Speter 32119304Speter#define BUILD(sp, l, len) { \ 32219304Speter if (lbclen + (len) > lblen) { \ 32319304Speter lblen += MAX(lbclen + (len), 256); \ 32419304Speter REALLOC(sp, lb, char *, lblen); \ 32519304Speter if (lb == NULL) { \ 32619304Speter lbclen = 0; \ 32719304Speter return (1); \ 32819304Speter } \ 32919304Speter } \ 33019304Speter memcpy(lb + lbclen, l, len); \ 33119304Speter lbclen += len; \ 33219304Speter} 33319304Speter 33419304Speter#define NEEDSP(sp, len, pnt) { \ 33519304Speter if (lbclen + (len) > lblen) { \ 33619304Speter lblen += MAX(lbclen + (len), 256); \ 33719304Speter REALLOC(sp, lb, char *, lblen); \ 33819304Speter if (lb == NULL) { \ 33919304Speter lbclen = 0; \ 34019304Speter return (1); \ 34119304Speter } \ 34219304Speter pnt = lb + lbclen; \ 34319304Speter } \ 34419304Speter} 34519304Speter 34619304Speterstatic int 34719304Speters(sp, cmdp, s, re, flags) 34819304Speter SCR *sp; 34919304Speter EXCMD *cmdp; 35019304Speter char *s; 35119304Speter regex_t *re; 35219304Speter u_int flags; 35319304Speter{ 35419304Speter EVENT ev; 35519304Speter MARK from, to; 35619304Speter TEXTH tiq; 35719304Speter recno_t elno, lno, slno; 35819304Speter regmatch_t match[10]; 35919304Speter size_t blen, cnt, last, lbclen, lblen, len, llen; 36019304Speter size_t offset, saved_offset, scno; 36119304Speter int cflag, lflag, nflag, pflag, rflag; 36219304Speter int didsub, do_eol_match, eflags, empty_ok, eval; 36319304Speter int linechanged, matched, quit, rval; 36419304Speter char *bp, *lb; 36519304Speter 36619304Speter NEEDFILE(sp, cmdp); 36719304Speter 36819304Speter slno = sp->lno; 36919304Speter scno = sp->cno; 37019304Speter 37119304Speter /* 37219304Speter * !!! 37319304Speter * Historically, the 'g' and 'c' suffices were always toggled as flags, 37419304Speter * so ":s/A/B/" was the same as ":s/A/B/ccgg". If O_EDCOMPATIBLE was 37519304Speter * not set, they were initialized to 0 for all substitute commands. If 37619304Speter * O_EDCOMPATIBLE was set, they were initialized to 0 only if the user 37719304Speter * specified substitute/replacement patterns (see ex_s()). 37819304Speter */ 37919304Speter if (!O_ISSET(sp, O_EDCOMPATIBLE)) 38019304Speter sp->c_suffix = sp->g_suffix = 0; 38119304Speter 38219304Speter /* 38319304Speter * Historic vi permitted the '#', 'l' and 'p' options in vi mode, but 38419304Speter * it only displayed the last change. I'd disallow them, but they are 38519304Speter * useful in combination with the [v]global commands. In the current 38619304Speter * model the problem is combining them with the 'c' flag -- the screen 38719304Speter * would have to flip back and forth between the confirm screen and the 38819304Speter * ex print screen, which would be pretty awful. We do display all 38919304Speter * changes, though, for what that's worth. 39019304Speter * 39119304Speter * !!! 39219304Speter * Historic vi was fairly strict about the order of "options", the 39319304Speter * count, and "flags". I'm somewhat fuzzy on the difference between 39419304Speter * options and flags, anyway, so this is a simpler approach, and we 39519304Speter * just take it them in whatever order the user gives them. (The ex 39619304Speter * usage statement doesn't reflect this.) 39719304Speter */ 39819304Speter cflag = lflag = nflag = pflag = rflag = 0; 39919304Speter if (s == NULL) 40019304Speter goto noargs; 40119304Speter for (lno = OOBLNO; *s != '\0'; ++s) 40219304Speter switch (*s) { 40319304Speter case ' ': 40419304Speter case '\t': 40519304Speter continue; 40619304Speter case '+': 40719304Speter ++cmdp->flagoff; 40819304Speter break; 40919304Speter case '-': 41019304Speter --cmdp->flagoff; 41119304Speter break; 41219304Speter case '0': case '1': case '2': case '3': case '4': 41319304Speter case '5': case '6': case '7': case '8': case '9': 41419304Speter if (lno != OOBLNO) 41519304Speter goto usage; 41619304Speter errno = 0; 41719304Speter lno = strtoul(s, &s, 10); 41819304Speter if (*s == '\0') /* Loop increment correction. */ 41919304Speter --s; 42019304Speter if (errno == ERANGE) { 42119304Speter if (lno == LONG_MAX) 42219304Speter msgq(sp, M_ERR, "153|Count overflow"); 42319304Speter else if (lno == LONG_MIN) 42419304Speter msgq(sp, M_ERR, "154|Count underflow"); 42519304Speter else 42619304Speter msgq(sp, M_SYSERR, NULL); 42719304Speter return (1); 42819304Speter } 42919304Speter /* 43019304Speter * In historic vi, the count was inclusive from the 43119304Speter * second address. 43219304Speter */ 43319304Speter cmdp->addr1.lno = cmdp->addr2.lno; 43419304Speter cmdp->addr2.lno += lno - 1; 43519304Speter if (!db_exist(sp, cmdp->addr2.lno) && 43619304Speter db_last(sp, &cmdp->addr2.lno)) 43719304Speter return (1); 43819304Speter break; 43919304Speter case '#': 44019304Speter nflag = 1; 44119304Speter break; 44219304Speter case 'c': 44319304Speter sp->c_suffix = !sp->c_suffix; 44419304Speter 44519304Speter /* Ex text structure initialization. */ 44619304Speter if (F_ISSET(sp, SC_EX)) { 44719304Speter memset(&tiq, 0, sizeof(TEXTH)); 44819304Speter CIRCLEQ_INIT(&tiq); 44919304Speter } 45019304Speter break; 45119304Speter case 'g': 45219304Speter sp->g_suffix = !sp->g_suffix; 45319304Speter break; 45419304Speter case 'l': 45519304Speter lflag = 1; 45619304Speter break; 45719304Speter case 'p': 45819304Speter pflag = 1; 45919304Speter break; 46019304Speter case 'r': 46119304Speter if (LF_ISSET(SUB_FIRST)) { 46219304Speter msgq(sp, M_ERR, 46319304Speter "155|Regular expression specified; r flag meaningless"); 46419304Speter return (1); 46519304Speter } 46619304Speter if (!F_ISSET(sp, SC_RE_SEARCH)) { 46719304Speter ex_emsg(sp, NULL, EXM_NOPREVRE); 46819304Speter return (1); 46919304Speter } 47019304Speter rflag = 1; 47119304Speter re = &sp->re_c; 47219304Speter break; 47319304Speter default: 47419304Speter goto usage; 47519304Speter } 47619304Speter 47719304Speter if (*s != '\0' || !rflag && LF_ISSET(SUB_MUSTSETR)) { 47819304Speterusage: ex_emsg(sp, cmdp->cmd->usage, EXM_USAGE); 47919304Speter return (1); 48019304Speter } 48119304Speter 48219304Speternoargs: if (F_ISSET(sp, SC_VI) && sp->c_suffix && (lflag || nflag || pflag)) { 48319304Speter msgq(sp, M_ERR, 48419304Speter"156|The #, l and p flags may not be combined with the c flag in vi mode"); 48519304Speter return (1); 48619304Speter } 48719304Speter 48819304Speter /* 48919304Speter * bp: if interactive, line cache 49019304Speter * blen: if interactive, line cache length 49119304Speter * lb: build buffer pointer. 49219304Speter * lbclen: current length of built buffer. 49319304Speter * lblen; length of build buffer. 49419304Speter */ 49519304Speter bp = lb = NULL; 49619304Speter blen = lbclen = lblen = 0; 49719304Speter 49819304Speter /* For each line... */ 49919304Speter for (matched = quit = 0, lno = cmdp->addr1.lno, 50019304Speter elno = cmdp->addr2.lno; !quit && lno <= elno; ++lno) { 50119304Speter 50219304Speter /* Someone's unhappy, time to stop. */ 50319304Speter if (INTERRUPTED(sp)) 50419304Speter break; 50519304Speter 50619304Speter /* Get the line. */ 50719304Speter if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 50819304Speter goto err; 50919304Speter 51019304Speter /* 51119304Speter * Make a local copy if doing confirmation -- when calling 51219304Speter * the confirm routine we're likely to lose the cached copy. 51319304Speter */ 51419304Speter if (sp->c_suffix) { 51519304Speter if (bp == NULL) { 51619304Speter GET_SPACE_RET(sp, bp, blen, llen); 51719304Speter } else 51819304Speter ADD_SPACE_RET(sp, bp, blen, llen); 51919304Speter memcpy(bp, s, llen); 52019304Speter s = bp; 52119304Speter } 52219304Speter 52319304Speter /* Start searching from the beginning. */ 52419304Speter offset = 0; 52519304Speter len = llen; 52619304Speter 52719304Speter /* Reset the build buffer offset. */ 52819304Speter lbclen = 0; 52919304Speter 53019304Speter /* Reset empty match flag. */ 53119304Speter empty_ok = 1; 53219304Speter 53319304Speter /* 53419304Speter * We don't want to have to do a setline if the line didn't 53519304Speter * change -- keep track of whether or not this line changed. 53619304Speter * If doing confirmations, don't want to keep setting the 53719304Speter * line if change is refused -- keep track of substitutions. 53819304Speter */ 53919304Speter didsub = linechanged = 0; 54019304Speter 54119304Speter /* New line, do an EOL match. */ 54219304Speter do_eol_match = 1; 54319304Speter 54419304Speter /* It's not nul terminated, but we pretend it is. */ 54519304Speter eflags = REG_STARTEND; 54619304Speter 54719304Speter /* 54819304Speter * The search area is from s + offset to the EOL. 54919304Speter * 55019304Speter * Generally, match[0].rm_so is the offset of the start 55119304Speter * of the match from the start of the search, and offset 55219304Speter * is the offset of the start of the last search. 55319304Speter */ 55419304Speternextmatch: match[0].rm_so = 0; 55519304Speter match[0].rm_eo = len; 55619304Speter 55719304Speter /* Get the next match. */ 55819304Speter eval = regexec(re, (char *)s + offset, 10, match, eflags); 55919304Speter 56019304Speter /* 56119304Speter * There wasn't a match or if there was an error, deal with 56219304Speter * it. If there was a previous match in this line, resolve 56319304Speter * the changes into the database. Otherwise, just move on. 56419304Speter */ 56519304Speter if (eval == REG_NOMATCH) 56619304Speter goto endmatch; 56719304Speter if (eval != 0) { 56819304Speter re_error(sp, eval, re); 56919304Speter goto err; 57019304Speter } 57119304Speter matched = 1; 57219304Speter 57319304Speter /* Only the first search can match an anchored expression. */ 57419304Speter eflags |= REG_NOTBOL; 57519304Speter 57619304Speter /* 57719304Speter * !!! 57819304Speter * It's possible to match 0-length strings -- for example, the 57919304Speter * command s;a*;X;, when matched against the string "aabb" will 58019304Speter * result in "XbXbX", i.e. the matches are "aa", the space 58119304Speter * between the b's and the space between the b's and the end of 58219304Speter * the string. There is a similar space between the beginning 58319304Speter * of the string and the a's. The rule that we use (because vi 58419304Speter * historically used it) is that any 0-length match, occurring 58519304Speter * immediately after a match, is ignored. Otherwise, the above 58619304Speter * example would have resulted in "XXbXbX". Another example is 58719304Speter * incorrectly using " *" to replace groups of spaces with one 58819304Speter * space. 58919304Speter * 59019304Speter * The way we do this is that if we just had a successful match, 59119304Speter * the starting offset does not skip characters, and the match 59219304Speter * is empty, ignore the match and move forward. If there's no 59319304Speter * more characters in the string, we were attempting to match 59419304Speter * after the last character, so quit. 59519304Speter */ 59619304Speter if (!empty_ok && match[0].rm_so == 0 && match[0].rm_eo == 0) { 59719304Speter empty_ok = 1; 59819304Speter if (len == 0) 59919304Speter goto endmatch; 60019304Speter BUILD(sp, s + offset, 1) 60119304Speter ++offset; 60219304Speter --len; 60319304Speter goto nextmatch; 60419304Speter } 60519304Speter 60619304Speter /* Confirm change. */ 60719304Speter if (sp->c_suffix) { 60819304Speter /* 60919304Speter * Set the cursor position for confirmation. Note, 61019304Speter * if we matched on a '$', the cursor may be past 61119304Speter * the end of line. 61219304Speter */ 61319304Speter from.lno = to.lno = lno; 61419304Speter from.cno = match[0].rm_so + offset; 61519304Speter to.cno = match[0].rm_eo + offset; 61619304Speter /* 61719304Speter * Both ex and vi have to correct for a change before 61819304Speter * the first character in the line. 61919304Speter */ 62019304Speter if (llen == 0) 62119304Speter from.cno = to.cno = 0; 62219304Speter if (F_ISSET(sp, SC_VI)) { 62319304Speter /* 62419304Speter * Only vi has to correct for a change after 62519304Speter * the last character in the line. 62619304Speter * 62719304Speter * XXX 62819304Speter * It would be nice to change the vi code so 62919304Speter * that we could display a cursor past EOL. 63019304Speter */ 63119304Speter if (to.cno >= llen) 63219304Speter to.cno = llen - 1; 63319304Speter if (from.cno >= llen) 63419304Speter from.cno = llen - 1; 63519304Speter 63619304Speter sp->lno = from.lno; 63719304Speter sp->cno = from.cno; 63819304Speter if (vs_refresh(sp, 1)) 63919304Speter goto err; 64019304Speter 64119304Speter vs_update(sp, msg_cat(sp, 64219304Speter "169|Confirm change? [n]", NULL), NULL); 64319304Speter 64419304Speter if (v_event_get(sp, &ev, 0, 0)) 64519304Speter goto err; 64619304Speter switch (ev.e_event) { 64719304Speter case E_CHARACTER: 64819304Speter break; 64919304Speter case E_EOF: 65019304Speter case E_ERR: 65119304Speter case E_INTERRUPT: 65219304Speter goto lquit; 65319304Speter default: 65419304Speter v_event_err(sp, &ev); 65519304Speter goto lquit; 65619304Speter } 65719304Speter } else { 65819304Speter if (ex_print(sp, cmdp, &from, &to, 0) || 65919304Speter ex_scprint(sp, &from, &to)) 66019304Speter goto lquit; 66119304Speter if (ex_txt(sp, &tiq, 0, TXT_CR)) 66219304Speter goto err; 66319304Speter ev.e_c = tiq.cqh_first->lb[0]; 66419304Speter } 66519304Speter 66619304Speter switch (ev.e_c) { 66719304Speter case CH_YES: 66819304Speter break; 66919304Speter default: 67019304Speter case CH_NO: 67119304Speter didsub = 0; 67219304Speter BUILD(sp, s +offset, match[0].rm_eo); 67319304Speter goto skip; 67419304Speter case CH_QUIT: 67519304Speter /* Set the quit/interrupted flags. */ 67619304Speterlquit: quit = 1; 67719304Speter F_SET(sp->gp, G_INTERRUPTED); 67819304Speter 67919304Speter /* 68019304Speter * Resolve any changes, then return to (and 68119304Speter * exit from) the main loop. 68219304Speter */ 68319304Speter goto endmatch; 68419304Speter } 68519304Speter } 68619304Speter 68719304Speter /* 68819304Speter * Set the cursor to the last position changed, converting 68919304Speter * from 1-based to 0-based. 69019304Speter */ 69119304Speter sp->lno = lno; 69219304Speter sp->cno = match[0].rm_so; 69319304Speter 69419304Speter /* Copy the bytes before the match into the build buffer. */ 69519304Speter BUILD(sp, s + offset, match[0].rm_so); 69619304Speter 69719304Speter /* Substitute the matching bytes. */ 69819304Speter didsub = 1; 69919304Speter if (re_sub(sp, s + offset, &lb, &lbclen, &lblen, match)) 70019304Speter goto err; 70119304Speter 70219304Speter /* Set the change flag so we know this line was modified. */ 70319304Speter linechanged = 1; 70419304Speter 70519304Speter /* Move past the matched bytes. */ 70619304Speterskip: offset += match[0].rm_eo; 70719304Speter len -= match[0].rm_eo; 70819304Speter 70919304Speter /* A match cannot be followed by an empty pattern. */ 71019304Speter empty_ok = 0; 71119304Speter 71219304Speter /* 71319304Speter * If doing a global change with confirmation, we have to 71419304Speter * update the screen. The basic idea is to store the line 71519304Speter * so the screen update routines can find it, and restart. 71619304Speter */ 71719304Speter if (didsub && sp->c_suffix && sp->g_suffix) { 71819304Speter /* 71919304Speter * The new search offset will be the end of the 72019304Speter * modified line. 72119304Speter */ 72219304Speter saved_offset = lbclen; 72319304Speter 72419304Speter /* Copy the rest of the line. */ 72519304Speter if (len) 72619304Speter BUILD(sp, s + offset, len) 72719304Speter 72819304Speter /* Set the new offset. */ 72919304Speter offset = saved_offset; 73019304Speter 73119304Speter /* Store inserted lines, adjusting the build buffer. */ 73219304Speter last = 0; 73319304Speter if (sp->newl_cnt) { 73419304Speter for (cnt = 0; 73519304Speter cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 73619304Speter if (db_insert(sp, lno, 73719304Speter lb + last, sp->newl[cnt] - last)) 73819304Speter goto err; 73919304Speter last = sp->newl[cnt] + 1; 74019304Speter ++sp->rptlines[L_ADDED]; 74119304Speter } 74219304Speter lbclen -= last; 74319304Speter offset -= last; 74419304Speter sp->newl_cnt = 0; 74519304Speter } 74619304Speter 74719304Speter /* Store and retrieve the line. */ 74819304Speter if (db_set(sp, lno, lb + last, lbclen)) 74919304Speter goto err; 75019304Speter if (db_get(sp, lno, DBG_FATAL, &s, &llen)) 75119304Speter goto err; 75219304Speter ADD_SPACE_RET(sp, bp, blen, llen) 75319304Speter memcpy(bp, s, llen); 75419304Speter s = bp; 75519304Speter len = llen - offset; 75619304Speter 75719304Speter /* Restart the build. */ 75819304Speter lbclen = 0; 75919304Speter BUILD(sp, s, offset); 76019304Speter 76119304Speter /* 76219304Speter * If we haven't already done the after-the-string 76319304Speter * match, do one. Set REG_NOTEOL so the '$' pattern 76419304Speter * only matches once. 76519304Speter */ 76619304Speter if (!do_eol_match) 76719304Speter goto endmatch; 76819304Speter if (offset == len) { 76919304Speter do_eol_match = 0; 77019304Speter eflags |= REG_NOTEOL; 77119304Speter } 77219304Speter goto nextmatch; 77319304Speter } 77419304Speter 77519304Speter /* 77619304Speter * If it's a global: 77719304Speter * 77819304Speter * If at the end of the string, do a test for the after 77919304Speter * the string match. Set REG_NOTEOL so the '$' pattern 78019304Speter * only matches once. 78119304Speter */ 78219304Speter if (sp->g_suffix && do_eol_match) { 78319304Speter if (len == 0) { 78419304Speter do_eol_match = 0; 78519304Speter eflags |= REG_NOTEOL; 78619304Speter } 78719304Speter goto nextmatch; 78819304Speter } 78919304Speter 79019304Speterendmatch: if (!linechanged) 79119304Speter continue; 79219304Speter 79319304Speter /* Copy any remaining bytes into the build buffer. */ 79419304Speter if (len) 79519304Speter BUILD(sp, s + offset, len) 79619304Speter 79719304Speter /* Store inserted lines, adjusting the build buffer. */ 79819304Speter last = 0; 79919304Speter if (sp->newl_cnt) { 80019304Speter for (cnt = 0; 80119304Speter cnt < sp->newl_cnt; ++cnt, ++lno, ++elno) { 80219304Speter if (db_insert(sp, 80319304Speter lno, lb + last, sp->newl[cnt] - last)) 80419304Speter goto err; 80519304Speter last = sp->newl[cnt] + 1; 80619304Speter ++sp->rptlines[L_ADDED]; 80719304Speter } 80819304Speter lbclen -= last; 80919304Speter sp->newl_cnt = 0; 81019304Speter } 81119304Speter 81219304Speter /* Store the changed line. */ 81319304Speter if (db_set(sp, lno, lb + last, lbclen)) 81419304Speter goto err; 81519304Speter 81619304Speter /* Update changed line counter. */ 81719304Speter if (sp->rptlchange != lno) { 81819304Speter sp->rptlchange = lno; 81919304Speter ++sp->rptlines[L_CHANGED]; 82019304Speter } 82119304Speter 82219304Speter /* 82319304Speter * !!! 82419304Speter * Display as necessary. Historic practice is to only 82519304Speter * display the last line of a line split into multiple 82619304Speter * lines. 82719304Speter */ 82819304Speter if (lflag || nflag || pflag) { 82919304Speter from.lno = to.lno = lno; 83019304Speter from.cno = to.cno = 0; 83119304Speter if (lflag) 83219304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_LIST); 83319304Speter if (nflag) 83419304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_HASH); 83519304Speter if (pflag) 83619304Speter (void)ex_print(sp, cmdp, &from, &to, E_C_PRINT); 83719304Speter } 83819304Speter } 83919304Speter 84019304Speter /* 84119304Speter * !!! 84219304Speter * Historically, vi attempted to leave the cursor at the same place if 84319304Speter * the substitution was done at the current cursor position. Otherwise 84419304Speter * it moved it to the first non-blank of the last line changed. There 84519304Speter * were some problems: for example, :s/$/foo/ with the cursor on the 84619304Speter * last character of the line left the cursor on the last character, or 84719304Speter * the & command with multiple occurrences of the matching string in the 84819304Speter * line usually left the cursor in a fairly random position. 84919304Speter * 85019304Speter * We try to do the same thing, with the exception that if the user is 85119304Speter * doing substitution with confirmation, we move to the last line about 85219304Speter * which the user was consulted, as opposed to the last line that they 85319304Speter * actually changed. This prevents a screen flash if the user doesn't 85419304Speter * change many of the possible lines. 85519304Speter */ 85619304Speter if (!sp->c_suffix && (sp->lno != slno || sp->cno != scno)) { 85719304Speter sp->cno = 0; 85819304Speter (void)nonblank(sp, sp->lno, &sp->cno); 85919304Speter } 86019304Speter 86119304Speter /* 86219304Speter * If not in a global command, and nothing matched, say so. 86319304Speter * Else, if none of the lines displayed, put something up. 86419304Speter */ 86519304Speter rval = 0; 86619304Speter if (!matched) { 86719304Speter if (!F_ISSET(sp, SC_EX_GLOBAL)) { 86819304Speter msgq(sp, M_ERR, "157|No match found"); 86919304Speter goto err; 87019304Speter } 87119304Speter } else if (!lflag && !nflag && !pflag) 87219304Speter F_SET(cmdp, E_AUTOPRINT); 87319304Speter 87419304Speter if (0) { 87519304Spetererr: rval = 1; 87619304Speter } 87719304Speter 87819304Speter if (bp != NULL) 87919304Speter FREE_SPACE(sp, bp, blen); 88019304Speter if (lb != NULL) 88119304Speter free(lb); 88219304Speter return (rval); 88319304Speter} 88419304Speter 88519304Speter/* 88619304Speter * re_compile -- 88719304Speter * Compile the RE. 88819304Speter * 88919304Speter * PUBLIC: int re_compile __P((SCR *, 89019304Speter * PUBLIC: char *, size_t, char **, size_t *, regex_t *, u_int)); 89119304Speter */ 89219304Speterint 89319304Speterre_compile(sp, ptrn, plen, ptrnp, lenp, rep, flags) 89419304Speter SCR *sp; 89519304Speter char *ptrn, **ptrnp; 89619304Speter size_t plen, *lenp; 89719304Speter regex_t *rep; 89819304Speter u_int flags; 89919304Speter{ 90019304Speter size_t len; 90119304Speter int reflags, replaced, rval; 90219304Speter char *p; 90319304Speter 90419304Speter /* Set RE flags. */ 90519304Speter reflags = 0; 90619304Speter if (!LF_ISSET(RE_C_CSCOPE | RE_C_TAG)) { 90719304Speter if (O_ISSET(sp, O_EXTENDED)) 90819304Speter reflags |= REG_EXTENDED; 90919304Speter if (O_ISSET(sp, O_IGNORECASE)) 91019304Speter reflags |= REG_ICASE; 91119304Speter if (O_ISSET(sp, O_ICLOWER)) { 91219304Speter for (p = ptrn, len = plen; len > 0; ++p, --len) 91319304Speter if (isupper(*p)) 91419304Speter break; 91519304Speter if (len == 0) 91619304Speter reflags |= REG_ICASE; 91719304Speter } 91819304Speter } 91919304Speter 92019304Speter /* If we're replacing a saved value, clear the old one. */ 92119304Speter if (LF_ISSET(RE_C_SEARCH) && F_ISSET(sp, SC_RE_SEARCH)) { 92219304Speter regfree(&sp->re_c); 92319304Speter F_CLR(sp, SC_RE_SEARCH); 92419304Speter } 92519304Speter if (LF_ISSET(RE_C_SUBST) && F_ISSET(sp, SC_RE_SUBST)) { 92619304Speter regfree(&sp->subre_c); 92719304Speter F_CLR(sp, SC_RE_SUBST); 92819304Speter } 92919304Speter 93019304Speter /* 93119304Speter * If we're saving the string, it's a pattern we haven't seen before, 93219304Speter * so convert the vi-style RE's to POSIX 1003.2 RE's. Save a copy for 93319304Speter * later recompilation. Free any previously saved value. 93419304Speter */ 93519304Speter if (ptrnp != NULL) { 93619304Speter if (LF_ISSET(RE_C_CSCOPE)) { 93719304Speter if (re_cscope_conv(sp, &ptrn, &plen, &replaced)) 93819304Speter return (1); 93919304Speter /* 94019304Speter * XXX 94119304Speter * Currently, the match-any-<blank> expression used in 94219304Speter * re_cscope_conv() requires extended RE's. This may 94319304Speter * not be right or safe. 94419304Speter */ 94519304Speter reflags |= REG_EXTENDED; 94619304Speter } else if (LF_ISSET(RE_C_TAG)) { 94719304Speter if (re_tag_conv(sp, &ptrn, &plen, &replaced)) 94819304Speter return (1); 94919304Speter } else 95019304Speter if (re_conv(sp, &ptrn, &plen, &replaced)) 95119304Speter return (1); 95219304Speter 95319304Speter /* Discard previous pattern. */ 95419304Speter if (*ptrnp != NULL) { 95519304Speter free(*ptrnp); 95619304Speter *ptrnp = NULL; 95719304Speter } 95819304Speter if (lenp != NULL) 95919304Speter *lenp = plen; 96019304Speter 96119304Speter /* 96219304Speter * Copy the string into allocated memory. 96319304Speter * 96419304Speter * XXX 96519304Speter * Regcomp isn't 8-bit clean, so the pattern is nul-terminated 96619304Speter * for now. There's just no other solution. 96719304Speter */ 96819304Speter MALLOC(sp, *ptrnp, char *, plen + 1); 96919304Speter if (*ptrnp != NULL) { 97019304Speter memcpy(*ptrnp, ptrn, plen); 97119304Speter (*ptrnp)[plen] = '\0'; 97219304Speter } 97319304Speter 97419304Speter /* Free up conversion-routine-allocated memory. */ 97519304Speter if (replaced) 97619304Speter FREE_SPACE(sp, ptrn, 0); 97719304Speter 97819304Speter if (*ptrnp == NULL) 97919304Speter return (1); 98019304Speter 98119304Speter ptrn = *ptrnp; 98219304Speter } 98319304Speter 98419304Speter /* 98519304Speter * XXX 98619304Speter * Regcomp isn't 8-bit clean, so we just lost if the pattern 98719304Speter * contained a nul. Bummer! 98819304Speter */ 98919304Speter if ((rval = regcomp(rep, ptrn, /* plen, */ reflags)) != 0) { 99019304Speter if (!LF_ISSET(RE_C_SILENT)) 99119304Speter re_error(sp, rval, rep); 99219304Speter return (1); 99319304Speter } 99419304Speter 99519304Speter if (LF_ISSET(RE_C_SEARCH)) 99619304Speter F_SET(sp, SC_RE_SEARCH); 99719304Speter if (LF_ISSET(RE_C_SUBST)) 99819304Speter F_SET(sp, SC_RE_SUBST); 99919304Speter 100019304Speter return (0); 100119304Speter} 100219304Speter 100319304Speter/* 100419304Speter * re_conv -- 100519304Speter * Convert vi's regular expressions into something that the 100619304Speter * the POSIX 1003.2 RE functions can handle. 100719304Speter * 100819304Speter * There are three conversions we make to make vi's RE's (specifically 100919304Speter * the global, search, and substitute patterns) work with POSIX RE's. 101019304Speter * 101119304Speter * 1: If O_MAGIC is not set, strip backslashes from the magic character 101219304Speter * set (.[*~) that have them, and add them to the ones that don't. 101319304Speter * 2: If O_MAGIC is not set, the string "\~" is replaced with the text 101419304Speter * from the last substitute command's replacement string. If O_MAGIC 101519304Speter * is set, it's the string "~". 101619304Speter * 3: The pattern \<ptrn\> does "word" searches, convert it to use the 101719304Speter * new RE escapes. 101819304Speter * 101919304Speter * !!!/XXX 102019304Speter * This doesn't exactly match the historic behavior of vi because we do 102119304Speter * the ~ substitution before calling the RE engine, so magic characters 102219304Speter * in the replacement string will be expanded by the RE engine, and they 102319304Speter * weren't historically. It's a bug. 102419304Speter */ 102519304Speterstatic int 102619304Speterre_conv(sp, ptrnp, plenp, replacedp) 102719304Speter SCR *sp; 102819304Speter char **ptrnp; 102919304Speter size_t *plenp; 103019304Speter int *replacedp; 103119304Speter{ 103219304Speter size_t blen, len, needlen; 103319304Speter int magic; 103419304Speter char *bp, *p, *t; 103519304Speter 103619304Speter /* 103719304Speter * First pass through, we figure out how much space we'll need. 103819304Speter * We do it in two passes, on the grounds that most of the time 103919304Speter * the user is doing a search and won't have magic characters. 104019304Speter * That way we can skip most of the memory allocation and copies. 104119304Speter */ 104219304Speter magic = 0; 104319304Speter for (p = *ptrnp, len = *plenp, needlen = 0; len > 0; ++p, --len) 104419304Speter switch (*p) { 104519304Speter case '\\': 104619304Speter if (len > 1) { 104719304Speter --len; 104819304Speter switch (*++p) { 104919304Speter case '<': 105019304Speter magic = 1; 105119304Speter needlen += sizeof(RE_WSTART); 105219304Speter break; 105319304Speter case '>': 105419304Speter magic = 1; 105519304Speter needlen += sizeof(RE_WSTOP); 105619304Speter break; 105719304Speter case '~': 105819304Speter if (!O_ISSET(sp, O_MAGIC)) { 105919304Speter magic = 1; 106019304Speter needlen += sp->repl_len; 106119304Speter } 106219304Speter break; 106319304Speter case '.': 106419304Speter case '[': 106519304Speter case '*': 106619304Speter if (!O_ISSET(sp, O_MAGIC)) { 106719304Speter magic = 1; 106819304Speter needlen += 1; 106919304Speter } 107019304Speter break; 107119304Speter default: 107219304Speter needlen += 2; 107319304Speter } 107419304Speter } else 107519304Speter needlen += 1; 107619304Speter break; 107719304Speter case '~': 107819304Speter if (O_ISSET(sp, O_MAGIC)) { 107919304Speter magic = 1; 108019304Speter needlen += sp->repl_len; 108119304Speter } 108219304Speter break; 108319304Speter case '.': 108419304Speter case '[': 108519304Speter case '*': 108619304Speter if (!O_ISSET(sp, O_MAGIC)) { 108719304Speter magic = 1; 108819304Speter needlen += 2; 108919304Speter } 109019304Speter break; 109119304Speter default: 109219304Speter needlen += 1; 109319304Speter break; 109419304Speter } 109519304Speter 109619304Speter if (!magic) { 109719304Speter *replacedp = 0; 109819304Speter return (0); 109919304Speter } 110019304Speter 110119304Speter /* Get enough memory to hold the final pattern. */ 110219304Speter *replacedp = 1; 110319304Speter GET_SPACE_RET(sp, bp, blen, needlen); 110419304Speter 110519304Speter for (p = *ptrnp, len = *plenp, t = bp; len > 0; ++p, --len) 110619304Speter switch (*p) { 110719304Speter case '\\': 110819304Speter if (len > 1) { 110919304Speter --len; 111019304Speter switch (*++p) { 111119304Speter case '<': 111219304Speter memcpy(t, 111319304Speter RE_WSTART, sizeof(RE_WSTART) - 1); 111419304Speter t += sizeof(RE_WSTART) - 1; 111519304Speter break; 111619304Speter case '>': 111719304Speter memcpy(t, 111819304Speter RE_WSTOP, sizeof(RE_WSTOP) - 1); 111919304Speter t += sizeof(RE_WSTOP) - 1; 112019304Speter break; 112119304Speter case '~': 112219304Speter if (O_ISSET(sp, O_MAGIC)) 112319304Speter *t++ = '~'; 112419304Speter else { 112519304Speter memcpy(t, 112619304Speter sp->repl, sp->repl_len); 112719304Speter t += sp->repl_len; 112819304Speter } 112919304Speter break; 113019304Speter case '.': 113119304Speter case '[': 113219304Speter case '*': 113319304Speter if (O_ISSET(sp, O_MAGIC)) 113419304Speter *t++ = '\\'; 113519304Speter *t++ = *p; 113619304Speter break; 113719304Speter default: 113819304Speter *t++ = '\\'; 113919304Speter *t++ = *p; 114019304Speter } 114119304Speter } else 114219304Speter *t++ = '\\'; 114319304Speter break; 114419304Speter case '~': 114519304Speter if (O_ISSET(sp, O_MAGIC)) { 114619304Speter memcpy(t, sp->repl, sp->repl_len); 114719304Speter t += sp->repl_len; 114819304Speter } else 114919304Speter *t++ = '~'; 115019304Speter break; 115119304Speter case '.': 115219304Speter case '[': 115319304Speter case '*': 115419304Speter if (!O_ISSET(sp, O_MAGIC)) 115519304Speter *t++ = '\\'; 115619304Speter *t++ = *p; 115719304Speter break; 115819304Speter default: 115919304Speter *t++ = *p; 116019304Speter break; 116119304Speter } 116219304Speter 116319304Speter *ptrnp = bp; 116419304Speter *plenp = t - bp; 116519304Speter return (0); 116619304Speter} 116719304Speter 116819304Speter/* 116919304Speter * re_tag_conv -- 117019304Speter * Convert a tags search path into something that the POSIX 117119304Speter * 1003.2 RE functions can handle. 117219304Speter */ 117319304Speterstatic int 117419304Speterre_tag_conv(sp, ptrnp, plenp, replacedp) 117519304Speter SCR *sp; 117619304Speter char **ptrnp; 117719304Speter size_t *plenp; 117819304Speter int *replacedp; 117919304Speter{ 118019304Speter size_t blen, len; 118119304Speter int lastdollar; 118219304Speter char *bp, *p, *t; 118319304Speter 118419304Speter len = *plenp; 118519304Speter 118619304Speter /* Max memory usage is 2 times the length of the string. */ 118719304Speter *replacedp = 1; 118819304Speter GET_SPACE_RET(sp, bp, blen, len * 2); 118919304Speter 119019304Speter p = *ptrnp; 119119304Speter t = bp; 119219304Speter 119319304Speter /* If the last character is a '/' or '?', we just strip it. */ 119419304Speter if (len > 0 && (p[len - 1] == '/' || p[len - 1] == '?')) 119519304Speter --len; 119619304Speter 119719304Speter /* If the next-to-last or last character is a '$', it's magic. */ 119819304Speter if (len > 0 && p[len - 1] == '$') { 119919304Speter --len; 120019304Speter lastdollar = 1; 120119304Speter } else 120219304Speter lastdollar = 0; 120319304Speter 120419304Speter /* If the first character is a '/' or '?', we just strip it. */ 120519304Speter if (len > 0 && (p[0] == '/' || p[0] == '?')) { 120619304Speter ++p; 120719304Speter --len; 120819304Speter } 120919304Speter 121019304Speter /* If the first or second character is a '^', it's magic. */ 121119304Speter if (p[0] == '^') { 121219304Speter *t++ = *p++; 121319304Speter --len; 121419304Speter } 121519304Speter 121619304Speter /* 121719304Speter * Escape every other magic character we can find, meanwhile stripping 121819304Speter * the backslashes ctags inserts when escaping the search delimiter 121919304Speter * characters. 122019304Speter */ 122119304Speter for (; len > 0; --len) { 122219304Speter if (p[0] == '\\' && (p[1] == '/' || p[1] == '?')) { 122319304Speter ++p; 122419304Speter --len; 122519304Speter } else if (strchr("^.[]$*", p[0])) 122619304Speter *t++ = '\\'; 122719304Speter *t++ = *p++; 122819304Speter } 122919304Speter if (lastdollar) 123019304Speter *t++ = '$'; 123119304Speter 123219304Speter *ptrnp = bp; 123319304Speter *plenp = t - bp; 123419304Speter return (0); 123519304Speter} 123619304Speter 123719304Speter/* 123819304Speter * re_cscope_conv -- 123919304Speter * Convert a cscope search path into something that the POSIX 124019304Speter * 1003.2 RE functions can handle. 124119304Speter */ 124219304Speterstatic int 124319304Speterre_cscope_conv(sp, ptrnp, plenp, replacedp) 124419304Speter SCR *sp; 124519304Speter char **ptrnp; 124619304Speter size_t *plenp; 124719304Speter int *replacedp; 124819304Speter{ 124919304Speter size_t blen, len, nspaces; 125019304Speter char *bp, *p, *t; 125119304Speter 125219304Speter /* 125319304Speter * Each space in the source line printed by cscope represents an 125419304Speter * arbitrary sequence of spaces, tabs, and comments. 125519304Speter */ 125619304Speter#define CSCOPE_RE_SPACE "([ \t]|/\\*([^*]|\\*/)*\\*/)*" 125719304Speter for (nspaces = 0, p = *ptrnp, len = *plenp; len > 0; ++p, --len) 125819304Speter if (*p == ' ') 125919304Speter ++nspaces; 126019304Speter 126119304Speter /* 126219304Speter * Allocate plenty of space: 126319304Speter * the string, plus potential escaping characters; 126419304Speter * nspaces + 2 copies of CSCOPE_RE_SPACE; 126519304Speter * ^, $, nul terminator characters. 126619304Speter */ 126719304Speter *replacedp = 1; 126819304Speter len = (p - *ptrnp) * 2 + (nspaces + 2) * sizeof(CSCOPE_RE_SPACE) + 3; 126919304Speter GET_SPACE_RET(sp, bp, blen, len); 127019304Speter 127119304Speter p = *ptrnp; 127219304Speter t = bp; 127319304Speter 127419304Speter *t++ = '^'; 127519304Speter memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 127619304Speter t += sizeof(CSCOPE_RE_SPACE) - 1; 127719304Speter 127819304Speter for (len = *plenp; len > 0; ++p, --len) 127919304Speter if (*p == ' ') { 128019304Speter memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 128119304Speter t += sizeof(CSCOPE_RE_SPACE) - 1; 128219304Speter } else { 128319304Speter if (strchr("\\^.[]$*+?()|{}", *p)) 128419304Speter *t++ = '\\'; 128519304Speter *t++ = *p; 128619304Speter } 128719304Speter 128819304Speter memcpy(t, CSCOPE_RE_SPACE, sizeof(CSCOPE_RE_SPACE) - 1); 128919304Speter t += sizeof(CSCOPE_RE_SPACE) - 1; 129019304Speter *t++ = '$'; 129119304Speter 129219304Speter *ptrnp = bp; 129319304Speter *plenp = t - bp; 129419304Speter return (0); 129519304Speter} 129619304Speter 129719304Speter/* 129819304Speter * re_error -- 129919304Speter * Report a regular expression error. 130019304Speter * 130119304Speter * PUBLIC: void re_error __P((SCR *, int, regex_t *)); 130219304Speter */ 130319304Spetervoid 130419304Speterre_error(sp, errcode, preg) 130519304Speter SCR *sp; 130619304Speter int errcode; 130719304Speter regex_t *preg; 130819304Speter{ 130919304Speter size_t s; 131019304Speter char *oe; 131119304Speter 131219304Speter s = regerror(errcode, preg, "", 0); 131319304Speter if ((oe = malloc(s)) == NULL) 131419304Speter msgq(sp, M_SYSERR, NULL); 131519304Speter else { 131619304Speter (void)regerror(errcode, preg, oe, s); 131719304Speter msgq(sp, M_ERR, "RE error: %s", oe); 131819304Speter free(oe); 131919304Speter } 132019304Speter} 132119304Speter 132219304Speter/* 132319304Speter * re_sub -- 132419304Speter * Do the substitution for a regular expression. 132519304Speter */ 132619304Speterstatic int 132719304Speterre_sub(sp, ip, lbp, lbclenp, lblenp, match) 132819304Speter SCR *sp; 132919304Speter char *ip; /* Input line. */ 133019304Speter char **lbp; 133119304Speter size_t *lbclenp, *lblenp; 133219304Speter regmatch_t match[10]; 133319304Speter{ 133419304Speter enum { C_NOTSET, C_LOWER, C_ONELOWER, C_ONEUPPER, C_UPPER } conv; 133519304Speter size_t lbclen, lblen; /* Local copies. */ 133619304Speter size_t mlen; /* Match length. */ 133719304Speter size_t rpl; /* Remaining replacement length. */ 133819304Speter char *rp; /* Replacement pointer. */ 133919304Speter int ch; 134019304Speter int no; /* Match replacement offset. */ 134119304Speter char *p, *t; /* Buffer pointers. */ 134219304Speter char *lb; /* Local copies. */ 134319304Speter 134419304Speter lb = *lbp; /* Get local copies. */ 134519304Speter lbclen = *lbclenp; 134619304Speter lblen = *lblenp; 134719304Speter 134819304Speter /* 134919304Speter * QUOTING NOTE: 135019304Speter * 135119304Speter * There are some special sequences that vi provides in the 135219304Speter * replacement patterns. 135319304Speter * & string the RE matched (\& if nomagic set) 135419304Speter * \# n-th regular subexpression 135519304Speter * \E end \U, \L conversion 135619304Speter * \e end \U, \L conversion 135719304Speter * \l convert the next character to lower-case 135819304Speter * \L convert to lower-case, until \E, \e, or end of replacement 135919304Speter * \u convert the next character to upper-case 136019304Speter * \U convert to upper-case, until \E, \e, or end of replacement 136119304Speter * 136219304Speter * Otherwise, since this is the lowest level of replacement, discard 136319304Speter * all escaping characters. This (hopefully) matches historic practice. 136419304Speter */ 136519304Speter#define OUTCH(ch, nltrans) { \ 136619304Speter CHAR_T __ch = (ch); \ 136719304Speter u_int __value = KEY_VAL(sp, __ch); \ 136819304Speter if (nltrans && (__value == K_CR || __value == K_NL)) { \ 136919304Speter NEEDNEWLINE(sp); \ 137019304Speter sp->newl[sp->newl_cnt++] = lbclen; \ 137119304Speter } else if (conv != C_NOTSET) { \ 137219304Speter switch (conv) { \ 137319304Speter case C_ONELOWER: \ 137419304Speter conv = C_NOTSET; \ 137519304Speter /* FALLTHROUGH */ \ 137619304Speter case C_LOWER: \ 137719304Speter if (isupper(__ch)) \ 137819304Speter __ch = tolower(__ch); \ 137919304Speter break; \ 138019304Speter case C_ONEUPPER: \ 138119304Speter conv = C_NOTSET; \ 138219304Speter /* FALLTHROUGH */ \ 138319304Speter case C_UPPER: \ 138419304Speter if (islower(__ch)) \ 138519304Speter __ch = toupper(__ch); \ 138619304Speter break; \ 138719304Speter default: \ 138819304Speter abort(); \ 138919304Speter } \ 139019304Speter } \ 139119304Speter NEEDSP(sp, 1, p); \ 139219304Speter *p++ = __ch; \ 139319304Speter ++lbclen; \ 139419304Speter} 139519304Speter conv = C_NOTSET; 139619304Speter for (rp = sp->repl, rpl = sp->repl_len, p = lb + lbclen; rpl--;) { 139719304Speter switch (ch = *rp++) { 139819304Speter case '&': 139919304Speter if (O_ISSET(sp, O_MAGIC)) { 140019304Speter no = 0; 140119304Speter goto subzero; 140219304Speter } 140319304Speter break; 140419304Speter case '\\': 140519304Speter if (rpl == 0) 140619304Speter break; 140719304Speter --rpl; 140819304Speter switch (ch = *rp) { 140919304Speter case '&': 141019304Speter ++rp; 141119304Speter if (!O_ISSET(sp, O_MAGIC)) { 141219304Speter no = 0; 141319304Speter goto subzero; 141419304Speter } 141519304Speter break; 141619304Speter case '0': case '1': case '2': case '3': case '4': 141719304Speter case '5': case '6': case '7': case '8': case '9': 141819304Speter no = *rp++ - '0'; 141919304Spetersubzero: if (match[no].rm_so == -1 || 142019304Speter match[no].rm_eo == -1) 142119304Speter break; 142219304Speter mlen = match[no].rm_eo - match[no].rm_so; 142319304Speter for (t = ip + match[no].rm_so; mlen--; ++t) 142419304Speter OUTCH(*t, 0); 142519304Speter continue; 142619304Speter case 'e': 142719304Speter case 'E': 142819304Speter ++rp; 142919304Speter conv = C_NOTSET; 143019304Speter continue; 143119304Speter case 'l': 143219304Speter ++rp; 143319304Speter conv = C_ONELOWER; 143419304Speter continue; 143519304Speter case 'L': 143619304Speter ++rp; 143719304Speter conv = C_LOWER; 143819304Speter continue; 143919304Speter case 'u': 144019304Speter ++rp; 144119304Speter conv = C_ONEUPPER; 144219304Speter continue; 144319304Speter case 'U': 144419304Speter ++rp; 144519304Speter conv = C_UPPER; 144619304Speter continue; 144719304Speter default: 144819304Speter ++rp; 144919304Speter break; 145019304Speter } 145119304Speter } 145219304Speter OUTCH(ch, 1); 145319304Speter } 145419304Speter 145519304Speter *lbp = lb; /* Update caller's information. */ 145619304Speter *lbclenp = lbclen; 145719304Speter *lblenp = lblen; 145819304Speter return (0); 145919304Speter} 1460