1/* $OpenBSD: re_search.c,v 1.37 2023/03/08 04:43:11 guenther Exp $ */ 2 3/* This file is in the public domain. */ 4 5/* 6 * regular expression search commands for Mg 7 * 8 * This file contains functions to implement several of gnuemacs's regular 9 * expression functions for Mg. Several of the routines below are just minor 10 * re-arrangements of Mg's non-regular expression search functions. Some of 11 * them are similar in structure to the original MicroEMACS, others are 12 * modifications of Rich Ellison's code. Peter Newton re-wrote about half of 13 * them from scratch. 14 */ 15 16#ifdef REGEX 17#include <sys/queue.h> 18#include <sys/types.h> 19#include <regex.h> 20#include <signal.h> 21#include <stdio.h> 22#include <string.h> 23 24#include "def.h" 25#include "macro.h" 26 27#define SRCH_BEGIN (0) /* search sub-codes */ 28#define SRCH_FORW (-1) 29#define SRCH_BACK (-2) 30#define SRCH_NOPR (-3) 31#define SRCH_ACCM (-4) 32#define SRCH_MARK (-5) 33 34#define RE_NMATCH 10 /* max number of matches */ 35#define REPLEN 256 /* max length of replacement string */ 36 37char re_pat[NPAT]; /* regex pattern */ 38int re_srch_lastdir = SRCH_NOPR; /* last search flags */ 39int casefoldsearch = TRUE; /* does search ignore case? */ 40 41static int re_doreplace(RSIZE, char *); 42static int re_forwsrch(void); 43static int re_backsrch(void); 44static int re_readpattern(char *); 45static int killmatches(int); 46static int countmatches(int); 47 48/* 49 * Search forward. 50 * Get a search string from the user and search for it starting at ".". If 51 * found, move "." to just after the matched characters. display does all 52 * the hard stuff. If not found, it just prints a message. 53 */ 54int 55re_forwsearch(int f, int n) 56{ 57 int s; 58 59 if ((s = re_readpattern("RE Search")) != TRUE) 60 return (s); 61 if (re_forwsrch() == FALSE) { 62 dobeep(); 63 ewprintf("Search failed: \"%s\"", re_pat); 64 return (FALSE); 65 } 66 re_srch_lastdir = SRCH_FORW; 67 return (TRUE); 68} 69 70/* 71 * Reverse search. 72 * Get a search string from the user, and search, starting at "." 73 * and proceeding toward the front of the buffer. If found "." is left 74 * pointing at the first character of the pattern [the last character that 75 * was matched]. 76 */ 77int 78re_backsearch(int f, int n) 79{ 80 int s; 81 82 if ((s = re_readpattern("RE Search backward")) != TRUE) 83 return (s); 84 if (re_backsrch() == FALSE) { 85 dobeep(); 86 ewprintf("Search failed: \"%s\"", re_pat); 87 return (FALSE); 88 } 89 re_srch_lastdir = SRCH_BACK; 90 return (TRUE); 91} 92 93/* 94 * Search again, using the same search string and direction as the last search 95 * command. The direction has been saved in "srch_lastdir", so you know which 96 * way to go. 97 * 98 * XXX: This code has problems -- some incompatibility(?) with extend.c causes 99 * match to fail when it should not. 100 */ 101int 102re_searchagain(int f, int n) 103{ 104 if (re_srch_lastdir == SRCH_NOPR) { 105 dobeep(); 106 ewprintf("No last search"); 107 return (FALSE); 108 } 109 if (re_srch_lastdir == SRCH_FORW) { 110 if (re_forwsrch() == FALSE) { 111 dobeep(); 112 ewprintf("Search failed: \"%s\"", re_pat); 113 return (FALSE); 114 } 115 return (TRUE); 116 } 117 if (re_srch_lastdir == SRCH_BACK) 118 if (re_backsrch() == FALSE) { 119 dobeep(); 120 ewprintf("Search failed: \"%s\"", re_pat); 121 return (FALSE); 122 } 123 124 return (TRUE); 125} 126 127/* Compiled regex goes here-- changed only when new pattern read */ 128static regex_t regex_buff; 129static regmatch_t regex_match[RE_NMATCH]; 130 131/* 132 * Re-Query Replace. 133 * Replace strings selectively. Does a search and replace operation. 134 */ 135int 136re_queryrepl(int f, int n) 137{ 138 int rcnt = 0; /* replacements made so far */ 139 int plen, s; /* length of found string */ 140 char news[NPAT]; /* replacement string */ 141 142 if ((s = re_readpattern("RE Query replace")) != TRUE) 143 return (s); 144 if (eread("Query replace %s with: ", news, NPAT, 145 EFNUL | EFNEW | EFCR, re_pat) == NULL) 146 return (ABORT); 147 ewprintf("Query replacing %s with %s:", re_pat, news); 148 149 /* 150 * Search forward repeatedly, checking each time whether to insert 151 * or not. The "!" case makes the check always true, so it gets put 152 * into a tighter loop for efficiency. 153 */ 154 while (re_forwsrch() == TRUE) { 155retry: 156 update(CMODE); 157 switch (getkey(FALSE)) { 158 case ' ': 159 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 160 if (re_doreplace((RSIZE)plen, news) == FALSE) 161 return (FALSE); 162 rcnt++; 163 break; 164 165 case '.': 166 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 167 if (re_doreplace((RSIZE)plen, news) == FALSE) 168 return (FALSE); 169 rcnt++; 170 goto stopsearch; 171 172 case CCHR('G'): /* ^G */ 173 (void)ctrlg(FFRAND, 0); 174 goto stopsearch; 175 case CCHR('['): /* ESC */ 176 case '`': 177 goto stopsearch; 178 case '!': 179 do { 180 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 181 if (re_doreplace((RSIZE)plen, news) == FALSE) 182 return (FALSE); 183 rcnt++; 184 } while (re_forwsrch() == TRUE); 185 goto stopsearch; 186 187 case CCHR('?'): /* To not replace */ 188 break; 189 190 default: 191 ewprintf("<SP> replace, [.] rep-end, <DEL> don't, [!] repl rest <ESC> quit"); 192 goto retry; 193 } 194 } 195 196stopsearch: 197 curwp->w_rflag |= WFFULL; 198 update(CMODE); 199 if (!inmacro) { 200 if (rcnt == 0) 201 ewprintf("(No replacements done)"); 202 else if (rcnt == 1) 203 ewprintf("(1 replacement done)"); 204 else 205 ewprintf("(%d replacements done)", rcnt); 206 } 207 return (TRUE); 208} 209 210int 211re_repl(int f, int n) 212{ 213 int rcnt = 0; /* replacements made so far */ 214 int plen, s; /* length of found string */ 215 char news[NPAT]; /* replacement string */ 216 217 if ((s = re_readpattern("RE Replace")) != TRUE) 218 return (s); 219 if (eread("Replace %s with: ", news, NPAT, 220 EFNUL | EFNEW | EFCR, re_pat) == NULL) 221 return (ABORT); 222 223 while (re_forwsrch() == TRUE) { 224 plen = regex_match[0].rm_eo - regex_match[0].rm_so; 225 if (re_doreplace((RSIZE)plen, news) == FALSE) 226 return (FALSE); 227 rcnt++; 228 } 229 230 curwp->w_rflag |= WFFULL; 231 update(CMODE); 232 if (!inmacro) 233 ewprintf("(%d replacement(s) done)", rcnt); 234 235 return(TRUE); 236} 237 238/* 239 * Routine re_doreplace calls lreplace to make replacements needed by 240 * re_query replace. Its reason for existence is to deal with \1, \2. etc. 241 * plen: length to remove 242 * st: replacement string 243 */ 244static int 245re_doreplace(RSIZE plen, char *st) 246{ 247 int j, k, s, more, num, state; 248 struct line *clp; 249 char repstr[REPLEN]; 250 251 clp = curwp->w_dotp; 252 more = TRUE; 253 j = 0; 254 state = 0; 255 num = 0; 256 257 /* The following FSA parses the replacement string */ 258 while (more) { 259 switch (state) { 260 case 0: 261 if (*st == '\\') { 262 st++; 263 state = 1; 264 } else if (*st == '\0') 265 more = FALSE; 266 else { 267 repstr[j] = *st; 268 j++; 269 if (j >= REPLEN) 270 return (FALSE); 271 st++; 272 } 273 break; 274 case 1: 275 if (*st >= '0' && *st <= '9') { 276 num = *st - '0'; 277 st++; 278 state = 2; 279 } else if (*st == '\0') 280 more = FALSE; 281 else { 282 repstr[j] = *st; 283 j++; 284 if (j >= REPLEN) 285 return (FALSE); 286 st++; 287 state = 0; 288 } 289 break; 290 case 2: 291 if (*st >= '0' && *st <= '9') { 292 num = 10 * num + *st - '0'; 293 st++; 294 } else { 295 if (num >= RE_NMATCH) 296 return (FALSE); 297 k = regex_match[num].rm_eo - regex_match[num].rm_so; 298 if (j + k >= REPLEN) 299 return (FALSE); 300 bcopy(&(clp->l_text[regex_match[num].rm_so]), 301 &repstr[j], k); 302 j += k; 303 if (*st == '\0') 304 more = FALSE; 305 if (*st == '\\') { 306 st++; 307 state = 1; 308 } else { 309 repstr[j] = *st; 310 j++; 311 if (j >= REPLEN) 312 return (FALSE); 313 st++; 314 state = 0; 315 } 316 } 317 break; 318 } /* switch (state) */ 319 } /* while (more) */ 320 321 repstr[j] = '\0'; 322 s = lreplace(plen, repstr); 323 return (s); 324} 325 326/* 327 * This routine does the real work of a forward search. The pattern is 328 * sitting in the external variable "pat". If found, dot is updated, the 329 * window system is notified of the change, and TRUE is returned. If the 330 * string isn't found, FALSE is returned. 331 */ 332static int 333re_forwsrch(void) 334{ 335 int re_flags, tbo, tdotline, error; 336 struct line *clp; 337 338 clp = curwp->w_dotp; 339 tbo = curwp->w_doto; 340 tdotline = curwp->w_dotline; 341 342 if (tbo == clp->l_used) 343 /* 344 * Don't start matching past end of line -- must move to 345 * beginning of next line, unless line is empty or at 346 * end of file. 347 */ 348 if (clp != curbp->b_headp && llength(clp) != 0) { 349 clp = lforw(clp); 350 tdotline++; 351 tbo = 0; 352 } 353 /* 354 * Note this loop does not process the last line, but this editor 355 * always makes the last line empty so this is good. 356 */ 357 while (clp != (curbp->b_headp)) { 358 re_flags = REG_STARTEND; 359 if (tbo != 0) 360 re_flags |= REG_NOTBOL; 361 regex_match[0].rm_so = tbo; 362 regex_match[0].rm_eo = llength(clp); 363 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 364 RE_NMATCH, regex_match, re_flags); 365 if (error != 0) { 366 clp = lforw(clp); 367 tdotline++; 368 tbo = 0; 369 } else { 370 curwp->w_doto = regex_match[0].rm_eo; 371 curwp->w_dotp = clp; 372 curwp->w_dotline = tdotline; 373 curwp->w_rflag |= WFMOVE; 374 return (TRUE); 375 } 376 } 377 return (FALSE); 378} 379 380/* 381 * This routine does the real work of a backward search. The pattern is sitting 382 * in the external variable "re_pat". If found, dot is updated, the window 383 * system is notified of the change, and TRUE is returned. If the string isn't 384 * found, FALSE is returned. 385 */ 386static int 387re_backsrch(void) 388{ 389 struct line *clp; 390 int tbo, tdotline; 391 regmatch_t lastmatch; 392 393 clp = curwp->w_dotp; 394 tbo = curwp->w_doto; 395 tdotline = curwp->w_dotline; 396 397 /* Start search one position to the left of dot */ 398 tbo = tbo - 1; 399 if (tbo < 0) { 400 /* must move up one line */ 401 clp = lback(clp); 402 tdotline--; 403 tbo = llength(clp); 404 } 405 406 /* 407 * Note this loop does not process the last line, but this editor 408 * always makes the last line empty so this is good. 409 */ 410 while (clp != (curbp->b_headp)) { 411 regex_match[0].rm_so = 0; 412 regex_match[0].rm_eo = llength(clp); 413 lastmatch.rm_so = -1; 414 /* 415 * Keep searching until we don't match any longer. Assumes a 416 * non-match does not modify the regex_match array. We have to 417 * do this character-by-character after the first match since 418 * POSIX regexps don't give you a way to do reverse matches. 419 */ 420 while (!regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 421 RE_NMATCH, regex_match, REG_STARTEND) && 422 regex_match[0].rm_so <= tbo) { 423 memcpy(&lastmatch, ®ex_match[0], sizeof(regmatch_t)); 424 regex_match[0].rm_so++; 425 regex_match[0].rm_eo = llength(clp); 426 } 427 if (lastmatch.rm_so == -1) { 428 clp = lback(clp); 429 tdotline--; 430 tbo = llength(clp); 431 } else { 432 memcpy(®ex_match[0], &lastmatch, sizeof(regmatch_t)); 433 curwp->w_doto = regex_match[0].rm_so; 434 curwp->w_dotp = clp; 435 curwp->w_dotline = tdotline; 436 curwp->w_rflag |= WFMOVE; 437 return (TRUE); 438 } 439 } 440 return (FALSE); 441} 442 443/* 444 * Read a pattern. 445 * Stash it in the external variable "re_pat". The "pat" is 446 * not updated if the user types in an empty line. If the user typed 447 * an empty line, and there is no old pattern, it is an error. 448 * Display the old pattern, in the style of Jeff Lomicka. There is 449 * some do-it-yourself control expansion. 450 */ 451static int 452re_readpattern(char *re_prompt) 453{ 454 static int dofree = 0; 455 int flags, error, s; 456 char tpat[NPAT], *rep; 457 458 if (re_pat[0] == '\0') 459 rep = eread("%s: ", tpat, NPAT, EFNEW | EFCR, re_prompt); 460 else 461 rep = eread("%s (default %s): ", tpat, NPAT, 462 EFNUL | EFNEW | EFCR, re_prompt, re_pat); 463 if (rep == NULL) 464 return (ABORT); 465 if (rep[0] != '\0') { 466 /* New pattern given */ 467 (void)strlcpy(re_pat, tpat, sizeof(re_pat)); 468 if (casefoldsearch) 469 flags = REG_EXTENDED | REG_ICASE; 470 else 471 flags = REG_EXTENDED; 472 if (dofree) 473 regfree(®ex_buff); 474 error = regcomp(®ex_buff, re_pat, flags); 475 if (error != 0) { 476 char message[256]; 477 regerror(error, ®ex_buff, message, sizeof(message)); 478 dobeep(); 479 ewprintf("Regex Error: %s", message); 480 re_pat[0] = '\0'; 481 return (FALSE); 482 } 483 dofree = 1; 484 s = TRUE; 485 } else if (rep[0] == '\0' && re_pat[0] != '\0') 486 /* Just using old pattern */ 487 s = TRUE; 488 else 489 s = FALSE; 490 return (s); 491} 492 493/* 494 * Cause case to not matter in searches. This is the default. If called 495 * with argument cause case to matter. 496 */ 497int 498setcasefold(int f, int n) 499{ 500 if (f & FFARG) { 501 casefoldsearch = FALSE; 502 ewprintf("Case-fold-search unset"); 503 } else { 504 casefoldsearch = TRUE; 505 ewprintf("Case-fold-search set"); 506 } 507 508 /* 509 * Invalidate the regular expression pattern since I'm too lazy to 510 * recompile it. 511 */ 512 re_pat[0] = '\0'; 513 return (TRUE); 514} 515 516/* 517 * Delete all lines after dot that contain a string matching regex. 518 */ 519int 520delmatchlines(int f, int n) 521{ 522 int s; 523 524 if ((s = re_readpattern("Flush lines (containing match for regexp)")) 525 != TRUE) 526 return (s); 527 528 s = killmatches(TRUE); 529 return (s); 530} 531 532/* 533 * Delete all lines after dot that don't contain a string matching regex. 534 */ 535int 536delnonmatchlines(int f, int n) 537{ 538 int s; 539 540 if ((s = re_readpattern("Keep lines (containing match for regexp)")) 541 != TRUE) 542 return (s); 543 544 s = killmatches(FALSE); 545 return (s); 546} 547 548/* 549 * This function does the work of deleting matching lines. 550 */ 551static int 552killmatches(int cond) 553{ 554 int s, error; 555 int count = 0; 556 struct line *clp; 557 558 clp = curwp->w_dotp; 559 if (curwp->w_doto == llength(clp)) 560 /* Consider dot on next line */ 561 clp = lforw(clp); 562 563 while (clp != (curbp->b_headp)) { 564 /* see if line matches */ 565 regex_match[0].rm_so = 0; 566 regex_match[0].rm_eo = llength(clp); 567 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 568 RE_NMATCH, regex_match, REG_STARTEND); 569 570 /* Delete line when appropriate */ 571 if ((cond == FALSE && error) || (cond == TRUE && !error)) { 572 curwp->w_doto = 0; 573 curwp->w_dotp = clp; 574 count++; 575 s = ldelete(llength(clp) + 1, KNONE); 576 clp = curwp->w_dotp; 577 curwp->w_rflag |= WFMOVE; 578 if (s == FALSE) 579 return (FALSE); 580 } else 581 clp = lforw(clp); 582 } 583 584 ewprintf("%d line(s) deleted", count); 585 if (count > 0) 586 curwp->w_rflag |= WFMOVE; 587 588 return (TRUE); 589} 590 591/* 592 * Count lines matching regex. 593 */ 594int 595cntmatchlines(int f, int n) 596{ 597 int s; 598 599 if ((s = re_readpattern("Count lines (matching regexp)")) != TRUE) 600 return (s); 601 s = countmatches(TRUE); 602 603 return (s); 604} 605 606/* 607 * Count lines that fail to match regex. 608 */ 609int 610cntnonmatchlines(int f, int n) 611{ 612 int s; 613 614 if ((s = re_readpattern("Count lines (not matching regexp)")) != TRUE) 615 return (s); 616 s = countmatches(FALSE); 617 618 return (s); 619} 620 621/* 622 * This function does the work of counting matching lines. 623 */ 624int 625countmatches(int cond) 626{ 627 int error; 628 int count = 0; 629 struct line *clp; 630 631 clp = curwp->w_dotp; 632 if (curwp->w_doto == llength(clp)) 633 /* Consider dot on next line */ 634 clp = lforw(clp); 635 636 while (clp != (curbp->b_headp)) { 637 /* see if line matches */ 638 regex_match[0].rm_so = 0; 639 regex_match[0].rm_eo = llength(clp); 640 error = regexec(®ex_buff, ltext(clp) ? ltext(clp) : "", 641 RE_NMATCH, regex_match, REG_STARTEND); 642 643 /* Count line when appropriate */ 644 if ((cond == FALSE && error) || (cond == TRUE && !error)) 645 count++; 646 clp = lforw(clp); 647 } 648 649 if (cond) 650 ewprintf("Number of lines matching: %d", count); 651 else 652 ewprintf("Number of lines not matching: %d", count); 653 654 return (TRUE); 655} 656#endif /* REGEX */ 657