1/* $OpenBSD: search.c,v 1.15 2022/12/10 16:06:18 millert Exp $ */ 2 3/*- 4 * Copyright (c) 1992, 1993, 1994 5 * The Regents of the University of California. All rights reserved. 6 * Copyright (c) 1992, 1993, 1994, 1995, 1996 7 * Keith Bostic. All rights reserved. 8 * 9 * See the LICENSE file for redistribution information. 10 */ 11 12#include "config.h" 13 14#include <sys/types.h> 15#include <sys/queue.h> 16 17#include <bitstring.h> 18#include <ctype.h> 19#include <errno.h> 20#include <limits.h> 21#include <stdio.h> 22#include <stdlib.h> 23#include <string.h> 24#include <unistd.h> 25 26#include "common.h" 27 28typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t; 29 30static void search_msg(SCR *, smsg_t); 31static int search_init(SCR *, dir_t, char *, size_t, char **, u_int); 32 33/* 34 * search_init -- 35 * Set up a search. 36 */ 37static int 38search_init(SCR *sp, dir_t dir, char *ptrn, size_t plen, char **epp, 39 u_int flags) 40{ 41 recno_t lno; 42 int delim; 43 char *p, *t; 44 45 /* If the file is empty, it's a fast search. */ 46 if (sp->lno <= 1) { 47 if (db_last(sp, &lno)) 48 return (1); 49 if (lno == 0) { 50 if (LF_ISSET(SEARCH_MSG)) 51 search_msg(sp, S_EMPTY); 52 return (1); 53 } 54 } 55 56 if (LF_ISSET(SEARCH_PARSE)) { /* Parse the string. */ 57 /* 58 * Use the saved pattern if no pattern specified, or if only 59 * one or two delimiter characters specified. 60 * 61 * !!! 62 * Historically, only the pattern itself was saved, vi didn't 63 * preserve addressing or delta information. 64 */ 65 if (ptrn == NULL) 66 goto prev; 67 if (plen == 1) { 68 if (epp != NULL) 69 *epp = ptrn + 1; 70 goto prev; 71 } 72 if (ptrn[0] == ptrn[1]) { 73 if (epp != NULL) 74 *epp = ptrn + 2; 75 76 /* Complain if we don't have a previous pattern. */ 77prev: if (sp->re == NULL) { 78 search_msg(sp, S_NOPREV); 79 return (1); 80 } 81 /* Re-compile the search pattern if necessary. */ 82 if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp, 83 sp->re, sp->re_len, NULL, NULL, &sp->re_c, 84 RE_C_SEARCH | 85 (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT))) 86 return (1); 87 88 /* Set the search direction. */ 89 if (LF_ISSET(SEARCH_SET)) 90 sp->searchdir = dir; 91 return (0); 92 } 93 94 /* 95 * Set the delimiter, and move forward to the terminating 96 * delimiter, handling escaped delimiters. 97 * 98 * QUOTING NOTE: 99 * Only discard an escape character if it escapes a delimiter. 100 */ 101 for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) { 102 if (--plen == 0 || p[0] == delim) { 103 if (plen != 0) 104 ++p; 105 break; 106 } 107 if (plen > 1 && p[0] == '\\') { 108 if (p[1] == delim) { 109 ++p; 110 --plen; 111 } else if (p[1] == '\\') { 112 *t++ = *p++; 113 --plen; 114 } 115 } 116 } 117 if (epp != NULL) 118 *epp = p; 119 120 plen = t - ptrn; 121 } 122 123 /* Compile the RE. */ 124 if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c, 125 RE_C_SEARCH | 126 (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) | 127 (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0))) 128 return (1); 129 130 /* Set the search direction. */ 131 if (LF_ISSET(SEARCH_SET)) 132 sp->searchdir = dir; 133 134 return (0); 135} 136 137/* 138 * f_search -- 139 * Do a forward search. 140 * 141 * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int); 142 */ 143int 144f_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn, 145 u_int flags) 146{ 147 busy_t btype; 148 recno_t lno; 149 regmatch_t match[1]; 150 size_t coff, len; 151 int cnt, eval, rval, wrapped = 0; 152 char *l; 153 154 if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags)) 155 return (1); 156 157 if (LF_ISSET(SEARCH_FILE)) { 158 lno = 1; 159 coff = 0; 160 } else { 161 if (db_get(sp, fm->lno, DBG_FATAL, &l, &len)) 162 return (1); 163 lno = fm->lno; 164 165 /* 166 * If doing incremental search, start searching at the previous 167 * column, so that we search a minimal distance and still match 168 * special patterns, e.g., \< for beginning of a word. 169 * 170 * Otherwise, start searching immediately after the cursor. If 171 * at the end of the line, start searching on the next line. 172 * This is incompatible (read bug fix) with the historic vi -- 173 * searches for the '$' pattern never moved forward, and the 174 * "-t foo" didn't work if the 'f' was the first character in 175 * the file. 176 */ 177 if (LF_ISSET(SEARCH_INCR)) { 178 if ((coff = fm->cno) != 0) 179 --coff; 180 } else if (fm->cno + 1 >= len) { 181 coff = 0; 182 lno = fm->lno + 1; 183 if (db_get(sp, lno, 0, &l, &len)) { 184 if (!O_ISSET(sp, O_WRAPSCAN)) { 185 if (LF_ISSET(SEARCH_MSG)) 186 search_msg(sp, S_EOF); 187 return (1); 188 } 189 lno = 1; 190 wrapped = 1; 191 } 192 } else 193 coff = fm->cno + 1; 194 } 195 196 btype = BUSY_ON; 197 for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) { 198 if (cnt-- == 0) { 199 if (INTERRUPTED(sp)) 200 break; 201 if (LF_ISSET(SEARCH_MSG)) { 202 search_busy(sp, btype); 203 btype = BUSY_UPDATE; 204 } 205 cnt = INTERRUPT_CHECK; 206 } 207 if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) { 208 if (wrapped) { 209 if (LF_ISSET(SEARCH_MSG)) 210 search_msg(sp, S_NOTFOUND); 211 break; 212 } 213 if (!O_ISSET(sp, O_WRAPSCAN)) { 214 if (LF_ISSET(SEARCH_MSG)) 215 search_msg(sp, S_EOF); 216 break; 217 } 218 lno = 0; 219 wrapped = 1; 220 continue; 221 } 222 223 /* If already at EOL, just keep going. */ 224 if (len != 0 && coff == len) 225 continue; 226 227 /* Set the termination. */ 228 match[0].rm_so = coff; 229 match[0].rm_eo = len; 230 231 /* Search the line. */ 232 eval = regexec(&sp->re_c, l, 1, match, 233 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND); 234 if (eval == REG_NOMATCH) 235 continue; 236 if (eval != 0) { 237 if (LF_ISSET(SEARCH_MSG)) 238 re_error(sp, eval, &sp->re_c); 239 else 240 (void)sp->gp->scr_bell(sp); 241 break; 242 } 243 244 /* Warn if the search wrapped. */ 245 if (wrapped && LF_ISSET(SEARCH_WMSG)) 246 search_msg(sp, S_WRAP); 247 248 rm->lno = lno; 249 rm->cno = match[0].rm_so; 250 251 /* 252 * If a change command, it's possible to move beyond the end 253 * of a line. Historic vi generally got this wrong (e.g. try 254 * "c?$<cr>"). Not all that sure this gets it right, there 255 * are lots of strange cases. 256 */ 257 if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len) 258 rm->cno = len != 0 ? len - 1 : 0; 259 260 rval = 0; 261 break; 262 } 263 264 if (LF_ISSET(SEARCH_MSG)) 265 search_busy(sp, BUSY_OFF); 266 return (rval); 267} 268 269/* 270 * b_search -- 271 * Do a backward search. 272 * 273 * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int); 274 */ 275int 276b_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn, 277 u_int flags) 278{ 279 busy_t btype; 280 recno_t lno; 281 regmatch_t match[1]; 282 size_t coff, last, len; 283 int cnt, eval, rval, wrapped; 284 char *l; 285 286 if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags)) 287 return (1); 288 289 /* 290 * If doing incremental search, set the "starting" position past the 291 * current column, so that we search a minimal distance and still 292 * match special patterns, e.g., \> for the end of a word. This is 293 * safe when the cursor is at the end of a line because we only use 294 * it for comparison with the location of the match. 295 * 296 * Otherwise, start searching immediately before the cursor. If in 297 * the first column, start search on the previous line. 298 */ 299 if (LF_ISSET(SEARCH_INCR)) { 300 lno = fm->lno; 301 coff = fm->cno + 1; 302 } else { 303 if (fm->cno == 0) { 304 if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) { 305 if (LF_ISSET(SEARCH_MSG)) 306 search_msg(sp, S_SOF); 307 return (1); 308 } 309 lno = fm->lno - 1; 310 } else 311 lno = fm->lno; 312 coff = fm->cno; 313 } 314 315 btype = BUSY_ON; 316 for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) { 317 if (cnt-- == 0) { 318 if (INTERRUPTED(sp)) 319 break; 320 if (LF_ISSET(SEARCH_MSG)) { 321 search_busy(sp, btype); 322 btype = BUSY_UPDATE; 323 } 324 cnt = INTERRUPT_CHECK; 325 } 326 if ((wrapped && lno < fm->lno) || lno == 0) { 327 if (wrapped) { 328 if (LF_ISSET(SEARCH_MSG)) 329 search_msg(sp, S_NOTFOUND); 330 break; 331 } 332 if (!O_ISSET(sp, O_WRAPSCAN)) { 333 if (LF_ISSET(SEARCH_MSG)) 334 search_msg(sp, S_SOF); 335 break; 336 } 337 if (db_last(sp, &lno)) 338 break; 339 if (lno == 0) { 340 if (LF_ISSET(SEARCH_MSG)) 341 search_msg(sp, S_EMPTY); 342 break; 343 } 344 ++lno; 345 wrapped = 1; 346 continue; 347 } 348 349 if (db_get(sp, lno, 0, &l, &len)) 350 break; 351 352 /* Set the termination. */ 353 match[0].rm_so = 0; 354 match[0].rm_eo = len; 355 356 /* Search the line. */ 357 eval = regexec(&sp->re_c, l, 1, match, 358 (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND); 359 if (eval == REG_NOMATCH) 360 continue; 361 if (eval != 0) { 362 if (LF_ISSET(SEARCH_MSG)) 363 re_error(sp, eval, &sp->re_c); 364 else 365 (void)sp->gp->scr_bell(sp); 366 break; 367 } 368 369 /* Check for a match starting past the cursor. */ 370 if (coff != 0 && match[0].rm_so >= coff) 371 continue; 372 373 /* Warn if the search wrapped. */ 374 if (wrapped && LF_ISSET(SEARCH_WMSG)) 375 search_msg(sp, S_WRAP); 376 377 /* 378 * We now have the first match on the line. Step through the 379 * line character by character until find the last acceptable 380 * match. This is painful, we need a better interface to regex 381 * to make this work. 382 */ 383 for (;;) { 384 last = match[0].rm_so++; 385 if (match[0].rm_so >= len) 386 break; 387 match[0].rm_eo = len; 388 eval = regexec(&sp->re_c, l, 1, match, 389 (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | 390 REG_STARTEND); 391 if (eval == REG_NOMATCH) 392 break; 393 if (eval != 0) { 394 if (LF_ISSET(SEARCH_MSG)) 395 re_error(sp, eval, &sp->re_c); 396 else 397 (void)sp->gp->scr_bell(sp); 398 goto err; 399 } 400 if (coff && match[0].rm_so >= coff) 401 break; 402 } 403 rm->lno = lno; 404 405 /* See comment in f_search(). */ 406 if (!LF_ISSET(SEARCH_EOL) && last >= len) 407 rm->cno = len != 0 ? len - 1 : 0; 408 else 409 rm->cno = last; 410 rval = 0; 411 break; 412 } 413 414err: if (LF_ISSET(SEARCH_MSG)) 415 search_busy(sp, BUSY_OFF); 416 return (rval); 417} 418 419/* 420 * search_msg -- 421 * Display one of the search messages. 422 */ 423static void 424search_msg(SCR *sp, smsg_t msg) 425{ 426 switch (msg) { 427 case S_EMPTY: 428 msgq(sp, M_ERR, "File empty; nothing to search"); 429 break; 430 case S_EOF: 431 msgq(sp, M_ERR, 432 "Reached end-of-file without finding the pattern"); 433 break; 434 case S_NOPREV: 435 msgq(sp, M_ERR, "No previous search pattern"); 436 break; 437 case S_NOTFOUND: 438 msgq(sp, M_ERR, "Pattern not found"); 439 break; 440 case S_SOF: 441 msgq(sp, M_ERR, 442 "Reached top-of-file without finding the pattern"); 443 break; 444 case S_WRAP: 445 msgq(sp, M_ERR, "Search wrapped"); 446 break; 447 default: 448 abort(); 449 } 450} 451 452/* 453 * search_busy -- 454 * Put up the busy searching message. 455 * 456 * PUBLIC: void search_busy(SCR *, busy_t); 457 */ 458void 459search_busy(SCR *sp, busy_t btype) 460{ 461 sp->gp->scr_busy(sp, "Searching...", btype); 462} 463