1/*	$OpenBSD: search.c,v 1.15 2022/12/10 16:06:18 millert Exp $	*/
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 *	Keith Bostic.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/types.h>
15#include <sys/queue.h>
16
17#include <bitstring.h>
18#include <ctype.h>
19#include <errno.h>
20#include <limits.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24#include <unistd.h>
25
26#include "common.h"
27
28typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
29
30static void	search_msg(SCR *, smsg_t);
31static int	search_init(SCR *, dir_t, char *, size_t, char **, u_int);
32
33/*
34 * search_init --
35 *	Set up a search.
36 */
37static int
38search_init(SCR *sp, dir_t dir, char *ptrn, size_t plen, char **epp,
39    u_int flags)
40{
41	recno_t lno;
42	int delim;
43	char *p, *t;
44
45	/* If the file is empty, it's a fast search. */
46	if (sp->lno <= 1) {
47		if (db_last(sp, &lno))
48			return (1);
49		if (lno == 0) {
50			if (LF_ISSET(SEARCH_MSG))
51				search_msg(sp, S_EMPTY);
52			return (1);
53		}
54	}
55
56	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
57		/*
58		 * Use the saved pattern if no pattern specified, or if only
59		 * one or two delimiter characters specified.
60		 *
61		 * !!!
62		 * Historically, only the pattern itself was saved, vi didn't
63		 * preserve addressing or delta information.
64		 */
65		if (ptrn == NULL)
66			goto prev;
67		if (plen == 1) {
68			if (epp != NULL)
69				*epp = ptrn + 1;
70			goto prev;
71		}
72		if (ptrn[0] == ptrn[1]) {
73			if (epp != NULL)
74				*epp = ptrn + 2;
75
76			/* Complain if we don't have a previous pattern. */
77prev:			if (sp->re == NULL) {
78				search_msg(sp, S_NOPREV);
79				return (1);
80			}
81			/* Re-compile the search pattern if necessary. */
82			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
83			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
84			    RE_C_SEARCH |
85			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
86				return (1);
87
88			/* Set the search direction. */
89			if (LF_ISSET(SEARCH_SET))
90				sp->searchdir = dir;
91			return (0);
92		}
93
94		/*
95		 * Set the delimiter, and move forward to the terminating
96		 * delimiter, handling escaped delimiters.
97		 *
98		 * QUOTING NOTE:
99		 * Only discard an escape character if it escapes a delimiter.
100		 */
101		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
102			if (--plen == 0 || p[0] == delim) {
103				if (plen != 0)
104					++p;
105				break;
106			}
107			if (plen > 1 && p[0] == '\\') {
108				if (p[1] == delim) {
109					++p;
110					--plen;
111				} else if (p[1] == '\\') {
112					*t++ = *p++;
113					--plen;
114				}
115			}
116		}
117		if (epp != NULL)
118			*epp = p;
119
120		plen = t - ptrn;
121	}
122
123	/* Compile the RE. */
124	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
125	    RE_C_SEARCH |
126	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
127	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0)))
128		return (1);
129
130	/* Set the search direction. */
131	if (LF_ISSET(SEARCH_SET))
132		sp->searchdir = dir;
133
134	return (0);
135}
136
137/*
138 * f_search --
139 *	Do a forward search.
140 *
141 * PUBLIC: int f_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
142 */
143int
144f_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
145    u_int flags)
146{
147	busy_t btype;
148	recno_t lno;
149	regmatch_t match[1];
150	size_t coff, len;
151	int cnt, eval, rval, wrapped = 0;
152	char *l;
153
154	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
155		return (1);
156
157	if (LF_ISSET(SEARCH_FILE)) {
158		lno = 1;
159		coff = 0;
160	} else {
161		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
162			return (1);
163		lno = fm->lno;
164
165		/*
166		 * If doing incremental search, start searching at the previous
167		 * column, so that we search a minimal distance and still match
168		 * special patterns, e.g., \< for beginning of a word.
169		 *
170		 * Otherwise, start searching immediately after the cursor.  If
171		 * at the end of the line, start searching on the next line.
172		 * This is incompatible (read bug fix) with the historic vi --
173		 * searches for the '$' pattern never moved forward, and the
174		 * "-t foo" didn't work if the 'f' was the first character in
175		 * the file.
176		 */
177		if (LF_ISSET(SEARCH_INCR)) {
178			if ((coff = fm->cno) != 0)
179				--coff;
180		} else if (fm->cno + 1 >= len) {
181			coff = 0;
182			lno = fm->lno + 1;
183			if (db_get(sp, lno, 0, &l, &len)) {
184				if (!O_ISSET(sp, O_WRAPSCAN)) {
185					if (LF_ISSET(SEARCH_MSG))
186						search_msg(sp, S_EOF);
187					return (1);
188				}
189				lno = 1;
190				wrapped = 1;
191			}
192		} else
193			coff = fm->cno + 1;
194	}
195
196	btype = BUSY_ON;
197	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
198		if (cnt-- == 0) {
199			if (INTERRUPTED(sp))
200				break;
201			if (LF_ISSET(SEARCH_MSG)) {
202				search_busy(sp, btype);
203				btype = BUSY_UPDATE;
204			}
205			cnt = INTERRUPT_CHECK;
206		}
207		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
208			if (wrapped) {
209				if (LF_ISSET(SEARCH_MSG))
210					search_msg(sp, S_NOTFOUND);
211				break;
212			}
213			if (!O_ISSET(sp, O_WRAPSCAN)) {
214				if (LF_ISSET(SEARCH_MSG))
215					search_msg(sp, S_EOF);
216				break;
217			}
218			lno = 0;
219			wrapped = 1;
220			continue;
221		}
222
223		/* If already at EOL, just keep going. */
224		if (len != 0 && coff == len)
225			continue;
226
227		/* Set the termination. */
228		match[0].rm_so = coff;
229		match[0].rm_eo = len;
230
231		/* Search the line. */
232		eval = regexec(&sp->re_c, l, 1, match,
233		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
234		if (eval == REG_NOMATCH)
235			continue;
236		if (eval != 0) {
237			if (LF_ISSET(SEARCH_MSG))
238				re_error(sp, eval, &sp->re_c);
239			else
240				(void)sp->gp->scr_bell(sp);
241			break;
242		}
243
244		/* Warn if the search wrapped. */
245		if (wrapped && LF_ISSET(SEARCH_WMSG))
246			search_msg(sp, S_WRAP);
247
248		rm->lno = lno;
249		rm->cno = match[0].rm_so;
250
251		/*
252		 * If a change command, it's possible to move beyond the end
253		 * of a line.  Historic vi generally got this wrong (e.g. try
254		 * "c?$<cr>").  Not all that sure this gets it right, there
255		 * are lots of strange cases.
256		 */
257		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
258			rm->cno = len != 0 ? len - 1 : 0;
259
260		rval = 0;
261		break;
262	}
263
264	if (LF_ISSET(SEARCH_MSG))
265		search_busy(sp, BUSY_OFF);
266	return (rval);
267}
268
269/*
270 * b_search --
271 *	Do a backward search.
272 *
273 * PUBLIC: int b_search(SCR *, MARK *, MARK *, char *, size_t, char **, u_int);
274 */
275int
276b_search(SCR *sp, MARK *fm, MARK *rm, char *ptrn, size_t plen, char **eptrn,
277    u_int flags)
278{
279	busy_t btype;
280	recno_t lno;
281	regmatch_t match[1];
282	size_t coff, last, len;
283	int cnt, eval, rval, wrapped;
284	char *l;
285
286	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
287		return (1);
288
289	/*
290	 * If doing incremental search, set the "starting" position past the
291	 * current column, so that we search a minimal distance and still
292	 * match special patterns, e.g., \> for the end of a word.  This is
293	 * safe when the cursor is at the end of a line because we only use
294	 * it for comparison with the location of the match.
295	 *
296	 * Otherwise, start searching immediately before the cursor.  If in
297	 * the first column, start search on the previous line.
298	 */
299	if (LF_ISSET(SEARCH_INCR)) {
300		lno = fm->lno;
301		coff = fm->cno + 1;
302	} else {
303		if (fm->cno == 0) {
304			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
305				if (LF_ISSET(SEARCH_MSG))
306					search_msg(sp, S_SOF);
307				return (1);
308			}
309			lno = fm->lno - 1;
310		} else
311			lno = fm->lno;
312		coff = fm->cno;
313	}
314
315	btype = BUSY_ON;
316	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
317		if (cnt-- == 0) {
318			if (INTERRUPTED(sp))
319				break;
320			if (LF_ISSET(SEARCH_MSG)) {
321				search_busy(sp, btype);
322				btype = BUSY_UPDATE;
323			}
324			cnt = INTERRUPT_CHECK;
325		}
326		if ((wrapped && lno < fm->lno) || lno == 0) {
327			if (wrapped) {
328				if (LF_ISSET(SEARCH_MSG))
329					search_msg(sp, S_NOTFOUND);
330				break;
331			}
332			if (!O_ISSET(sp, O_WRAPSCAN)) {
333				if (LF_ISSET(SEARCH_MSG))
334					search_msg(sp, S_SOF);
335				break;
336			}
337			if (db_last(sp, &lno))
338				break;
339			if (lno == 0) {
340				if (LF_ISSET(SEARCH_MSG))
341					search_msg(sp, S_EMPTY);
342				break;
343			}
344			++lno;
345			wrapped = 1;
346			continue;
347		}
348
349		if (db_get(sp, lno, 0, &l, &len))
350			break;
351
352		/* Set the termination. */
353		match[0].rm_so = 0;
354		match[0].rm_eo = len;
355
356		/* Search the line. */
357		eval = regexec(&sp->re_c, l, 1, match,
358		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
359		if (eval == REG_NOMATCH)
360			continue;
361		if (eval != 0) {
362			if (LF_ISSET(SEARCH_MSG))
363				re_error(sp, eval, &sp->re_c);
364			else
365				(void)sp->gp->scr_bell(sp);
366			break;
367		}
368
369		/* Check for a match starting past the cursor. */
370		if (coff != 0 && match[0].rm_so >= coff)
371			continue;
372
373		/* Warn if the search wrapped. */
374		if (wrapped && LF_ISSET(SEARCH_WMSG))
375			search_msg(sp, S_WRAP);
376
377		/*
378		 * We now have the first match on the line.  Step through the
379		 * line character by character until find the last acceptable
380		 * match.  This is painful, we need a better interface to regex
381		 * to make this work.
382		 */
383		for (;;) {
384			last = match[0].rm_so++;
385			if (match[0].rm_so >= len)
386				break;
387			match[0].rm_eo = len;
388			eval = regexec(&sp->re_c, l, 1, match,
389			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
390			    REG_STARTEND);
391			if (eval == REG_NOMATCH)
392				break;
393			if (eval != 0) {
394				if (LF_ISSET(SEARCH_MSG))
395					re_error(sp, eval, &sp->re_c);
396				else
397					(void)sp->gp->scr_bell(sp);
398				goto err;
399			}
400			if (coff && match[0].rm_so >= coff)
401				break;
402		}
403		rm->lno = lno;
404
405		/* See comment in f_search(). */
406		if (!LF_ISSET(SEARCH_EOL) && last >= len)
407			rm->cno = len != 0 ? len - 1 : 0;
408		else
409			rm->cno = last;
410		rval = 0;
411		break;
412	}
413
414err:	if (LF_ISSET(SEARCH_MSG))
415		search_busy(sp, BUSY_OFF);
416	return (rval);
417}
418
419/*
420 * search_msg --
421 *	Display one of the search messages.
422 */
423static void
424search_msg(SCR *sp, smsg_t msg)
425{
426	switch (msg) {
427	case S_EMPTY:
428		msgq(sp, M_ERR, "File empty; nothing to search");
429		break;
430	case S_EOF:
431		msgq(sp, M_ERR,
432		    "Reached end-of-file without finding the pattern");
433		break;
434	case S_NOPREV:
435		msgq(sp, M_ERR, "No previous search pattern");
436		break;
437	case S_NOTFOUND:
438		msgq(sp, M_ERR, "Pattern not found");
439		break;
440	case S_SOF:
441		msgq(sp, M_ERR,
442		    "Reached top-of-file without finding the pattern");
443		break;
444	case S_WRAP:
445		msgq(sp, M_ERR, "Search wrapped");
446		break;
447	default:
448		abort();
449	}
450}
451
452/*
453 * search_busy --
454 *	Put up the busy searching message.
455 *
456 * PUBLIC: void search_busy(SCR *, busy_t);
457 */
458void
459search_busy(SCR *sp, busy_t btype)
460{
461	sp->gp->scr_busy(sp, "Searching...", btype);
462}
463