1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "$Id: search.c,v 10.27 2015/03/13 18:41:35 zy Exp $";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <errno.h>
23#include <limits.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27#include <unistd.h>
28
29#include "common.h"
30
31typedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
32
33static void	search_msg(SCR *, smsg_t);
34static int	search_init(SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int);
35
36/*
37 * search_init --
38 *	Set up a search.
39 */
40static int
41search_init(
42	SCR *sp,
43	dir_t dir,
44	CHAR_T *ptrn,
45	size_t plen,
46	CHAR_T **epp,
47	u_int flags)
48{
49	recno_t lno;
50	int delim;
51	CHAR_T *p, *t;
52
53	/* If the file is empty, it's a fast search. */
54	if (sp->lno <= 1) {
55		if (db_last(sp, &lno))
56			return (1);
57		if (lno == 0) {
58			if (LF_ISSET(SEARCH_MSG))
59				search_msg(sp, S_EMPTY);
60			return (1);
61		}
62	}
63
64	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
65		/*
66		 * Use the saved pattern if no pattern specified, or if only
67		 * one or two delimiter characters specified.
68		 *
69		 * !!!
70		 * Historically, only the pattern itself was saved, vi didn't
71		 * preserve addressing or delta information.
72		 */
73		if (ptrn == NULL)
74			goto prev;
75		if (plen == 1) {
76			if (epp != NULL)
77				*epp = ptrn + 1;
78			goto prev;
79		}
80		if (ptrn[0] == ptrn[1]) {
81			if (epp != NULL)
82				*epp = ptrn + 2;
83
84			/* Complain if we don't have a previous pattern. */
85prev:			if (sp->re == NULL) {
86				search_msg(sp, S_NOPREV);
87				return (1);
88			}
89			/* Re-compile the search pattern if necessary. */
90			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
91			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
92			    RE_C_SEARCH |
93			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
94				return (1);
95
96			/* Set the search direction. */
97			if (LF_ISSET(SEARCH_SET))
98				sp->searchdir = dir;
99			return (0);
100		}
101
102		/*
103		 * Set the delimiter, and move forward to the terminating
104		 * delimiter, handling escaped delimiters.
105		 *
106		 * QUOTING NOTE:
107		 * Only discard an escape character if it escapes a delimiter.
108		 */
109		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
110			if (--plen == 0 || p[0] == delim) {
111				if (plen != 0)
112					++p;
113				break;
114			}
115			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
116				++p;
117				--plen;
118			}
119		}
120		if (epp != NULL)
121			*epp = p;
122
123		plen = t - ptrn;
124	}
125
126	/* Compile the RE. */
127	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
128	    RE_C_SEARCH |
129	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
130	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
131	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
132		return (1);
133
134	/* Set the search direction. */
135	if (LF_ISSET(SEARCH_SET))
136		sp->searchdir = dir;
137
138	return (0);
139}
140
141/*
142 * f_search --
143 *	Do a forward search.
144 *
145 * PUBLIC: int f_search(SCR *,
146 * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
147 */
148int
149f_search(
150	SCR *sp,
151	MARK *fm,
152	MARK *rm,
153	CHAR_T *ptrn,
154	size_t plen,
155	CHAR_T **eptrn,
156	u_int flags)
157{
158	busy_t btype;
159	recno_t lno;
160	regmatch_t match[1];
161	size_t coff, len;
162	int cnt, eval, rval, wrapped = 0;
163	CHAR_T *l;
164
165	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
166		return (1);
167
168	if (LF_ISSET(SEARCH_FILE)) {
169		lno = 1;
170		coff = 0;
171	} else {
172		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
173			return (1);
174		lno = fm->lno;
175
176		/*
177		 * If doing incremental search, start searching at the previous
178		 * column, so that we search a minimal distance and still match
179		 * special patterns, e.g., \< for beginning of a word.
180		 *
181		 * Otherwise, start searching immediately after the cursor.  If
182		 * at the end of the line, start searching on the next line.
183		 * This is incompatible (read bug fix) with the historic vi --
184		 * searches for the '$' pattern never moved forward, and the
185		 * "-t foo" didn't work if the 'f' was the first character in
186		 * the file.
187		 */
188		if (LF_ISSET(SEARCH_INCR)) {
189			if ((coff = fm->cno) != 0)
190				--coff;
191		} else if (fm->cno + 1 >= len) {
192			coff = 0;
193			lno = fm->lno + 1;
194			if (db_get(sp, lno, 0, &l, &len)) {
195				if (!O_ISSET(sp, O_WRAPSCAN)) {
196					if (LF_ISSET(SEARCH_MSG))
197						search_msg(sp, S_EOF);
198					return (1);
199				}
200				lno = 1;
201				wrapped = 1;
202			}
203		} else
204			coff = fm->cno + 1;
205	}
206
207	btype = BUSY_ON;
208	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
209		if (cnt-- == 0) {
210			if (INTERRUPTED(sp))
211				break;
212			if (LF_ISSET(SEARCH_MSG)) {
213				search_busy(sp, btype);
214				btype = BUSY_UPDATE;
215			}
216			cnt = INTERRUPT_CHECK;
217		}
218		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
219			if (wrapped) {
220				if (LF_ISSET(SEARCH_MSG))
221					search_msg(sp, S_NOTFOUND);
222				break;
223			}
224			if (!O_ISSET(sp, O_WRAPSCAN)) {
225				if (LF_ISSET(SEARCH_MSG))
226					search_msg(sp, S_EOF);
227				break;
228			}
229			lno = 0;
230			wrapped = 1;
231			continue;
232		}
233
234		/* If already at EOL, just keep going. */
235		if (len != 0 && coff == len)
236			continue;
237
238		/* Set the termination. */
239		match[0].rm_so = coff;
240		match[0].rm_eo = len;
241
242#if defined(DEBUG) && 0
243		TRACE(sp, "F search: %lu from %u to %u\n",
244		    lno, coff, len != 0 ? len - 1 : len);
245#endif
246		/* Search the line. */
247		eval = regexec(&sp->re_c, l, 1, match,
248		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
249		if (eval == REG_NOMATCH)
250			continue;
251		if (eval != 0) {
252			if (LF_ISSET(SEARCH_MSG))
253				re_error(sp, eval, &sp->re_c);
254			else
255				(void)sp->gp->scr_bell(sp);
256			break;
257		}
258
259		/* Warn if the search wrapped. */
260		if (wrapped && LF_ISSET(SEARCH_WMSG))
261			search_msg(sp, S_WRAP);
262
263#if defined(DEBUG) && 0
264		TRACE(sp, "F search: %qu to %qu\n",
265		    match[0].rm_so, match[0].rm_eo);
266#endif
267		rm->lno = lno;
268		rm->cno = match[0].rm_so;
269
270		/*
271		 * If a change command, it's possible to move beyond the end
272		 * of a line.  Historic vi generally got this wrong (e.g. try
273		 * "c?$<cr>").  Not all that sure this gets it right, there
274		 * are lots of strange cases.
275		 */
276		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
277			rm->cno = len != 0 ? len - 1 : 0;
278
279		rval = 0;
280		break;
281	}
282
283	if (LF_ISSET(SEARCH_MSG))
284		search_busy(sp, BUSY_OFF);
285	return (rval);
286}
287
288/*
289 * b_search --
290 *	Do a backward search.
291 *
292 * PUBLIC: int b_search(SCR *,
293 * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
294 */
295int
296b_search(
297	SCR *sp,
298	MARK *fm,
299	MARK *rm,
300	CHAR_T *ptrn,
301	size_t plen,
302	CHAR_T **eptrn,
303	u_int flags)
304{
305	busy_t btype;
306	recno_t lno;
307	regmatch_t match[1];
308	size_t coff, last, len;
309	int cnt, eval, rval, wrapped;
310	CHAR_T *l;
311
312	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
313		return (1);
314
315	/*
316	 * If doing incremental search, set the "starting" position past the
317	 * current column, so that we search a minimal distance and still
318	 * match special patterns, e.g., \> for the end of a word.  This is
319	 * safe when the cursor is at the end of a line because we only use
320	 * it for comparison with the location of the match.
321	 *
322	 * Otherwise, start searching immediately before the cursor.  If in
323	 * the first column, start search on the previous line.
324	 */
325	if (LF_ISSET(SEARCH_INCR)) {
326		lno = fm->lno;
327		coff = fm->cno + 1;
328	} else {
329		if (fm->cno == 0) {
330			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
331				if (LF_ISSET(SEARCH_MSG))
332					search_msg(sp, S_SOF);
333				return (1);
334			}
335			lno = fm->lno - 1;
336		} else
337			lno = fm->lno;
338		coff = fm->cno;
339	}
340
341	btype = BUSY_ON;
342	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
343		if (cnt-- == 0) {
344			if (INTERRUPTED(sp))
345				break;
346			if (LF_ISSET(SEARCH_MSG)) {
347				search_busy(sp, btype);
348				btype = BUSY_UPDATE;
349			}
350			cnt = INTERRUPT_CHECK;
351		}
352		if ((wrapped && lno < fm->lno) || lno == 0) {
353			if (wrapped) {
354				if (LF_ISSET(SEARCH_MSG))
355					search_msg(sp, S_NOTFOUND);
356				break;
357			}
358			if (!O_ISSET(sp, O_WRAPSCAN)) {
359				if (LF_ISSET(SEARCH_MSG))
360					search_msg(sp, S_SOF);
361				break;
362			}
363			if (db_last(sp, &lno))
364				break;
365			if (lno == 0) {
366				if (LF_ISSET(SEARCH_MSG))
367					search_msg(sp, S_EMPTY);
368				break;
369			}
370			++lno;
371			wrapped = 1;
372			continue;
373		}
374
375		if (db_get(sp, lno, 0, &l, &len))
376			break;
377
378		/* Set the termination. */
379		match[0].rm_so = 0;
380		match[0].rm_eo = len;
381
382#if defined(DEBUG) && 0
383		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
384#endif
385		/* Search the line. */
386		eval = regexec(&sp->re_c, l, 1, match,
387		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
388		if (eval == REG_NOMATCH)
389			continue;
390		if (eval != 0) {
391			if (LF_ISSET(SEARCH_MSG))
392				re_error(sp, eval, &sp->re_c);
393			else
394				(void)sp->gp->scr_bell(sp);
395			break;
396		}
397
398		/* Check for a match starting past the cursor. */
399		if (coff != 0 && match[0].rm_so >= coff)
400			continue;
401
402		/* Warn if the search wrapped. */
403		if (wrapped && LF_ISSET(SEARCH_WMSG))
404			search_msg(sp, S_WRAP);
405
406#if defined(DEBUG) && 0
407		TRACE(sp, "B found: %qu to %qu\n",
408		    match[0].rm_so, match[0].rm_eo);
409#endif
410		/*
411		 * We now have the first match on the line.  Step through the
412		 * line character by character until find the last acceptable
413		 * match.  This is painful, we need a better interface to regex
414		 * to make this work.
415		 */
416		for (;;) {
417			last = match[0].rm_so++;
418			if (match[0].rm_so >= len)
419				break;
420			match[0].rm_eo = len;
421			eval = regexec(&sp->re_c, l, 1, match,
422			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
423			    REG_STARTEND);
424			if (eval == REG_NOMATCH)
425				break;
426			if (eval != 0) {
427				if (LF_ISSET(SEARCH_MSG))
428					re_error(sp, eval, &sp->re_c);
429				else
430					(void)sp->gp->scr_bell(sp);
431				goto err;
432			}
433			if (coff && match[0].rm_so >= coff)
434				break;
435		}
436		rm->lno = lno;
437
438		/* See comment in f_search(). */
439		if (!LF_ISSET(SEARCH_EOL) && last >= len)
440			rm->cno = len != 0 ? len - 1 : 0;
441		else
442			rm->cno = last;
443		rval = 0;
444		break;
445	}
446
447err:	if (LF_ISSET(SEARCH_MSG))
448		search_busy(sp, BUSY_OFF);
449	return (rval);
450}
451
452/*
453 * search_msg --
454 *	Display one of the search messages.
455 */
456static void
457search_msg(
458	SCR *sp,
459	smsg_t msg)
460{
461	switch (msg) {
462	case S_EMPTY:
463		msgq(sp, M_ERR, "072|File empty; nothing to search");
464		break;
465	case S_EOF:
466		msgq(sp, M_ERR,
467		    "073|Reached end-of-file without finding the pattern");
468		break;
469	case S_NOPREV:
470		msgq(sp, M_ERR, "074|No previous search pattern");
471		break;
472	case S_NOTFOUND:
473		msgq(sp, M_ERR, "075|Pattern not found");
474		break;
475	case S_SOF:
476		msgq(sp, M_ERR,
477		    "076|Reached top-of-file without finding the pattern");
478		break;
479	case S_WRAP:
480		msgq(sp, M_ERR, "077|Search wrapped");
481		break;
482	default:
483		abort();
484	}
485}
486
487/*
488 * search_busy --
489 *	Put up the busy searching message.
490 *
491 * PUBLIC: void search_busy(SCR *, busy_t);
492 */
493void
494search_busy(
495	SCR *sp,
496	busy_t btype)
497{
498	sp->gp->scr_busy(sp, "078|Searching...", btype);
499}
500