119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
13281373Sbaptstatic const char sccsid[] = "$Id: search.c,v 10.27 2015/03/13 18:41:35 zy Exp $";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
18254225Speter#include <sys/time.h>
1919304Speter
2019304Speter#include <bitstring.h>
2119304Speter#include <ctype.h>
2219304Speter#include <errno.h>
2319304Speter#include <limits.h>
2419304Speter#include <stdio.h>
2519304Speter#include <stdlib.h>
2619304Speter#include <string.h>
2719304Speter#include <unistd.h>
2819304Speter
2919304Speter#include "common.h"
3019304Speter
3119304Spetertypedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
3219304Speter
33281373Sbaptstatic void	search_msg(SCR *, smsg_t);
34281373Sbaptstatic int	search_init(SCR *, dir_t, CHAR_T *, size_t, CHAR_T **, u_int);
3519304Speter
3619304Speter/*
3719304Speter * search_init --
3819304Speter *	Set up a search.
3919304Speter */
4019304Speterstatic int
41254225Spetersearch_init(
42254225Speter	SCR *sp,
43254225Speter	dir_t dir,
44254225Speter	CHAR_T *ptrn,
45254225Speter	size_t plen,
46254225Speter	CHAR_T **epp,
47254225Speter	u_int flags)
4819304Speter{
4919304Speter	recno_t lno;
5019304Speter	int delim;
51254225Speter	CHAR_T *p, *t;
5219304Speter
5319304Speter	/* If the file is empty, it's a fast search. */
5419304Speter	if (sp->lno <= 1) {
5519304Speter		if (db_last(sp, &lno))
5619304Speter			return (1);
5719304Speter		if (lno == 0) {
5819304Speter			if (LF_ISSET(SEARCH_MSG))
5919304Speter				search_msg(sp, S_EMPTY);
6019304Speter			return (1);
6119304Speter		}
6219304Speter	}
6319304Speter
6419304Speter	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
6519304Speter		/*
6619304Speter		 * Use the saved pattern if no pattern specified, or if only
6719304Speter		 * one or two delimiter characters specified.
6819304Speter		 *
6919304Speter		 * !!!
7019304Speter		 * Historically, only the pattern itself was saved, vi didn't
7119304Speter		 * preserve addressing or delta information.
7219304Speter		 */
7319304Speter		if (ptrn == NULL)
7419304Speter			goto prev;
7519304Speter		if (plen == 1) {
7619304Speter			if (epp != NULL)
7719304Speter				*epp = ptrn + 1;
7819304Speter			goto prev;
7919304Speter		}
8019304Speter		if (ptrn[0] == ptrn[1]) {
8119304Speter			if (epp != NULL)
8219304Speter				*epp = ptrn + 2;
8319304Speter
8419304Speter			/* Complain if we don't have a previous pattern. */
8519304Speterprev:			if (sp->re == NULL) {
8619304Speter				search_msg(sp, S_NOPREV);
8719304Speter				return (1);
8819304Speter			}
8919304Speter			/* Re-compile the search pattern if necessary. */
9019304Speter			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
9119304Speter			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
9219304Speter			    RE_C_SEARCH |
9319304Speter			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
9419304Speter				return (1);
9519304Speter
9619304Speter			/* Set the search direction. */
9719304Speter			if (LF_ISSET(SEARCH_SET))
9819304Speter				sp->searchdir = dir;
9919304Speter			return (0);
10019304Speter		}
10119304Speter
10219304Speter		/*
10319304Speter		 * Set the delimiter, and move forward to the terminating
10419304Speter		 * delimiter, handling escaped delimiters.
10519304Speter		 *
10619304Speter		 * QUOTING NOTE:
10719304Speter		 * Only discard an escape character if it escapes a delimiter.
10819304Speter		 */
10919304Speter		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
11019304Speter			if (--plen == 0 || p[0] == delim) {
11119304Speter				if (plen != 0)
11219304Speter					++p;
11319304Speter				break;
11419304Speter			}
11519304Speter			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
11619304Speter				++p;
11719304Speter				--plen;
11819304Speter			}
11919304Speter		}
12019304Speter		if (epp != NULL)
12119304Speter			*epp = p;
12219304Speter
12319304Speter		plen = t - ptrn;
12419304Speter	}
12519304Speter
12619304Speter	/* Compile the RE. */
12719304Speter	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
12819304Speter	    RE_C_SEARCH |
12919304Speter	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
13019304Speter	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
13119304Speter	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
13219304Speter		return (1);
13319304Speter
13419304Speter	/* Set the search direction. */
13519304Speter	if (LF_ISSET(SEARCH_SET))
13619304Speter		sp->searchdir = dir;
13719304Speter
13819304Speter	return (0);
13919304Speter}
14019304Speter
14119304Speter/*
14219304Speter * f_search --
14319304Speter *	Do a forward search.
14419304Speter *
145281373Sbapt * PUBLIC: int f_search(SCR *,
146281373Sbapt * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
14719304Speter */
14819304Speterint
149254225Speterf_search(
150254225Speter	SCR *sp,
151254225Speter	MARK *fm,
152254225Speter	MARK *rm,
153254225Speter	CHAR_T *ptrn,
154254225Speter	size_t plen,
155254225Speter	CHAR_T **eptrn,
156254225Speter	u_int flags)
15719304Speter{
15819304Speter	busy_t btype;
15919304Speter	recno_t lno;
16019304Speter	regmatch_t match[1];
16119304Speter	size_t coff, len;
162281373Sbapt	int cnt, eval, rval, wrapped = 0;
163254225Speter	CHAR_T *l;
16419304Speter
16519304Speter	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
16619304Speter		return (1);
16719304Speter
16819304Speter	if (LF_ISSET(SEARCH_FILE)) {
16919304Speter		lno = 1;
17019304Speter		coff = 0;
17119304Speter	} else {
17219304Speter		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
17319304Speter			return (1);
17419304Speter		lno = fm->lno;
17519304Speter
17619304Speter		/*
17719304Speter		 * If doing incremental search, start searching at the previous
17819304Speter		 * column, so that we search a minimal distance and still match
17919304Speter		 * special patterns, e.g., \< for beginning of a word.
18019304Speter		 *
18119304Speter		 * Otherwise, start searching immediately after the cursor.  If
18219304Speter		 * at the end of the line, start searching on the next line.
18319304Speter		 * This is incompatible (read bug fix) with the historic vi --
18419304Speter		 * searches for the '$' pattern never moved forward, and the
18519304Speter		 * "-t foo" didn't work if the 'f' was the first character in
18619304Speter		 * the file.
18719304Speter		 */
18819304Speter		if (LF_ISSET(SEARCH_INCR)) {
18919304Speter			if ((coff = fm->cno) != 0)
19019304Speter				--coff;
19119304Speter		} else if (fm->cno + 1 >= len) {
19219304Speter			coff = 0;
19319304Speter			lno = fm->lno + 1;
19419304Speter			if (db_get(sp, lno, 0, &l, &len)) {
19519304Speter				if (!O_ISSET(sp, O_WRAPSCAN)) {
19619304Speter					if (LF_ISSET(SEARCH_MSG))
19719304Speter						search_msg(sp, S_EOF);
19819304Speter					return (1);
19919304Speter				}
20019304Speter				lno = 1;
201281373Sbapt				wrapped = 1;
20219304Speter			}
20319304Speter		} else
20419304Speter			coff = fm->cno + 1;
20519304Speter	}
20619304Speter
20719304Speter	btype = BUSY_ON;
208281373Sbapt	for (cnt = INTERRUPT_CHECK, rval = 1;; ++lno, coff = 0) {
20919304Speter		if (cnt-- == 0) {
21019304Speter			if (INTERRUPTED(sp))
21119304Speter				break;
21219304Speter			if (LF_ISSET(SEARCH_MSG)) {
21319304Speter				search_busy(sp, btype);
21419304Speter				btype = BUSY_UPDATE;
21519304Speter			}
21619304Speter			cnt = INTERRUPT_CHECK;
21719304Speter		}
218254225Speter		if ((wrapped && lno > fm->lno) || db_get(sp, lno, 0, &l, &len)) {
21919304Speter			if (wrapped) {
22019304Speter				if (LF_ISSET(SEARCH_MSG))
22119304Speter					search_msg(sp, S_NOTFOUND);
22219304Speter				break;
22319304Speter			}
22419304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
22519304Speter				if (LF_ISSET(SEARCH_MSG))
22619304Speter					search_msg(sp, S_EOF);
22719304Speter				break;
22819304Speter			}
22919304Speter			lno = 0;
23019304Speter			wrapped = 1;
23119304Speter			continue;
23219304Speter		}
23319304Speter
23419304Speter		/* If already at EOL, just keep going. */
23519304Speter		if (len != 0 && coff == len)
23619304Speter			continue;
23719304Speter
23819304Speter		/* Set the termination. */
23919304Speter		match[0].rm_so = coff;
24019304Speter		match[0].rm_eo = len;
24119304Speter
24219304Speter#if defined(DEBUG) && 0
24319304Speter		TRACE(sp, "F search: %lu from %u to %u\n",
24419304Speter		    lno, coff, len != 0 ? len - 1 : len);
24519304Speter#endif
24619304Speter		/* Search the line. */
24719304Speter		eval = regexec(&sp->re_c, l, 1, match,
24819304Speter		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
24919304Speter		if (eval == REG_NOMATCH)
25019304Speter			continue;
25119304Speter		if (eval != 0) {
25219304Speter			if (LF_ISSET(SEARCH_MSG))
25319304Speter				re_error(sp, eval, &sp->re_c);
25419304Speter			else
25519304Speter				(void)sp->gp->scr_bell(sp);
25619304Speter			break;
25719304Speter		}
25819304Speter
25919304Speter		/* Warn if the search wrapped. */
26019304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
26119304Speter			search_msg(sp, S_WRAP);
26219304Speter
26319304Speter#if defined(DEBUG) && 0
26419304Speter		TRACE(sp, "F search: %qu to %qu\n",
26519304Speter		    match[0].rm_so, match[0].rm_eo);
26619304Speter#endif
26719304Speter		rm->lno = lno;
26819304Speter		rm->cno = match[0].rm_so;
26919304Speter
27019304Speter		/*
27119304Speter		 * If a change command, it's possible to move beyond the end
27219304Speter		 * of a line.  Historic vi generally got this wrong (e.g. try
27319304Speter		 * "c?$<cr>").  Not all that sure this gets it right, there
27419304Speter		 * are lots of strange cases.
27519304Speter		 */
27619304Speter		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
27719304Speter			rm->cno = len != 0 ? len - 1 : 0;
27819304Speter
27919304Speter		rval = 0;
28019304Speter		break;
28119304Speter	}
28219304Speter
28319304Speter	if (LF_ISSET(SEARCH_MSG))
28419304Speter		search_busy(sp, BUSY_OFF);
28519304Speter	return (rval);
28619304Speter}
28719304Speter
28819304Speter/*
28919304Speter * b_search --
29019304Speter *	Do a backward search.
29119304Speter *
292281373Sbapt * PUBLIC: int b_search(SCR *,
293281373Sbapt * PUBLIC:    MARK *, MARK *, CHAR_T *, size_t, CHAR_T **, u_int);
29419304Speter */
29519304Speterint
296254225Speterb_search(
297254225Speter	SCR *sp,
298254225Speter	MARK *fm,
299254225Speter	MARK *rm,
300254225Speter	CHAR_T *ptrn,
301254225Speter	size_t plen,
302254225Speter	CHAR_T **eptrn,
303254225Speter	u_int flags)
30419304Speter{
30519304Speter	busy_t btype;
30619304Speter	recno_t lno;
30719304Speter	regmatch_t match[1];
30819304Speter	size_t coff, last, len;
30919304Speter	int cnt, eval, rval, wrapped;
310254225Speter	CHAR_T *l;
31119304Speter
31219304Speter	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
31319304Speter		return (1);
31419304Speter
31519304Speter	/*
31619304Speter	 * If doing incremental search, set the "starting" position past the
31719304Speter	 * current column, so that we search a minimal distance and still
31819304Speter	 * match special patterns, e.g., \> for the end of a word.  This is
31919304Speter	 * safe when the cursor is at the end of a line because we only use
32019304Speter	 * it for comparison with the location of the match.
32119304Speter	 *
32219304Speter	 * Otherwise, start searching immediately before the cursor.  If in
32319304Speter	 * the first column, start search on the previous line.
32419304Speter	 */
32519304Speter	if (LF_ISSET(SEARCH_INCR)) {
32619304Speter		lno = fm->lno;
32719304Speter		coff = fm->cno + 1;
32819304Speter	} else {
32919304Speter		if (fm->cno == 0) {
33019304Speter			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
33119304Speter				if (LF_ISSET(SEARCH_MSG))
33219304Speter					search_msg(sp, S_SOF);
33319304Speter				return (1);
33419304Speter			}
33519304Speter			lno = fm->lno - 1;
33619304Speter		} else
33719304Speter			lno = fm->lno;
33819304Speter		coff = fm->cno;
33919304Speter	}
34019304Speter
34119304Speter	btype = BUSY_ON;
34219304Speter	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
34319304Speter		if (cnt-- == 0) {
34419304Speter			if (INTERRUPTED(sp))
34519304Speter				break;
34619304Speter			if (LF_ISSET(SEARCH_MSG)) {
34719304Speter				search_busy(sp, btype);
34819304Speter				btype = BUSY_UPDATE;
34919304Speter			}
35019304Speter			cnt = INTERRUPT_CHECK;
35119304Speter		}
352254225Speter		if ((wrapped && lno < fm->lno) || lno == 0) {
35319304Speter			if (wrapped) {
35419304Speter				if (LF_ISSET(SEARCH_MSG))
35519304Speter					search_msg(sp, S_NOTFOUND);
35619304Speter				break;
35719304Speter			}
35819304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
35919304Speter				if (LF_ISSET(SEARCH_MSG))
36019304Speter					search_msg(sp, S_SOF);
36119304Speter				break;
36219304Speter			}
36319304Speter			if (db_last(sp, &lno))
36419304Speter				break;
36519304Speter			if (lno == 0) {
36619304Speter				if (LF_ISSET(SEARCH_MSG))
36719304Speter					search_msg(sp, S_EMPTY);
36819304Speter				break;
36919304Speter			}
37019304Speter			++lno;
37119304Speter			wrapped = 1;
37219304Speter			continue;
37319304Speter		}
37419304Speter
37519304Speter		if (db_get(sp, lno, 0, &l, &len))
37619304Speter			break;
37719304Speter
37819304Speter		/* Set the termination. */
37919304Speter		match[0].rm_so = 0;
38019304Speter		match[0].rm_eo = len;
38119304Speter
38219304Speter#if defined(DEBUG) && 0
38319304Speter		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
38419304Speter#endif
38519304Speter		/* Search the line. */
38619304Speter		eval = regexec(&sp->re_c, l, 1, match,
38719304Speter		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
38819304Speter		if (eval == REG_NOMATCH)
38919304Speter			continue;
39019304Speter		if (eval != 0) {
39119304Speter			if (LF_ISSET(SEARCH_MSG))
39219304Speter				re_error(sp, eval, &sp->re_c);
39319304Speter			else
39419304Speter				(void)sp->gp->scr_bell(sp);
39519304Speter			break;
39619304Speter		}
39719304Speter
39819304Speter		/* Check for a match starting past the cursor. */
39919304Speter		if (coff != 0 && match[0].rm_so >= coff)
40019304Speter			continue;
40119304Speter
40219304Speter		/* Warn if the search wrapped. */
40319304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
40419304Speter			search_msg(sp, S_WRAP);
40519304Speter
40619304Speter#if defined(DEBUG) && 0
40719304Speter		TRACE(sp, "B found: %qu to %qu\n",
40819304Speter		    match[0].rm_so, match[0].rm_eo);
40919304Speter#endif
41019304Speter		/*
41119304Speter		 * We now have the first match on the line.  Step through the
41219304Speter		 * line character by character until find the last acceptable
41319304Speter		 * match.  This is painful, we need a better interface to regex
41419304Speter		 * to make this work.
41519304Speter		 */
41619304Speter		for (;;) {
41719304Speter			last = match[0].rm_so++;
41819304Speter			if (match[0].rm_so >= len)
41919304Speter				break;
42019304Speter			match[0].rm_eo = len;
42119304Speter			eval = regexec(&sp->re_c, l, 1, match,
42219304Speter			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
42319304Speter			    REG_STARTEND);
42419304Speter			if (eval == REG_NOMATCH)
42519304Speter				break;
42619304Speter			if (eval != 0) {
42719304Speter				if (LF_ISSET(SEARCH_MSG))
42819304Speter					re_error(sp, eval, &sp->re_c);
42919304Speter				else
43019304Speter					(void)sp->gp->scr_bell(sp);
43119304Speter				goto err;
43219304Speter			}
43319304Speter			if (coff && match[0].rm_so >= coff)
43419304Speter				break;
43519304Speter		}
43619304Speter		rm->lno = lno;
43719304Speter
43819304Speter		/* See comment in f_search(). */
43919304Speter		if (!LF_ISSET(SEARCH_EOL) && last >= len)
44019304Speter			rm->cno = len != 0 ? len - 1 : 0;
44119304Speter		else
44219304Speter			rm->cno = last;
44319304Speter		rval = 0;
44419304Speter		break;
44519304Speter	}
44619304Speter
44719304Spetererr:	if (LF_ISSET(SEARCH_MSG))
44819304Speter		search_busy(sp, BUSY_OFF);
44919304Speter	return (rval);
45019304Speter}
45119304Speter
45219304Speter/*
45319304Speter * search_msg --
45419304Speter *	Display one of the search messages.
45519304Speter */
45619304Speterstatic void
457254225Spetersearch_msg(
458254225Speter	SCR *sp,
459254225Speter	smsg_t msg)
46019304Speter{
46119304Speter	switch (msg) {
46219304Speter	case S_EMPTY:
46319304Speter		msgq(sp, M_ERR, "072|File empty; nothing to search");
46419304Speter		break;
46519304Speter	case S_EOF:
46619304Speter		msgq(sp, M_ERR,
46719304Speter		    "073|Reached end-of-file without finding the pattern");
46819304Speter		break;
46919304Speter	case S_NOPREV:
47019304Speter		msgq(sp, M_ERR, "074|No previous search pattern");
47119304Speter		break;
47219304Speter	case S_NOTFOUND:
47319304Speter		msgq(sp, M_ERR, "075|Pattern not found");
47419304Speter		break;
47519304Speter	case S_SOF:
47619304Speter		msgq(sp, M_ERR,
47719304Speter		    "076|Reached top-of-file without finding the pattern");
47819304Speter		break;
47919304Speter	case S_WRAP:
48019304Speter		msgq(sp, M_ERR, "077|Search wrapped");
48119304Speter		break;
48219304Speter	default:
48319304Speter		abort();
48419304Speter	}
48519304Speter}
48619304Speter
48719304Speter/*
48819304Speter * search_busy --
48919304Speter *	Put up the busy searching message.
49019304Speter *
491281373Sbapt * PUBLIC: void search_busy(SCR *, busy_t);
49219304Speter */
49319304Spetervoid
494254225Spetersearch_busy(
495254225Speter	SCR *sp,
496254225Speter	busy_t btype)
49719304Speter{
49819304Speter	sp->gp->scr_busy(sp, "078|Searching...", btype);
49919304Speter}
500