search.c revision 19304
119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
1319304Speterstatic const char sccsid[] = "@(#)search.c	10.25 (Berkeley) 6/30/96";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
1819304Speter
1919304Speter#include <bitstring.h>
2019304Speter#include <ctype.h>
2119304Speter#include <errno.h>
2219304Speter#include <limits.h>
2319304Speter#include <stdio.h>
2419304Speter#include <stdlib.h>
2519304Speter#include <string.h>
2619304Speter#include <unistd.h>
2719304Speter
2819304Speter#include "common.h"
2919304Speter
3019304Spetertypedef enum { S_EMPTY, S_EOF, S_NOPREV, S_NOTFOUND, S_SOF, S_WRAP } smsg_t;
3119304Speter
3219304Speterstatic void	search_msg __P((SCR *, smsg_t));
3319304Speterstatic int	search_init __P((SCR *, dir_t, char *, size_t, char **, u_int));
3419304Speter
3519304Speter/*
3619304Speter * search_init --
3719304Speter *	Set up a search.
3819304Speter */
3919304Speterstatic int
4019304Spetersearch_init(sp, dir, ptrn, plen, epp, flags)
4119304Speter	SCR *sp;
4219304Speter	dir_t dir;
4319304Speter	char *ptrn, **epp;
4419304Speter	size_t plen;
4519304Speter	u_int flags;
4619304Speter{
4719304Speter	recno_t lno;
4819304Speter	int delim;
4919304Speter	char *p, *t;
5019304Speter
5119304Speter	/* If the file is empty, it's a fast search. */
5219304Speter	if (sp->lno <= 1) {
5319304Speter		if (db_last(sp, &lno))
5419304Speter			return (1);
5519304Speter		if (lno == 0) {
5619304Speter			if (LF_ISSET(SEARCH_MSG))
5719304Speter				search_msg(sp, S_EMPTY);
5819304Speter			return (1);
5919304Speter		}
6019304Speter	}
6119304Speter
6219304Speter	if (LF_ISSET(SEARCH_PARSE)) {		/* Parse the string. */
6319304Speter		/*
6419304Speter		 * Use the saved pattern if no pattern specified, or if only
6519304Speter		 * one or two delimiter characters specified.
6619304Speter		 *
6719304Speter		 * !!!
6819304Speter		 * Historically, only the pattern itself was saved, vi didn't
6919304Speter		 * preserve addressing or delta information.
7019304Speter		 */
7119304Speter		if (ptrn == NULL)
7219304Speter			goto prev;
7319304Speter		if (plen == 1) {
7419304Speter			if (epp != NULL)
7519304Speter				*epp = ptrn + 1;
7619304Speter			goto prev;
7719304Speter		}
7819304Speter		if (ptrn[0] == ptrn[1]) {
7919304Speter			if (epp != NULL)
8019304Speter				*epp = ptrn + 2;
8119304Speter
8219304Speter			/* Complain if we don't have a previous pattern. */
8319304Speterprev:			if (sp->re == NULL) {
8419304Speter				search_msg(sp, S_NOPREV);
8519304Speter				return (1);
8619304Speter			}
8719304Speter			/* Re-compile the search pattern if necessary. */
8819304Speter			if (!F_ISSET(sp, SC_RE_SEARCH) && re_compile(sp,
8919304Speter			    sp->re, sp->re_len, NULL, NULL, &sp->re_c,
9019304Speter			    RE_C_SEARCH |
9119304Speter			    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT)))
9219304Speter				return (1);
9319304Speter
9419304Speter			/* Set the search direction. */
9519304Speter			if (LF_ISSET(SEARCH_SET))
9619304Speter				sp->searchdir = dir;
9719304Speter			return (0);
9819304Speter		}
9919304Speter
10019304Speter		/*
10119304Speter		 * Set the delimiter, and move forward to the terminating
10219304Speter		 * delimiter, handling escaped delimiters.
10319304Speter		 *
10419304Speter		 * QUOTING NOTE:
10519304Speter		 * Only discard an escape character if it escapes a delimiter.
10619304Speter		 */
10719304Speter		for (delim = *ptrn, p = t = ++ptrn;; *t++ = *p++) {
10819304Speter			if (--plen == 0 || p[0] == delim) {
10919304Speter				if (plen != 0)
11019304Speter					++p;
11119304Speter				break;
11219304Speter			}
11319304Speter			if (plen > 1 && p[0] == '\\' && p[1] == delim) {
11419304Speter				++p;
11519304Speter				--plen;
11619304Speter			}
11719304Speter		}
11819304Speter		if (epp != NULL)
11919304Speter			*epp = p;
12019304Speter
12119304Speter		plen = t - ptrn;
12219304Speter	}
12319304Speter
12419304Speter	/* Compile the RE. */
12519304Speter	if (re_compile(sp, ptrn, plen, &sp->re, &sp->re_len, &sp->re_c,
12619304Speter	    RE_C_SEARCH |
12719304Speter	    (LF_ISSET(SEARCH_MSG) ? 0 : RE_C_SILENT) |
12819304Speter	    (LF_ISSET(SEARCH_TAG) ? RE_C_TAG : 0) |
12919304Speter	    (LF_ISSET(SEARCH_CSCOPE) ? RE_C_CSCOPE : 0)))
13019304Speter		return (1);
13119304Speter
13219304Speter	/* Set the search direction. */
13319304Speter	if (LF_ISSET(SEARCH_SET))
13419304Speter		sp->searchdir = dir;
13519304Speter
13619304Speter	return (0);
13719304Speter}
13819304Speter
13919304Speter/*
14019304Speter * f_search --
14119304Speter *	Do a forward search.
14219304Speter *
14319304Speter * PUBLIC: int f_search __P((SCR *,
14419304Speter * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
14519304Speter */
14619304Speterint
14719304Speterf_search(sp, fm, rm, ptrn, plen, eptrn, flags)
14819304Speter	SCR *sp;
14919304Speter	MARK *fm, *rm;
15019304Speter	char *ptrn, **eptrn;
15119304Speter	size_t plen;
15219304Speter	u_int flags;
15319304Speter{
15419304Speter	busy_t btype;
15519304Speter	recno_t lno;
15619304Speter	regmatch_t match[1];
15719304Speter	size_t coff, len;
15819304Speter	int cnt, eval, rval, wrapped;
15919304Speter	char *l;
16019304Speter
16119304Speter	if (search_init(sp, FORWARD, ptrn, plen, eptrn, flags))
16219304Speter		return (1);
16319304Speter
16419304Speter	if (LF_ISSET(SEARCH_FILE)) {
16519304Speter		lno = 1;
16619304Speter		coff = 0;
16719304Speter	} else {
16819304Speter		if (db_get(sp, fm->lno, DBG_FATAL, &l, &len))
16919304Speter			return (1);
17019304Speter		lno = fm->lno;
17119304Speter
17219304Speter		/*
17319304Speter		 * If doing incremental search, start searching at the previous
17419304Speter		 * column, so that we search a minimal distance and still match
17519304Speter		 * special patterns, e.g., \< for beginning of a word.
17619304Speter		 *
17719304Speter		 * Otherwise, start searching immediately after the cursor.  If
17819304Speter		 * at the end of the line, start searching on the next line.
17919304Speter		 * This is incompatible (read bug fix) with the historic vi --
18019304Speter		 * searches for the '$' pattern never moved forward, and the
18119304Speter		 * "-t foo" didn't work if the 'f' was the first character in
18219304Speter		 * the file.
18319304Speter		 */
18419304Speter		if (LF_ISSET(SEARCH_INCR)) {
18519304Speter			if ((coff = fm->cno) != 0)
18619304Speter				--coff;
18719304Speter		} else if (fm->cno + 1 >= len) {
18819304Speter			coff = 0;
18919304Speter			lno = fm->lno + 1;
19019304Speter			if (db_get(sp, lno, 0, &l, &len)) {
19119304Speter				if (!O_ISSET(sp, O_WRAPSCAN)) {
19219304Speter					if (LF_ISSET(SEARCH_MSG))
19319304Speter						search_msg(sp, S_EOF);
19419304Speter					return (1);
19519304Speter				}
19619304Speter				lno = 1;
19719304Speter			}
19819304Speter		} else
19919304Speter			coff = fm->cno + 1;
20019304Speter	}
20119304Speter
20219304Speter	btype = BUSY_ON;
20319304Speter	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; ++lno, coff = 0) {
20419304Speter		if (cnt-- == 0) {
20519304Speter			if (INTERRUPTED(sp))
20619304Speter				break;
20719304Speter			if (LF_ISSET(SEARCH_MSG)) {
20819304Speter				search_busy(sp, btype);
20919304Speter				btype = BUSY_UPDATE;
21019304Speter			}
21119304Speter			cnt = INTERRUPT_CHECK;
21219304Speter		}
21319304Speter		if (wrapped && lno > fm->lno || db_get(sp, lno, 0, &l, &len)) {
21419304Speter			if (wrapped) {
21519304Speter				if (LF_ISSET(SEARCH_MSG))
21619304Speter					search_msg(sp, S_NOTFOUND);
21719304Speter				break;
21819304Speter			}
21919304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
22019304Speter				if (LF_ISSET(SEARCH_MSG))
22119304Speter					search_msg(sp, S_EOF);
22219304Speter				break;
22319304Speter			}
22419304Speter			lno = 0;
22519304Speter			wrapped = 1;
22619304Speter			continue;
22719304Speter		}
22819304Speter
22919304Speter		/* If already at EOL, just keep going. */
23019304Speter		if (len != 0 && coff == len)
23119304Speter			continue;
23219304Speter
23319304Speter		/* Set the termination. */
23419304Speter		match[0].rm_so = coff;
23519304Speter		match[0].rm_eo = len;
23619304Speter
23719304Speter#if defined(DEBUG) && 0
23819304Speter		TRACE(sp, "F search: %lu from %u to %u\n",
23919304Speter		    lno, coff, len != 0 ? len - 1 : len);
24019304Speter#endif
24119304Speter		/* Search the line. */
24219304Speter		eval = regexec(&sp->re_c, l, 1, match,
24319304Speter		    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) | REG_STARTEND);
24419304Speter		if (eval == REG_NOMATCH)
24519304Speter			continue;
24619304Speter		if (eval != 0) {
24719304Speter			if (LF_ISSET(SEARCH_MSG))
24819304Speter				re_error(sp, eval, &sp->re_c);
24919304Speter			else
25019304Speter				(void)sp->gp->scr_bell(sp);
25119304Speter			break;
25219304Speter		}
25319304Speter
25419304Speter		/* Warn if the search wrapped. */
25519304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
25619304Speter			search_msg(sp, S_WRAP);
25719304Speter
25819304Speter#if defined(DEBUG) && 0
25919304Speter		TRACE(sp, "F search: %qu to %qu\n",
26019304Speter		    match[0].rm_so, match[0].rm_eo);
26119304Speter#endif
26219304Speter		rm->lno = lno;
26319304Speter		rm->cno = match[0].rm_so;
26419304Speter
26519304Speter		/*
26619304Speter		 * If a change command, it's possible to move beyond the end
26719304Speter		 * of a line.  Historic vi generally got this wrong (e.g. try
26819304Speter		 * "c?$<cr>").  Not all that sure this gets it right, there
26919304Speter		 * are lots of strange cases.
27019304Speter		 */
27119304Speter		if (!LF_ISSET(SEARCH_EOL) && rm->cno >= len)
27219304Speter			rm->cno = len != 0 ? len - 1 : 0;
27319304Speter
27419304Speter		rval = 0;
27519304Speter		break;
27619304Speter	}
27719304Speter
27819304Speter	if (LF_ISSET(SEARCH_MSG))
27919304Speter		search_busy(sp, BUSY_OFF);
28019304Speter	return (rval);
28119304Speter}
28219304Speter
28319304Speter/*
28419304Speter * b_search --
28519304Speter *	Do a backward search.
28619304Speter *
28719304Speter * PUBLIC: int b_search __P((SCR *,
28819304Speter * PUBLIC:    MARK *, MARK *, char *, size_t, char **, u_int));
28919304Speter */
29019304Speterint
29119304Speterb_search(sp, fm, rm, ptrn, plen, eptrn, flags)
29219304Speter	SCR *sp;
29319304Speter	MARK *fm, *rm;
29419304Speter	char *ptrn, **eptrn;
29519304Speter	size_t plen;
29619304Speter	u_int flags;
29719304Speter{
29819304Speter	busy_t btype;
29919304Speter	recno_t lno;
30019304Speter	regmatch_t match[1];
30119304Speter	size_t coff, last, len;
30219304Speter	int cnt, eval, rval, wrapped;
30319304Speter	char *l;
30419304Speter
30519304Speter	if (search_init(sp, BACKWARD, ptrn, plen, eptrn, flags))
30619304Speter		return (1);
30719304Speter
30819304Speter	/*
30919304Speter	 * If doing incremental search, set the "starting" position past the
31019304Speter	 * current column, so that we search a minimal distance and still
31119304Speter	 * match special patterns, e.g., \> for the end of a word.  This is
31219304Speter	 * safe when the cursor is at the end of a line because we only use
31319304Speter	 * it for comparison with the location of the match.
31419304Speter	 *
31519304Speter	 * Otherwise, start searching immediately before the cursor.  If in
31619304Speter	 * the first column, start search on the previous line.
31719304Speter	 */
31819304Speter	if (LF_ISSET(SEARCH_INCR)) {
31919304Speter		lno = fm->lno;
32019304Speter		coff = fm->cno + 1;
32119304Speter	} else {
32219304Speter		if (fm->cno == 0) {
32319304Speter			if (fm->lno == 1 && !O_ISSET(sp, O_WRAPSCAN)) {
32419304Speter				if (LF_ISSET(SEARCH_MSG))
32519304Speter					search_msg(sp, S_SOF);
32619304Speter				return (1);
32719304Speter			}
32819304Speter			lno = fm->lno - 1;
32919304Speter		} else
33019304Speter			lno = fm->lno;
33119304Speter		coff = fm->cno;
33219304Speter	}
33319304Speter
33419304Speter	btype = BUSY_ON;
33519304Speter	for (cnt = INTERRUPT_CHECK, rval = 1, wrapped = 0;; --lno, coff = 0) {
33619304Speter		if (cnt-- == 0) {
33719304Speter			if (INTERRUPTED(sp))
33819304Speter				break;
33919304Speter			if (LF_ISSET(SEARCH_MSG)) {
34019304Speter				search_busy(sp, btype);
34119304Speter				btype = BUSY_UPDATE;
34219304Speter			}
34319304Speter			cnt = INTERRUPT_CHECK;
34419304Speter		}
34519304Speter		if (wrapped && lno < fm->lno || lno == 0) {
34619304Speter			if (wrapped) {
34719304Speter				if (LF_ISSET(SEARCH_MSG))
34819304Speter					search_msg(sp, S_NOTFOUND);
34919304Speter				break;
35019304Speter			}
35119304Speter			if (!O_ISSET(sp, O_WRAPSCAN)) {
35219304Speter				if (LF_ISSET(SEARCH_MSG))
35319304Speter					search_msg(sp, S_SOF);
35419304Speter				break;
35519304Speter			}
35619304Speter			if (db_last(sp, &lno))
35719304Speter				break;
35819304Speter			if (lno == 0) {
35919304Speter				if (LF_ISSET(SEARCH_MSG))
36019304Speter					search_msg(sp, S_EMPTY);
36119304Speter				break;
36219304Speter			}
36319304Speter			++lno;
36419304Speter			wrapped = 1;
36519304Speter			continue;
36619304Speter		}
36719304Speter
36819304Speter		if (db_get(sp, lno, 0, &l, &len))
36919304Speter			break;
37019304Speter
37119304Speter		/* Set the termination. */
37219304Speter		match[0].rm_so = 0;
37319304Speter		match[0].rm_eo = len;
37419304Speter
37519304Speter#if defined(DEBUG) && 0
37619304Speter		TRACE(sp, "B search: %lu from 0 to %qu\n", lno, match[0].rm_eo);
37719304Speter#endif
37819304Speter		/* Search the line. */
37919304Speter		eval = regexec(&sp->re_c, l, 1, match,
38019304Speter		    (match[0].rm_eo == len ? 0 : REG_NOTEOL) | REG_STARTEND);
38119304Speter		if (eval == REG_NOMATCH)
38219304Speter			continue;
38319304Speter		if (eval != 0) {
38419304Speter			if (LF_ISSET(SEARCH_MSG))
38519304Speter				re_error(sp, eval, &sp->re_c);
38619304Speter			else
38719304Speter				(void)sp->gp->scr_bell(sp);
38819304Speter			break;
38919304Speter		}
39019304Speter
39119304Speter		/* Check for a match starting past the cursor. */
39219304Speter		if (coff != 0 && match[0].rm_so >= coff)
39319304Speter			continue;
39419304Speter
39519304Speter		/* Warn if the search wrapped. */
39619304Speter		if (wrapped && LF_ISSET(SEARCH_WMSG))
39719304Speter			search_msg(sp, S_WRAP);
39819304Speter
39919304Speter#if defined(DEBUG) && 0
40019304Speter		TRACE(sp, "B found: %qu to %qu\n",
40119304Speter		    match[0].rm_so, match[0].rm_eo);
40219304Speter#endif
40319304Speter		/*
40419304Speter		 * We now have the first match on the line.  Step through the
40519304Speter		 * line character by character until find the last acceptable
40619304Speter		 * match.  This is painful, we need a better interface to regex
40719304Speter		 * to make this work.
40819304Speter		 */
40919304Speter		for (;;) {
41019304Speter			last = match[0].rm_so++;
41119304Speter			if (match[0].rm_so >= len)
41219304Speter				break;
41319304Speter			match[0].rm_eo = len;
41419304Speter			eval = regexec(&sp->re_c, l, 1, match,
41519304Speter			    (match[0].rm_so == 0 ? 0 : REG_NOTBOL) |
41619304Speter			    REG_STARTEND);
41719304Speter			if (eval == REG_NOMATCH)
41819304Speter				break;
41919304Speter			if (eval != 0) {
42019304Speter				if (LF_ISSET(SEARCH_MSG))
42119304Speter					re_error(sp, eval, &sp->re_c);
42219304Speter				else
42319304Speter					(void)sp->gp->scr_bell(sp);
42419304Speter				goto err;
42519304Speter			}
42619304Speter			if (coff && match[0].rm_so >= coff)
42719304Speter				break;
42819304Speter		}
42919304Speter		rm->lno = lno;
43019304Speter
43119304Speter		/* See comment in f_search(). */
43219304Speter		if (!LF_ISSET(SEARCH_EOL) && last >= len)
43319304Speter			rm->cno = len != 0 ? len - 1 : 0;
43419304Speter		else
43519304Speter			rm->cno = last;
43619304Speter		rval = 0;
43719304Speter		break;
43819304Speter	}
43919304Speter
44019304Spetererr:	if (LF_ISSET(SEARCH_MSG))
44119304Speter		search_busy(sp, BUSY_OFF);
44219304Speter	return (rval);
44319304Speter}
44419304Speter
44519304Speter/*
44619304Speter * search_msg --
44719304Speter *	Display one of the search messages.
44819304Speter */
44919304Speterstatic void
45019304Spetersearch_msg(sp, msg)
45119304Speter	SCR *sp;
45219304Speter	smsg_t msg;
45319304Speter{
45419304Speter	switch (msg) {
45519304Speter	case S_EMPTY:
45619304Speter		msgq(sp, M_ERR, "072|File empty; nothing to search");
45719304Speter		break;
45819304Speter	case S_EOF:
45919304Speter		msgq(sp, M_ERR,
46019304Speter		    "073|Reached end-of-file without finding the pattern");
46119304Speter		break;
46219304Speter	case S_NOPREV:
46319304Speter		msgq(sp, M_ERR, "074|No previous search pattern");
46419304Speter		break;
46519304Speter	case S_NOTFOUND:
46619304Speter		msgq(sp, M_ERR, "075|Pattern not found");
46719304Speter		break;
46819304Speter	case S_SOF:
46919304Speter		msgq(sp, M_ERR,
47019304Speter		    "076|Reached top-of-file without finding the pattern");
47119304Speter		break;
47219304Speter	case S_WRAP:
47319304Speter		msgq(sp, M_ERR, "077|Search wrapped");
47419304Speter		break;
47519304Speter	default:
47619304Speter		abort();
47719304Speter	}
47819304Speter}
47919304Speter
48019304Speter/*
48119304Speter * search_busy --
48219304Speter *	Put up the busy searching message.
48319304Speter *
48419304Speter * PUBLIC: void search_busy __P((SCR *, busy_t));
48519304Speter */
48619304Spetervoid
48719304Spetersearch_busy(sp, btype)
48819304Speter	SCR *sp;
48919304Speter	busy_t btype;
49019304Speter{
49119304Speter	sp->gp->scr_busy(sp, "078|Searching...", btype);
49219304Speter}
493