1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "$Id: v_search.c,v 10.31 2012/02/08 07:26:59 zy Exp $";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <errno.h>
23#include <limits.h>
24#include <stdio.h>
25#include <stdlib.h>
26#include <string.h>
27
28#include "../common/common.h"
29#include "vi.h"
30
31static int v_exaddr __P((SCR *, VICMD *, dir_t));
32static int v_search __P((SCR *, VICMD *, CHAR_T *, size_t, u_int, dir_t));
33
34/*
35 * v_srch -- [count]?RE[? offset]
36 *	Ex address search backward.
37 *
38 * PUBLIC: int v_searchb __P((SCR *, VICMD *));
39 */
40int
41v_searchb(SCR *sp, VICMD *vp)
42{
43	return (v_exaddr(sp, vp, BACKWARD));
44}
45
46/*
47 * v_searchf -- [count]/RE[/ offset]
48 *	Ex address search forward.
49 *
50 * PUBLIC: int v_searchf __P((SCR *, VICMD *));
51 */
52int
53v_searchf(SCR *sp, VICMD *vp)
54{
55	return (v_exaddr(sp, vp, FORWARD));
56}
57
58/*
59 * v_exaddr --
60 *	Do a vi search (which is really an ex address).
61 */
62static int
63v_exaddr(SCR *sp, VICMD *vp, dir_t dir)
64{
65	static EXCMDLIST fake = { L("search") };
66	EXCMD *cmdp;
67	GS *gp;
68	TEXT *tp;
69	recno_t s_lno;
70	size_t len, s_cno, tlen;
71	int err, nb, type;
72	char buf[20];
73	CHAR_T *cmd, *t;
74	CHAR_T *w;
75	size_t wlen;
76
77	/*
78	 * !!!
79	 * If using the search command as a motion, any addressing components
80	 * are lost, i.e. y/ptrn/+2, when repeated, is the same as y/ptrn/.
81	 */
82	if (F_ISSET(vp, VC_ISDOT))
83		return (v_search(sp, vp,
84		    NULL, 0, SEARCH_PARSE | SEARCH_MSG | SEARCH_SET, dir));
85
86	/* Get the search pattern. */
87	if (v_tcmd(sp, vp, dir == BACKWARD ? CH_BSEARCH : CH_FSEARCH,
88	    TXT_BS | TXT_CR | TXT_ESCAPE | TXT_PROMPT |
89	    (O_ISSET(sp, O_SEARCHINCR) ? TXT_SEARCHINCR : 0)))
90		return (1);
91
92	tp = TAILQ_FIRST(sp->tiq);
93
94	/* If the user backspaced over the prompt, do nothing. */
95	if (tp->term == TERM_BS)
96		return (1);
97
98	/*
99	 * If the user was doing an incremental search, then we've already
100	 * updated the cursor and moved to the right location.  Return the
101	 * correct values, we're done.
102	 */
103	if (tp->term == TERM_SEARCH) {
104		vp->m_stop.lno = sp->lno;
105		vp->m_stop.cno = sp->cno;
106		if (ISMOTION(vp))
107			return (v_correct(sp, vp, 0));
108		vp->m_final = vp->m_stop;
109		return (0);
110	}
111
112	/*
113	 * If the user entered <escape> or <carriage-return>, the length is
114	 * 1 and the right thing will happen, i.e. the prompt will be used
115	 * as a command character.
116	 *
117	 * Build a fake ex command structure.
118	 */
119	gp = sp->gp;
120	gp->excmd.cp = tp->lb;
121	gp->excmd.clen = tp->len;
122	F_INIT(&gp->excmd, E_VISEARCH);
123
124	/*
125	 * XXX
126	 * Warn if the search wraps.  This is a pretty special case, but it's
127	 * nice feature that wasn't in the original implementations of ex/vi.
128	 * (It was added at some point to System V's version.)  This message
129	 * is only displayed if there are no keys in the queue. The problem is
130	 * the command is going to succeed, and the message is informational,
131	 * not an error.  If a macro displays it repeatedly, e.g., the pattern
132	 * only occurs once in the file and wrapscan is set, you lose big.  For
133	 * example, if the macro does something like:
134	 *
135	 *	:map K /pattern/^MjK
136	 *
137	 * Each search will display the message, but the following "/pattern/"
138	 * will immediately overwrite it, with strange results.  The System V
139	 * vi displays the "wrapped" message multiple times, but because it's
140	 * overwritten each time, it's not as noticeable.  As we don't discard
141	 * messages, it's a real problem for us.
142	 */
143	if (!KEYS_WAITING(sp))
144		F_SET(&gp->excmd, E_SEARCH_WMSG);
145
146	/* Save the current line/column. */
147	s_lno = sp->lno;
148	s_cno = sp->cno;
149
150	/*
151	 * !!!
152	 * Historically, vi / and ? commands were full-blown ex addresses,
153	 * including ';' delimiters, trailing <blank>'s, multiple search
154	 * strings (separated by semi-colons) and, finally, full-blown z
155	 * commands after the / and ? search strings.  (If the search was
156	 * being used as a motion, the trailing z command was ignored.
157	 * Also, we do some argument checking on the z command, to be sure
158	 * that it's not some other random command.) For multiple search
159	 * strings, leading <blank>'s at the second and subsequent strings
160	 * were eaten as well.  This has some (unintended?) side-effects:
161	 * the command /ptrn/;3 is legal and results in moving to line 3.
162	 * I suppose you could use it to optionally move to line 3...
163	 *
164	 * !!!
165	 * Historically, if any part of the search command failed, the cursor
166	 * remained unmodified (even if ; was used).  We have to play games
167	 * because the underlying ex parser thinks we're modifying the cursor
168	 * as we go, but I think we're compatible with historic practice.
169	 *
170	 * !!!
171	 * Historically, the command "/STRING/;   " failed, apparently it
172	 * confused the parser.  We're not that compatible.
173	 */
174	cmdp = &gp->excmd;
175	if (ex_range(sp, cmdp, &err))
176		return (1);
177
178	/*
179	 * Remember where any remaining command information is, and clean
180	 * up the fake ex command.
181	 */
182	cmd = cmdp->cp;
183	len = cmdp->clen;
184	gp->excmd.clen = 0;
185
186	if (err)
187		goto err2;
188
189	/* Copy out the new cursor position and make sure it's okay. */
190	switch (cmdp->addrcnt) {
191	case 1:
192		vp->m_stop = cmdp->addr1;
193		break;
194	case 2:
195		vp->m_stop = cmdp->addr2;
196		break;
197	}
198	if (!db_exist(sp, vp->m_stop.lno)) {
199		ex_badaddr(sp, &fake,
200		    vp->m_stop.lno == 0 ? A_ZERO : A_EOF, NUM_OK);
201		goto err2;
202	}
203
204	/*
205	 * !!!
206	 * Historic practice is that a trailing 'z' was ignored if it was a
207	 * motion command.  Should probably be an error, but not worth the
208	 * effort.
209	 */
210	if (ISMOTION(vp))
211		return (v_correct(sp, vp, F_ISSET(cmdp, E_DELTA)));
212
213	/*
214	 * !!!
215	 * Historically, if it wasn't a motion command, a delta in the search
216	 * pattern turns it into a first nonblank movement.
217	 */
218	nb = F_ISSET(cmdp, E_DELTA);
219
220	/* Check for the 'z' command. */
221	if (len != 0) {
222		if (*cmd != 'z')
223			goto err1;
224
225		/* No blanks, just like the z command. */
226		for (t = cmd + 1, tlen = len - 1; tlen > 0; ++t, --tlen)
227			if (!isdigit(*t))
228				break;
229		if (tlen &&
230		    (*t == '-' || *t == '.' || *t == '+' || *t == '^')) {
231			++t;
232			--tlen;
233			type = 1;
234		} else
235			type = 0;
236		if (tlen)
237			goto err1;
238
239		/* The z command will do the nonblank for us. */
240		nb = 0;
241
242		/* Default to z+. */
243		if (!type &&
244		    v_event_push(sp, NULL, L("+"), 1, CH_NOMAP | CH_QUOTED))
245			return (1);
246
247		/* Push the user's command. */
248		if (v_event_push(sp, NULL, cmd, len, CH_NOMAP | CH_QUOTED))
249			return (1);
250
251		/* Push line number so get correct z display. */
252		tlen = snprintf(buf,
253		    sizeof(buf), "%lu", (u_long)vp->m_stop.lno);
254		CHAR2INT(sp, buf, tlen, w, wlen);
255		if (v_event_push(sp, NULL, w, wlen, CH_NOMAP | CH_QUOTED))
256			return (1);
257
258		/* Don't refresh until after 'z' happens. */
259		F_SET(VIP(sp), VIP_S_REFRESH);
260	}
261
262	/* Non-motion commands move to the end of the range. */
263	vp->m_final = vp->m_stop;
264	if (nb) {
265		F_CLR(vp, VM_RCM_MASK);
266		F_SET(vp, VM_RCM_SETFNB);
267	}
268	return (0);
269
270err1:	msgq(sp, M_ERR,
271	    "188|Characters after search string, line offset and/or z command");
272err2:	vp->m_final.lno = s_lno;
273	vp->m_final.cno = s_cno;
274	return (1);
275}
276
277/*
278 * v_searchN -- N
279 *	Reverse last search.
280 *
281 * PUBLIC: int v_searchN __P((SCR *, VICMD *));
282 */
283int
284v_searchN(SCR *sp, VICMD *vp)
285{
286	dir_t dir;
287
288	switch (sp->searchdir) {
289	case BACKWARD:
290		dir = FORWARD;
291		break;
292	case FORWARD:
293		dir = BACKWARD;
294		break;
295	default:
296		dir = sp->searchdir;
297		break;
298	}
299	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, dir));
300}
301
302/*
303 * v_searchn -- n
304 *	Repeat last search.
305 *
306 * PUBLIC: int v_searchn __P((SCR *, VICMD *));
307 */
308int
309v_searchn(SCR *sp, VICMD *vp)
310{
311	return (v_search(sp, vp, NULL, 0, SEARCH_PARSE, sp->searchdir));
312}
313
314/*
315 * is_special --
316 *	Test if the character is special in a basic RE.
317 */
318static int
319is_special(CHAR_T c)
320{
321	/*
322	 * !!!
323	 * `*' and `$' are ordinary when appear at the beginning of a RE,
324	 * but it's safe to distinguish them from the ordinary characters.
325	 * The tilde is vi-specific, of course.
326	 */
327	return (STRCHR(L(".[*\\^$~"), c) && c);
328}
329
330/*
331 * Rear delimiter for word search when the keyword ends in
332 * (i.e., consists of) a non-word character.  See v_searchw below.
333 */
334#define RE_NWSTOP	L("([^[:alnum:]_]|$)")
335#define RE_NWSTOP_LEN	(SIZE(RE_NWSTOP) - 1)
336
337/*
338 * v_searchw -- [count]^A
339 *	Search for the word under the cursor.
340 *
341 * PUBLIC: int v_searchw __P((SCR *, VICMD *));
342 */
343int
344v_searchw(SCR *sp, VICMD *vp)
345{
346	size_t blen, len;
347	int rval;
348	CHAR_T *bp, *p;
349
350	/* An upper bound for the SIZE of the RE under construction. */
351	len = VIP(sp)->klen + MAX(RE_WSTART_LEN, 1)
352	    + MAX(RE_WSTOP_LEN, RE_NWSTOP_LEN);
353	GET_SPACE_RETW(sp, bp, blen, len);
354	p = bp;
355
356	/* Only the first character can be non-word, see v_curword. */
357	if (inword(VIP(sp)->keyw[0])) {
358		MEMCPY(p, RE_WSTART, RE_WSTART_LEN);
359		p += RE_WSTART_LEN;
360	} else if (is_special(VIP(sp)->keyw[0])) {
361		MEMCPY(p, L("\\"), 1);
362		p += 1;
363	}
364
365	MEMCPY(p, VIP(sp)->keyw, VIP(sp)->klen);
366	p += VIP(sp)->klen;
367
368	if (inword(p[-1])) {
369		MEMCPY(p, RE_WSTOP, RE_WSTOP_LEN);
370		p += RE_WSTOP_LEN;
371	} else {
372		/*
373		 * The keyword is a single non-word character.
374		 * We want it to stay the same when typing ^A several times
375		 * in a row, just the way the other cases behave.
376		 */
377		MEMCPY(p, RE_NWSTOP, RE_NWSTOP_LEN);
378		p += RE_NWSTOP_LEN;
379	}
380
381	len = p - bp;
382	rval = v_search(sp, vp, bp, len, SEARCH_SET, FORWARD);
383
384	FREE_SPACEW(sp, bp, blen);
385	return (rval);
386}
387
388/*
389 * v_search --
390 *	The search commands.
391 */
392static int
393v_search(SCR *sp, VICMD *vp, CHAR_T *ptrn, size_t plen, u_int flags, dir_t dir)
394{
395	/* Display messages. */
396	LF_SET(SEARCH_MSG);
397
398	/* If it's a motion search, offset past end-of-line is okay. */
399	if (ISMOTION(vp))
400		LF_SET(SEARCH_EOL);
401
402	/*
403	 * XXX
404	 * Warn if the search wraps.  See the comment above, in v_exaddr().
405	 */
406	if (!KEYS_WAITING(sp))
407		LF_SET(SEARCH_WMSG);
408
409	switch (dir) {
410	case BACKWARD:
411		if (b_search(sp,
412		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
413			return (1);
414		break;
415	case FORWARD:
416		if (f_search(sp,
417		    &vp->m_start, &vp->m_stop, ptrn, plen, NULL, flags))
418			return (1);
419		break;
420	case NOTSET:
421		msgq(sp, M_ERR, "189|No previous search pattern");
422		return (1);
423	default:
424		abort();
425	}
426
427	/* Correct motion commands, otherwise, simply move to the location. */
428	if (ISMOTION(vp)) {
429		if (v_correct(sp, vp, 0))
430			return(1);
431	} else
432		vp->m_final = vp->m_stop;
433	return (0);
434}
435
436/*
437 * v_correct --
438 *	Handle command with a search as the motion.
439 *
440 * !!!
441 * Historically, commands didn't affect the line searched to/from if the
442 * motion command was a search and the final position was the start/end
443 * of the line.  There were some special cases and vi was not consistent;
444 * it was fairly easy to confuse it.  For example, given the two lines:
445 *
446 *	abcdefghi
447 *	ABCDEFGHI
448 *
449 * placing the cursor on the 'A' and doing y?$ would so confuse it that 'h'
450 * 'k' and put would no longer work correctly.  In any case, we try to do
451 * the right thing, but it's not going to exactly match historic practice.
452 *
453 * PUBLIC: int v_correct __P((SCR *, VICMD *, int));
454 */
455int
456v_correct(SCR *sp, VICMD *vp, int isdelta)
457{
458	dir_t dir;
459	MARK m;
460	size_t len;
461
462	/*
463	 * !!!
464	 * We may have wrapped if wrapscan was set, and we may have returned
465	 * to the position where the cursor started.  Historic vi didn't cope
466	 * with this well.  Yank wouldn't beep, but the first put after the
467	 * yank would move the cursor right one column (without adding any
468	 * text) and the second would put a copy of the current line.  The
469	 * change and delete commands would beep, but would leave the cursor
470	 * on the colon command line.  I believe that there are macros that
471	 * depend on delete, at least, failing.  For now, commands that use
472	 * search as a motion component fail when the search returns to the
473	 * original cursor position.
474	 */
475	if (vp->m_start.lno == vp->m_stop.lno &&
476	    vp->m_start.cno == vp->m_stop.cno) {
477		msgq(sp, M_BERR, "190|Search wrapped to original position");
478		return (1);
479	}
480
481	/*
482	 * !!!
483	 * Searches become line mode operations if there was a delta specified
484	 * to the search pattern.
485	 */
486	if (isdelta)
487		F_SET(vp, VM_LMODE);
488
489	/*
490	 * If the motion is in the reverse direction, switch the start and
491	 * stop MARK's so that it's in a forward direction.  (There's no
492	 * reason for this other than to make the tests below easier.  The
493	 * code in vi.c:vi() would have done the switch.)  Both forward
494	 * and backward motions can happen for any kind of search command
495	 * because of the wrapscan option.
496	 */
497	if (vp->m_start.lno > vp->m_stop.lno ||
498	    (vp->m_start.lno == vp->m_stop.lno &&
499	    vp->m_start.cno > vp->m_stop.cno)) {
500		m = vp->m_start;
501		vp->m_start = vp->m_stop;
502		vp->m_stop = m;
503		dir = BACKWARD;
504	} else
505		dir = FORWARD;
506
507	/*
508	 * BACKWARD:
509	 *	Delete and yank commands move to the end of the range.
510	 *	Ignore others.
511	 *
512	 * FORWARD:
513	 *	Delete and yank commands don't move.  Ignore others.
514	 */
515	vp->m_final = vp->m_start;
516
517	/*
518	 * !!!
519	 * Delta'd searches don't correct based on column positions.
520	 */
521	if (isdelta)
522		return (0);
523
524	/*
525	 * !!!
526	 * Backward searches starting at column 0, and forward searches ending
527	 * at column 0 are corrected to the last column of the previous line.
528	 * Otherwise, adjust the starting/ending point to the character before
529	 * the current one (this is safe because we know the search had to move
530	 * to succeed).
531	 *
532	 * Searches become line mode operations if they start at the first
533	 * nonblank and end at column 0 of another line.
534	 */
535	if (vp->m_start.lno < vp->m_stop.lno && vp->m_stop.cno == 0) {
536		if (db_get(sp, --vp->m_stop.lno, DBG_FATAL, NULL, &len))
537			return (1);
538		vp->m_stop.cno = len ? len - 1 : 0;
539		len = 0;
540		if (nonblank(sp, vp->m_start.lno, &len))
541			return (1);
542		if (vp->m_start.cno <= len)
543			F_SET(vp, VM_LMODE);
544	} else
545		--vp->m_stop.cno;
546
547	return (0);
548}
549