119304Speter/*-
219304Speter * Copyright (c) 1992, 1993, 1994
319304Speter *	The Regents of the University of California.  All rights reserved.
419304Speter * Copyright (c) 1992, 1993, 1994, 1995, 1996
519304Speter *	Keith Bostic.  All rights reserved.
619304Speter *
719304Speter * See the LICENSE file for redistribution information.
819304Speter */
919304Speter
1019304Speter#include "config.h"
1119304Speter
1219304Speter#ifndef lint
1319304Speterstatic const char sccsid[] = "@(#)v_word.c	10.5 (Berkeley) 3/6/96";
1419304Speter#endif /* not lint */
1519304Speter
1619304Speter#include <sys/types.h>
1719304Speter#include <sys/queue.h>
1819304Speter#include <sys/time.h>
1919304Speter
2019304Speter#include <bitstring.h>
2119304Speter#include <ctype.h>
2219304Speter#include <limits.h>
2319304Speter#include <stdio.h>
2419304Speter
2519304Speter#include "../common/common.h"
2619304Speter#include "vi.h"
2719304Speter
2819304Speter/*
2919304Speter * There are two types of "words".  Bigwords are easy -- groups of anything
3019304Speter * delimited by whitespace.  Normal words are trickier.  They are either a
3119304Speter * group of characters, numbers and underscores, or a group of anything but,
3219304Speter * delimited by whitespace.  When for a word, if you're in whitespace, it's
3319304Speter * easy, just remove the whitespace and go to the beginning or end of the
3419304Speter * word.  Otherwise, figure out if the next character is in a different group.
3519304Speter * If it is, go to the beginning or end of that group, otherwise, go to the
3619304Speter * beginning or end of the current group.  The historic version of vi didn't
3719304Speter * get this right, so, for example, there were cases where "4e" was not the
3819304Speter * same as "eeee" -- in particular, single character words, and commands that
3919304Speter * began in whitespace were almost always handled incorrectly.  To get it right
4019304Speter * you have to resolve the cursor after each search so that the look-ahead to
4119304Speter * figure out what type of "word" the cursor is in will be correct.
4219304Speter *
4319304Speter * Empty lines, and lines that consist of only white-space characters count
4419304Speter * as a single word, and the beginning and end of the file counts as an
4519304Speter * infinite number of words.
4619304Speter *
4719304Speter * Movements associated with commands are different than movement commands.
4819304Speter * For example, in "abc  def", with the cursor on the 'a', "cw" is from
4919304Speter * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
5019304Speter * space is discarded from the change movement.  Another example is that,
5119304Speter * in the same string, a "cw" on any white space character replaces that
5219304Speter * single character, and nothing else.  Ain't nothin' in here that's easy.
5319304Speter *
5419304Speter * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
5519304Speter * would treat groups of empty lines as individual words, i.e. the command
5619304Speter * would move the cursor to each new empty line.  The 'e' and 'E' commands
5719304Speter * would treat groups of empty lines as a single word, i.e. the first use
5819304Speter * would move past the group of lines.  The 'b' command would just beep at
5919304Speter * you, or, if you did it from the start of the line as part of a motion
6019304Speter * command, go absolutely nuts.  If the lines contained only white-space
6119304Speter * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
6219304Speter * 'b', 'E' and 'e' commands would treat the group as a single word, and
6319304Speter * the 'B' and 'b' commands will treat the lines as individual words.  This
6419304Speter * implementation treats all of these cases as a single white-space word.
6519304Speter */
6619304Speter
6719304Speterenum which {BIGWORD, LITTLEWORD};
6819304Speter
6919304Speterstatic int bword __P((SCR *, VICMD *, enum which));
7019304Speterstatic int eword __P((SCR *, VICMD *, enum which));
7119304Speterstatic int fword __P((SCR *, VICMD *, enum which));
7219304Speter
7319304Speter/*
7419304Speter * v_wordW -- [count]W
7519304Speter *	Move forward a bigword at a time.
7619304Speter *
7719304Speter * PUBLIC: int v_wordW __P((SCR *, VICMD *));
7819304Speter */
7919304Speterint
8019304Speterv_wordW(sp, vp)
8119304Speter	SCR *sp;
8219304Speter	VICMD *vp;
8319304Speter{
8419304Speter	return (fword(sp, vp, BIGWORD));
8519304Speter}
8619304Speter
8719304Speter/*
8819304Speter * v_wordw -- [count]w
8919304Speter *	Move forward a word at a time.
9019304Speter *
9119304Speter * PUBLIC: int v_wordw __P((SCR *, VICMD *));
9219304Speter */
9319304Speterint
9419304Speterv_wordw(sp, vp)
9519304Speter	SCR *sp;
9619304Speter	VICMD *vp;
9719304Speter{
9819304Speter	return (fword(sp, vp, LITTLEWORD));
9919304Speter}
10019304Speter
10119304Speter/*
10219304Speter * fword --
10319304Speter *	Move forward by words.
10419304Speter */
10519304Speterstatic int
10619304Speterfword(sp, vp, type)
10719304Speter	SCR *sp;
10819304Speter	VICMD *vp;
10919304Speter	enum which type;
11019304Speter{
11119304Speter	enum { INWORD, NOTWORD } state;
11219304Speter	VCS cs;
11319304Speter	u_long cnt;
11419304Speter
11519304Speter	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
11619304Speter	cs.cs_lno = vp->m_start.lno;
11719304Speter	cs.cs_cno = vp->m_start.cno;
11819304Speter	if (cs_init(sp, &cs))
11919304Speter		return (1);
12019304Speter
12119304Speter	/*
12219304Speter	 * If in white-space:
12319304Speter	 *	If the count is 1, and it's a change command, we're done.
12419304Speter	 *	Else, move to the first non-white-space character, which
12519304Speter	 *	counts as a single word move.  If it's a motion command,
12619304Speter	 *	don't move off the end of the line.
12719304Speter	 */
12819304Speter	if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
12919304Speter		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
13019304Speter			if (ISCMD(vp->rkp, 'c'))
13119304Speter				return (0);
13219304Speter			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
13319304Speter				if (cs_fspace(sp, &cs))
13419304Speter					return (1);
13519304Speter				goto ret;
13619304Speter			}
13719304Speter		}
13819304Speter		if (cs_fblank(sp, &cs))
13919304Speter			return (1);
14019304Speter		--cnt;
14119304Speter	}
14219304Speter
14319304Speter	/*
14419304Speter	 * Cyclically move to the next word -- this involves skipping
14519304Speter	 * over word characters and then any trailing non-word characters.
14619304Speter	 * Note, for the 'w' command, the definition of a word keeps
14719304Speter	 * switching.
14819304Speter	 */
14919304Speter	if (type == BIGWORD)
15019304Speter		while (cnt--) {
15119304Speter			for (;;) {
15219304Speter				if (cs_next(sp, &cs))
15319304Speter					return (1);
15419304Speter				if (cs.cs_flags == CS_EOF)
15519304Speter					goto ret;
15619304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
15719304Speter					break;
15819304Speter			}
15919304Speter			/*
16019304Speter			 * If a motion command and we're at the end of the
16119304Speter			 * last word, we're done.  Delete and yank eat any
16219304Speter			 * trailing blanks, but we don't move off the end
16319304Speter			 * of the line regardless.
16419304Speter			 */
16519304Speter			if (cnt == 0 && ISMOTION(vp)) {
16619304Speter				if ((ISCMD(vp->rkp, 'd') ||
16719304Speter				    ISCMD(vp->rkp, 'y')) &&
16819304Speter				    cs_fspace(sp, &cs))
16919304Speter					return (1);
17019304Speter				break;
17119304Speter			}
17219304Speter
17319304Speter			/* Eat whitespace characters. */
17419304Speter			if (cs_fblank(sp, &cs))
17519304Speter				return (1);
17619304Speter			if (cs.cs_flags == CS_EOF)
17719304Speter				goto ret;
17819304Speter		}
17919304Speter	else
18019304Speter		while (cnt--) {
18119304Speter			state = cs.cs_flags == 0 &&
18219304Speter			    inword(cs.cs_ch) ? INWORD : NOTWORD;
18319304Speter			for (;;) {
18419304Speter				if (cs_next(sp, &cs))
18519304Speter					return (1);
18619304Speter				if (cs.cs_flags == CS_EOF)
18719304Speter					goto ret;
18819304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
18919304Speter					break;
19019304Speter				if (state == INWORD) {
19119304Speter					if (!inword(cs.cs_ch))
19219304Speter						break;
19319304Speter				} else
19419304Speter					if (inword(cs.cs_ch))
19519304Speter						break;
19619304Speter			}
19719304Speter			/* See comment above. */
19819304Speter			if (cnt == 0 && ISMOTION(vp)) {
19919304Speter				if ((ISCMD(vp->rkp, 'd') ||
20019304Speter				    ISCMD(vp->rkp, 'y')) &&
20119304Speter				    cs_fspace(sp, &cs))
20219304Speter					return (1);
20319304Speter				break;
20419304Speter			}
20519304Speter
20619304Speter			/* Eat whitespace characters. */
20719304Speter			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
20819304Speter				if (cs_fblank(sp, &cs))
20919304Speter					return (1);
21019304Speter			if (cs.cs_flags == CS_EOF)
21119304Speter				goto ret;
21219304Speter		}
21319304Speter
21419304Speter	/*
21519304Speter	 * If we didn't move, we must be at EOF.
21619304Speter	 *
21719304Speter	 * !!!
21819304Speter	 * That's okay for motion commands, however.
21919304Speter	 */
22019304Speterret:	if (!ISMOTION(vp) &&
22119304Speter	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
22219304Speter		v_eof(sp, &vp->m_start);
22319304Speter		return (1);
22419304Speter	}
22519304Speter
22619304Speter	/* Adjust the end of the range for motion commands. */
22719304Speter	vp->m_stop.lno = cs.cs_lno;
22819304Speter	vp->m_stop.cno = cs.cs_cno;
22919304Speter	if (ISMOTION(vp) && cs.cs_flags == 0)
23019304Speter		--vp->m_stop.cno;
23119304Speter
23219304Speter	/*
23319304Speter	 * Non-motion commands move to the end of the range.  Delete
23419304Speter	 * and yank stay at the start, ignore others.
23519304Speter	 */
23619304Speter	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
23719304Speter	return (0);
23819304Speter}
23919304Speter
24019304Speter/*
24119304Speter * v_wordE -- [count]E
24219304Speter *	Move forward to the end of the bigword.
24319304Speter *
24419304Speter * PUBLIC: int v_wordE __P((SCR *, VICMD *));
24519304Speter */
24619304Speterint
24719304Speterv_wordE(sp, vp)
24819304Speter	SCR *sp;
24919304Speter	VICMD *vp;
25019304Speter{
25119304Speter	return (eword(sp, vp, BIGWORD));
25219304Speter}
25319304Speter
25419304Speter/*
25519304Speter * v_worde -- [count]e
25619304Speter *	Move forward to the end of the word.
25719304Speter *
25819304Speter * PUBLIC: int v_worde __P((SCR *, VICMD *));
25919304Speter */
26019304Speterint
26119304Speterv_worde(sp, vp)
26219304Speter	SCR *sp;
26319304Speter	VICMD *vp;
26419304Speter{
26519304Speter	return (eword(sp, vp, LITTLEWORD));
26619304Speter}
26719304Speter
26819304Speter/*
26919304Speter * eword --
27019304Speter *	Move forward to the end of the word.
27119304Speter */
27219304Speterstatic int
27319304Spetereword(sp, vp, type)
27419304Speter	SCR *sp;
27519304Speter	VICMD *vp;
27619304Speter	enum which type;
27719304Speter{
27819304Speter	enum { INWORD, NOTWORD } state;
27919304Speter	VCS cs;
28019304Speter	u_long cnt;
28119304Speter
28219304Speter	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
28319304Speter	cs.cs_lno = vp->m_start.lno;
28419304Speter	cs.cs_cno = vp->m_start.cno;
28519304Speter	if (cs_init(sp, &cs))
28619304Speter		return (1);
28719304Speter
28819304Speter	/*
28919304Speter	 * !!!
29019304Speter	 * If in whitespace, or the next character is whitespace, move past
29119304Speter	 * it.  (This doesn't count as a word move.)  Stay at the character
29219304Speter	 * past the current one, it sets word "state" for the 'e' command.
29319304Speter	 */
29419304Speter	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
29519304Speter		if (cs_next(sp, &cs))
29619304Speter			return (1);
29719304Speter		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
29819304Speter			goto start;
29919304Speter	}
30019304Speter	if (cs_fblank(sp, &cs))
30119304Speter		return (1);
30219304Speter
30319304Speter	/*
30419304Speter	 * Cyclically move to the next word -- this involves skipping
30519304Speter	 * over word characters and then any trailing non-word characters.
30619304Speter	 * Note, for the 'e' command, the definition of a word keeps
30719304Speter	 * switching.
30819304Speter	 */
30919304Speterstart:	if (type == BIGWORD)
31019304Speter		while (cnt--) {
31119304Speter			for (;;) {
31219304Speter				if (cs_next(sp, &cs))
31319304Speter					return (1);
31419304Speter				if (cs.cs_flags == CS_EOF)
31519304Speter					goto ret;
31619304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
31719304Speter					break;
31819304Speter			}
31919304Speter			/*
32019304Speter			 * When we reach the start of the word after the last
32119304Speter			 * word, we're done.  If we changed state, back up one
32219304Speter			 * to the end of the previous word.
32319304Speter			 */
32419304Speter			if (cnt == 0) {
32519304Speter				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
32619304Speter					return (1);
32719304Speter				break;
32819304Speter			}
32919304Speter
33019304Speter			/* Eat whitespace characters. */
33119304Speter			if (cs_fblank(sp, &cs))
33219304Speter				return (1);
33319304Speter			if (cs.cs_flags == CS_EOF)
33419304Speter				goto ret;
33519304Speter		}
33619304Speter	else
33719304Speter		while (cnt--) {
33819304Speter			state = cs.cs_flags == 0 &&
33919304Speter			    inword(cs.cs_ch) ? INWORD : NOTWORD;
34019304Speter			for (;;) {
34119304Speter				if (cs_next(sp, &cs))
34219304Speter					return (1);
34319304Speter				if (cs.cs_flags == CS_EOF)
34419304Speter					goto ret;
34519304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
34619304Speter					break;
34719304Speter				if (state == INWORD) {
34819304Speter					if (!inword(cs.cs_ch))
34919304Speter						break;
35019304Speter				} else
35119304Speter					if (inword(cs.cs_ch))
35219304Speter						break;
35319304Speter			}
35419304Speter			/* See comment above. */
35519304Speter			if (cnt == 0) {
35619304Speter				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
35719304Speter					return (1);
35819304Speter				break;
35919304Speter			}
36019304Speter
36119304Speter			/* Eat whitespace characters. */
36219304Speter			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
36319304Speter				if (cs_fblank(sp, &cs))
36419304Speter					return (1);
36519304Speter			if (cs.cs_flags == CS_EOF)
36619304Speter				goto ret;
36719304Speter		}
36819304Speter
36919304Speter	/*
37019304Speter	 * If we didn't move, we must be at EOF.
37119304Speter	 *
37219304Speter	 * !!!
37319304Speter	 * That's okay for motion commands, however.
37419304Speter	 */
37519304Speterret:	if (!ISMOTION(vp) &&
37619304Speter	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
37719304Speter		v_eof(sp, &vp->m_start);
37819304Speter		return (1);
37919304Speter	}
38019304Speter
38119304Speter	/* Set the end of the range for motion commands. */
38219304Speter	vp->m_stop.lno = cs.cs_lno;
38319304Speter	vp->m_stop.cno = cs.cs_cno;
38419304Speter
38519304Speter	/*
38619304Speter	 * Non-motion commands move to the end of the range.
38719304Speter	 * Delete and yank stay at the start, ignore others.
38819304Speter	 */
38919304Speter	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
39019304Speter	return (0);
39119304Speter}
39219304Speter
39319304Speter/*
39419304Speter * v_WordB -- [count]B
39519304Speter *	Move backward a bigword at a time.
39619304Speter *
39719304Speter * PUBLIC: int v_wordB __P((SCR *, VICMD *));
39819304Speter */
39919304Speterint
40019304Speterv_wordB(sp, vp)
40119304Speter	SCR *sp;
40219304Speter	VICMD *vp;
40319304Speter{
40419304Speter	return (bword(sp, vp, BIGWORD));
40519304Speter}
40619304Speter
40719304Speter/*
40819304Speter * v_wordb -- [count]b
40919304Speter *	Move backward a word at a time.
41019304Speter *
41119304Speter * PUBLIC: int v_wordb __P((SCR *, VICMD *));
41219304Speter */
41319304Speterint
41419304Speterv_wordb(sp, vp)
41519304Speter	SCR *sp;
41619304Speter	VICMD *vp;
41719304Speter{
41819304Speter	return (bword(sp, vp, LITTLEWORD));
41919304Speter}
42019304Speter
42119304Speter/*
42219304Speter * bword --
42319304Speter *	Move backward by words.
42419304Speter */
42519304Speterstatic int
42619304Speterbword(sp, vp, type)
42719304Speter	SCR *sp;
42819304Speter	VICMD *vp;
42919304Speter	enum which type;
43019304Speter{
43119304Speter	enum { INWORD, NOTWORD } state;
43219304Speter	VCS cs;
43319304Speter	u_long cnt;
43419304Speter
43519304Speter	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
43619304Speter	cs.cs_lno = vp->m_start.lno;
43719304Speter	cs.cs_cno = vp->m_start.cno;
43819304Speter	if (cs_init(sp, &cs))
43919304Speter		return (1);
44019304Speter
44119304Speter	/*
44219304Speter	 * !!!
44319304Speter	 * If in whitespace, or the previous character is whitespace, move
44419304Speter	 * past it.  (This doesn't count as a word move.)  Stay at the
44519304Speter	 * character before the current one, it sets word "state" for the
44619304Speter	 * 'b' command.
44719304Speter	 */
44819304Speter	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
44919304Speter		if (cs_prev(sp, &cs))
45019304Speter			return (1);
45119304Speter		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
45219304Speter			goto start;
45319304Speter	}
45419304Speter	if (cs_bblank(sp, &cs))
45519304Speter		return (1);
45619304Speter
45719304Speter	/*
45819304Speter	 * Cyclically move to the beginning of the previous word -- this
45919304Speter	 * involves skipping over word characters and then any trailing
46019304Speter	 * non-word characters.  Note, for the 'b' command, the definition
46119304Speter	 * of a word keeps switching.
46219304Speter	 */
46319304Speterstart:	if (type == BIGWORD)
46419304Speter		while (cnt--) {
46519304Speter			for (;;) {
46619304Speter				if (cs_prev(sp, &cs))
46719304Speter					return (1);
46819304Speter				if (cs.cs_flags == CS_SOF)
46919304Speter					goto ret;
47019304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
47119304Speter					break;
47219304Speter			}
47319304Speter			/*
47419304Speter			 * When we reach the end of the word before the last
47519304Speter			 * word, we're done.  If we changed state, move forward
47619304Speter			 * one to the end of the next word.
47719304Speter			 */
47819304Speter			if (cnt == 0) {
47919304Speter				if (cs.cs_flags == 0 && cs_next(sp, &cs))
48019304Speter					return (1);
48119304Speter				break;
48219304Speter			}
48319304Speter
48419304Speter			/* Eat whitespace characters. */
48519304Speter			if (cs_bblank(sp, &cs))
48619304Speter				return (1);
48719304Speter			if (cs.cs_flags == CS_SOF)
48819304Speter				goto ret;
48919304Speter		}
49019304Speter	else
49119304Speter		while (cnt--) {
49219304Speter			state = cs.cs_flags == 0 &&
49319304Speter			    inword(cs.cs_ch) ? INWORD : NOTWORD;
49419304Speter			for (;;) {
49519304Speter				if (cs_prev(sp, &cs))
49619304Speter					return (1);
49719304Speter				if (cs.cs_flags == CS_SOF)
49819304Speter					goto ret;
49919304Speter				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
50019304Speter					break;
50119304Speter				if (state == INWORD) {
50219304Speter					if (!inword(cs.cs_ch))
50319304Speter						break;
50419304Speter				} else
50519304Speter					if (inword(cs.cs_ch))
50619304Speter						break;
50719304Speter			}
50819304Speter			/* See comment above. */
50919304Speter			if (cnt == 0) {
51019304Speter				if (cs.cs_flags == 0 && cs_next(sp, &cs))
51119304Speter					return (1);
51219304Speter				break;
51319304Speter			}
51419304Speter
51519304Speter			/* Eat whitespace characters. */
51619304Speter			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
51719304Speter				if (cs_bblank(sp, &cs))
51819304Speter					return (1);
51919304Speter			if (cs.cs_flags == CS_SOF)
52019304Speter				goto ret;
52119304Speter		}
52219304Speter
52319304Speter	/* If we didn't move, we must be at SOF. */
52419304Speterret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
52519304Speter		v_sof(sp, &vp->m_start);
52619304Speter		return (1);
52719304Speter	}
52819304Speter
52919304Speter	/* Set the end of the range for motion commands. */
53019304Speter	vp->m_stop.lno = cs.cs_lno;
53119304Speter	vp->m_stop.cno = cs.cs_cno;
53219304Speter
53319304Speter	/*
53419304Speter	 * All commands move to the end of the range.  Motion commands
53519304Speter	 * adjust the starting point to the character before the current
53619304Speter	 * one.
53719304Speter	 *
53819304Speter	 * !!!
53919304Speter	 * The historic vi didn't get this right -- the `yb' command yanked
54019304Speter	 * the right stuff and even updated the cursor value, but the cursor
54119304Speter	 * was not actually updated on the screen.
54219304Speter	 */
54319304Speter	vp->m_final = vp->m_stop;
54419304Speter	if (ISMOTION(vp))
54519304Speter		--vp->m_start.cno;
54619304Speter	return (0);
54719304Speter}
548