1/*	$OpenBSD: v_word.c,v 1.7 2014/11/12 04:28:41 bentley Exp $	*/
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 *	Keith Bostic.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/types.h>
15#include <sys/queue.h>
16#include <sys/time.h>
17
18#include <bitstring.h>
19#include <ctype.h>
20#include <limits.h>
21#include <stdio.h>
22
23#include "../common/common.h"
24#include "vi.h"
25
26/*
27 * There are two types of "words".  Bigwords are easy -- groups of anything
28 * delimited by whitespace.  Normal words are trickier.  They are either a
29 * group of characters, numbers and underscores, or a group of anything but,
30 * delimited by whitespace.  When for a word, if you're in whitespace, it's
31 * easy, just remove the whitespace and go to the beginning or end of the
32 * word.  Otherwise, figure out if the next character is in a different group.
33 * If it is, go to the beginning or end of that group, otherwise, go to the
34 * beginning or end of the current group.  The historic version of vi didn't
35 * get this right, so, for example, there were cases where "4e" was not the
36 * same as "eeee" -- in particular, single character words, and commands that
37 * began in whitespace were almost always handled incorrectly.  To get it right
38 * you have to resolve the cursor after each search so that the look-ahead to
39 * figure out what type of "word" the cursor is in will be correct.
40 *
41 * Empty lines, and lines that consist of only white-space characters count
42 * as a single word, and the beginning and end of the file counts as an
43 * infinite number of words.
44 *
45 * Movements associated with commands are different than movement commands.
46 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
47 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
48 * space is discarded from the change movement.  Another example is that,
49 * in the same string, a "cw" on any white space character replaces that
50 * single character, and nothing else.  Ain't nothin' in here that's easy.
51 *
52 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
53 * would treat groups of empty lines as individual words, i.e. the command
54 * would move the cursor to each new empty line.  The 'e' and 'E' commands
55 * would treat groups of empty lines as a single word, i.e. the first use
56 * would move past the group of lines.  The 'b' command would just beep at
57 * you, or, if you did it from the start of the line as part of a motion
58 * command, go absolutely nuts.  If the lines contained only white-space
59 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
60 * 'b', 'E' and 'e' commands would treat the group as a single word, and
61 * the 'B' and 'b' commands will treat the lines as individual words.  This
62 * implementation treats all of these cases as a single white-space word.
63 */
64
65enum which {BIGWORD, LITTLEWORD};
66
67static int bword(SCR *, VICMD *, enum which);
68static int eword(SCR *, VICMD *, enum which);
69static int fword(SCR *, VICMD *, enum which);
70
71/*
72 * v_wordW -- [count]W
73 *	Move forward a bigword at a time.
74 *
75 * PUBLIC: int v_wordW(SCR *, VICMD *);
76 */
77int
78v_wordW(SCR *sp, VICMD *vp)
79{
80	return (fword(sp, vp, BIGWORD));
81}
82
83/*
84 * v_wordw -- [count]w
85 *	Move forward a word at a time.
86 *
87 * PUBLIC: int v_wordw(SCR *, VICMD *);
88 */
89int
90v_wordw(SCR *sp, VICMD *vp)
91{
92	return (fword(sp, vp, LITTLEWORD));
93}
94
95/*
96 * fword --
97 *	Move forward by words.
98 */
99static int
100fword(SCR *sp, VICMD *vp, enum which type)
101{
102	enum { INWORD, NOTWORD } state;
103	VCS cs;
104	u_long cnt;
105
106	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
107	cs.cs_lno = vp->m_start.lno;
108	cs.cs_cno = vp->m_start.cno;
109	if (cs_init(sp, &cs))
110		return (1);
111
112	/*
113	 * If in white-space:
114	 *	If the count is 1, and it's a change command, we're done.
115	 *	Else, move to the first non-white-space character, which
116	 *	counts as a single word move.  If it's a motion command,
117	 *	don't move off the end of the line.
118	 */
119	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
120		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
121			if (ISCMD(vp->rkp, 'c'))
122				return (0);
123			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
124				if (cs_fspace(sp, &cs))
125					return (1);
126				goto ret;
127			}
128		}
129		if (cs_fblank(sp, &cs))
130			return (1);
131		--cnt;
132	}
133
134	/*
135	 * Cyclically move to the next word -- this involves skipping
136	 * over word characters and then any trailing non-word characters.
137	 * Note, for the 'w' command, the definition of a word keeps
138	 * switching.
139	 */
140	if (type == BIGWORD)
141		while (cnt--) {
142			for (;;) {
143				if (cs_next(sp, &cs))
144					return (1);
145				if (cs.cs_flags == CS_EOF)
146					goto ret;
147				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
148					break;
149			}
150			/*
151			 * If a motion command and we're at the end of the
152			 * last word, we're done.  Delete and yank eat any
153			 * trailing blanks, but we don't move off the end
154			 * of the line regardless.
155			 */
156			if (cnt == 0 && ISMOTION(vp)) {
157				if ((ISCMD(vp->rkp, 'd') ||
158				    ISCMD(vp->rkp, 'y')) &&
159				    cs_fspace(sp, &cs))
160					return (1);
161				break;
162			}
163
164			/* Eat whitespace characters. */
165			if (cs_fblank(sp, &cs))
166				return (1);
167			if (cs.cs_flags == CS_EOF)
168				goto ret;
169		}
170	else
171		while (cnt--) {
172			state = cs.cs_flags == 0 &&
173			    inword(cs.cs_ch) ? INWORD : NOTWORD;
174			for (;;) {
175				if (cs_next(sp, &cs))
176					return (1);
177				if (cs.cs_flags == CS_EOF)
178					goto ret;
179				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
180					break;
181				if (state == INWORD) {
182					if (!inword(cs.cs_ch))
183						break;
184				} else
185					if (inword(cs.cs_ch))
186						break;
187			}
188			/* See comment above. */
189			if (cnt == 0 && ISMOTION(vp)) {
190				if ((ISCMD(vp->rkp, 'd') ||
191				    ISCMD(vp->rkp, 'y')) &&
192				    cs_fspace(sp, &cs))
193					return (1);
194				break;
195			}
196
197			/* Eat whitespace characters. */
198			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
199				if (cs_fblank(sp, &cs))
200					return (1);
201			if (cs.cs_flags == CS_EOF)
202				goto ret;
203		}
204
205	/*
206	 * If we didn't move, we must be at EOF.
207	 *
208	 * !!!
209	 * That's okay for motion commands, however.
210	 */
211ret:	if (!ISMOTION(vp) &&
212	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
213		v_eof(sp, &vp->m_start);
214		return (1);
215	}
216
217	/* Adjust the end of the range for motion commands. */
218	vp->m_stop.lno = cs.cs_lno;
219	vp->m_stop.cno = cs.cs_cno;
220	if (ISMOTION(vp) && cs.cs_flags == 0)
221		--vp->m_stop.cno;
222
223	/*
224	 * Non-motion commands move to the end of the range.  Delete
225	 * and yank stay at the start, ignore others.
226	 */
227	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
228	return (0);
229}
230
231/*
232 * v_wordE -- [count]E
233 *	Move forward to the end of the bigword.
234 *
235 * PUBLIC: int v_wordE(SCR *, VICMD *);
236 */
237int
238v_wordE(SCR *sp, VICMD *vp)
239{
240	return (eword(sp, vp, BIGWORD));
241}
242
243/*
244 * v_worde -- [count]e
245 *	Move forward to the end of the word.
246 *
247 * PUBLIC: int v_worde(SCR *, VICMD *);
248 */
249int
250v_worde(SCR *sp, VICMD *vp)
251{
252	return (eword(sp, vp, LITTLEWORD));
253}
254
255/*
256 * eword --
257 *	Move forward to the end of the word.
258 */
259static int
260eword(SCR *sp, VICMD *vp, enum which type)
261{
262	enum { INWORD, NOTWORD } state;
263	VCS cs;
264	u_long cnt;
265
266	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
267	cs.cs_lno = vp->m_start.lno;
268	cs.cs_cno = vp->m_start.cno;
269	if (cs_init(sp, &cs))
270		return (1);
271
272	/*
273	 * !!!
274	 * If in whitespace, or the next character is whitespace, move past
275	 * it.  (This doesn't count as a word move.)  Stay at the character
276	 * past the current one, it sets word "state" for the 'e' command.
277	 */
278	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
279		if (cs_next(sp, &cs))
280			return (1);
281		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
282			goto start;
283	}
284	if (cs_fblank(sp, &cs))
285		return (1);
286
287	/*
288	 * Cyclically move to the next word -- this involves skipping
289	 * over word characters and then any trailing non-word characters.
290	 * Note, for the 'e' command, the definition of a word keeps
291	 * switching.
292	 */
293start:	if (type == BIGWORD)
294		while (cnt--) {
295			for (;;) {
296				if (cs_next(sp, &cs))
297					return (1);
298				if (cs.cs_flags == CS_EOF)
299					goto ret;
300				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
301					break;
302			}
303			/*
304			 * When we reach the start of the word after the last
305			 * word, we're done.  If we changed state, back up one
306			 * to the end of the previous word.
307			 */
308			if (cnt == 0) {
309				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
310					return (1);
311				break;
312			}
313
314			/* Eat whitespace characters. */
315			if (cs_fblank(sp, &cs))
316				return (1);
317			if (cs.cs_flags == CS_EOF)
318				goto ret;
319		}
320	else
321		while (cnt--) {
322			state = cs.cs_flags == 0 &&
323			    inword(cs.cs_ch) ? INWORD : NOTWORD;
324			for (;;) {
325				if (cs_next(sp, &cs))
326					return (1);
327				if (cs.cs_flags == CS_EOF)
328					goto ret;
329				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
330					break;
331				if (state == INWORD) {
332					if (!inword(cs.cs_ch))
333						break;
334				} else
335					if (inword(cs.cs_ch))
336						break;
337			}
338			/* See comment above. */
339			if (cnt == 0) {
340				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
341					return (1);
342				break;
343			}
344
345			/* Eat whitespace characters. */
346			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
347				if (cs_fblank(sp, &cs))
348					return (1);
349			if (cs.cs_flags == CS_EOF)
350				goto ret;
351		}
352
353	/*
354	 * If we didn't move, we must be at EOF.
355	 *
356	 * !!!
357	 * That's okay for motion commands, however.
358	 */
359ret:	if (!ISMOTION(vp) &&
360	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
361		v_eof(sp, &vp->m_start);
362		return (1);
363	}
364
365	/* Set the end of the range for motion commands. */
366	vp->m_stop.lno = cs.cs_lno;
367	vp->m_stop.cno = cs.cs_cno;
368
369	/*
370	 * Non-motion commands move to the end of the range.
371	 * Delete and yank stay at the start, ignore others.
372	 */
373	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
374	return (0);
375}
376
377/*
378 * v_WordB -- [count]B
379 *	Move backward a bigword at a time.
380 *
381 * PUBLIC: int v_wordB(SCR *, VICMD *);
382 */
383int
384v_wordB(SCR *sp, VICMD *vp)
385{
386	return (bword(sp, vp, BIGWORD));
387}
388
389/*
390 * v_wordb -- [count]b
391 *	Move backward a word at a time.
392 *
393 * PUBLIC: int v_wordb(SCR *, VICMD *);
394 */
395int
396v_wordb(SCR *sp, VICMD *vp)
397{
398	return (bword(sp, vp, LITTLEWORD));
399}
400
401/*
402 * bword --
403 *	Move backward by words.
404 */
405static int
406bword(SCR *sp, VICMD *vp, enum which type)
407{
408	enum { INWORD, NOTWORD } state;
409	VCS cs;
410	u_long cnt;
411
412	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
413	cs.cs_lno = vp->m_start.lno;
414	cs.cs_cno = vp->m_start.cno;
415	if (cs_init(sp, &cs))
416		return (1);
417
418	/*
419	 * !!!
420	 * If in whitespace, or the previous character is whitespace, move
421	 * past it.  (This doesn't count as a word move.)  Stay at the
422	 * character before the current one, it sets word "state" for the
423	 * 'b' command.
424	 */
425	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
426		if (cs_prev(sp, &cs))
427			return (1);
428		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
429			goto start;
430	}
431	if (cs_bblank(sp, &cs))
432		return (1);
433
434	/*
435	 * Cyclically move to the beginning of the previous word -- this
436	 * involves skipping over word characters and then any trailing
437	 * non-word characters.  Note, for the 'b' command, the definition
438	 * of a word keeps switching.
439	 */
440start:	if (type == BIGWORD)
441		while (cnt--) {
442			for (;;) {
443				if (cs_prev(sp, &cs))
444					return (1);
445				if (cs.cs_flags == CS_SOF)
446					goto ret;
447				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
448					break;
449			}
450			/*
451			 * When we reach the end of the word before the last
452			 * word, we're done.  If we changed state, move forward
453			 * one to the end of the next word.
454			 */
455			if (cnt == 0) {
456				if (cs.cs_flags == 0 && cs_next(sp, &cs))
457					return (1);
458				break;
459			}
460
461			/* Eat whitespace characters. */
462			if (cs_bblank(sp, &cs))
463				return (1);
464			if (cs.cs_flags == CS_SOF)
465				goto ret;
466		}
467	else
468		while (cnt--) {
469			state = cs.cs_flags == 0 &&
470			    inword(cs.cs_ch) ? INWORD : NOTWORD;
471			for (;;) {
472				if (cs_prev(sp, &cs))
473					return (1);
474				if (cs.cs_flags == CS_SOF)
475					goto ret;
476				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
477					break;
478				if (state == INWORD) {
479					if (!inword(cs.cs_ch))
480						break;
481				} else
482					if (inword(cs.cs_ch))
483						break;
484			}
485			/* See comment above. */
486			if (cnt == 0) {
487				if (cs.cs_flags == 0 && cs_next(sp, &cs))
488					return (1);
489				break;
490			}
491
492			/* Eat whitespace characters. */
493			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
494				if (cs_bblank(sp, &cs))
495					return (1);
496			if (cs.cs_flags == CS_SOF)
497				goto ret;
498		}
499
500	/* If we didn't move, we must be at SOF. */
501ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
502		v_sof(sp, &vp->m_start);
503		return (1);
504	}
505
506	/* Set the end of the range for motion commands. */
507	vp->m_stop.lno = cs.cs_lno;
508	vp->m_stop.cno = cs.cs_cno;
509
510	/*
511	 * All commands move to the end of the range.  Motion commands
512	 * adjust the starting point to the character before the current
513	 * one.
514	 *
515	 * !!!
516	 * The historic vi didn't get this right -- the `yb' command yanked
517	 * the right stuff and even updated the cursor value, but the cursor
518	 * was not actually updated on the screen.
519	 */
520	vp->m_final = vp->m_stop;
521	if (ISMOTION(vp))
522		--vp->m_start.cno;
523	return (0);
524}
525