1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#include <sys/types.h>
13#include <sys/queue.h>
14#include <sys/time.h>
15
16#include <bitstring.h>
17#include <ctype.h>
18#include <limits.h>
19#include <stdio.h>
20
21#include "../common/common.h"
22#include "vi.h"
23
24/*
25 * There are two types of "words".  Bigwords are easy -- groups of anything
26 * delimited by whitespace.  Normal words are trickier.  They are either a
27 * group of characters, numbers and underscores, or a group of anything but,
28 * delimited by whitespace.  When for a word, if you're in whitespace, it's
29 * easy, just remove the whitespace and go to the beginning or end of the
30 * word.  Otherwise, figure out if the next character is in a different group.
31 * If it is, go to the beginning or end of that group, otherwise, go to the
32 * beginning or end of the current group.  The historic version of vi didn't
33 * get this right, so, for example, there were cases where "4e" was not the
34 * same as "eeee" -- in particular, single character words, and commands that
35 * began in whitespace were almost always handled incorrectly.  To get it right
36 * you have to resolve the cursor after each search so that the look-ahead to
37 * figure out what type of "word" the cursor is in will be correct.
38 *
39 * Empty lines, and lines that consist of only white-space characters count
40 * as a single word, and the beginning and end of the file counts as an
41 * infinite number of words.
42 *
43 * Movements associated with commands are different than movement commands.
44 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
45 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
46 * space is discarded from the change movement.  Another example is that,
47 * in the same string, a "cw" on any white space character replaces that
48 * single character, and nothing else.  Ain't nothin' in here that's easy.
49 *
50 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
51 * would treat groups of empty lines as individual words, i.e. the command
52 * would move the cursor to each new empty line.  The 'e' and 'E' commands
53 * would treat groups of empty lines as a single word, i.e. the first use
54 * would move past the group of lines.  The 'b' command would just beep at
55 * you, or, if you did it from the start of the line as part of a motion
56 * command, go absolutely nuts.  If the lines contained only white-space
57 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
58 * 'b', 'E' and 'e' commands would treat the group as a single word, and
59 * the 'B' and 'b' commands will treat the lines as individual words.  This
60 * implementation treats all of these cases as a single white-space word.
61 */
62
63enum which {BIGWORD, LITTLEWORD};
64
65static int bword(SCR *, VICMD *, enum which);
66static int eword(SCR *, VICMD *, enum which);
67static int fword(SCR *, VICMD *, enum which);
68
69/*
70 * v_wordW -- [count]W
71 *	Move forward a bigword at a time.
72 *
73 * PUBLIC: int v_wordW(SCR *, VICMD *);
74 */
75int
76v_wordW(SCR *sp, VICMD *vp)
77{
78	return (fword(sp, vp, BIGWORD));
79}
80
81/*
82 * v_wordw -- [count]w
83 *	Move forward a word at a time.
84 *
85 * PUBLIC: int v_wordw(SCR *, VICMD *);
86 */
87int
88v_wordw(SCR *sp, VICMD *vp)
89{
90	return (fword(sp, vp, LITTLEWORD));
91}
92
93/*
94 * fword --
95 *	Move forward by words.
96 */
97static int
98fword(SCR *sp, VICMD *vp, enum which type)
99{
100	enum { INWORD, NOTWORD } state;
101	VCS cs;
102	u_long cnt;
103
104	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
105	cs.cs_lno = vp->m_start.lno;
106	cs.cs_cno = vp->m_start.cno;
107	if (cs_init(sp, &cs))
108		return (1);
109
110	/*
111	 * If in white-space:
112	 *	If the count is 1, and it's a change command, we're done.
113	 *	Else, move to the first non-white-space character, which
114	 *	counts as a single word move.  If it's a motion command,
115	 *	don't move off the end of the line.
116	 */
117	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK(cs.cs_ch))) {
118		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
119			if (ISCMD(vp->rkp, 'c'))
120				return (0);
121			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
122				if (cs_fspace(sp, &cs))
123					return (1);
124				goto ret;
125			}
126		}
127		if (cs_fblank(sp, &cs))
128			return (1);
129		--cnt;
130	}
131
132	/*
133	 * Cyclically move to the next word -- this involves skipping
134	 * over word characters and then any trailing non-word characters.
135	 * Note, for the 'w' command, the definition of a word keeps
136	 * switching.
137	 */
138	if (type == BIGWORD)
139		while (cnt--) {
140			for (;;) {
141				if (cs_next(sp, &cs))
142					return (1);
143				if (cs.cs_flags == CS_EOF)
144					goto ret;
145				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
146					break;
147			}
148			/*
149			 * If a motion command and we're at the end of the
150			 * last word, we're done.  Delete and yank eat any
151			 * trailing blanks, but we don't move off the end
152			 * of the line regardless.
153			 */
154			if (cnt == 0 && ISMOTION(vp)) {
155				if ((ISCMD(vp->rkp, 'd') ||
156				    ISCMD(vp->rkp, 'y')) &&
157				    cs_fspace(sp, &cs))
158					return (1);
159				break;
160			}
161
162			/* Eat whitespace characters. */
163			if (cs_fblank(sp, &cs))
164				return (1);
165			if (cs.cs_flags == CS_EOF)
166				goto ret;
167		}
168	else
169		while (cnt--) {
170			state = cs.cs_flags == 0 &&
171			    inword(cs.cs_ch) ? INWORD : NOTWORD;
172			for (;;) {
173				if (cs_next(sp, &cs))
174					return (1);
175				if (cs.cs_flags == CS_EOF)
176					goto ret;
177				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
178					break;
179				if (state == INWORD) {
180					if (!inword(cs.cs_ch))
181						break;
182				} else
183					if (inword(cs.cs_ch))
184						break;
185			}
186			/* See comment above. */
187			if (cnt == 0 && ISMOTION(vp)) {
188				if ((ISCMD(vp->rkp, 'd') ||
189				    ISCMD(vp->rkp, 'y')) &&
190				    cs_fspace(sp, &cs))
191					return (1);
192				break;
193			}
194
195			/* Eat whitespace characters. */
196			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
197				if (cs_fblank(sp, &cs))
198					return (1);
199			if (cs.cs_flags == CS_EOF)
200				goto ret;
201		}
202
203	/*
204	 * If we didn't move, we must be at EOF.
205	 *
206	 * !!!
207	 * That's okay for motion commands, however.
208	 */
209ret:	if (!ISMOTION(vp) &&
210	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
211		v_eof(sp, &vp->m_start);
212		return (1);
213	}
214
215	/* Adjust the end of the range for motion commands. */
216	vp->m_stop.lno = cs.cs_lno;
217	vp->m_stop.cno = cs.cs_cno;
218	if (ISMOTION(vp) && cs.cs_flags == 0)
219		--vp->m_stop.cno;
220
221	/*
222	 * Non-motion commands move to the end of the range.  Delete
223	 * and yank stay at the start, ignore others.
224	 */
225	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
226	return (0);
227}
228
229/*
230 * v_wordE -- [count]E
231 *	Move forward to the end of the bigword.
232 *
233 * PUBLIC: int v_wordE(SCR *, VICMD *);
234 */
235int
236v_wordE(SCR *sp, VICMD *vp)
237{
238	return (eword(sp, vp, BIGWORD));
239}
240
241/*
242 * v_worde -- [count]e
243 *	Move forward to the end of the word.
244 *
245 * PUBLIC: int v_worde(SCR *, VICMD *);
246 */
247int
248v_worde(SCR *sp, VICMD *vp)
249{
250	return (eword(sp, vp, LITTLEWORD));
251}
252
253/*
254 * eword --
255 *	Move forward to the end of the word.
256 */
257static int
258eword(SCR *sp, VICMD *vp, enum which type)
259{
260	enum { INWORD, NOTWORD } state;
261	VCS cs;
262	u_long cnt;
263
264	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
265	cs.cs_lno = vp->m_start.lno;
266	cs.cs_cno = vp->m_start.cno;
267	if (cs_init(sp, &cs))
268		return (1);
269
270	/*
271	 * !!!
272	 * If in whitespace, or the next character is whitespace, move past
273	 * it.  (This doesn't count as a word move.)  Stay at the character
274	 * past the current one, it sets word "state" for the 'e' command.
275	 */
276	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
277		if (cs_next(sp, &cs))
278			return (1);
279		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
280			goto start;
281	}
282	if (cs_fblank(sp, &cs))
283		return (1);
284
285	/*
286	 * Cyclically move to the next word -- this involves skipping
287	 * over word characters and then any trailing non-word characters.
288	 * Note, for the 'e' command, the definition of a word keeps
289	 * switching.
290	 */
291start:	if (type == BIGWORD)
292		while (cnt--) {
293			for (;;) {
294				if (cs_next(sp, &cs))
295					return (1);
296				if (cs.cs_flags == CS_EOF)
297					goto ret;
298				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
299					break;
300			}
301			/*
302			 * When we reach the start of the word after the last
303			 * word, we're done.  If we changed state, back up one
304			 * to the end of the previous word.
305			 */
306			if (cnt == 0) {
307				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
308					return (1);
309				break;
310			}
311
312			/* Eat whitespace characters. */
313			if (cs_fblank(sp, &cs))
314				return (1);
315			if (cs.cs_flags == CS_EOF)
316				goto ret;
317		}
318	else
319		while (cnt--) {
320			state = cs.cs_flags == 0 &&
321			    inword(cs.cs_ch) ? INWORD : NOTWORD;
322			for (;;) {
323				if (cs_next(sp, &cs))
324					return (1);
325				if (cs.cs_flags == CS_EOF)
326					goto ret;
327				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
328					break;
329				if (state == INWORD) {
330					if (!inword(cs.cs_ch))
331						break;
332				} else
333					if (inword(cs.cs_ch))
334						break;
335			}
336			/* See comment above. */
337			if (cnt == 0) {
338				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
339					return (1);
340				break;
341			}
342
343			/* Eat whitespace characters. */
344			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
345				if (cs_fblank(sp, &cs))
346					return (1);
347			if (cs.cs_flags == CS_EOF)
348				goto ret;
349		}
350
351	/*
352	 * If we didn't move, we must be at EOF.
353	 *
354	 * !!!
355	 * That's okay for motion commands, however.
356	 */
357ret:	if (!ISMOTION(vp) &&
358	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
359		v_eof(sp, &vp->m_start);
360		return (1);
361	}
362
363	/* Set the end of the range for motion commands. */
364	vp->m_stop.lno = cs.cs_lno;
365	vp->m_stop.cno = cs.cs_cno;
366
367	/*
368	 * Non-motion commands move to the end of the range.
369	 * Delete and yank stay at the start, ignore others.
370	 */
371	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
372	return (0);
373}
374
375/*
376 * v_WordB -- [count]B
377 *	Move backward a bigword at a time.
378 *
379 * PUBLIC: int v_wordB(SCR *, VICMD *);
380 */
381int
382v_wordB(SCR *sp, VICMD *vp)
383{
384	return (bword(sp, vp, BIGWORD));
385}
386
387/*
388 * v_wordb -- [count]b
389 *	Move backward a word at a time.
390 *
391 * PUBLIC: int v_wordb(SCR *, VICMD *);
392 */
393int
394v_wordb(SCR *sp, VICMD *vp)
395{
396	return (bword(sp, vp, LITTLEWORD));
397}
398
399/*
400 * bword --
401 *	Move backward by words.
402 */
403static int
404bword(SCR *sp, VICMD *vp, enum which type)
405{
406	enum { INWORD, NOTWORD } state;
407	VCS cs;
408	u_long cnt;
409
410	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
411	cs.cs_lno = vp->m_start.lno;
412	cs.cs_cno = vp->m_start.cno;
413	if (cs_init(sp, &cs))
414		return (1);
415
416	/*
417	 * !!!
418	 * If in whitespace, or the previous character is whitespace, move
419	 * past it.  (This doesn't count as a word move.)  Stay at the
420	 * character before the current one, it sets word "state" for the
421	 * 'b' command.
422	 */
423	if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch)) {
424		if (cs_prev(sp, &cs))
425			return (1);
426		if (cs.cs_flags == 0 && !ISBLANK(cs.cs_ch))
427			goto start;
428	}
429	if (cs_bblank(sp, &cs))
430		return (1);
431
432	/*
433	 * Cyclically move to the beginning of the previous word -- this
434	 * involves skipping over word characters and then any trailing
435	 * non-word characters.  Note, for the 'b' command, the definition
436	 * of a word keeps switching.
437	 */
438start:	if (type == BIGWORD)
439		while (cnt--) {
440			for (;;) {
441				if (cs_prev(sp, &cs))
442					return (1);
443				if (cs.cs_flags == CS_SOF)
444					goto ret;
445				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
446					break;
447			}
448			/*
449			 * When we reach the end of the word before the last
450			 * word, we're done.  If we changed state, move forward
451			 * one to the end of the next word.
452			 */
453			if (cnt == 0) {
454				if (cs.cs_flags == 0 && cs_next(sp, &cs))
455					return (1);
456				break;
457			}
458
459			/* Eat whitespace characters. */
460			if (cs_bblank(sp, &cs))
461				return (1);
462			if (cs.cs_flags == CS_SOF)
463				goto ret;
464		}
465	else
466		while (cnt--) {
467			state = cs.cs_flags == 0 &&
468			    inword(cs.cs_ch) ? INWORD : NOTWORD;
469			for (;;) {
470				if (cs_prev(sp, &cs))
471					return (1);
472				if (cs.cs_flags == CS_SOF)
473					goto ret;
474				if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
475					break;
476				if (state == INWORD) {
477					if (!inword(cs.cs_ch))
478						break;
479				} else
480					if (inword(cs.cs_ch))
481						break;
482			}
483			/* See comment above. */
484			if (cnt == 0) {
485				if (cs.cs_flags == 0 && cs_next(sp, &cs))
486					return (1);
487				break;
488			}
489
490			/* Eat whitespace characters. */
491			if (cs.cs_flags != 0 || ISBLANK(cs.cs_ch))
492				if (cs_bblank(sp, &cs))
493					return (1);
494			if (cs.cs_flags == CS_SOF)
495				goto ret;
496		}
497
498	/* If we didn't move, we must be at SOF. */
499ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
500		v_sof(sp, &vp->m_start);
501		return (1);
502	}
503
504	/* Set the end of the range for motion commands. */
505	vp->m_stop.lno = cs.cs_lno;
506	vp->m_stop.cno = cs.cs_cno;
507
508	/*
509	 * All commands move to the end of the range.  Motion commands
510	 * adjust the starting point to the character before the current
511	 * one.
512	 *
513	 * !!!
514	 * The historic vi didn't get this right -- the `yb' command yanked
515	 * the right stuff and even updated the cursor value, but the cursor
516	 * was not actually updated on the screen.
517	 */
518	vp->m_final = vp->m_stop;
519	if (ISMOTION(vp))
520		--vp->m_start.cno;
521	return (0);
522}
523