v_word.c revision 19304
1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "@(#)v_word.c	10.5 (Berkeley) 3/6/96";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <limits.h>
23#include <stdio.h>
24
25#include "../common/common.h"
26#include "vi.h"
27
28/*
29 * There are two types of "words".  Bigwords are easy -- groups of anything
30 * delimited by whitespace.  Normal words are trickier.  They are either a
31 * group of characters, numbers and underscores, or a group of anything but,
32 * delimited by whitespace.  When for a word, if you're in whitespace, it's
33 * easy, just remove the whitespace and go to the beginning or end of the
34 * word.  Otherwise, figure out if the next character is in a different group.
35 * If it is, go to the beginning or end of that group, otherwise, go to the
36 * beginning or end of the current group.  The historic version of vi didn't
37 * get this right, so, for example, there were cases where "4e" was not the
38 * same as "eeee" -- in particular, single character words, and commands that
39 * began in whitespace were almost always handled incorrectly.  To get it right
40 * you have to resolve the cursor after each search so that the look-ahead to
41 * figure out what type of "word" the cursor is in will be correct.
42 *
43 * Empty lines, and lines that consist of only white-space characters count
44 * as a single word, and the beginning and end of the file counts as an
45 * infinite number of words.
46 *
47 * Movements associated with commands are different than movement commands.
48 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
49 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
50 * space is discarded from the change movement.  Another example is that,
51 * in the same string, a "cw" on any white space character replaces that
52 * single character, and nothing else.  Ain't nothin' in here that's easy.
53 *
54 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
55 * would treat groups of empty lines as individual words, i.e. the command
56 * would move the cursor to each new empty line.  The 'e' and 'E' commands
57 * would treat groups of empty lines as a single word, i.e. the first use
58 * would move past the group of lines.  The 'b' command would just beep at
59 * you, or, if you did it from the start of the line as part of a motion
60 * command, go absolutely nuts.  If the lines contained only white-space
61 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
62 * 'b', 'E' and 'e' commands would treat the group as a single word, and
63 * the 'B' and 'b' commands will treat the lines as individual words.  This
64 * implementation treats all of these cases as a single white-space word.
65 */
66
67enum which {BIGWORD, LITTLEWORD};
68
69static int bword __P((SCR *, VICMD *, enum which));
70static int eword __P((SCR *, VICMD *, enum which));
71static int fword __P((SCR *, VICMD *, enum which));
72
73/*
74 * v_wordW -- [count]W
75 *	Move forward a bigword at a time.
76 *
77 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
78 */
79int
80v_wordW(sp, vp)
81	SCR *sp;
82	VICMD *vp;
83{
84	return (fword(sp, vp, BIGWORD));
85}
86
87/*
88 * v_wordw -- [count]w
89 *	Move forward a word at a time.
90 *
91 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
92 */
93int
94v_wordw(sp, vp)
95	SCR *sp;
96	VICMD *vp;
97{
98	return (fword(sp, vp, LITTLEWORD));
99}
100
101/*
102 * fword --
103 *	Move forward by words.
104 */
105static int
106fword(sp, vp, type)
107	SCR *sp;
108	VICMD *vp;
109	enum which type;
110{
111	enum { INWORD, NOTWORD } state;
112	VCS cs;
113	u_long cnt;
114
115	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
116	cs.cs_lno = vp->m_start.lno;
117	cs.cs_cno = vp->m_start.cno;
118	if (cs_init(sp, &cs))
119		return (1);
120
121	/*
122	 * If in white-space:
123	 *	If the count is 1, and it's a change command, we're done.
124	 *	Else, move to the first non-white-space character, which
125	 *	counts as a single word move.  If it's a motion command,
126	 *	don't move off the end of the line.
127	 */
128	if (cs.cs_flags == CS_EMP || cs.cs_flags == 0 && isblank(cs.cs_ch)) {
129		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
130			if (ISCMD(vp->rkp, 'c'))
131				return (0);
132			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
133				if (cs_fspace(sp, &cs))
134					return (1);
135				goto ret;
136			}
137		}
138		if (cs_fblank(sp, &cs))
139			return (1);
140		--cnt;
141	}
142
143	/*
144	 * Cyclically move to the next word -- this involves skipping
145	 * over word characters and then any trailing non-word characters.
146	 * Note, for the 'w' command, the definition of a word keeps
147	 * switching.
148	 */
149	if (type == BIGWORD)
150		while (cnt--) {
151			for (;;) {
152				if (cs_next(sp, &cs))
153					return (1);
154				if (cs.cs_flags == CS_EOF)
155					goto ret;
156				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
157					break;
158			}
159			/*
160			 * If a motion command and we're at the end of the
161			 * last word, we're done.  Delete and yank eat any
162			 * trailing blanks, but we don't move off the end
163			 * of the line regardless.
164			 */
165			if (cnt == 0 && ISMOTION(vp)) {
166				if ((ISCMD(vp->rkp, 'd') ||
167				    ISCMD(vp->rkp, 'y')) &&
168				    cs_fspace(sp, &cs))
169					return (1);
170				break;
171			}
172
173			/* Eat whitespace characters. */
174			if (cs_fblank(sp, &cs))
175				return (1);
176			if (cs.cs_flags == CS_EOF)
177				goto ret;
178		}
179	else
180		while (cnt--) {
181			state = cs.cs_flags == 0 &&
182			    inword(cs.cs_ch) ? INWORD : NOTWORD;
183			for (;;) {
184				if (cs_next(sp, &cs))
185					return (1);
186				if (cs.cs_flags == CS_EOF)
187					goto ret;
188				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
189					break;
190				if (state == INWORD) {
191					if (!inword(cs.cs_ch))
192						break;
193				} else
194					if (inword(cs.cs_ch))
195						break;
196			}
197			/* See comment above. */
198			if (cnt == 0 && ISMOTION(vp)) {
199				if ((ISCMD(vp->rkp, 'd') ||
200				    ISCMD(vp->rkp, 'y')) &&
201				    cs_fspace(sp, &cs))
202					return (1);
203				break;
204			}
205
206			/* Eat whitespace characters. */
207			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
208				if (cs_fblank(sp, &cs))
209					return (1);
210			if (cs.cs_flags == CS_EOF)
211				goto ret;
212		}
213
214	/*
215	 * If we didn't move, we must be at EOF.
216	 *
217	 * !!!
218	 * That's okay for motion commands, however.
219	 */
220ret:	if (!ISMOTION(vp) &&
221	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
222		v_eof(sp, &vp->m_start);
223		return (1);
224	}
225
226	/* Adjust the end of the range for motion commands. */
227	vp->m_stop.lno = cs.cs_lno;
228	vp->m_stop.cno = cs.cs_cno;
229	if (ISMOTION(vp) && cs.cs_flags == 0)
230		--vp->m_stop.cno;
231
232	/*
233	 * Non-motion commands move to the end of the range.  Delete
234	 * and yank stay at the start, ignore others.
235	 */
236	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
237	return (0);
238}
239
240/*
241 * v_wordE -- [count]E
242 *	Move forward to the end of the bigword.
243 *
244 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
245 */
246int
247v_wordE(sp, vp)
248	SCR *sp;
249	VICMD *vp;
250{
251	return (eword(sp, vp, BIGWORD));
252}
253
254/*
255 * v_worde -- [count]e
256 *	Move forward to the end of the word.
257 *
258 * PUBLIC: int v_worde __P((SCR *, VICMD *));
259 */
260int
261v_worde(sp, vp)
262	SCR *sp;
263	VICMD *vp;
264{
265	return (eword(sp, vp, LITTLEWORD));
266}
267
268/*
269 * eword --
270 *	Move forward to the end of the word.
271 */
272static int
273eword(sp, vp, type)
274	SCR *sp;
275	VICMD *vp;
276	enum which type;
277{
278	enum { INWORD, NOTWORD } state;
279	VCS cs;
280	u_long cnt;
281
282	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
283	cs.cs_lno = vp->m_start.lno;
284	cs.cs_cno = vp->m_start.cno;
285	if (cs_init(sp, &cs))
286		return (1);
287
288	/*
289	 * !!!
290	 * If in whitespace, or the next character is whitespace, move past
291	 * it.  (This doesn't count as a word move.)  Stay at the character
292	 * past the current one, it sets word "state" for the 'e' command.
293	 */
294	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
295		if (cs_next(sp, &cs))
296			return (1);
297		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
298			goto start;
299	}
300	if (cs_fblank(sp, &cs))
301		return (1);
302
303	/*
304	 * Cyclically move to the next word -- this involves skipping
305	 * over word characters and then any trailing non-word characters.
306	 * Note, for the 'e' command, the definition of a word keeps
307	 * switching.
308	 */
309start:	if (type == BIGWORD)
310		while (cnt--) {
311			for (;;) {
312				if (cs_next(sp, &cs))
313					return (1);
314				if (cs.cs_flags == CS_EOF)
315					goto ret;
316				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
317					break;
318			}
319			/*
320			 * When we reach the start of the word after the last
321			 * word, we're done.  If we changed state, back up one
322			 * to the end of the previous word.
323			 */
324			if (cnt == 0) {
325				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
326					return (1);
327				break;
328			}
329
330			/* Eat whitespace characters. */
331			if (cs_fblank(sp, &cs))
332				return (1);
333			if (cs.cs_flags == CS_EOF)
334				goto ret;
335		}
336	else
337		while (cnt--) {
338			state = cs.cs_flags == 0 &&
339			    inword(cs.cs_ch) ? INWORD : NOTWORD;
340			for (;;) {
341				if (cs_next(sp, &cs))
342					return (1);
343				if (cs.cs_flags == CS_EOF)
344					goto ret;
345				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
346					break;
347				if (state == INWORD) {
348					if (!inword(cs.cs_ch))
349						break;
350				} else
351					if (inword(cs.cs_ch))
352						break;
353			}
354			/* See comment above. */
355			if (cnt == 0) {
356				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
357					return (1);
358				break;
359			}
360
361			/* Eat whitespace characters. */
362			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
363				if (cs_fblank(sp, &cs))
364					return (1);
365			if (cs.cs_flags == CS_EOF)
366				goto ret;
367		}
368
369	/*
370	 * If we didn't move, we must be at EOF.
371	 *
372	 * !!!
373	 * That's okay for motion commands, however.
374	 */
375ret:	if (!ISMOTION(vp) &&
376	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
377		v_eof(sp, &vp->m_start);
378		return (1);
379	}
380
381	/* Set the end of the range for motion commands. */
382	vp->m_stop.lno = cs.cs_lno;
383	vp->m_stop.cno = cs.cs_cno;
384
385	/*
386	 * Non-motion commands move to the end of the range.
387	 * Delete and yank stay at the start, ignore others.
388	 */
389	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
390	return (0);
391}
392
393/*
394 * v_WordB -- [count]B
395 *	Move backward a bigword at a time.
396 *
397 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
398 */
399int
400v_wordB(sp, vp)
401	SCR *sp;
402	VICMD *vp;
403{
404	return (bword(sp, vp, BIGWORD));
405}
406
407/*
408 * v_wordb -- [count]b
409 *	Move backward a word at a time.
410 *
411 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
412 */
413int
414v_wordb(sp, vp)
415	SCR *sp;
416	VICMD *vp;
417{
418	return (bword(sp, vp, LITTLEWORD));
419}
420
421/*
422 * bword --
423 *	Move backward by words.
424 */
425static int
426bword(sp, vp, type)
427	SCR *sp;
428	VICMD *vp;
429	enum which type;
430{
431	enum { INWORD, NOTWORD } state;
432	VCS cs;
433	u_long cnt;
434
435	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
436	cs.cs_lno = vp->m_start.lno;
437	cs.cs_cno = vp->m_start.cno;
438	if (cs_init(sp, &cs))
439		return (1);
440
441	/*
442	 * !!!
443	 * If in whitespace, or the previous character is whitespace, move
444	 * past it.  (This doesn't count as a word move.)  Stay at the
445	 * character before the current one, it sets word "state" for the
446	 * 'b' command.
447	 */
448	if (cs.cs_flags == 0 && !isblank(cs.cs_ch)) {
449		if (cs_prev(sp, &cs))
450			return (1);
451		if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
452			goto start;
453	}
454	if (cs_bblank(sp, &cs))
455		return (1);
456
457	/*
458	 * Cyclically move to the beginning of the previous word -- this
459	 * involves skipping over word characters and then any trailing
460	 * non-word characters.  Note, for the 'b' command, the definition
461	 * of a word keeps switching.
462	 */
463start:	if (type == BIGWORD)
464		while (cnt--) {
465			for (;;) {
466				if (cs_prev(sp, &cs))
467					return (1);
468				if (cs.cs_flags == CS_SOF)
469					goto ret;
470				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
471					break;
472			}
473			/*
474			 * When we reach the end of the word before the last
475			 * word, we're done.  If we changed state, move forward
476			 * one to the end of the next word.
477			 */
478			if (cnt == 0) {
479				if (cs.cs_flags == 0 && cs_next(sp, &cs))
480					return (1);
481				break;
482			}
483
484			/* Eat whitespace characters. */
485			if (cs_bblank(sp, &cs))
486				return (1);
487			if (cs.cs_flags == CS_SOF)
488				goto ret;
489		}
490	else
491		while (cnt--) {
492			state = cs.cs_flags == 0 &&
493			    inword(cs.cs_ch) ? INWORD : NOTWORD;
494			for (;;) {
495				if (cs_prev(sp, &cs))
496					return (1);
497				if (cs.cs_flags == CS_SOF)
498					goto ret;
499				if (cs.cs_flags != 0 || isblank(cs.cs_ch))
500					break;
501				if (state == INWORD) {
502					if (!inword(cs.cs_ch))
503						break;
504				} else
505					if (inword(cs.cs_ch))
506						break;
507			}
508			/* See comment above. */
509			if (cnt == 0) {
510				if (cs.cs_flags == 0 && cs_next(sp, &cs))
511					return (1);
512				break;
513			}
514
515			/* Eat whitespace characters. */
516			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
517				if (cs_bblank(sp, &cs))
518					return (1);
519			if (cs.cs_flags == CS_SOF)
520				goto ret;
521		}
522
523	/* If we didn't move, we must be at SOF. */
524ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
525		v_sof(sp, &vp->m_start);
526		return (1);
527	}
528
529	/* Set the end of the range for motion commands. */
530	vp->m_stop.lno = cs.cs_lno;
531	vp->m_stop.cno = cs.cs_cno;
532
533	/*
534	 * All commands move to the end of the range.  Motion commands
535	 * adjust the starting point to the character before the current
536	 * one.
537	 *
538	 * !!!
539	 * The historic vi didn't get this right -- the `yb' command yanked
540	 * the right stuff and even updated the cursor value, but the cursor
541	 * was not actually updated on the screen.
542	 */
543	vp->m_final = vp->m_stop;
544	if (ISMOTION(vp))
545		--vp->m_start.cno;
546	return (0);
547}
548