1/*	$NetBSD: v_word.c,v 1.2 2008/12/05 22:51:43 christos Exp $ */
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 *	Keith Bostic.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#ifndef lint
15static const char sccsid[] = "Id: v_word.c,v 10.6 2001/06/25 15:19:36 skimo Exp (Berkeley) Date: 2001/06/25 15:19:36";
16#endif /* not lint */
17
18#include <sys/types.h>
19#include <sys/queue.h>
20#include <sys/time.h>
21
22#include <bitstring.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26
27#include "../common/common.h"
28#include "vi.h"
29
30/*
31 * There are two types of "words".  Bigwords are easy -- groups of anything
32 * delimited by whitespace.  Normal words are trickier.  They are either a
33 * group of characters, numbers and underscores, or a group of anything but,
34 * delimited by whitespace.  When for a word, if you're in whitespace, it's
35 * easy, just remove the whitespace and go to the beginning or end of the
36 * word.  Otherwise, figure out if the next character is in a different group.
37 * If it is, go to the beginning or end of that group, otherwise, go to the
38 * beginning or end of the current group.  The historic version of vi didn't
39 * get this right, so, for example, there were cases where "4e" was not the
40 * same as "eeee" -- in particular, single character words, and commands that
41 * began in whitespace were almost always handled incorrectly.  To get it right
42 * you have to resolve the cursor after each search so that the look-ahead to
43 * figure out what type of "word" the cursor is in will be correct.
44 *
45 * Empty lines, and lines that consist of only white-space characters count
46 * as a single word, and the beginning and end of the file counts as an
47 * infinite number of words.
48 *
49 * Movements associated with commands are different than movement commands.
50 * For example, in "abc  def", with the cursor on the 'a', "cw" is from
51 * 'a' to 'c', while "w" is from 'a' to 'd'.  In general, trailing white
52 * space is discarded from the change movement.  Another example is that,
53 * in the same string, a "cw" on any white space character replaces that
54 * single character, and nothing else.  Ain't nothin' in here that's easy.
55 *
56 * One historic note -- in the original vi, the 'w', 'W' and 'B' commands
57 * would treat groups of empty lines as individual words, i.e. the command
58 * would move the cursor to each new empty line.  The 'e' and 'E' commands
59 * would treat groups of empty lines as a single word, i.e. the first use
60 * would move past the group of lines.  The 'b' command would just beep at
61 * you, or, if you did it from the start of the line as part of a motion
62 * command, go absolutely nuts.  If the lines contained only white-space
63 * characters, the 'w' and 'W' commands would just beep at you, and the 'B',
64 * 'b', 'E' and 'e' commands would treat the group as a single word, and
65 * the 'B' and 'b' commands will treat the lines as individual words.  This
66 * implementation treats all of these cases as a single white-space word.
67 */
68
69enum which {BIGWORD, LITTLEWORD};
70
71static int bword __P((SCR *, VICMD *, enum which));
72static int eword __P((SCR *, VICMD *, enum which));
73static int fword __P((SCR *, VICMD *, enum which));
74
75/*
76 * v_wordW -- [count]W
77 *	Move forward a bigword at a time.
78 *
79 * PUBLIC: int v_wordW __P((SCR *, VICMD *));
80 */
81int
82v_wordW(SCR *sp, VICMD *vp)
83{
84	return (fword(sp, vp, BIGWORD));
85}
86
87/*
88 * v_wordw -- [count]w
89 *	Move forward a word at a time.
90 *
91 * PUBLIC: int v_wordw __P((SCR *, VICMD *));
92 */
93int
94v_wordw(SCR *sp, VICMD *vp)
95{
96	return (fword(sp, vp, LITTLEWORD));
97}
98
99/*
100 * fword --
101 *	Move forward by words.
102 */
103static int
104fword(SCR *sp, VICMD *vp, enum which type)
105{
106	enum { INWORD, NOTWORD } state;
107	VCS cs;
108	u_long cnt;
109
110	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
111	cs.cs_lno = vp->m_start.lno;
112	cs.cs_cno = vp->m_start.cno;
113	if (cs_init(sp, &cs))
114		return (1);
115
116	/*
117	 * If in white-space:
118	 *	If the count is 1, and it's a change command, we're done.
119	 *	Else, move to the first non-white-space character, which
120	 *	counts as a single word move.  If it's a motion command,
121	 *	don't move off the end of the line.
122	 */
123	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
124		if (ISMOTION(vp) && cs.cs_flags != CS_EMP && cnt == 1) {
125			if (ISCMD(vp->rkp, 'c'))
126				return (0);
127			if (ISCMD(vp->rkp, 'd') || ISCMD(vp->rkp, 'y')) {
128				if (cs_fspace(sp, &cs))
129					return (1);
130				goto ret;
131			}
132		}
133		if (cs_fblank(sp, &cs))
134			return (1);
135		--cnt;
136	}
137
138	/*
139	 * Cyclically move to the next word -- this involves skipping
140	 * over word characters and then any trailing non-word characters.
141	 * Note, for the 'w' command, the definition of a word keeps
142	 * switching.
143	 */
144	if (type == BIGWORD)
145		while (cnt--) {
146			for (;;) {
147				if (cs_next(sp, &cs))
148					return (1);
149				if (cs.cs_flags == CS_EOF)
150					goto ret;
151				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
152					break;
153			}
154			/*
155			 * If a motion command and we're at the end of the
156			 * last word, we're done.  Delete and yank eat any
157			 * trailing blanks, but we don't move off the end
158			 * of the line regardless.
159			 */
160			if (cnt == 0 && ISMOTION(vp)) {
161				if ((ISCMD(vp->rkp, 'd') ||
162				    ISCMD(vp->rkp, 'y')) &&
163				    cs_fspace(sp, &cs))
164					return (1);
165				break;
166			}
167
168			/* Eat whitespace characters. */
169			if (cs_fblank(sp, &cs))
170				return (1);
171			if (cs.cs_flags == CS_EOF)
172				goto ret;
173		}
174	else
175		while (cnt--) {
176			state = cs.cs_flags == 0 &&
177			    inword(cs.cs_ch) ? INWORD : NOTWORD;
178			for (;;) {
179				if (cs_next(sp, &cs))
180					return (1);
181				if (cs.cs_flags == CS_EOF)
182					goto ret;
183				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
184					break;
185				if (state == INWORD) {
186					if (!inword(cs.cs_ch))
187						break;
188				} else
189					if (inword(cs.cs_ch))
190						break;
191			}
192			/* See comment above. */
193			if (cnt == 0 && ISMOTION(vp)) {
194				if ((ISCMD(vp->rkp, 'd') ||
195				    ISCMD(vp->rkp, 'y')) &&
196				    cs_fspace(sp, &cs))
197					return (1);
198				break;
199			}
200
201			/* Eat whitespace characters. */
202			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
203				if (cs_fblank(sp, &cs))
204					return (1);
205			if (cs.cs_flags == CS_EOF)
206				goto ret;
207		}
208
209	/*
210	 * If we didn't move, we must be at EOF.
211	 *
212	 * !!!
213	 * That's okay for motion commands, however.
214	 */
215ret:	if (!ISMOTION(vp) &&
216	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
217		v_eof(sp, &vp->m_start);
218		return (1);
219	}
220
221	/* Adjust the end of the range for motion commands. */
222	vp->m_stop.lno = cs.cs_lno;
223	vp->m_stop.cno = cs.cs_cno;
224	if (ISMOTION(vp) && cs.cs_flags == 0)
225		--vp->m_stop.cno;
226
227	/*
228	 * Non-motion commands move to the end of the range.  Delete
229	 * and yank stay at the start, ignore others.
230	 */
231	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
232	return (0);
233}
234
235/*
236 * v_wordE -- [count]E
237 *	Move forward to the end of the bigword.
238 *
239 * PUBLIC: int v_wordE __P((SCR *, VICMD *));
240 */
241int
242v_wordE(SCR *sp, VICMD *vp)
243{
244	return (eword(sp, vp, BIGWORD));
245}
246
247/*
248 * v_worde -- [count]e
249 *	Move forward to the end of the word.
250 *
251 * PUBLIC: int v_worde __P((SCR *, VICMD *));
252 */
253int
254v_worde(SCR *sp, VICMD *vp)
255{
256	return (eword(sp, vp, LITTLEWORD));
257}
258
259/*
260 * eword --
261 *	Move forward to the end of the word.
262 */
263static int
264eword(SCR *sp, VICMD *vp, enum which type)
265{
266	enum { INWORD, NOTWORD } state;
267	VCS cs;
268	u_long cnt;
269
270	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
271	cs.cs_lno = vp->m_start.lno;
272	cs.cs_cno = vp->m_start.cno;
273	if (cs_init(sp, &cs))
274		return (1);
275
276	/*
277	 * !!!
278	 * If in whitespace, or the next character is whitespace, move past
279	 * it.  (This doesn't count as a word move.)  Stay at the character
280	 * past the current one, it sets word "state" for the 'e' command.
281	 */
282	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
283		if (cs_next(sp, &cs))
284			return (1);
285		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
286			goto start;
287	}
288	if (cs_fblank(sp, &cs))
289		return (1);
290
291	/*
292	 * Cyclically move to the next word -- this involves skipping
293	 * over word characters and then any trailing non-word characters.
294	 * Note, for the 'e' command, the definition of a word keeps
295	 * switching.
296	 */
297start:	if (type == BIGWORD)
298		while (cnt--) {
299			for (;;) {
300				if (cs_next(sp, &cs))
301					return (1);
302				if (cs.cs_flags == CS_EOF)
303					goto ret;
304				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
305					break;
306			}
307			/*
308			 * When we reach the start of the word after the last
309			 * word, we're done.  If we changed state, back up one
310			 * to the end of the previous word.
311			 */
312			if (cnt == 0) {
313				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
314					return (1);
315				break;
316			}
317
318			/* Eat whitespace characters. */
319			if (cs_fblank(sp, &cs))
320				return (1);
321			if (cs.cs_flags == CS_EOF)
322				goto ret;
323		}
324	else
325		while (cnt--) {
326			state = cs.cs_flags == 0 &&
327			    inword(cs.cs_ch) ? INWORD : NOTWORD;
328			for (;;) {
329				if (cs_next(sp, &cs))
330					return (1);
331				if (cs.cs_flags == CS_EOF)
332					goto ret;
333				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
334					break;
335				if (state == INWORD) {
336					if (!inword(cs.cs_ch))
337						break;
338				} else
339					if (inword(cs.cs_ch))
340						break;
341			}
342			/* See comment above. */
343			if (cnt == 0) {
344				if (cs.cs_flags == 0 && cs_prev(sp, &cs))
345					return (1);
346				break;
347			}
348
349			/* Eat whitespace characters. */
350			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
351				if (cs_fblank(sp, &cs))
352					return (1);
353			if (cs.cs_flags == CS_EOF)
354				goto ret;
355		}
356
357	/*
358	 * If we didn't move, we must be at EOF.
359	 *
360	 * !!!
361	 * That's okay for motion commands, however.
362	 */
363ret:	if (!ISMOTION(vp) &&
364	    cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
365		v_eof(sp, &vp->m_start);
366		return (1);
367	}
368
369	/* Set the end of the range for motion commands. */
370	vp->m_stop.lno = cs.cs_lno;
371	vp->m_stop.cno = cs.cs_cno;
372
373	/*
374	 * Non-motion commands move to the end of the range.
375	 * Delete and yank stay at the start, ignore others.
376	 */
377	vp->m_final = ISMOTION(vp) ? vp->m_start : vp->m_stop;
378	return (0);
379}
380
381/*
382 * v_WordB -- [count]B
383 *	Move backward a bigword at a time.
384 *
385 * PUBLIC: int v_wordB __P((SCR *, VICMD *));
386 */
387int
388v_wordB(SCR *sp, VICMD *vp)
389{
390	return (bword(sp, vp, BIGWORD));
391}
392
393/*
394 * v_wordb -- [count]b
395 *	Move backward a word at a time.
396 *
397 * PUBLIC: int v_wordb __P((SCR *, VICMD *));
398 */
399int
400v_wordb(SCR *sp, VICMD *vp)
401{
402	return (bword(sp, vp, LITTLEWORD));
403}
404
405/*
406 * bword --
407 *	Move backward by words.
408 */
409static int
410bword(SCR *sp, VICMD *vp, enum which type)
411{
412	enum { INWORD, NOTWORD } state;
413	VCS cs;
414	u_long cnt;
415
416	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
417	cs.cs_lno = vp->m_start.lno;
418	cs.cs_cno = vp->m_start.cno;
419	if (cs_init(sp, &cs))
420		return (1);
421
422	/*
423	 * !!!
424	 * If in whitespace, or the previous character is whitespace, move
425	 * past it.  (This doesn't count as a word move.)  Stay at the
426	 * character before the current one, it sets word "state" for the
427	 * 'b' command.
428	 */
429	if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch)) {
430		if (cs_prev(sp, &cs))
431			return (1);
432		if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
433			goto start;
434	}
435	if (cs_bblank(sp, &cs))
436		return (1);
437
438	/*
439	 * Cyclically move to the beginning of the previous word -- this
440	 * involves skipping over word characters and then any trailing
441	 * non-word characters.  Note, for the 'b' command, the definition
442	 * of a word keeps switching.
443	 */
444start:	if (type == BIGWORD)
445		while (cnt--) {
446			for (;;) {
447				if (cs_prev(sp, &cs))
448					return (1);
449				if (cs.cs_flags == CS_SOF)
450					goto ret;
451				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
452					break;
453			}
454			/*
455			 * When we reach the end of the word before the last
456			 * word, we're done.  If we changed state, move forward
457			 * one to the end of the next word.
458			 */
459			if (cnt == 0) {
460				if (cs.cs_flags == 0 && cs_next(sp, &cs))
461					return (1);
462				break;
463			}
464
465			/* Eat whitespace characters. */
466			if (cs_bblank(sp, &cs))
467				return (1);
468			if (cs.cs_flags == CS_SOF)
469				goto ret;
470		}
471	else
472		while (cnt--) {
473			state = cs.cs_flags == 0 &&
474			    inword(cs.cs_ch) ? INWORD : NOTWORD;
475			for (;;) {
476				if (cs_prev(sp, &cs))
477					return (1);
478				if (cs.cs_flags == CS_SOF)
479					goto ret;
480				if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
481					break;
482				if (state == INWORD) {
483					if (!inword(cs.cs_ch))
484						break;
485				} else
486					if (inword(cs.cs_ch))
487						break;
488			}
489			/* See comment above. */
490			if (cnt == 0) {
491				if (cs.cs_flags == 0 && cs_next(sp, &cs))
492					return (1);
493				break;
494			}
495
496			/* Eat whitespace characters. */
497			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
498				if (cs_bblank(sp, &cs))
499					return (1);
500			if (cs.cs_flags == CS_SOF)
501				goto ret;
502		}
503
504	/* If we didn't move, we must be at SOF. */
505ret:	if (cs.cs_lno == vp->m_start.lno && cs.cs_cno == vp->m_start.cno) {
506		v_sof(sp, &vp->m_start);
507		return (1);
508	}
509
510	/* Set the end of the range for motion commands. */
511	vp->m_stop.lno = cs.cs_lno;
512	vp->m_stop.cno = cs.cs_cno;
513
514	/*
515	 * All commands move to the end of the range.  Motion commands
516	 * adjust the starting point to the character before the current
517	 * one.
518	 *
519	 * !!!
520	 * The historic vi didn't get this right -- the `yb' command yanked
521	 * the right stuff and even updated the cursor value, but the cursor
522	 * was not actually updated on the screen.
523	 */
524	vp->m_final = vp->m_stop;
525	if (ISMOTION(vp))
526		--vp->m_start.cno;
527	return (0);
528}
529