1/*-
2 * Copyright (c) 1992, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 * Copyright (c) 1992, 1993, 1994, 1995, 1996
5 *	Keith Bostic.  All rights reserved.
6 *
7 * See the LICENSE file for redistribution information.
8 */
9
10#include "config.h"
11
12#ifndef lint
13static const char sccsid[] = "$Id: v_sentence.c,v 10.9 2001/06/25 15:19:35 skimo Exp $";
14#endif /* not lint */
15
16#include <sys/types.h>
17#include <sys/queue.h>
18#include <sys/time.h>
19
20#include <bitstring.h>
21#include <ctype.h>
22#include <limits.h>
23#include <stdio.h>
24
25#include "../common/common.h"
26#include "vi.h"
27
28/*
29 * !!!
30 * In historic vi, a sentence was delimited by a '.', '?' or '!' character
31 * followed by TWO spaces or a newline.  One or more empty lines was also
32 * treated as a separate sentence.  The Berkeley documentation for historical
33 * vi states that any number of ')', ']', '"' and '\'' characters can be
34 * between the delimiter character and the spaces or end of line, however,
35 * the historical implementation did not handle additional '"' characters.
36 * We follow the documentation here, not the implementation.
37 *
38 * Once again, historical vi didn't do sentence movements associated with
39 * counts consistently, mostly in the presence of lines containing only
40 * white-space characters.
41 *
42 * This implementation also permits a single tab to delimit sentences, and
43 * treats lines containing only white-space characters as empty lines.
44 * Finally, tabs are eaten (along with spaces) when skipping to the start
45 * of the text following a "sentence".
46 */
47
48/*
49 * v_sentencef -- [count])
50 *	Move forward count sentences.
51 *
52 * PUBLIC: int v_sentencef(SCR *, VICMD *);
53 */
54int
55v_sentencef(SCR *sp, VICMD *vp)
56{
57	enum { BLANK, NONE, PERIOD } state;
58	VCS cs;
59	size_t len;
60	u_long cnt;
61
62	cs.cs_lno = vp->m_start.lno;
63	cs.cs_cno = vp->m_start.cno;
64	if (cs_init(sp, &cs))
65		return (1);
66
67	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
68
69	/*
70	 * !!!
71	 * If in white-space, the next start of sentence counts as one.
72	 * This may not handle "  .  " correctly, but it's real unclear
73	 * what correctly means in that case.
74	 */
75	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
76		if (cs_fblank(sp, &cs))
77			return (1);
78		if (--cnt == 0) {
79			if (vp->m_start.lno != cs.cs_lno ||
80			    vp->m_start.cno != cs.cs_cno)
81				goto okret;
82			return (1);
83		}
84	}
85
86	for (state = NONE;;) {
87		if (cs_next(sp, &cs))
88			return (1);
89		if (cs.cs_flags == CS_EOF)
90			break;
91		if (cs.cs_flags == CS_EOL) {
92			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
93				if (cs_next(sp, &cs))
94					return (1);
95				if (cs.cs_flags == 0 &&
96				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
97					return (1);
98				goto okret;
99			}
100			state = NONE;
101			continue;
102		}
103		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
104			if (--cnt == 0)
105				goto okret;
106			if (cs_fblank(sp, &cs))
107				return (1);
108			if (--cnt == 0)
109				goto okret;
110			state = NONE;
111			continue;
112		}
113		switch (cs.cs_ch) {
114		case '.':
115		case '?':
116		case '!':
117			state = PERIOD;
118			break;
119		case ')':
120		case ']':
121		case '"':
122		case '\'':
123			if (state != PERIOD)
124				state = NONE;
125			break;
126		case '\t':
127			if (state == PERIOD)
128				state = BLANK;
129			/* FALLTHROUGH */
130		case ' ':
131			if (state == PERIOD) {
132				state = BLANK;
133				break;
134			}
135			if (state == BLANK && --cnt == 0) {
136				if (cs_fblank(sp, &cs))
137					return (1);
138				goto okret;
139			}
140			/* FALLTHROUGH */
141		default:
142			state = NONE;
143			break;
144		}
145	}
146
147	/* EOF is a movement sink, but it's an error not to have moved. */
148	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
149		v_eof(sp, NULL);
150		return (1);
151	}
152
153okret:	vp->m_stop.lno = cs.cs_lno;
154	vp->m_stop.cno = cs.cs_cno;
155
156	/*
157	 * !!!
158	 * Historic, uh, features, yeah, that's right, call 'em features.
159	 * If the starting and ending cursor positions are at the first
160	 * column in their lines, i.e. the movement is cutting entire lines,
161	 * the buffer is in line mode, and the ending position is the last
162	 * character of the previous line.  Note check to make sure that
163	 * it's not within a single line.
164	 *
165	 * Non-motion commands move to the end of the range.  Delete and
166	 * yank stay at the start.  Ignore others.  Adjust the end of the
167	 * range for motion commands.
168	 */
169	if (ISMOTION(vp)) {
170		if (vp->m_start.cno == 0 &&
171		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
172			if (vp->m_start.lno < vp->m_stop.lno) {
173				if (db_get(sp,
174				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
175					return (1);
176				vp->m_stop.cno = len ? len - 1 : 0;
177			}
178			F_SET(vp, VM_LMODE);
179		} else
180			--vp->m_stop.cno;
181		vp->m_final = vp->m_start;
182	} else
183		vp->m_final = vp->m_stop;
184	return (0);
185}
186
187/*
188 * v_sentenceb -- [count](
189 *	Move backward count sentences.
190 *
191 * PUBLIC: int v_sentenceb(SCR *, VICMD *);
192 */
193int
194v_sentenceb(SCR *sp, VICMD *vp)
195{
196	VCS cs;
197	recno_t slno;
198	size_t len, scno;
199	u_long cnt;
200	int last;
201
202	/*
203	 * !!!
204	 * Historic vi permitted the user to hit SOF repeatedly.
205	 */
206	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
207		return (0);
208
209	cs.cs_lno = vp->m_start.lno;
210	cs.cs_cno = vp->m_start.cno;
211	if (cs_init(sp, &cs))
212		return (1);
213
214	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
215
216	/*
217	 * !!!
218	 * In empty lines, skip to the previous non-white-space character.
219	 * If in text, skip to the prevous white-space character.  Believe
220	 * it or not, in the paragraph:
221	 *	ab cd.
222	 *	AB CD.
223	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
224	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
225	 * Berkeley was once a major center of drug activity.
226	 */
227	if (cs.cs_flags == CS_EMP) {
228		if (cs_bblank(sp, &cs))
229			return (1);
230		for (;;) {
231			if (cs_prev(sp, &cs))
232				return (1);
233			if (cs.cs_flags != CS_EOL)
234				break;
235		}
236	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
237		for (;;) {
238			if (cs_prev(sp, &cs))
239				return (1);
240			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
241				break;
242		}
243
244	for (last = 0;;) {
245		if (cs_prev(sp, &cs))
246			return (1);
247		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
248			break;
249		if (cs.cs_flags == CS_EOL) {
250			last = 1;
251			continue;
252		}
253		if (cs.cs_flags == CS_EMP) {
254			if (--cnt == 0)
255				goto ret;
256			if (cs_bblank(sp, &cs))
257				return (1);
258			last = 0;
259			continue;
260		}
261		switch (cs.cs_ch) {
262		case '.':
263		case '?':
264		case '!':
265			if (!last || --cnt != 0) {
266				last = 0;
267				continue;
268			}
269
270ret:			slno = cs.cs_lno;
271			scno = cs.cs_cno;
272
273			/*
274			 * Move to the start of the sentence, skipping blanks
275			 * and special characters.
276			 */
277			do {
278				if (cs_next(sp, &cs))
279					return (1);
280			} while (!cs.cs_flags &&
281			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
282			    cs.cs_ch == '"' || cs.cs_ch == '\''));
283			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
284			    cs_fblank(sp, &cs))
285				return (1);
286
287			/*
288			 * If it was ".  xyz", with the cursor on the 'x', or
289			 * "end.  ", with the cursor in the spaces, or the
290			 * beginning of a sentence preceded by an empty line,
291			 * we can end up where we started.  Fix it.
292			 */
293			if (vp->m_start.lno != cs.cs_lno ||
294			    vp->m_start.cno != cs.cs_cno)
295				goto okret;
296
297			/*
298			 * Well, if an empty line preceded possible blanks
299			 * and the sentence, it could be a real sentence.
300			 */
301			for (;;) {
302				if (cs_prev(sp, &cs))
303					return (1);
304				if (cs.cs_flags == CS_EOL)
305					continue;
306				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
307					continue;
308				break;
309			}
310			if (cs.cs_flags == CS_EMP)
311				goto okret;
312
313			/* But it wasn't; try again. */
314			++cnt;
315			cs.cs_lno = slno;
316			cs.cs_cno = scno;
317			last = 0;
318			break;
319		case '\t':
320			last = 1;
321			break;
322		default:
323			last =
324			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
325			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
326			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
327		}
328	}
329
330okret:	vp->m_stop.lno = cs.cs_lno;
331	vp->m_stop.cno = cs.cs_cno;
332
333	/*
334	 * !!!
335	 * If the starting and stopping cursor positions are at the first
336	 * columns in the line, i.e. the movement is cutting an entire line,
337	 * the buffer is in line mode, and the starting position is the last
338	 * character of the previous line.
339	 *
340	 * All commands move to the end of the range.  Adjust the start of
341	 * the range for motion commands.
342	 */
343	if (ISMOTION(vp))
344		if (vp->m_start.cno == 0 &&
345		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
346			if (db_get(sp,
347			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
348				return (1);
349			vp->m_start.cno = len ? len - 1 : 0;
350			F_SET(vp, VM_LMODE);
351		} else
352			--vp->m_start.cno;
353	vp->m_final = vp->m_stop;
354	return (0);
355}
356