1/*	$NetBSD: v_sentence.c,v 1.2 2008/12/05 22:51:43 christos Exp $ */
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 *	Keith Bostic.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#ifndef lint
15static const char sccsid[] = "Id: v_sentence.c,v 10.9 2001/06/25 15:19:35 skimo Exp (Berkeley) Date: 2001/06/25 15:19:35";
16#endif /* not lint */
17
18#include <sys/types.h>
19#include <sys/queue.h>
20#include <sys/time.h>
21
22#include <bitstring.h>
23#include <ctype.h>
24#include <limits.h>
25#include <stdio.h>
26
27#include "../common/common.h"
28#include "vi.h"
29
30/*
31 * !!!
32 * In historic vi, a sentence was delimited by a '.', '?' or '!' character
33 * followed by TWO spaces or a newline.  One or more empty lines was also
34 * treated as a separate sentence.  The Berkeley documentation for historical
35 * vi states that any number of ')', ']', '"' and '\'' characters can be
36 * between the delimiter character and the spaces or end of line, however,
37 * the historical implementation did not handle additional '"' characters.
38 * We follow the documentation here, not the implementation.
39 *
40 * Once again, historical vi didn't do sentence movements associated with
41 * counts consistently, mostly in the presence of lines containing only
42 * white-space characters.
43 *
44 * This implementation also permits a single tab to delimit sentences, and
45 * treats lines containing only white-space characters as empty lines.
46 * Finally, tabs are eaten (along with spaces) when skipping to the start
47 * of the text following a "sentence".
48 */
49
50/*
51 * v_sentencef -- [count])
52 *	Move forward count sentences.
53 *
54 * PUBLIC: int v_sentencef __P((SCR *, VICMD *));
55 */
56int
57v_sentencef(SCR *sp, VICMD *vp)
58{
59	enum { BLANK, NONE, PERIOD } state;
60	VCS cs;
61	size_t len;
62	u_long cnt;
63
64	cs.cs_lno = vp->m_start.lno;
65	cs.cs_cno = vp->m_start.cno;
66	if (cs_init(sp, &cs))
67		return (1);
68
69	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
70
71	/*
72	 * !!!
73	 * If in white-space, the next start of sentence counts as one.
74	 * This may not handle "  .  " correctly, but it's real unclear
75	 * what correctly means in that case.
76	 */
77	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))) {
78		if (cs_fblank(sp, &cs))
79			return (1);
80		if (--cnt == 0) {
81			if (vp->m_start.lno != cs.cs_lno ||
82			    vp->m_start.cno != cs.cs_cno)
83				goto okret;
84			return (1);
85		}
86	}
87
88	for (state = NONE;;) {
89		if (cs_next(sp, &cs))
90			return (1);
91		if (cs.cs_flags == CS_EOF)
92			break;
93		if (cs.cs_flags == CS_EOL) {
94			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
95				if (cs_next(sp, &cs))
96					return (1);
97				if (cs.cs_flags == 0 &&
98				    ISBLANK2(cs.cs_ch) && cs_fblank(sp, &cs))
99					return (1);
100				goto okret;
101			}
102			state = NONE;
103			continue;
104		}
105		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
106			if (--cnt == 0)
107				goto okret;
108			if (cs_fblank(sp, &cs))
109				return (1);
110			if (--cnt == 0)
111				goto okret;
112			state = NONE;
113			continue;
114		}
115		switch (cs.cs_ch) {
116		case '.':
117		case '?':
118		case '!':
119			state = PERIOD;
120			break;
121		case ')':
122		case ']':
123		case '"':
124		case '\'':
125			if (state != PERIOD)
126				state = NONE;
127			break;
128		case '\t':
129			if (state == PERIOD)
130				state = BLANK;
131			/* FALLTHROUGH */
132		case ' ':
133			if (state == PERIOD) {
134				state = BLANK;
135				break;
136			}
137			if (state == BLANK && --cnt == 0) {
138				if (cs_fblank(sp, &cs))
139					return (1);
140				goto okret;
141			}
142			/* FALLTHROUGH */
143		default:
144			state = NONE;
145			break;
146		}
147	}
148
149	/* EOF is a movement sink, but it's an error not to have moved. */
150	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
151		v_eof(sp, NULL);
152		return (1);
153	}
154
155okret:	vp->m_stop.lno = cs.cs_lno;
156	vp->m_stop.cno = cs.cs_cno;
157
158	/*
159	 * !!!
160	 * Historic, uh, features, yeah, that's right, call 'em features.
161	 * If the starting and ending cursor positions are at the first
162	 * column in their lines, i.e. the movement is cutting entire lines,
163	 * the buffer is in line mode, and the ending position is the last
164	 * character of the previous line.  Note check to make sure that
165	 * it's not within a single line.
166	 *
167	 * Non-motion commands move to the end of the range.  Delete and
168	 * yank stay at the start.  Ignore others.  Adjust the end of the
169	 * range for motion commands.
170	 */
171	if (ISMOTION(vp)) {
172		if (vp->m_start.cno == 0 &&
173		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
174			if (vp->m_start.lno < vp->m_stop.lno) {
175				if (db_get(sp,
176				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
177					return (1);
178				vp->m_stop.cno = len ? len - 1 : 0;
179			}
180			F_SET(vp, VM_LMODE);
181		} else
182			--vp->m_stop.cno;
183		vp->m_final = vp->m_start;
184	} else
185		vp->m_final = vp->m_stop;
186	return (0);
187}
188
189/*
190 * v_sentenceb -- [count](
191 *	Move backward count sentences.
192 *
193 * PUBLIC: int v_sentenceb __P((SCR *, VICMD *));
194 */
195int
196v_sentenceb(SCR *sp, VICMD *vp)
197{
198	VCS cs;
199	db_recno_t slno;
200	size_t len, scno;
201	u_long cnt;
202	int last;
203
204	/*
205	 * !!!
206	 * Historic vi permitted the user to hit SOF repeatedly.
207	 */
208	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
209		return (0);
210
211	cs.cs_lno = vp->m_start.lno;
212	cs.cs_cno = vp->m_start.cno;
213	if (cs_init(sp, &cs))
214		return (1);
215
216	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
217
218	/*
219	 * !!!
220	 * In empty lines, skip to the previous non-white-space character.
221	 * If in text, skip to the prevous white-space character.  Believe
222	 * it or not, in the paragraph:
223	 *	ab cd.
224	 *	AB CD.
225	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
226	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
227	 * Berkeley was once a major center of drug activity.
228	 */
229	if (cs.cs_flags == CS_EMP) {
230		if (cs_bblank(sp, &cs))
231			return (1);
232		for (;;) {
233			if (cs_prev(sp, &cs))
234				return (1);
235			if (cs.cs_flags != CS_EOL)
236				break;
237		}
238	} else if (cs.cs_flags == 0 && !ISBLANK2(cs.cs_ch))
239		for (;;) {
240			if (cs_prev(sp, &cs))
241				return (1);
242			if (cs.cs_flags != 0 || ISBLANK2(cs.cs_ch))
243				break;
244		}
245
246	for (last = 0;;) {
247		if (cs_prev(sp, &cs))
248			return (1);
249		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
250			break;
251		if (cs.cs_flags == CS_EOL) {
252			last = 1;
253			continue;
254		}
255		if (cs.cs_flags == CS_EMP) {
256			if (--cnt == 0)
257				goto ret;
258			if (cs_bblank(sp, &cs))
259				return (1);
260			last = 0;
261			continue;
262		}
263		switch (cs.cs_ch) {
264		case '.':
265		case '?':
266		case '!':
267			if (!last || --cnt != 0) {
268				last = 0;
269				continue;
270			}
271
272ret:			slno = cs.cs_lno;
273			scno = cs.cs_cno;
274
275			/*
276			 * Move to the start of the sentence, skipping blanks
277			 * and special characters.
278			 */
279			do {
280				if (cs_next(sp, &cs))
281					return (1);
282			} while (!cs.cs_flags &&
283			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
284			    cs.cs_ch == '"' || cs.cs_ch == '\''));
285			if ((cs.cs_flags || ISBLANK2(cs.cs_ch)) &&
286			    cs_fblank(sp, &cs))
287				return (1);
288
289			/*
290			 * If it was ".  xyz", with the cursor on the 'x', or
291			 * "end.  ", with the cursor in the spaces, or the
292			 * beginning of a sentence preceded by an empty line,
293			 * we can end up where we started.  Fix it.
294			 */
295			if (vp->m_start.lno != cs.cs_lno ||
296			    vp->m_start.cno != cs.cs_cno)
297				goto okret;
298
299			/*
300			 * Well, if an empty line preceded possible blanks
301			 * and the sentence, it could be a real sentence.
302			 */
303			for (;;) {
304				if (cs_prev(sp, &cs))
305					return (1);
306				if (cs.cs_flags == CS_EOL)
307					continue;
308				if (cs.cs_flags == 0 && ISBLANK2(cs.cs_ch))
309					continue;
310				break;
311			}
312			if (cs.cs_flags == CS_EMP)
313				goto okret;
314
315			/* But it wasn't; try again. */
316			++cnt;
317			cs.cs_lno = slno;
318			cs.cs_cno = scno;
319			last = 0;
320			break;
321		case '\t':
322			last = 1;
323			break;
324		default:
325			last =
326			    cs.cs_flags == CS_EOL || ISBLANK2(cs.cs_ch) ||
327			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
328			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
329		}
330	}
331
332okret:	vp->m_stop.lno = cs.cs_lno;
333	vp->m_stop.cno = cs.cs_cno;
334
335	/*
336	 * !!!
337	 * If the starting and stopping cursor positions are at the first
338	 * columns in the line, i.e. the movement is cutting an entire line,
339	 * the buffer is in line mode, and the starting position is the last
340	 * character of the previous line.
341	 *
342	 * All commands move to the end of the range.  Adjust the start of
343	 * the range for motion commands.
344	 */
345	if (ISMOTION(vp)) {
346		if (vp->m_start.cno == 0 &&
347		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
348			if (db_get(sp,
349			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
350				return (1);
351			vp->m_start.cno = len ? len - 1 : 0;
352			F_SET(vp, VM_LMODE);
353		} else
354			--vp->m_start.cno;
355	}
356	vp->m_final = vp->m_stop;
357	return (0);
358}
359