1/*	$OpenBSD: v_sentence.c,v 1.8 2022/12/26 19:16:04 jmc Exp $	*/
2
3/*-
4 * Copyright (c) 1992, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 * Copyright (c) 1992, 1993, 1994, 1995, 1996
7 *	Keith Bostic.  All rights reserved.
8 *
9 * See the LICENSE file for redistribution information.
10 */
11
12#include "config.h"
13
14#include <sys/types.h>
15#include <sys/queue.h>
16#include <sys/time.h>
17
18#include <bitstring.h>
19#include <ctype.h>
20#include <limits.h>
21#include <stdio.h>
22
23#include "../common/common.h"
24#include "vi.h"
25
26/*
27 * !!!
28 * In historic vi, a sentence was delimited by a '.', '?' or '!' character
29 * followed by TWO spaces or a newline.  One or more empty lines was also
30 * treated as a separate sentence.  The Berkeley documentation for historical
31 * vi states that any number of ')', ']', '"' and '\'' characters can be
32 * between the delimiter character and the spaces or end of line, however,
33 * the historical implementation did not handle additional '"' characters.
34 * We follow the documentation here, not the implementation.
35 *
36 * Once again, historical vi didn't do sentence movements associated with
37 * counts consistently, mostly in the presence of lines containing only
38 * white-space characters.
39 *
40 * This implementation also permits a single tab to delimit sentences, and
41 * treats lines containing only white-space characters as empty lines.
42 * Finally, tabs are eaten (along with spaces) when skipping to the start
43 * of the text following a "sentence".
44 */
45
46/*
47 * v_sentencef -- [count])
48 *	Move forward count sentences.
49 *
50 * PUBLIC: int v_sentencef(SCR *, VICMD *);
51 */
52int
53v_sentencef(SCR *sp, VICMD *vp)
54{
55	enum { BLANK, NONE, PERIOD } state;
56	VCS cs;
57	size_t len;
58	u_long cnt;
59
60	cs.cs_lno = vp->m_start.lno;
61	cs.cs_cno = vp->m_start.cno;
62	if (cs_init(sp, &cs))
63		return (1);
64
65	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
66
67	/*
68	 * !!!
69	 * If in white-space, the next start of sentence counts as one.
70	 * This may not handle "  .  " correctly, but it's real unclear
71	 * what correctly means in that case.
72	 */
73	if (cs.cs_flags == CS_EMP || (cs.cs_flags == 0 && isblank(cs.cs_ch))) {
74		if (cs_fblank(sp, &cs))
75			return (1);
76		if (--cnt == 0) {
77			if (vp->m_start.lno != cs.cs_lno ||
78			    vp->m_start.cno != cs.cs_cno)
79				goto okret;
80			return (1);
81		}
82	}
83
84	for (state = NONE;;) {
85		if (cs_next(sp, &cs))
86			return (1);
87		if (cs.cs_flags == CS_EOF)
88			break;
89		if (cs.cs_flags == CS_EOL) {
90			if ((state == PERIOD || state == BLANK) && --cnt == 0) {
91				if (cs_next(sp, &cs))
92					return (1);
93				if (cs.cs_flags == 0 &&
94				    isblank(cs.cs_ch) && cs_fblank(sp, &cs))
95					return (1);
96				goto okret;
97			}
98			state = NONE;
99			continue;
100		}
101		if (cs.cs_flags == CS_EMP) {	/* An EMP is two sentences. */
102			if (--cnt == 0)
103				goto okret;
104			if (cs_fblank(sp, &cs))
105				return (1);
106			if (--cnt == 0)
107				goto okret;
108			state = NONE;
109			continue;
110		}
111		switch (cs.cs_ch) {
112		case '.':
113		case '?':
114		case '!':
115			state = PERIOD;
116			break;
117		case ')':
118		case ']':
119		case '"':
120		case '\'':
121			if (state != PERIOD)
122				state = NONE;
123			break;
124		case '\t':
125			if (state == PERIOD)
126				state = BLANK;
127			/* FALLTHROUGH */
128		case ' ':
129			if (state == PERIOD) {
130				state = BLANK;
131				break;
132			}
133			if (state == BLANK && --cnt == 0) {
134				if (cs_fblank(sp, &cs))
135					return (1);
136				goto okret;
137			}
138			/* FALLTHROUGH */
139		default:
140			state = NONE;
141			break;
142		}
143	}
144
145	/* EOF is a movement sink, but it's an error not to have moved. */
146	if (vp->m_start.lno == cs.cs_lno && vp->m_start.cno == cs.cs_cno) {
147		v_eof(sp, NULL);
148		return (1);
149	}
150
151okret:	vp->m_stop.lno = cs.cs_lno;
152	vp->m_stop.cno = cs.cs_cno;
153
154	/*
155	 * !!!
156	 * Historic, uh, features, yeah, that's right, call 'em features.
157	 * If the starting and ending cursor positions are at the first
158	 * column in their lines, i.e. the movement is cutting entire lines,
159	 * the buffer is in line mode, and the ending position is the last
160	 * character of the previous line.  Note check to make sure that
161	 * it's not within a single line.
162	 *
163	 * Non-motion commands move to the end of the range.  Delete and
164	 * yank stay at the start.  Ignore others.  Adjust the end of the
165	 * range for motion commands.
166	 */
167	if (ISMOTION(vp)) {
168		if (vp->m_start.cno == 0 &&
169		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
170			if (vp->m_start.lno < vp->m_stop.lno) {
171				if (db_get(sp,
172				    --vp->m_stop.lno, DBG_FATAL, NULL, &len))
173					return (1);
174				vp->m_stop.cno = len ? len - 1 : 0;
175			}
176			F_SET(vp, VM_LMODE);
177		} else
178			--vp->m_stop.cno;
179		vp->m_final = vp->m_start;
180	} else
181		vp->m_final = vp->m_stop;
182	return (0);
183}
184
185/*
186 * v_sentenceb -- [count](
187 *	Move backward count sentences.
188 *
189 * PUBLIC: int v_sentenceb(SCR *, VICMD *);
190 */
191int
192v_sentenceb(SCR *sp, VICMD *vp)
193{
194	VCS cs;
195	recno_t slno;
196	size_t len, scno;
197	u_long cnt;
198	int last;
199
200	/*
201	 * !!!
202	 * Historic vi permitted the user to hit SOF repeatedly.
203	 */
204	if (vp->m_start.lno == 1 && vp->m_start.cno == 0)
205		return (0);
206
207	cs.cs_lno = vp->m_start.lno;
208	cs.cs_cno = vp->m_start.cno;
209	if (cs_init(sp, &cs))
210		return (1);
211
212	cnt = F_ISSET(vp, VC_C1SET) ? vp->count : 1;
213
214	/*
215	 * !!!
216	 * In empty lines, skip to the previous non-white-space character.
217	 * If in text, skip to the previous white-space character.  Believe
218	 * it or not, in the paragraph:
219	 *	ab cd.
220	 *	AB CD.
221	 * if the cursor is on the 'A' or 'B', ( moves to the 'a'.  If it
222	 * is on the ' ', 'C' or 'D', it moves to the 'A'.  Yes, Virginia,
223	 * Berkeley was once a major center of drug activity.
224	 */
225	if (cs.cs_flags == CS_EMP) {
226		if (cs_bblank(sp, &cs))
227			return (1);
228		for (;;) {
229			if (cs_prev(sp, &cs))
230				return (1);
231			if (cs.cs_flags != CS_EOL)
232				break;
233		}
234	} else if (cs.cs_flags == 0 && !isblank(cs.cs_ch))
235		for (;;) {
236			if (cs_prev(sp, &cs))
237				return (1);
238			if (cs.cs_flags != 0 || isblank(cs.cs_ch))
239				break;
240		}
241
242	for (last = 0;;) {
243		if (cs_prev(sp, &cs))
244			return (1);
245		if (cs.cs_flags == CS_SOF)	/* SOF is a movement sink. */
246			break;
247		if (cs.cs_flags == CS_EOL) {
248			last = 1;
249			continue;
250		}
251		if (cs.cs_flags == CS_EMP) {
252			if (--cnt == 0)
253				goto ret;
254			if (cs_bblank(sp, &cs))
255				return (1);
256			last = 0;
257			continue;
258		}
259		switch (cs.cs_ch) {
260		case '.':
261		case '?':
262		case '!':
263			if (!last || --cnt != 0) {
264				last = 0;
265				continue;
266			}
267
268ret:			slno = cs.cs_lno;
269			scno = cs.cs_cno;
270
271			/*
272			 * Move to the start of the sentence, skipping blanks
273			 * and special characters.
274			 */
275			do {
276				if (cs_next(sp, &cs))
277					return (1);
278			} while (!cs.cs_flags &&
279			    (cs.cs_ch == ')' || cs.cs_ch == ']' ||
280			    cs.cs_ch == '"' || cs.cs_ch == '\''));
281			if ((cs.cs_flags || isblank(cs.cs_ch)) &&
282			    cs_fblank(sp, &cs))
283				return (1);
284
285			/*
286			 * If it was ".  xyz", with the cursor on the 'x', or
287			 * "end.  ", with the cursor in the spaces, or the
288			 * beginning of a sentence preceded by an empty line,
289			 * we can end up where we started.  Fix it.
290			 */
291			if (vp->m_start.lno != cs.cs_lno ||
292			    vp->m_start.cno != cs.cs_cno)
293				goto okret;
294
295			/*
296			 * Well, if an empty line preceded possible blanks
297			 * and the sentence, it could be a real sentence.
298			 */
299			for (;;) {
300				if (cs_prev(sp, &cs))
301					return (1);
302				if (cs.cs_flags == CS_EOL)
303					continue;
304				if (cs.cs_flags == 0 && isblank(cs.cs_ch))
305					continue;
306				break;
307			}
308			if (cs.cs_flags == CS_EMP)
309				goto okret;
310
311			/* But it wasn't; try again. */
312			++cnt;
313			cs.cs_lno = slno;
314			cs.cs_cno = scno;
315			last = 0;
316			break;
317		case '\t':
318			last = 1;
319			break;
320		default:
321			last =
322			    cs.cs_flags == CS_EOL || isblank(cs.cs_ch) ||
323			    cs.cs_ch == ')' || cs.cs_ch == ']' ||
324			    cs.cs_ch == '"' || cs.cs_ch == '\'' ? 1 : 0;
325		}
326	}
327
328okret:	vp->m_stop.lno = cs.cs_lno;
329	vp->m_stop.cno = cs.cs_cno;
330
331	/*
332	 * !!!
333	 * If the starting and stopping cursor positions are at the first
334	 * columns in the line, i.e. the movement is cutting an entire line,
335	 * the buffer is in line mode, and the starting position is the last
336	 * character of the previous line.
337	 *
338	 * All commands move to the end of the range.  Adjust the start of
339	 * the range for motion commands.
340	 */
341	if (ISMOTION(vp)) {
342		if (vp->m_start.cno == 0 &&
343		    (cs.cs_flags != 0 || vp->m_stop.cno == 0)) {
344			if (db_get(sp,
345			    --vp->m_start.lno, DBG_FATAL, NULL, &len))
346				return (1);
347			vp->m_start.cno = len ? len - 1 : 0;
348			F_SET(vp, VM_LMODE);
349		} else
350			--vp->m_start.cno;
351	}
352	vp->m_final = vp->m_stop;
353	return (0);
354}
355