1/*	$NetBSD: line.c,v 1.4 2003/08/07 09:27:59 agc Exp $	*/
2
3/*
4 * Copyright (c) 1988 Mark Nudelman
5 * Copyright (c) 1988, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34#ifndef lint
35#if 0
36static char sccsid[] = "@(#)line.c	8.1 (Berkeley) 6/6/93";
37#else
38__RCSID("$NetBSD: line.c,v 1.4 2003/08/07 09:27:59 agc Exp $");
39#endif
40#endif /* not lint */
41
42/*
43 * Routines to manipulate the "line buffer".
44 * The line buffer holds a line of output as it is being built
45 * in preparation for output to the screen.
46 * We keep track of the PRINTABLE length of the line as it is being built.
47 */
48
49#include <sys/types.h>
50#include <ctype.h>
51
52#include "less.h"
53#include "extern.h"
54
55static char linebuf[1024];	/* Buffer which holds the current output line */
56static char *curr;		/* Pointer into linebuf */
57static int column;		/* Printable length, accounting for
58				   backspaces, etc. */
59/*
60 * A ridiculously complex state machine takes care of backspaces.  The
61 * complexity arises from the attempt to deal with all cases, especially
62 * involving long lines with underlining, boldfacing or whatever.  There
63 * are still some cases which will break it.
64 *
65 * There are four states:
66 *	LN_NORMAL is the normal state (not in underline mode).
67 *	LN_UNDERLINE means we are in underline mode.  We expect to get
68 *		either a sequence like "_\bX" or "X\b_" to continue
69 *		underline mode, or anything else to end underline mode.
70 *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
71 *		like "X\bX\b...X\bX" to continue boldface mode, or anything
72 *		else to end boldface mode.
73 *	LN_UL_X means we are one character after LN_UNDERLINE
74 *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
75 *	LN_UL_XB means we are one character after LN_UL_X
76 *		(we have gotten the backspace in "_\bX" or "X\b_";
77 *		we expect one more ordinary character,
78 *		which will put us back in state LN_UNDERLINE).
79 *	LN_BO_X means we are one character after LN_BOLDFACE
80 *		(we have gotten the 'X' in "X\bX").
81 *	LN_BO_XB means we are one character after LN_BO_X
82 *		(we have gotten the backspace in "X\bX";
83 *		we expect one more 'X' which will put us back
84 *		in LN_BOLDFACE).
85 */
86static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
87#define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
88#define	LN_UNDERLINE	1	/* In underline, need next char */
89#define	LN_UL_X		2	/* In underline, got char, need \b */
90#define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
91#define	LN_BOLDFACE	4	/* In boldface, need next char */
92#define	LN_BO_X		5	/* In boldface, got char, need \b */
93#define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
94
95char *line;			/* Pointer to the current line.
96				   Usually points to linebuf. */
97/*
98 * Rewind the line buffer.
99 */
100void
101prewind()
102{
103	line = curr = linebuf;
104	ln_state = LN_NORMAL;
105	column = 0;
106}
107
108/*
109 * Append a character to the line buffer.
110 * Expand tabs into spaces, handle underlining, boldfacing, etc.
111 * Returns 0 if ok, 1 if couldn't fit in buffer.
112 */
113#define	NEW_COLUMN(addon) \
114	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
115		return(1); \
116	else \
117		column += addon
118
119int
120pappend(c)
121	int c;
122{
123	if (c == '\0') {
124		/*
125		 * Terminate any special modes, if necessary.
126		 * Append a '\0' to the end of the line.
127		 */
128		switch (ln_state) {
129		case LN_UL_X:
130			curr[0] = curr[-1];
131			curr[-1] = UE_CHAR;
132			curr++;
133			break;
134		case LN_BO_X:
135			curr[0] = curr[-1];
136			curr[-1] = BE_CHAR;
137			curr++;
138			break;
139		case LN_UL_XB:
140		case LN_UNDERLINE:
141			*curr++ = UE_CHAR;
142			break;
143		case LN_BO_XB:
144		case LN_BOLDFACE:
145			*curr++ = BE_CHAR;
146			break;
147		}
148		ln_state = LN_NORMAL;
149		*curr = '\0';
150		return(0);
151	}
152
153	if (curr > linebuf + sizeof(linebuf) - 12)
154		/*
155		 * Almost out of room in the line buffer.
156		 * Don't take any chances.
157		 * {{ Linebuf is supposed to be big enough that this
158		 *    will never happen, but may need to be made
159		 *    bigger for wide screens or lots of backspaces. }}
160		 */
161		return(1);
162
163	if (!bs_mode) {
164		/*
165		 * Advance the state machine.
166		 */
167		switch (ln_state) {
168		case LN_NORMAL:
169			if (curr <= linebuf + 1
170			    || curr[-1] != (char)('H' | 0200))
171				break;
172			column -= 2;
173			if (c == curr[-2])
174				goto enter_boldface;
175			if (c == '_' || curr[-2] == '_')
176				goto enter_underline;
177			curr -= 2;
178			break;
179
180enter_boldface:
181			/*
182			 * We have "X\bX" (including the current char).
183			 * Switch into boldface mode.
184			 */
185			column--;
186			if (column + bo_width + be_width + 1 >= sc_width)
187				/*
188				 * Not enough room left on the screen to
189				 * enter and exit boldface mode.
190				 */
191				return (1);
192
193			if (bo_width > 0 && curr > linebuf + 2
194			    && curr[-3] == ' ') {
195				/*
196				 * Special case for magic cookie terminals:
197				 * if the previous char was a space, replace
198				 * it with the "enter boldface" sequence.
199				 */
200				curr[-3] = BO_CHAR;
201				column += bo_width-1;
202			} else {
203				curr[-1] = curr[-2];
204				curr[-2] = BO_CHAR;
205				column += bo_width;
206				curr++;
207			}
208			goto ln_bo_xb_case;
209
210enter_underline:
211			/*
212			 * We have either "_\bX" or "X\b_" (including
213			 * the current char).  Switch into underline mode.
214			 */
215			column--;
216			if (column + ul_width + ue_width + 1 >= sc_width)
217				/*
218				 * Not enough room left on the screen to
219				 * enter and exit underline mode.
220				 */
221				return (1);
222
223			if (ul_width > 0 &&
224			    curr > linebuf + 2 && curr[-3] == ' ')
225			{
226				/*
227				 * Special case for magic cookie terminals:
228				 * if the previous char was a space, replace
229				 * it with the "enter underline" sequence.
230				 */
231				curr[-3] = UL_CHAR;
232				column += ul_width-1;
233			} else
234			{
235				curr[-1] = curr[-2];
236				curr[-2] = UL_CHAR;
237				column += ul_width;
238				curr++;
239			}
240			goto ln_ul_xb_case;
241			/*NOTREACHED*/
242		case LN_UL_XB:
243			/*
244			 * Termination of a sequence "_\bX" or "X\b_".
245			 */
246			if (c != '_' && curr[-2] != '_' && c == curr[-2])
247			{
248				/*
249				 * We seem to have run on from underlining
250				 * into boldfacing - this is a nasty fix, but
251				 * until this whole routine is rewritten as a
252				 * real DFA, ...  well ...
253				 */
254				curr[0] = curr[-2];
255				curr[-2] = UE_CHAR;
256				curr[-1] = BO_CHAR;
257				curr += 2; /* char & non-existent backspace */
258				ln_state = LN_BO_XB;
259				goto ln_bo_xb_case;
260			}
261ln_ul_xb_case:
262			if (c == '_')
263				c = curr[-2];
264			curr -= 2;
265			ln_state = LN_UNDERLINE;
266			break;
267		case LN_BO_XB:
268			/*
269			 * Termination of a sequnce "X\bX".
270			 */
271			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
272			{
273				/*
274				 * We seem to have run on from
275				 * boldfacing into underlining.
276				 */
277				curr[0] = curr[-2];
278				curr[-2] = BE_CHAR;
279				curr[-1] = UL_CHAR;
280				curr += 2; /* char & non-existent backspace */
281				ln_state = LN_UL_XB;
282				goto ln_ul_xb_case;
283			}
284ln_bo_xb_case:
285			curr -= 2;
286			ln_state = LN_BOLDFACE;
287			break;
288		case LN_UNDERLINE:
289			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
290				/*
291				 * We have just barely enough room to
292				 * exit underline mode and handle a possible
293				 * underline/boldface run on mixup.
294				 */
295				return (1);
296			ln_state = LN_UL_X;
297			break;
298		case LN_BOLDFACE:
299			if (c == '\b')
300			{
301				ln_state = LN_BO_XB;
302				break;
303			}
304			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
305				/*
306				 * We have just barely enough room to
307				 * exit underline mode and handle a possible
308				 * underline/boldface run on mixup.
309				 */
310				return (1);
311			ln_state = LN_BO_X;
312			break;
313		case LN_UL_X:
314			if (c == '\b')
315				ln_state = LN_UL_XB;
316			else
317			{
318				/*
319				 * Exit underline mode.
320				 * We have to shuffle the chars a bit
321				 * to make this work.
322				 */
323				curr[0] = curr[-1];
324				curr[-1] = UE_CHAR;
325				column += ue_width;
326				if (ue_width > 0 && curr[0] == ' ')
327					/*
328					 * Another special case for magic
329					 * cookie terminals: if the next
330					 * char is a space, replace it
331					 * with the "exit underline" sequence.
332					 */
333					column--;
334				else
335					curr++;
336				ln_state = LN_NORMAL;
337			}
338			break;
339		case LN_BO_X:
340			if (c == '\b')
341				ln_state = LN_BO_XB;
342			else
343			{
344				/*
345				 * Exit boldface mode.
346				 * We have to shuffle the chars a bit
347				 * to make this work.
348				 */
349				curr[0] = curr[-1];
350				curr[-1] = BE_CHAR;
351				column += be_width;
352				if (be_width > 0 && curr[0] == ' ')
353					/*
354					 * Another special case for magic
355					 * cookie terminals: if the next
356					 * char is a space, replace it
357					 * with the "exit boldface" sequence.
358					 */
359					column--;
360				else
361					curr++;
362				ln_state = LN_NORMAL;
363			}
364			break;
365		}
366	}
367
368	if (c == '\t') {
369		/*
370		 * Expand a tab into spaces.
371		 */
372		do {
373			NEW_COLUMN(1);
374		} while ((column % tabstop) != 0);
375		*curr++ = '\t';
376		return (0);
377	}
378
379	if (c == '\b') {
380		if (ln_state == LN_NORMAL)
381			NEW_COLUMN(2);
382		else
383			column--;
384		*curr++ = ('H' | 0200);
385		return(0);
386	}
387
388	if (CONTROL_CHAR(c)) {
389		/*
390		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
391		 * put_line() to prefix the char with a ^.  We don't actually
392		 * put the ^ in the buffer because we sometimes need to move
393		 * chars around, and such movement might separate the ^ from
394		 * its following character.
395		 */
396		NEW_COLUMN(2);
397		*curr++ = (CARAT_CHAR(c) | 0200);
398		return(0);
399	}
400
401	/*
402	 * Ordinary character.  Just put it in the buffer.
403	 */
404	NEW_COLUMN(1);
405	*curr++ = c;
406	return (0);
407}
408
409/*
410 * Analogous to forw_line(), but deals with "raw lines":
411 * lines which are not split for screen width.
412 * {{ This is supposed to be more efficient than forw_line(). }}
413 */
414off_t
415forw_raw_line(curr_pos)
416	off_t curr_pos;
417{
418	char *p;
419	int c;
420	off_t new_pos;
421
422	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
423		(c = ch_forw_get()) == EOI)
424		return (NULL_POSITION);
425
426	p = linebuf;
427
428	for (;;)
429	{
430		if (c == '\n' || c == EOI)
431		{
432			new_pos = ch_tell();
433			break;
434		}
435		if (p >= &linebuf[sizeof(linebuf)-1])
436		{
437			/*
438			 * Overflowed the input buffer.
439			 * Pretend the line ended here.
440			 * {{ The line buffer is supposed to be big
441			 *    enough that this never happens. }}
442			 */
443			new_pos = ch_tell() - 1;
444			break;
445		}
446		*p++ = c;
447		c = ch_forw_get();
448	}
449	*p = '\0';
450	line = linebuf;
451	return (new_pos);
452}
453
454/*
455 * Analogous to back_line(), but deals with "raw lines".
456 * {{ This is supposed to be more efficient than back_line(). }}
457 */
458off_t
459back_raw_line(curr_pos)
460	off_t curr_pos;
461{
462	char *p;
463	int c;
464	off_t new_pos;
465
466	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
467		ch_seek(curr_pos-1))
468		return (NULL_POSITION);
469
470	p = &linebuf[sizeof(linebuf)];
471	*--p = '\0';
472
473	for (;;)
474	{
475		c = ch_back_get();
476		if (c == '\n')
477		{
478			/*
479			 * This is the newline ending the previous line.
480			 * We have hit the beginning of the line.
481			 */
482			new_pos = ch_tell() + 1;
483			break;
484		}
485		if (c == EOI)
486		{
487			/*
488			 * We have hit the beginning of the file.
489			 * This must be the first line in the file.
490			 * This must, of course, be the beginning of the line.
491			 */
492			new_pos = (off_t)0;
493			break;
494		}
495		if (p <= linebuf)
496		{
497			/*
498			 * Overflowed the input buffer.
499			 * Pretend the line ended here.
500			 */
501			new_pos = ch_tell() + 1;
502			break;
503		}
504		*--p = c;
505	}
506	line = p;
507	return (new_pos);
508}
509