line.c revision 161478
1/* $FreeBSD: head/contrib/less/line.c 161478 2006-08-20 15:50:51Z delphij $ */
2/*
3 * Copyright (C) 1984-2005  Mark Nudelman
4 *
5 * You may distribute under the terms of either the GNU General Public
6 * License or the Less License, as specified in the README file.
7 *
8 * For more information about less, or for information on how to
9 * contact the author, see the README file.
10 */
11
12
13/*
14 * Routines to manipulate the "line buffer".
15 * The line buffer holds a line of output as it is being built
16 * in preparation for output to the screen.
17 */
18
19#include "less.h"
20#include "charset.h"
21
22static char *linebuf = NULL;	/* Buffer which holds the current output line */
23static char *attr = NULL;	/* Extension of linebuf to hold attributes */
24public int size_linebuf = 0;	/* Size of line buffer (and attr buffer) */
25
26static int cshift;		/* Current left-shift of output line buffer */
27public int hshift;		/* Desired left-shift of output line buffer */
28public int tabstops[TABSTOP_MAX] = { 0 }; /* Custom tabstops */
29public int ntabstops = 1;	/* Number of tabstops */
30public int tabdefault = 8;	/* Default repeated tabstops */
31
32static int curr;		/* Index into linebuf */
33static int column;		/* Printable length, accounting for
34				   backspaces, etc. */
35static int overstrike;		/* Next char should overstrike previous char */
36static int last_overstrike = AT_NORMAL;
37static int is_null_line;	/* There is no current line */
38static int lmargin;		/* Left margin */
39static int line_matches;	/* Number of search matches in this line */
40static char pendc;
41static POSITION pendpos;
42static char *end_ansi_chars;
43static char *mid_ansi_chars;
44
45static int attr_swidth();
46static int attr_ewidth();
47static int do_append();
48
49extern int sigs;
50extern int bs_mode;
51extern int linenums;
52extern int ctldisp;
53extern int twiddle;
54extern int binattr;
55extern int status_col;
56extern int auto_wrap, ignaw;
57extern int bo_s_width, bo_e_width;
58extern int ul_s_width, ul_e_width;
59extern int bl_s_width, bl_e_width;
60extern int so_s_width, so_e_width;
61extern int sc_width, sc_height;
62extern int utf_mode;
63extern POSITION start_attnpos;
64extern POSITION end_attnpos;
65
66static char mbc_buf[MAX_UTF_CHAR_LEN];
67static int mbc_buf_len = 0;
68static int mbc_buf_index = 0;
69static POSITION mbc_pos;
70
71/*
72 * Initialize from environment variables.
73 */
74	public void
75init_line()
76{
77	end_ansi_chars = lgetenv("LESSANSIENDCHARS");
78	if (end_ansi_chars == NULL || *end_ansi_chars == '\0')
79		end_ansi_chars = "m";
80
81	mid_ansi_chars = lgetenv("LESSANSIMIDCHARS");
82	if (mid_ansi_chars == NULL || *mid_ansi_chars == '\0')
83		mid_ansi_chars = "0123456789;[?!\"'#%()*+ ";
84
85	linebuf = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
86	attr = (char *) ecalloc(LINEBUF_SIZE, sizeof(char));
87	size_linebuf = LINEBUF_SIZE;
88}
89
90/*
91 * Expand the line buffer.
92 */
93	static int
94expand_linebuf()
95{
96	/* Double the size of the line buffer. */
97	int new_size = size_linebuf * 2;
98
99	/* Just realloc to expand the buffer, if we can. */
100#if HAVE_REALLOC
101	char *new_buf = (char *) realloc(linebuf, new_size);
102	char *new_attr = (char *) realloc(attr, new_size);
103#else
104	char *new_buf = (char *) calloc(new_size, sizeof(char));
105	char *new_attr = (char *) calloc(new_size, sizeof(char));
106#endif
107	if (new_buf == NULL || new_attr == NULL)
108	{
109		if (new_attr != NULL)
110			free(new_attr);
111		if (new_buf != NULL)
112			free(new_buf);
113		return 1;
114	}
115#if HAVE_REALLOC
116	/*
117	 * We realloc'd the buffers; they already have the old contents.
118	 */
119	#if 0
120	memset(new_buf + size_linebuf, 0, new_size - size_linebuf);
121	memset(new_attr + size_linebuf, 0, new_size - size_linebuf);
122	#endif
123#else
124	/*
125	 * We just calloc'd the buffers; copy the old contents.
126	 */
127	memcpy(new_buf, linebuf, size_linebuf * sizeof(char));
128	memcpy(new_attr, attr, size_linebuf * sizeof(char));
129	free(attr);
130	free(linebuf);
131#endif
132	linebuf = new_buf;
133	attr = new_attr;
134	size_linebuf = new_size;
135	return 0;
136}
137
138/*
139 * Is a character ASCII?
140 */
141	public int
142is_ascii_char(ch)
143	LWCHAR ch;
144{
145	return (ch <= 0x7F);
146}
147
148/*
149 * Rewind the line buffer.
150 */
151	public void
152prewind()
153{
154	curr = 0;
155	column = 0;
156	cshift = 0;
157	overstrike = 0;
158	last_overstrike = AT_NORMAL;
159	mbc_buf_len = 0;
160	is_null_line = 0;
161	pendc = '\0';
162	lmargin = 0;
163	if (status_col)
164		lmargin += 1;
165#if HILITE_SEARCH
166	line_matches = 0;
167#endif
168}
169
170/*
171 * Insert the line number (of the given position) into the line buffer.
172 */
173	public void
174plinenum(pos)
175	POSITION pos;
176{
177	register LINENUM linenum = 0;
178	register int i;
179
180	if (linenums == OPT_ONPLUS)
181	{
182		/*
183		 * Get the line number and put it in the current line.
184		 * {{ Note: since find_linenum calls forw_raw_line,
185		 *    it may seek in the input file, requiring the caller
186		 *    of plinenum to re-seek if necessary. }}
187		 * {{ Since forw_raw_line modifies linebuf, we must
188		 *    do this first, before storing anything in linebuf. }}
189		 */
190		linenum = find_linenum(pos);
191	}
192
193	/*
194	 * Display a status column if the -J option is set.
195	 */
196	if (status_col)
197	{
198		linebuf[curr] = ' ';
199		if (start_attnpos != NULL_POSITION &&
200		    pos >= start_attnpos && pos < end_attnpos)
201			attr[curr] = AT_NORMAL|AT_HILITE;
202		else
203			attr[curr] = AT_NORMAL;
204		curr++;
205		column++;
206	}
207	/*
208	 * Display the line number at the start of each line
209	 * if the -N option is set.
210	 */
211	if (linenums == OPT_ONPLUS)
212	{
213		char buf[INT_STRLEN_BOUND(pos) + 2];
214		int n;
215
216		linenumtoa(linenum, buf);
217		n = strlen(buf);
218		if (n < MIN_LINENUM_WIDTH)
219			n = MIN_LINENUM_WIDTH;
220		sprintf(linebuf+curr, "%*s ", n, buf);
221		n++;  /* One space after the line number. */
222		for (i = 0; i < n; i++)
223			attr[curr+i] = AT_NORMAL;
224		curr += n;
225		column += n;
226		lmargin += n;
227	}
228
229	/*
230	 * Append enough spaces to bring us to the lmargin.
231	 */
232	while (column < lmargin)
233	{
234		linebuf[curr] = ' ';
235		attr[curr++] = AT_NORMAL;
236		column++;
237	}
238}
239
240/*
241 * Shift the input line left.
242 * This means discarding N printable chars at the start of the buffer.
243 */
244	static void
245pshift(shift)
246	int shift;
247{
248	LWCHAR prev_ch = 0;
249	unsigned char c;
250	int shifted = 0;
251	int to;
252	int from;
253	int len;
254	int width;
255	int prev_attr;
256	int next_attr;
257
258	if (shift > column - lmargin)
259		shift = column - lmargin;
260	if (shift > curr - lmargin)
261		shift = curr - lmargin;
262
263	to = from = lmargin;
264	/*
265	 * We keep on going when shifted == shift
266	 * to get all combining chars.
267	 */
268	while (shifted <= shift && from < curr)
269	{
270		c = linebuf[from];
271		if (c == ESC && ctldisp == OPT_ONPLUS)
272		{
273			/* Keep cumulative effect.  */
274			linebuf[to] = c;
275			attr[to++] = attr[from++];
276			while (from < curr && linebuf[from])
277			{
278				linebuf[to] = linebuf[from];
279				attr[to++] = attr[from];
280				if (!is_ansi_middle(linebuf[from++]))
281					break;
282			}
283			continue;
284		}
285
286		width = 0;
287
288		if (!IS_ASCII_OCTET(c) && utf_mode)
289		{
290			/* Assumes well-formedness validation already done.  */
291			LWCHAR ch;
292
293			len = utf_len(c);
294			if (from + len > curr)
295				break;
296			ch = get_wchar(linebuf + from);
297			if (!is_composing_char(ch) && !is_combining_char(prev_ch, ch))
298				width = is_wide_char(ch) ? 2 : 1;
299			prev_ch = ch;
300		} else
301		{
302			len = 1;
303			if (c == '\b')
304				/* XXX - Incorrect if several '\b' in a row.  */
305				width = (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
306			else if (!control_char(c))
307				width = 1;
308			prev_ch = 0;
309		}
310
311		if (width == 2 && shift - shifted == 1) {
312			/* Should never happen when called by pshift_all().  */
313			attr[to] = attr[from];
314			/*
315			 * Assume a wide_char will never be the first half of a
316			 * combining_char pair, so reset prev_ch in case we're
317			 * followed by a '\b'.
318			 */
319			prev_ch = linebuf[to++] = ' ';
320			from += len;
321			shifted++;
322			continue;
323		}
324
325		/* Adjust width for magic cookies. */
326		prev_attr = (to > 0) ? attr[to-1] : AT_NORMAL;
327		next_attr = (from + len < curr) ? attr[from + len] : prev_attr;
328		if (!is_at_equiv(attr[from], prev_attr) &&
329			!is_at_equiv(attr[from], next_attr))
330		{
331			width += attr_swidth(attr[from]);
332			if (from + len < curr)
333				width += attr_ewidth(attr[from]);
334			if (is_at_equiv(prev_attr, next_attr))
335			{
336				width += attr_ewidth(prev_attr);
337				if (from + len < curr)
338					width += attr_swidth(next_attr);
339			}
340		}
341
342		if (shift - shifted < width)
343			break;
344		from += len;
345		shifted += width;
346		if (shifted < 0)
347			shifted = 0;
348	}
349	while (from < curr)
350	{
351		linebuf[to] = linebuf[from];
352		attr[to++] = attr[from++];
353	}
354	curr = to;
355	column -= shifted;
356	cshift += shifted;
357}
358
359/*
360 *
361 */
362	public void
363pshift_all()
364{
365	pshift(column);
366}
367
368/*
369 * Return the printing width of the start (enter) sequence
370 * for a given character attribute.
371 */
372	static int
373attr_swidth(a)
374	int a;
375{
376	int w = 0;
377
378	a = apply_at_specials(a);
379
380	if (a & AT_UNDERLINE)
381		w += ul_s_width;
382	if (a & AT_BOLD)
383		w += bo_s_width;
384	if (a & AT_BLINK)
385		w += bl_s_width;
386	if (a & AT_STANDOUT)
387		w += so_s_width;
388
389	return w;
390}
391
392/*
393 * Return the printing width of the end (exit) sequence
394 * for a given character attribute.
395 */
396	static int
397attr_ewidth(a)
398	int a;
399{
400	int w = 0;
401
402	a = apply_at_specials(a);
403
404	if (a & AT_UNDERLINE)
405		w += ul_e_width;
406	if (a & AT_BOLD)
407		w += bo_e_width;
408	if (a & AT_BLINK)
409		w += bl_e_width;
410	if (a & AT_STANDOUT)
411		w += so_e_width;
412
413	return w;
414}
415
416/*
417 * Return the printing width of a given character and attribute,
418 * if the character were added to the current position in the line buffer.
419 * Adding a character with a given attribute may cause an enter or exit
420 * attribute sequence to be inserted, so this must be taken into account.
421 */
422	static int
423pwidth(ch, a, prev_ch)
424	LWCHAR ch;
425	int a;
426	LWCHAR prev_ch;
427{
428	int w;
429
430	if (ch == '\b')
431		/*
432		 * Backspace moves backwards one or two positions.
433		 * XXX - Incorrect if several '\b' in a row.
434		 */
435		return (utf_mode && is_wide_char(prev_ch)) ? -2 : -1;
436
437	if (!utf_mode || is_ascii_char(ch))
438	{
439		if (control_char((char)ch))
440		{
441			/*
442			 * Control characters do unpredictable things,
443			 * so we don't even try to guess; say it doesn't move.
444			 * This can only happen if the -r flag is in effect.
445			 */
446			return (0);
447		}
448	} else
449	{
450		if (is_composing_char(ch) || is_combining_char(prev_ch, ch))
451		{
452			/*
453			 * Composing and combining chars take up no space.
454			 *
455			 * Some terminals, upon failure to compose a
456			 * composing character with the character(s) that
457			 * precede(s) it will actually take up one column
458			 * for the composing character; there isn't much
459			 * we could do short of testing the (complex)
460			 * composition process ourselves and printing
461			 * a binary representation when it fails.
462			 */
463			return (0);
464		}
465	}
466
467	/*
468	 * Other characters take one or two columns,
469	 * plus the width of any attribute enter/exit sequence.
470	 */
471	w = 1;
472	if (is_wide_char(ch))
473		w++;
474	if (curr > 0 && !is_at_equiv(attr[curr-1], a))
475		w += attr_ewidth(attr[curr-1]);
476	if ((apply_at_specials(a) != AT_NORMAL) &&
477	    (curr == 0 || !is_at_equiv(attr[curr-1], a)))
478		w += attr_swidth(a);
479	return (w);
480}
481
482/*
483 * Delete to the previous base character in the line buffer.
484 * Return 1 if one is found.
485 */
486	static int
487backc()
488{
489	LWCHAR prev_ch;
490	char *p = linebuf + curr;
491	LWCHAR ch = step_char(&p, -1, linebuf + lmargin);
492	int width;
493
494	/* This assumes that there is no '\b' in linebuf.  */
495	while (   curr > lmargin
496	       && column > lmargin
497	       && (!(attr[curr - 1] & (AT_ANSI|AT_BINARY))))
498	{
499		curr = p - linebuf;
500		prev_ch = step_char(&p, -1, linebuf + lmargin);
501		width = pwidth(ch, attr[curr], prev_ch);
502		column -= width;
503		if (width > 0)
504			return 1;
505		ch = prev_ch;
506	}
507
508	return 0;
509}
510
511/*
512 * Are we currently within a recognized ANSI escape sequence?
513 */
514	static int
515in_ansi_esc_seq()
516{
517	char *p;
518
519	/*
520	 * Search backwards for either an ESC (which means we ARE in a seq);
521	 * or an end char (which means we're NOT in a seq).
522	 */
523	for (p = &linebuf[curr];  p > linebuf; )
524	{
525		LWCHAR ch = step_char(&p, -1, linebuf);
526		if (ch == ESC)
527			return (1);
528		if (!is_ansi_middle(ch))
529			return (0);
530	}
531	return (0);
532}
533
534/*
535 * Is a character the end of an ANSI escape sequence?
536 */
537	public int
538is_ansi_end(ch)
539	LWCHAR ch;
540{
541	if (!is_ascii_char(ch))
542		return (0);
543	return (strchr(end_ansi_chars, (char) ch) != NULL);
544}
545
546/*
547 *
548 */
549	public int
550is_ansi_middle(ch)
551	LWCHAR ch;
552{
553	if (!is_ascii_char(ch))
554		return (0);
555	if (is_ansi_end(ch))
556		return (0);
557	return (strchr(mid_ansi_chars, (char) ch) != NULL);
558}
559
560/*
561 * Append a character and attribute to the line buffer.
562 */
563#define	STORE_CHAR(ch,a,rep,pos) \
564	do { \
565		if (store_char((ch),(a),(rep),(pos))) return (1); \
566	} while (0)
567
568	static int
569store_char(ch, a, rep, pos)
570	LWCHAR ch;
571	int a;
572	char *rep;
573	POSITION pos;
574{
575	int w;
576	int replen;
577	char cs;
578
579	w = (a & (AT_UNDERLINE|AT_BOLD));	/* Pre-use w.  */
580	if (w != AT_NORMAL)
581		last_overstrike = w;
582
583#if HILITE_SEARCH
584	{
585		int matches;
586		if (is_hilited(pos, pos+1, 0, &matches))
587		{
588			/*
589			 * This character should be highlighted.
590			 * Override the attribute passed in.
591			 */
592			if (a != AT_ANSI)
593				a |= AT_HILITE;
594		}
595		line_matches += matches;
596	}
597#endif
598
599	if (ctldisp == OPT_ONPLUS && in_ansi_esc_seq())
600	{
601		if (!is_ansi_end(ch) && !is_ansi_middle(ch)) {
602			/* Remove whole unrecognized sequence.  */
603			do {
604				--curr;
605			} while (linebuf[curr] != ESC);
606			return 0;
607		}
608		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
609		w = 0;
610	}
611	else if (ctldisp == OPT_ONPLUS && ch == ESC)
612	{
613		a = AT_ANSI;	/* Will force re-AT_'ing around it.  */
614		w = 0;
615	}
616	else
617	{
618		char *p = &linebuf[curr];
619		LWCHAR prev_ch = step_char(&p, -1, linebuf);
620		w = pwidth(ch, a, prev_ch);
621	}
622
623	if (ctldisp != OPT_ON && column + w + attr_ewidth(a) > sc_width)
624		/*
625		 * Won't fit on screen.
626		 */
627		return (1);
628
629	if (rep == NULL)
630	{
631		cs = (char) ch;
632		rep = &cs;
633		replen = 1;
634	} else
635	{
636		replen = utf_len(rep[0]);
637	}
638	if (curr + replen >= size_linebuf-6)
639	{
640		/*
641		 * Won't fit in line buffer.
642		 * Try to expand it.
643		 */
644		if (expand_linebuf())
645			return (1);
646	}
647
648	while (replen-- > 0)
649	{
650		linebuf[curr] = *rep++;
651		attr[curr] = a;
652		curr++;
653	}
654	column += w;
655	return (0);
656}
657
658/*
659 * Append a tab to the line buffer.
660 * Store spaces to represent the tab.
661 */
662#define	STORE_TAB(a,pos) \
663	do { if (store_tab((a),(pos))) return (1); } while (0)
664
665	static int
666store_tab(attr, pos)
667	int attr;
668	POSITION pos;
669{
670	int to_tab = column + cshift - lmargin;
671	int i;
672
673	if (ntabstops < 2 || to_tab >= tabstops[ntabstops-1])
674		to_tab = tabdefault -
675		     ((to_tab - tabstops[ntabstops-1]) % tabdefault);
676	else
677	{
678		for (i = ntabstops - 2;  i >= 0;  i--)
679			if (to_tab >= tabstops[i])
680				break;
681		to_tab = tabstops[i+1] - to_tab;
682	}
683
684	if (column + to_tab - 1 + pwidth(' ', attr, 0) + attr_ewidth(attr) > sc_width)
685		return 1;
686
687	do {
688		STORE_CHAR(' ', attr, " ", pos);
689	} while (--to_tab > 0);
690	return 0;
691}
692
693#define STORE_PRCHAR(c, pos) \
694	do { if (store_prchar((c), (pos))) return 1; } while (0)
695
696	static int
697store_prchar(c, pos)
698	char c;
699	POSITION pos;
700{
701	char *s;
702
703	/*
704	 * Convert to printable representation.
705	 */
706	s = prchar(c);
707
708	/*
709	 * Make sure we can get the entire representation
710	 * of the character on this line.
711	 */
712	if (column + (int) strlen(s) - 1 +
713            pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
714		return 1;
715
716	for ( ;  *s != 0;  s++)
717		STORE_CHAR(*s, AT_BINARY, NULL, pos);
718
719	return 0;
720}
721
722	static int
723flush_mbc_buf(pos)
724	POSITION pos;
725{
726	int i;
727
728	for (i = 0; i < mbc_buf_index; i++)
729		if (store_prchar(mbc_buf[i], pos))
730			return mbc_buf_index - i;
731
732	return 0;
733}
734
735/*
736 * Append a character to the line buffer.
737 * Expand tabs into spaces, handle underlining, boldfacing, etc.
738 * Returns 0 if ok, 1 if couldn't fit in buffer.
739 */
740	public int
741pappend(c, pos)
742	char c;
743	POSITION pos;
744{
745	int r;
746
747	if (pendc)
748	{
749		if (do_append(pendc, NULL, pendpos))
750			/*
751			 * Oops.  We've probably lost the char which
752			 * was in pendc, since caller won't back up.
753			 */
754			return (1);
755		pendc = '\0';
756	}
757
758	if (c == '\r' && bs_mode == BS_SPECIAL)
759	{
760		if (mbc_buf_len > 0)  /* utf_mode must be on. */
761		{
762			/* Flush incomplete (truncated) sequence. */
763			r = flush_mbc_buf(mbc_pos);
764			mbc_buf_index = r + 1;
765			mbc_buf_len = 0;
766			if (r)
767				return (mbc_buf_index);
768		}
769
770		/*
771		 * Don't put the CR into the buffer until we see
772		 * the next char.  If the next char is a newline,
773		 * discard the CR.
774		 */
775		pendc = c;
776		pendpos = pos;
777		return (0);
778	}
779
780	if (!utf_mode)
781	{
782		r = do_append((LWCHAR) c, NULL, pos);
783	} else
784	{
785		/* Perform strict validation in all possible cases. */
786		if (mbc_buf_len == 0)
787		{
788		retry:
789			mbc_buf_index = 1;
790			*mbc_buf = c;
791			if (IS_ASCII_OCTET(c))
792				r = do_append((LWCHAR) c, NULL, pos);
793			else if (IS_UTF8_LEAD(c))
794			{
795				mbc_buf_len = utf_len(c);
796				mbc_pos = pos;
797				return (0);
798			} else
799				/* UTF8_INVALID or stray UTF8_TRAIL */
800				r = flush_mbc_buf(pos);
801		} else if (IS_UTF8_TRAIL(c))
802		{
803			mbc_buf[mbc_buf_index++] = c;
804			if (mbc_buf_index < mbc_buf_len)
805				return (0);
806			if (is_utf8_well_formed(mbc_buf))
807				r = do_append(get_wchar(mbc_buf), mbc_buf, mbc_pos);
808			else
809				/* Complete, but not shortest form, sequence. */
810				mbc_buf_index = r = flush_mbc_buf(mbc_pos);
811			mbc_buf_len = 0;
812		} else
813		{
814			/* Flush incomplete (truncated) sequence.  */
815			r = flush_mbc_buf(mbc_pos);
816			mbc_buf_index = r + 1;
817			mbc_buf_len = 0;
818			/* Handle new char.  */
819			if (!r)
820				goto retry;
821 		}
822	}
823
824	/*
825	 * If we need to shift the line, do it.
826	 * But wait until we get to at least the middle of the screen,
827	 * so shifting it doesn't affect the chars we're currently
828	 * pappending.  (Bold & underline can get messed up otherwise.)
829	 */
830	if (cshift < hshift && column > sc_width / 2)
831	{
832		linebuf[curr] = '\0';
833		pshift(hshift - cshift);
834	}
835	if (r)
836	{
837		/* How many chars should caller back up? */
838		r = (!utf_mode) ? 1 : mbc_buf_index;
839	}
840	return (r);
841}
842
843	static int
844do_append(ch, rep, pos)
845	LWCHAR ch;
846	char *rep;
847	POSITION pos;
848{
849	register int a;
850	LWCHAR prev_ch;
851
852	a = AT_NORMAL;
853
854	if (ch == '\b')
855	{
856		if (bs_mode == BS_CONTROL)
857			goto do_control_char;
858
859		/*
860		 * A better test is needed here so we don't
861		 * backspace over part of the printed
862		 * representation of a binary character.
863		 */
864		if (   curr <= lmargin
865		    || column <= lmargin
866		    || (attr[curr - 1] & (AT_ANSI|AT_BINARY)))
867			STORE_PRCHAR('\b', pos);
868		else if (bs_mode == BS_NORMAL)
869			STORE_CHAR(ch, AT_NORMAL, NULL, pos);
870		else if (bs_mode == BS_SPECIAL)
871			overstrike = backc();
872
873		return 0;
874	}
875
876	if (overstrike > 0)
877	{
878		/*
879		 * Overstrike the character at the current position
880		 * in the line buffer.  This will cause either
881		 * underline (if a "_" is overstruck),
882		 * bold (if an identical character is overstruck),
883		 * or just deletion of the character in the buffer.
884		 */
885		overstrike = utf_mode ? -1 : 0;
886		/* To be correct, this must be a base character.  */
887		prev_ch = get_wchar(linebuf + curr);
888		a = attr[curr];
889		if (ch == prev_ch)
890		{
891			/*
892			 * Overstriking a char with itself means make it bold.
893			 * But overstriking an underscore with itself is
894			 * ambiguous.  It could mean make it bold, or
895			 * it could mean make it underlined.
896			 * Use the previous overstrike to resolve it.
897			 */
898			if (ch == '_')
899			{
900				if ((a & (AT_BOLD|AT_UNDERLINE)) != AT_NORMAL)
901					a |= (AT_BOLD|AT_UNDERLINE);
902				else if (last_overstrike != AT_NORMAL)
903					a |= last_overstrike;
904				else
905					a |= AT_BOLD;
906			} else
907				a |= AT_BOLD;
908		} else if (ch == '_')
909		{
910			a |= AT_UNDERLINE;
911			ch = prev_ch;
912			rep = linebuf + curr;
913		} else if (prev_ch == '_')
914		{
915			a |= AT_UNDERLINE;
916		}
917		/* Else we replace prev_ch, but we keep its attributes.  */
918	} else if (overstrike < 0)
919	{
920		if (   is_composing_char(ch)
921		    || is_combining_char(get_wchar(linebuf + curr), ch))
922			/* Continuation of the same overstrike.  */
923			a = last_overstrike;
924		else
925			overstrike = 0;
926	}
927
928	if (ch == '\t')
929	{
930		/*
931		 * Expand a tab into spaces.
932		 */
933		switch (bs_mode)
934		{
935		case BS_CONTROL:
936			goto do_control_char;
937		case BS_NORMAL:
938		case BS_SPECIAL:
939			STORE_TAB(a, pos);
940			break;
941		}
942	} else if ((!utf_mode || is_ascii_char(ch)) && control_char((char)ch))
943	{
944	do_control_char:
945		if (ctldisp == OPT_ON || (ctldisp == OPT_ONPLUS && ch == ESC))
946		{
947			/*
948			 * Output as a normal character.
949			 */
950			STORE_CHAR(ch, AT_NORMAL, rep, pos);
951		} else
952		{
953			STORE_PRCHAR((char) ch, pos);
954		}
955	} else if (utf_mode && ctldisp != OPT_ON && is_ubin_char(ch))
956	{
957		char *s;
958
959		s = prutfchar(ch);
960
961		if (column + (int) strlen(s) - 1 +
962		    pwidth(' ', binattr, 0) + attr_ewidth(binattr) > sc_width)
963			return (1);
964
965		for ( ;  *s != 0;  s++)
966			STORE_CHAR(*s, AT_BINARY, NULL, pos);
967 	} else
968	{
969		STORE_CHAR(ch, a, rep, pos);
970	}
971 	return (0);
972}
973
974/*
975 *
976 */
977	public int
978pflushmbc()
979{
980	int r = 0;
981
982	if (mbc_buf_len > 0)
983	{
984		/* Flush incomplete (truncated) sequence.  */
985		r = flush_mbc_buf(mbc_pos);
986		mbc_buf_len = 0;
987	}
988	return r;
989}
990
991/*
992 * Terminate the line in the line buffer.
993 */
994	public void
995pdone(endline)
996	int endline;
997{
998	(void) pflushmbc();
999
1000	if (pendc && (pendc != '\r' || !endline))
1001		/*
1002		 * If we had a pending character, put it in the buffer.
1003		 * But discard a pending CR if we are at end of line
1004		 * (that is, discard the CR in a CR/LF sequence).
1005		 */
1006		(void) do_append(pendc, NULL, pendpos);
1007
1008	/*
1009	 * Make sure we've shifted the line, if we need to.
1010	 */
1011	if (cshift < hshift)
1012		pshift(hshift - cshift);
1013
1014	if (ctldisp == OPT_ONPLUS && is_ansi_end('m'))
1015	{
1016		/* Switch to normal attribute at end of line. */
1017		char *p = "\033[m";
1018		for ( ;  *p != '\0';  p++)
1019		{
1020			linebuf[curr] = *p;
1021			attr[curr++] = AT_ANSI;
1022		}
1023	}
1024
1025	/*
1026	 * Add a newline if necessary,
1027	 * and append a '\0' to the end of the line.
1028	 */
1029	if (column < sc_width || !auto_wrap || ignaw || ctldisp == OPT_ON)
1030	{
1031		linebuf[curr] = '\n';
1032		attr[curr] = AT_NORMAL;
1033		curr++;
1034	}
1035	linebuf[curr] = '\0';
1036	attr[curr] = AT_NORMAL;
1037
1038#if HILITE_SEARCH
1039	if (status_col && line_matches > 0)
1040	{
1041		linebuf[0] = '*';
1042		attr[0] = AT_NORMAL|AT_HILITE;
1043	}
1044#endif
1045}
1046
1047/*
1048 * Get a character from the current line.
1049 * Return the character as the function return value,
1050 * and the character attribute in *ap.
1051 */
1052	public int
1053gline(i, ap)
1054	register int i;
1055	register int *ap;
1056{
1057	if (is_null_line)
1058	{
1059		/*
1060		 * If there is no current line, we pretend the line is
1061		 * either "~" or "", depending on the "twiddle" flag.
1062		 */
1063		if (twiddle)
1064		{
1065			if (i == 0)
1066			{
1067				*ap = AT_BOLD;
1068				return '~';
1069			}
1070			--i;
1071		}
1072		/* Make sure we're back to AT_NORMAL before the '\n'.  */
1073		*ap = AT_NORMAL;
1074		return i ? '\0' : '\n';
1075	}
1076
1077	*ap = attr[i];
1078	return (linebuf[i] & 0xFF);
1079}
1080
1081/*
1082 * Indicate that there is no current line.
1083 */
1084	public void
1085null_line()
1086{
1087	is_null_line = 1;
1088	cshift = 0;
1089}
1090
1091/*
1092 * Analogous to forw_line(), but deals with "raw lines":
1093 * lines which are not split for screen width.
1094 * {{ This is supposed to be more efficient than forw_line(). }}
1095 */
1096	public POSITION
1097forw_raw_line(curr_pos, linep)
1098	POSITION curr_pos;
1099	char **linep;
1100{
1101	register int n;
1102	register int c;
1103	POSITION new_pos;
1104
1105	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
1106		(c = ch_forw_get()) == EOI)
1107		return (NULL_POSITION);
1108
1109	n = 0;
1110	for (;;)
1111	{
1112		if (c == '\n' || c == EOI || ABORT_SIGS())
1113		{
1114			new_pos = ch_tell();
1115			break;
1116		}
1117		if (n >= size_linebuf-1)
1118		{
1119			if (expand_linebuf())
1120			{
1121				/*
1122				 * Overflowed the input buffer.
1123				 * Pretend the line ended here.
1124				 */
1125				new_pos = ch_tell() - 1;
1126				break;
1127			}
1128		}
1129		linebuf[n++] = c;
1130		c = ch_forw_get();
1131	}
1132	linebuf[n] = '\0';
1133	if (linep != NULL)
1134		*linep = linebuf;
1135	return (new_pos);
1136}
1137
1138/*
1139 * Analogous to back_line(), but deals with "raw lines".
1140 * {{ This is supposed to be more efficient than back_line(). }}
1141 */
1142	public POSITION
1143back_raw_line(curr_pos, linep)
1144	POSITION curr_pos;
1145	char **linep;
1146{
1147	register int n;
1148	register int c;
1149	POSITION new_pos;
1150
1151	if (curr_pos == NULL_POSITION || curr_pos <= ch_zero() ||
1152		ch_seek(curr_pos-1))
1153		return (NULL_POSITION);
1154
1155	n = size_linebuf;
1156	linebuf[--n] = '\0';
1157	for (;;)
1158	{
1159		c = ch_back_get();
1160		if (c == '\n' || ABORT_SIGS())
1161		{
1162			/*
1163			 * This is the newline ending the previous line.
1164			 * We have hit the beginning of the line.
1165			 */
1166			new_pos = ch_tell() + 1;
1167			break;
1168		}
1169		if (c == EOI)
1170		{
1171			/*
1172			 * We have hit the beginning of the file.
1173			 * This must be the first line in the file.
1174			 * This must, of course, be the beginning of the line.
1175			 */
1176			new_pos = ch_zero();
1177			break;
1178		}
1179		if (n <= 0)
1180		{
1181			int old_size_linebuf = size_linebuf;
1182			char *fm;
1183			char *to;
1184			if (expand_linebuf())
1185			{
1186				/*
1187				 * Overflowed the input buffer.
1188				 * Pretend the line ended here.
1189				 */
1190				new_pos = ch_tell() + 1;
1191				break;
1192			}
1193			/*
1194			 * Shift the data to the end of the new linebuf.
1195			 */
1196			for (fm = linebuf + old_size_linebuf - 1,
1197			      to = linebuf + size_linebuf - 1;
1198			     fm >= linebuf;  fm--, to--)
1199				*to = *fm;
1200			n = size_linebuf - old_size_linebuf;
1201		}
1202		linebuf[--n] = c;
1203	}
1204	if (linep != NULL)
1205		*linep = &linebuf[n];
1206	return (new_pos);
1207}
1208