1/*	$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $	*/
2
3/*-
4 * SPDX-License-Identifier: BSD-3-Clause
5 *
6 * Copyright (c) 1990, 1993, 1994
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Michael Rendell of the Memorial University of Newfoundland.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#ifndef lint
39__COPYRIGHT("@(#) Copyright (c) 1990, 1993, 1994\
40 The Regents of the University of California.  All rights reserved.");
41#endif /* not lint */
42
43#ifndef lint
44#if 0
45static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
46__FBSDID("$FreeBSD: head/usr.bin/col/col.c 366577 2020-10-09 15:27:37Z markj $")
47;
48
49#endif
50__RCSID("$NetBSD: col.c,v 1.20 2021/09/10 21:52:17 rillig Exp $");
51#endif /* not lint */
52
53#include <err.h>
54#include <errno.h>
55#include <inttypes.h>
56#include <limits.h>
57#include <locale.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <termios.h>
62#include <unistd.h>
63#include <wchar.h>
64#include <wctype.h>
65
66#define	BS	'\b'		/* backspace */
67#define	TAB	'\t'		/* tab */
68#define	SPACE	' '		/* space */
69#define	NL	'\n'		/* newline */
70#define	CR	'\r'		/* carriage return */
71#define	ESC	'\033'		/* escape */
72#define	SI	'\017'		/* shift in to normal character set */
73#define	SO	'\016'		/* shift out to alternate character set */
74#define	VT	'\013'		/* vertical tab (aka reverse line feed) */
75#define	RLF	'7'		/* ESC-7 reverse line feed */
76#define	RHLF	'8'		/* ESC-8 reverse half-line feed */
77#define	FHLF	'9'		/* ESC-9 forward half-line feed */
78
79/* build up at least this many lines before flushing them out */
80#define	BUFFER_MARGIN		32
81
82typedef char CSET;
83
84typedef struct char_str {
85#define	CS_NORMAL	1
86#define	CS_ALTERNATE	2
87	int		c_column;	/* column character is in */
88	CSET		c_set;		/* character set (currently only 2) */
89	wchar_t		c_char;		/* character in question */
90	int		c_width;	/* character width */
91} CHAR;
92
93typedef struct line_str LINE;
94struct line_str {
95	CHAR	*l_line;		/* characters on the line */
96	LINE	*l_prev;		/* previous line */
97	LINE	*l_next;		/* next line */
98	int	l_lsize;		/* allocated sizeof l_line */
99	int	l_line_len;		/* strlen(l_line) */
100	int	l_needs_sort;		/* set if chars went in out of order */
101	int	l_max_col;		/* max column in the line */
102};
103
104static void	addto_lineno(int *, int);
105static LINE	*alloc_line(void);
106static void	dowarn(int);
107static void	flush_line(LINE *);
108static void	flush_lines(int);
109static void	flush_blanks(void);
110static void	free_line(LINE *);
111__dead static void	usage(void);
112
113static CSET	last_set;		/* char_set of last char printed */
114static LINE	*lines;
115static int	compress_spaces;	/* if doing space -> tab conversion */
116static int	fine;			/* if `fine' resolution (half lines) */
117static int	max_bufd_lines;		/* max # of half lines to keep in memory */
118static int	nblank_lines;		/* # blanks after last flushed line */
119static int	no_backspaces;		/* if not to output any backspaces */
120static int	pass_unknown_seqs;	/* pass unknown control sequences */
121
122#define	PUTC(ch) \
123	do {					\
124		if (putwchar(ch) == WEOF)	\
125			errx(EXIT_FAILURE, "write error");	\
126	} while (0)
127
128int
129main(int argc, char **argv)
130{
131	wint_t ch;
132	CHAR *c;
133	CSET cur_set;			/* current character set */
134	LINE *l;			/* current line */
135	int extra_lines;		/* # of lines above first line */
136	int cur_col;			/* current column */
137	int cur_line;			/* line number of current position */
138	int max_line;			/* max value of cur_line */
139	int this_line;			/* line l points to */
140	int nflushd_lines;		/* number of lines that were flushed */
141	int adjust, opt, warned, width;
142	int e;
143
144	(void)setlocale(LC_CTYPE, "");
145
146	max_bufd_lines = 256;
147	compress_spaces = 1;		/* compress spaces into tabs */
148	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
149		switch (opt) {
150		case 'b':		/* do not output backspaces */
151			no_backspaces = 1;
152			break;
153		case 'f':		/* allow half forward line feeds */
154			fine = 1;
155			break;
156		case 'h':		/* compress spaces into tabs */
157			compress_spaces = 1;
158			break;
159		case 'l':		/* buffered line count */
160			max_bufd_lines = (int)strtoi(optarg, NULL, 0, 1,
161			    (INT_MAX - BUFFER_MARGIN) / 2, &e) * 2;
162			if (e)
163				errc(EXIT_FAILURE, e, "bad -l argument `%s'",
164				    optarg);
165			break;
166		case 'p':		/* pass unknown control sequences */
167			pass_unknown_seqs = 1;
168			break;
169		case 'x':		/* do not compress spaces into tabs */
170			compress_spaces = 0;
171			break;
172		case '?':
173		default:
174			usage();
175		}
176
177	if (optind != argc)
178		usage();
179
180	adjust = cur_col = extra_lines = warned = 0;
181	cur_line = max_line = nflushd_lines = this_line = 0;
182	cur_set = last_set = CS_NORMAL;
183	lines = l = alloc_line();
184
185	while ((ch = getwchar()) != WEOF) {
186		if (!iswgraph(ch)) {
187			switch (ch) {
188			case BS:		/* can't go back further */
189				if (cur_col == 0)
190					continue;
191				--cur_col;
192				continue;
193			case CR:
194				cur_col = 0;
195				continue;
196			case ESC:		/* just ignore EOF */
197				switch(getwchar()) {
198				/*
199				 * In the input stream, accept both the
200				 * XPG5 sequences ESC-digit and the
201				 * traditional BSD sequences ESC-ctrl.
202				 */
203				case '\007':
204					/* FALLTHROUGH */
205				case RLF:
206					addto_lineno(&cur_line, -2);
207					break;
208				case '\010':
209					/* FALLTHROUGH */
210				case RHLF:
211					addto_lineno(&cur_line, -1);
212					break;
213				case '\011':
214					/* FALLTHROUGH */
215				case FHLF:
216					addto_lineno(&cur_line, 1);
217					if (cur_line > max_line)
218						max_line = cur_line;
219				}
220				continue;
221			case NL:
222				addto_lineno(&cur_line, 2);
223				if (cur_line > max_line)
224					max_line = cur_line;
225				cur_col = 0;
226				continue;
227			case SPACE:
228				++cur_col;
229				continue;
230			case SI:
231				cur_set = CS_NORMAL;
232				continue;
233			case SO:
234				cur_set = CS_ALTERNATE;
235				continue;
236			case TAB:		/* adjust column */
237				cur_col |= 7;
238				++cur_col;
239				continue;
240			case VT:
241				addto_lineno(&cur_line, -2);
242				continue;
243			}
244			if (iswspace(ch)) {
245				if ((width = wcwidth(ch)) > 0)
246					cur_col += width;
247				continue;
248			}
249			if (!pass_unknown_seqs)
250				continue;
251		}
252
253		/* Must stuff ch in a line - are we at the right one? */
254		if (cur_line + adjust != this_line) {
255			LINE *lnew;
256
257			/* round up to next line */
258			adjust = !fine && (cur_line & 1);
259
260			if (cur_line + adjust < this_line) {
261				while (cur_line + adjust < this_line &&
262				    l->l_prev != NULL) {
263					l = l->l_prev;
264					this_line--;
265				}
266				if (cur_line + adjust < this_line) {
267					if (nflushd_lines == 0) {
268						/*
269						 * Allow backup past first
270						 * line if nothing has been
271						 * flushed yet.
272						 */
273						while (cur_line + adjust
274						    < this_line) {
275							lnew = alloc_line();
276							l->l_prev = lnew;
277							lnew->l_next = l;
278							l = lines = lnew;
279							extra_lines++;
280							this_line--;
281						}
282					} else {
283						if (!warned++)
284							dowarn(cur_line);
285						cur_line = this_line - adjust;
286					}
287				}
288			} else {
289				/* may need to allocate here */
290				while (cur_line + adjust > this_line) {
291					if (l->l_next == NULL) {
292						l->l_next = alloc_line();
293						l->l_next->l_prev = l;
294					}
295					l = l->l_next;
296					this_line++;
297				}
298			}
299			if (this_line > nflushd_lines &&
300			    this_line - nflushd_lines >=
301			    max_bufd_lines + BUFFER_MARGIN) {
302				if (extra_lines) {
303					flush_lines(extra_lines);
304					extra_lines = 0;
305				}
306				flush_lines(this_line - nflushd_lines -
307				    max_bufd_lines);
308				nflushd_lines = this_line - max_bufd_lines;
309			}
310		}
311		/* grow line's buffer? */
312		if (l->l_line_len + 1 >= l->l_lsize) {
313			int need;
314
315			need = l->l_lsize ? l->l_lsize * 2 : 90;
316			if ((l->l_line = realloc(l->l_line,
317			    (unsigned)need * sizeof(CHAR))) == NULL)
318				err(EXIT_FAILURE, NULL);
319			l->l_lsize = need;
320		}
321		c = &l->l_line[l->l_line_len++];
322		c->c_char = ch;
323		c->c_set = cur_set;
324		c->c_column = cur_col;
325		c->c_width = wcwidth(ch);
326		/*
327		 * If things are put in out of order, they will need sorting
328		 * when it is flushed.
329		 */
330		if (cur_col < l->l_max_col)
331			l->l_needs_sort = 1;
332		else
333			l->l_max_col = cur_col;
334		if (c->c_width > 0)
335			cur_col += c->c_width;
336	}
337	if (ferror(stdin))
338		err(EXIT_FAILURE, NULL);
339	if (extra_lines) {
340		/*
341		 * Extra lines only exist if no lines have been flushed
342		 * yet. This means that 'lines' must point to line zero
343		 * after we flush the extra lines.
344		 */
345		flush_lines(extra_lines);
346		l = lines;
347		this_line = 0;
348	}
349
350	/* goto the last line that had a character on it */
351	for (; l->l_next; l = l->l_next)
352		this_line++;
353	flush_lines(this_line - nflushd_lines + 1);
354
355	/* make sure we leave things in a sane state */
356	if (last_set != CS_NORMAL)
357		PUTC(SI);
358
359	/* flush out the last few blank lines */
360	if (max_line >= this_line)
361		nblank_lines = max_line - this_line + (max_line & 1);
362	if (nblank_lines == 0)
363		/* end with a newline even if the source doesn't */
364		nblank_lines = 2;
365	flush_blanks();
366	exit(EXIT_SUCCESS);
367}
368
369/*
370 * Prints the first 'nflush' lines. Printed lines are freed.
371 * After this function returns, 'lines' points to the first
372 * of the remaining lines, and 'nblank_lines' will have the
373 * number of half line feeds between the final flushed line
374 * and the first remaining line.
375 */
376static void
377flush_lines(int nflush)
378{
379	LINE *l;
380
381	while (--nflush >= 0) {
382		l = lines;
383		lines = l->l_next;
384		if (l->l_line) {
385			flush_blanks();
386			flush_line(l);
387			free(l->l_line);
388		}
389		if (l->l_next)
390			nblank_lines++;
391		free_line(l);
392	}
393	if (lines)
394		lines->l_prev = NULL;
395}
396
397/*
398 * Print a number of newline/half newlines.
399 * nblank_lines is the number of half line feeds.
400 */
401static void
402flush_blanks(void)
403{
404	int half, i, nb;
405
406	half = 0;
407	nb = nblank_lines;
408	if (nb & 1) {
409		if (fine)
410			half = 1;
411		else
412			nb++;
413	}
414	nb /= 2;
415	for (i = nb; --i >= 0;)
416		PUTC('\n');
417	if (half) {
418		PUTC(ESC);
419		PUTC(FHLF);
420		if (!nb)
421			PUTC('\r');
422	}
423	nblank_lines = 0;
424}
425
426/*
427 * Write a line to stdout taking care of space to tab conversion (-h flag)
428 * and character set shifts.
429 */
430static void
431flush_line(LINE *l)
432{
433	CHAR *c, *endc;
434	int i, j, nchars, last_col, save, this_col, tot;
435
436	last_col = 0;
437	nchars = l->l_line_len;
438
439	if (l->l_needs_sort) {
440		static CHAR *sorted;
441		static int count_size, *count, sorted_size;
442
443		/*
444		 * Do an O(n) sort on l->l_line by column being careful to
445		 * preserve the order of characters in the same column.
446		 */
447		if (l->l_lsize > sorted_size) {
448			sorted_size = l->l_lsize;
449			if ((sorted = realloc(sorted,
450			    sizeof(CHAR) * (size_t)sorted_size)) == NULL)
451				err(EXIT_FAILURE, NULL);
452		}
453		if (l->l_max_col >= count_size) {
454			count_size = l->l_max_col + 1;
455			if ((count = realloc(count,
456			    sizeof(int) * (size_t)count_size)) == NULL)
457				err(EXIT_FAILURE, NULL);
458		}
459		memset(count, 0, sizeof(int) * (size_t)l->l_max_col + 1);
460		for (i = nchars, c = l->l_line; --i >= 0; c++)
461			count[c->c_column]++;
462
463		/*
464		 * calculate running total (shifted down by 1) to use as
465		 * indices into new line.
466		 */
467		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
468			save = count[i];
469			count[i] = tot;
470			tot += save;
471		}
472
473		for (i = nchars, c = l->l_line; --i >= 0; c++)
474			sorted[count[c->c_column]++] = *c;
475		c = sorted;
476	} else
477		c = l->l_line;
478	while (nchars > 0) {
479		this_col = c->c_column;
480		endc = c;
481		do {
482			++endc;
483		} while (--nchars > 0 && this_col == endc->c_column);
484
485		/* if -b only print last character */
486		if (no_backspaces) {
487			c = endc - 1;
488			if (nchars > 0 &&
489			    this_col + c->c_width > endc->c_column)
490				continue;
491		}
492
493		if (this_col > last_col) {
494			int nspace = this_col - last_col;
495
496			if (compress_spaces && nspace > 1) {
497				while (1) {
498					int tab_col, tab_size;
499
500					tab_col = (last_col + 8) & ~7;
501					if (tab_col > this_col)
502						break;
503					tab_size = tab_col - last_col;
504					if (tab_size == 1)
505						PUTC(' ');
506					else
507						PUTC('\t');
508					nspace -= tab_size;
509					last_col = tab_col;
510				}
511			}
512			while (--nspace >= 0)
513				PUTC(' ');
514			last_col = this_col;
515		}
516
517		for (;;) {
518			if (c->c_set != last_set) {
519				switch (c->c_set) {
520				case CS_NORMAL:
521					PUTC(SI);
522					break;
523				case CS_ALTERNATE:
524					PUTC(SO);
525				}
526				last_set = c->c_set;
527			}
528			PUTC(c->c_char);
529			if ((c + 1) < endc)
530				for (j = 0; j < c->c_width; j++)
531					PUTC('\b');
532			if (++c >= endc)
533				break;
534		}
535		last_col += (c - 1)->c_width;
536	}
537}
538
539/*
540 * Increment or decrement a line number, checking for overflow.
541 * Stop one below INT_MAX such that the adjust variable is safe.
542 */
543void
544addto_lineno(int *lno, int offset)
545{
546	if (offset > 0) {
547		if (*lno >= INT_MAX - offset)
548			errx(EXIT_FAILURE, "too many lines");
549	} else {
550		if (*lno < INT_MIN - offset)
551			errx(EXIT_FAILURE, "too many reverse line feeds");
552	}
553	*lno += offset;
554}
555
556#define	NALLOC 64
557
558static LINE *line_freelist;
559
560static LINE *
561alloc_line(void)
562{
563	LINE *l;
564	int i;
565
566	if (!line_freelist) {
567		if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
568			err(EXIT_FAILURE, NULL);
569		line_freelist = l;
570		for (i = 1; i < NALLOC; i++, l++)
571			l->l_next = l + 1;
572		l->l_next = NULL;
573	}
574	l = line_freelist;
575	line_freelist = l->l_next;
576
577	memset(l, 0, sizeof(LINE));
578	return (l);
579}
580
581static void
582free_line(LINE *l)
583{
584
585	l->l_next = line_freelist;
586	line_freelist = l;
587}
588
589static void
590usage(void)
591{
592
593	(void)fprintf(stderr, "Usage: %s [-bfhpx] [-l nline]\n", getprogname());
594	exit(EXIT_FAILURE);
595}
596
597static void
598dowarn(int line)
599{
600
601	warnx("warning: can't back up %s",
602		line < 0 ? "past first line" : "-- line already flushed");
603}
604