1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1990, 1993, 1994
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Michael Rendell of the Memorial University of Newfoundland.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 *    may be used to endorse or promote products derived from this software
20 *    without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35#ifndef lint
36static const char copyright[] =
37"@(#) Copyright (c) 1990, 1993, 1994\n\
38	The Regents of the University of California.  All rights reserved.\n";
39#endif
40
41#if 0
42#ifndef lint
43static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
44#endif
45#endif
46
47#include <sys/cdefs.h>
48__FBSDID("$FreeBSD$");
49
50#include <sys/capsicum.h>
51
52#include <capsicum_helpers.h>
53#include <err.h>
54#include <errno.h>
55#include <locale.h>
56#include <stdio.h>
57#include <stdlib.h>
58#include <string.h>
59#include <termios.h>
60#include <unistd.h>
61#include <wchar.h>
62#include <wctype.h>
63
64#define	BS	'\b'		/* backspace */
65#define	TAB	'\t'		/* tab */
66#define	SPACE	' '		/* space */
67#define	NL	'\n'		/* newline */
68#define	CR	'\r'		/* carriage return */
69#define	ESC	'\033'		/* escape */
70#define	SI	'\017'		/* shift in to normal character set */
71#define	SO	'\016'		/* shift out to alternate character set */
72#define	VT	'\013'		/* vertical tab (aka reverse line feed) */
73#define	RLF	'7'		/* ESC-7 reverse line feed */
74#define	RHLF	'8'		/* ESC-8 reverse half-line feed */
75#define	FHLF	'9'		/* ESC-9 forward half-line feed */
76
77/* build up at least this many lines before flushing them out */
78#define	BUFFER_MARGIN		32
79
80typedef char CSET;
81
82typedef struct char_str {
83#define	CS_NORMAL	1
84#define	CS_ALTERNATE	2
85	short		c_column;	/* column character is in */
86	CSET		c_set;		/* character set (currently only 2) */
87	wchar_t		c_char;		/* character in question */
88	int		c_width;	/* character width */
89} CHAR;
90
91typedef struct line_str LINE;
92struct line_str {
93	CHAR	*l_line;		/* characters on the line */
94	LINE	*l_prev;		/* previous line */
95	LINE	*l_next;		/* next line */
96	int	l_lsize;		/* allocated sizeof l_line */
97	int	l_line_len;		/* strlen(l_line) */
98	int	l_needs_sort;		/* set if chars went in out of order */
99	int	l_max_col;		/* max column in the line */
100};
101
102static void	addto_lineno(int *, int);
103static LINE	*alloc_line(void);
104static void	dowarn(int);
105static void	flush_line(LINE *);
106static void	flush_lines(int);
107static void	flush_blanks(void);
108static void	free_line(LINE *);
109static void	usage(void);
110
111static CSET	last_set;		/* char_set of last char printed */
112static LINE	*lines;
113static int	compress_spaces;	/* if doing space -> tab conversion */
114static int	fine;			/* if `fine' resolution (half lines) */
115static int	max_bufd_lines;		/* max # of half lines to keep in memory */
116static int	nblank_lines;		/* # blanks after last flushed line */
117static int	no_backspaces;		/* if not to output any backspaces */
118static int	pass_unknown_seqs;	/* pass unknown control sequences */
119
120#define	PUTC(ch) \
121	do {					\
122		if (putwchar(ch) == WEOF)	\
123			errx(1, "write error");	\
124	} while (0)
125
126int
127main(int argc, char **argv)
128{
129	wint_t ch;
130	CHAR *c;
131	CSET cur_set;			/* current character set */
132	LINE *l;			/* current line */
133	int extra_lines;		/* # of lines above first line */
134	int cur_col;			/* current column */
135	int cur_line;			/* line number of current position */
136	int max_line;			/* max value of cur_line */
137	int this_line;			/* line l points to */
138	int nflushd_lines;		/* number of lines that were flushed */
139	int adjust, opt, warned, width;
140	const char *errstr;
141
142	(void)setlocale(LC_CTYPE, "");
143
144	if (caph_limit_stdio() == -1)
145		err(1, "unable to limit stdio");
146
147	if (caph_enter() < 0)
148		err(1, "unable to enter capability mode");
149
150	max_bufd_lines = 256;
151	compress_spaces = 1;		/* compress spaces into tabs */
152	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
153		switch (opt) {
154		case 'b':		/* do not output backspaces */
155			no_backspaces = 1;
156			break;
157		case 'f':		/* allow half forward line feeds */
158			fine = 1;
159			break;
160		case 'h':		/* compress spaces into tabs */
161			compress_spaces = 1;
162			break;
163		case 'l':		/* buffered line count */
164			max_bufd_lines = strtonum(optarg, 1,
165			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
166			if (errstr != NULL)
167				errx(1, "bad -l argument, %s: %s", errstr,
168					optarg);
169			break;
170		case 'p':		/* pass unknown control sequences */
171			pass_unknown_seqs = 1;
172			break;
173		case 'x':		/* do not compress spaces into tabs */
174			compress_spaces = 0;
175			break;
176		case '?':
177		default:
178			usage();
179		}
180
181	if (optind != argc)
182		usage();
183
184	adjust = cur_col = extra_lines = warned = 0;
185	cur_line = max_line = nflushd_lines = this_line = 0;
186	cur_set = last_set = CS_NORMAL;
187	lines = l = alloc_line();
188
189	while ((ch = getwchar()) != WEOF) {
190		if (!iswgraph(ch)) {
191			switch (ch) {
192			case BS:		/* can't go back further */
193				if (cur_col == 0)
194					continue;
195				--cur_col;
196				continue;
197			case CR:
198				cur_col = 0;
199				continue;
200			case ESC:		/* just ignore EOF */
201				switch(getwchar()) {
202				/*
203				 * In the input stream, accept both the
204				 * XPG5 sequences ESC-digit and the
205				 * traditional BSD sequences ESC-ctrl.
206				 */
207				case '\007':
208					/* FALLTHROUGH */
209				case RLF:
210					addto_lineno(&cur_line, -2);
211					break;
212				case '\010':
213					/* FALLTHROUGH */
214				case RHLF:
215					addto_lineno(&cur_line, -1);
216					break;
217				case '\011':
218					/* FALLTHROUGH */
219				case FHLF:
220					addto_lineno(&cur_line, 1);
221					if (cur_line > max_line)
222						max_line = cur_line;
223				}
224				continue;
225			case NL:
226				addto_lineno(&cur_line, 2);
227				if (cur_line > max_line)
228					max_line = cur_line;
229				cur_col = 0;
230				continue;
231			case SPACE:
232				++cur_col;
233				continue;
234			case SI:
235				cur_set = CS_NORMAL;
236				continue;
237			case SO:
238				cur_set = CS_ALTERNATE;
239				continue;
240			case TAB:		/* adjust column */
241				cur_col |= 7;
242				++cur_col;
243				continue;
244			case VT:
245				addto_lineno(&cur_line, -2);
246				continue;
247			}
248			if (iswspace(ch)) {
249				if ((width = wcwidth(ch)) > 0)
250					cur_col += width;
251				continue;
252			}
253			if (!pass_unknown_seqs)
254				continue;
255		}
256
257		/* Must stuff ch in a line - are we at the right one? */
258		if (cur_line + adjust != this_line) {
259			LINE *lnew;
260
261			/* round up to next line */
262			adjust = !fine && (cur_line & 1);
263
264			if (cur_line + adjust < this_line) {
265				while (cur_line + adjust < this_line &&
266				    l->l_prev != NULL) {
267					l = l->l_prev;
268					this_line--;
269				}
270				if (cur_line + adjust < this_line) {
271					if (nflushd_lines == 0) {
272						/*
273						 * Allow backup past first
274						 * line if nothing has been
275						 * flushed yet.
276						 */
277						while (cur_line + adjust
278						    < this_line) {
279							lnew = alloc_line();
280							l->l_prev = lnew;
281							lnew->l_next = l;
282							l = lines = lnew;
283							extra_lines++;
284							this_line--;
285						}
286					} else {
287						if (!warned++)
288							dowarn(cur_line);
289						cur_line = this_line - adjust;
290					}
291				}
292			} else {
293				/* may need to allocate here */
294				while (cur_line + adjust > this_line) {
295					if (l->l_next == NULL) {
296						l->l_next = alloc_line();
297						l->l_next->l_prev = l;
298					}
299					l = l->l_next;
300					this_line++;
301				}
302			}
303			if (this_line > nflushd_lines &&
304			    this_line - nflushd_lines >=
305			    max_bufd_lines + BUFFER_MARGIN) {
306				if (extra_lines) {
307					flush_lines(extra_lines);
308					extra_lines = 0;
309				}
310				flush_lines(this_line - nflushd_lines -
311				    max_bufd_lines);
312				nflushd_lines = this_line - max_bufd_lines;
313			}
314		}
315		/* grow line's buffer? */
316		if (l->l_line_len + 1 >= l->l_lsize) {
317			int need;
318
319			need = l->l_lsize ? l->l_lsize * 2 : 90;
320			if ((l->l_line = realloc(l->l_line,
321			    (unsigned)need * sizeof(CHAR))) == NULL)
322				err(1, NULL);
323			l->l_lsize = need;
324		}
325		c = &l->l_line[l->l_line_len++];
326		c->c_char = ch;
327		c->c_set = cur_set;
328		c->c_column = cur_col;
329		c->c_width = wcwidth(ch);
330		/*
331		 * If things are put in out of order, they will need sorting
332		 * when it is flushed.
333		 */
334		if (cur_col < l->l_max_col)
335			l->l_needs_sort = 1;
336		else
337			l->l_max_col = cur_col;
338		if (c->c_width > 0)
339			cur_col += c->c_width;
340	}
341	if (ferror(stdin))
342		err(1, NULL);
343	if (extra_lines) {
344		/*
345		 * Extra lines only exist if no lines have been flushed
346		 * yet. This means that 'lines' must point to line zero
347		 * after we flush the extra lines.
348		 */
349		flush_lines(extra_lines);
350		l = lines;
351		this_line = 0;
352	}
353
354	/* goto the last line that had a character on it */
355	for (; l->l_next; l = l->l_next)
356		this_line++;
357	flush_lines(this_line - nflushd_lines + 1);
358
359	/* make sure we leave things in a sane state */
360	if (last_set != CS_NORMAL)
361		PUTC(SI);
362
363	/* flush out the last few blank lines */
364	if (max_line >= this_line)
365		nblank_lines = max_line - this_line + (max_line & 1);
366	if (nblank_lines == 0)
367		/* end with a newline even if the source doesn't */
368		nblank_lines = 2;
369	flush_blanks();
370	exit(0);
371}
372
373/*
374 * Prints the first 'nflush' lines. Printed lines are freed.
375 * After this function returns, 'lines' points to the first
376 * of the remaining lines, and 'nblank_lines' will have the
377 * number of half line feeds between the final flushed line
378 * and the first remaining line.
379 */
380static void
381flush_lines(int nflush)
382{
383	LINE *l;
384
385	while (--nflush >= 0) {
386		l = lines;
387		lines = l->l_next;
388		if (l->l_line) {
389			flush_blanks();
390			flush_line(l);
391			free(l->l_line);
392		}
393		if (l->l_next)
394			nblank_lines++;
395		free_line(l);
396	}
397	if (lines)
398		lines->l_prev = NULL;
399}
400
401/*
402 * Print a number of newline/half newlines.
403 * nblank_lines is the number of half line feeds.
404 */
405static void
406flush_blanks(void)
407{
408	int half, i, nb;
409
410	half = 0;
411	nb = nblank_lines;
412	if (nb & 1) {
413		if (fine)
414			half = 1;
415		else
416			nb++;
417	}
418	nb /= 2;
419	for (i = nb; --i >= 0;)
420		PUTC('\n');
421	if (half) {
422		PUTC(ESC);
423		PUTC(FHLF);
424		if (!nb)
425			PUTC('\r');
426	}
427	nblank_lines = 0;
428}
429
430/*
431 * Write a line to stdout taking care of space to tab conversion (-h flag)
432 * and character set shifts.
433 */
434static void
435flush_line(LINE *l)
436{
437	CHAR *c, *endc;
438	int i, j, nchars, last_col, save, this_col, tot;
439
440	last_col = 0;
441	nchars = l->l_line_len;
442
443	if (l->l_needs_sort) {
444		static CHAR *sorted;
445		static int count_size, *count, sorted_size;
446
447		/*
448		 * Do an O(n) sort on l->l_line by column being careful to
449		 * preserve the order of characters in the same column.
450		 */
451		if (l->l_lsize > sorted_size) {
452			sorted_size = l->l_lsize;
453			if ((sorted = realloc(sorted,
454			    (unsigned)sizeof(CHAR) * sorted_size)) == NULL)
455				err(1, NULL);
456		}
457		if (l->l_max_col >= count_size) {
458			count_size = l->l_max_col + 1;
459			if ((count = realloc(count,
460			    (unsigned)sizeof(int) * count_size)) == NULL)
461				err(1, NULL);
462		}
463		memset(count, 0, sizeof(int) * l->l_max_col + 1);
464		for (i = nchars, c = l->l_line; --i >= 0; c++)
465			count[c->c_column]++;
466
467		/*
468		 * calculate running total (shifted down by 1) to use as
469		 * indices into new line.
470		 */
471		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
472			save = count[i];
473			count[i] = tot;
474			tot += save;
475		}
476
477		for (i = nchars, c = l->l_line; --i >= 0; c++)
478			sorted[count[c->c_column]++] = *c;
479		c = sorted;
480	} else
481		c = l->l_line;
482	while (nchars > 0) {
483		this_col = c->c_column;
484		endc = c;
485		do {
486			++endc;
487		} while (--nchars > 0 && this_col == endc->c_column);
488
489		/* if -b only print last character */
490		if (no_backspaces) {
491			c = endc - 1;
492			if (nchars > 0 &&
493			    this_col + c->c_width > endc->c_column)
494				continue;
495		}
496
497		if (this_col > last_col) {
498			int nspace = this_col - last_col;
499
500			if (compress_spaces && nspace > 1) {
501				while (1) {
502					int tab_col, tab_size;
503
504					tab_col = (last_col + 8) & ~7;
505					if (tab_col > this_col)
506						break;
507					tab_size = tab_col - last_col;
508					if (tab_size == 1)
509						PUTC(' ');
510					else
511						PUTC('\t');
512					nspace -= tab_size;
513					last_col = tab_col;
514				}
515			}
516			while (--nspace >= 0)
517				PUTC(' ');
518			last_col = this_col;
519		}
520
521		for (;;) {
522			if (c->c_set != last_set) {
523				switch (c->c_set) {
524				case CS_NORMAL:
525					PUTC(SI);
526					break;
527				case CS_ALTERNATE:
528					PUTC(SO);
529				}
530				last_set = c->c_set;
531			}
532			PUTC(c->c_char);
533			if ((c + 1) < endc)
534				for (j = 0; j < c->c_width; j++)
535					PUTC('\b');
536			if (++c >= endc)
537				break;
538		}
539		last_col += (c - 1)->c_width;
540	}
541}
542
543/*
544 * Increment or decrement a line number, checking for overflow.
545 * Stop one below INT_MAX such that the adjust variable is safe.
546 */
547void
548addto_lineno(int *lno, int offset)
549{
550	if (offset > 0) {
551		if (*lno >= INT_MAX - offset)
552			errx(1, "too many lines");
553	} else {
554		if (*lno < INT_MIN - offset)
555			errx(1, "too many reverse line feeds");
556	}
557	*lno += offset;
558}
559
560#define	NALLOC 64
561
562static LINE *line_freelist;
563
564static LINE *
565alloc_line(void)
566{
567	LINE *l;
568	int i;
569
570	if (!line_freelist) {
571		if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
572			err(1, NULL);
573		line_freelist = l;
574		for (i = 1; i < NALLOC; i++, l++)
575			l->l_next = l + 1;
576		l->l_next = NULL;
577	}
578	l = line_freelist;
579	line_freelist = l->l_next;
580
581	memset(l, 0, sizeof(LINE));
582	return (l);
583}
584
585static void
586free_line(LINE *l)
587{
588
589	l->l_next = line_freelist;
590	line_freelist = l;
591}
592
593static void
594usage(void)
595{
596
597	(void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n");
598	exit(1);
599}
600
601static void
602dowarn(int line)
603{
604
605	warnx("warning: can't back up %s",
606		line < 0 ? "past first line" : "-- line already flushed");
607}
608