1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Michael Rendell of the Memorial University of Newfoundland.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#ifndef lint
34static const char copyright[] =
35"@(#) Copyright (c) 1990, 1993, 1994\n\
36	The Regents of the University of California.  All rights reserved.\n";
37#endif
38
39#if 0
40#ifndef lint
41static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
42#endif
43#endif
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD$");
47
48#include <sys/capability.h>
49
50#include <err.h>
51#include <errno.h>
52#include <locale.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <termios.h>
57#include <unistd.h>
58#include <wchar.h>
59#include <wctype.h>
60
61#define	BS	'\b'		/* backspace */
62#define	TAB	'\t'		/* tab */
63#define	SPACE	' '		/* space */
64#define	NL	'\n'		/* newline */
65#define	CR	'\r'		/* carriage return */
66#define	ESC	'\033'		/* escape */
67#define	SI	'\017'		/* shift in to normal character set */
68#define	SO	'\016'		/* shift out to alternate character set */
69#define	VT	'\013'		/* vertical tab (aka reverse line feed) */
70#define	RLF	'7'		/* ESC-7 reverse line feed */
71#define	RHLF	'8'		/* ESC-8 reverse half-line feed */
72#define	FHLF	'9'		/* ESC-9 forward half-line feed */
73
74/* build up at least this many lines before flushing them out */
75#define	BUFFER_MARGIN		32
76
77typedef char CSET;
78
79typedef struct char_str {
80#define	CS_NORMAL	1
81#define	CS_ALTERNATE	2
82	short		c_column;	/* column character is in */
83	CSET		c_set;		/* character set (currently only 2) */
84	wchar_t		c_char;		/* character in question */
85	int		c_width;	/* character width */
86} CHAR;
87
88typedef struct line_str LINE;
89struct line_str {
90	CHAR	*l_line;		/* characters on the line */
91	LINE	*l_prev;		/* previous line */
92	LINE	*l_next;		/* next line */
93	int	l_lsize;		/* allocated sizeof l_line */
94	int	l_line_len;		/* strlen(l_line) */
95	int	l_needs_sort;		/* set if chars went in out of order */
96	int	l_max_col;		/* max column in the line */
97};
98
99static void	addto_lineno(int *, int);
100static LINE   *alloc_line(void);
101static void	dowarn(int);
102static void	flush_line(LINE *);
103static void	flush_lines(int);
104static void	flush_blanks(void);
105static void	free_line(LINE *);
106static void	usage(void);
107
108static CSET	last_set;		/* char_set of last char printed */
109static LINE    *lines;
110static int	compress_spaces;	/* if doing space -> tab conversion */
111static int	fine;			/* if `fine' resolution (half lines) */
112static int	max_bufd_lines;		/* max # of half lines to keep in memory */
113static int	nblank_lines;		/* # blanks after last flushed line */
114static int	no_backspaces;		/* if not to output any backspaces */
115static int	pass_unknown_seqs;	/* pass unknown control sequences */
116
117#define	PUTC(ch) \
118	do {					\
119		if (putwchar(ch) == WEOF)	\
120			errx(1, "write error");	\
121	} while (0)
122
123int
124main(int argc, char **argv)
125{
126	wint_t ch;
127	CHAR *c;
128	CSET cur_set;			/* current character set */
129	LINE *l;			/* current line */
130	int extra_lines;		/* # of lines above first line */
131	int cur_col;			/* current column */
132	int cur_line;			/* line number of current position */
133	int max_line;			/* max value of cur_line */
134	int this_line;			/* line l points to */
135	int nflushd_lines;		/* number of lines that were flushed */
136	int adjust, opt, warned, width;
137	const char *errstr;
138	cap_rights_t rights;
139	unsigned long cmd;
140
141	(void)setlocale(LC_CTYPE, "");
142
143	cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
144	if (cap_rights_limit(STDIN_FILENO, &rights) < 0 && errno != ENOSYS)
145		err(1, "unable to limit rights for stdin");
146	cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE, CAP_IOCTL);
147	if (cap_rights_limit(STDOUT_FILENO, &rights) < 0 && errno != ENOSYS)
148		err(1, "unable to limit rights for stdout");
149	cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
150	if (cap_ioctls_limit(STDOUT_FILENO, &cmd, 1) < 0 && errno != ENOSYS)
151		err(1, "unable to limit ioctls for stdout");
152
153	if (cap_enter() < 0 && errno != ENOSYS)
154		err(1, "unable to enter capability mode");
155
156	max_bufd_lines = 256;
157	compress_spaces = 1;		/* compress spaces into tabs */
158	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
159		switch (opt) {
160		case 'b':		/* do not output backspaces */
161			no_backspaces = 1;
162			break;
163		case 'f':		/* allow half forward line feeds */
164			fine = 1;
165			break;
166		case 'h':		/* compress spaces into tabs */
167			compress_spaces = 1;
168			break;
169		case 'l':		/* buffered line count */
170			max_bufd_lines = strtonum(optarg, 1,
171			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
172			if (errstr != NULL)
173				errx(1, "bad -l argument, %s: %s", errstr,
174					optarg);
175			break;
176		case 'p':		/* pass unknown control sequences */
177			pass_unknown_seqs = 1;
178			break;
179		case 'x':		/* do not compress spaces into tabs */
180			compress_spaces = 0;
181			break;
182		case '?':
183		default:
184			usage();
185		}
186
187	if (optind != argc)
188		usage();
189
190	adjust = cur_col = extra_lines = warned = 0;
191	cur_line = max_line = nflushd_lines = this_line = 0;
192	cur_set = last_set = CS_NORMAL;
193	lines = l = alloc_line();
194
195	while ((ch = getwchar()) != WEOF) {
196		if (!iswgraph(ch)) {
197			switch (ch) {
198			case BS:		/* can't go back further */
199				if (cur_col == 0)
200					continue;
201				--cur_col;
202				continue;
203			case CR:
204				cur_col = 0;
205				continue;
206			case ESC:		/* just ignore EOF */
207				switch(getwchar()) {
208				/*
209				 * In the input stream, accept both the
210				 * XPG5 sequences ESC-digit and the
211				 * traditional BSD sequences ESC-ctrl.
212				 */
213				case '\007':
214					/* FALLTHROUGH */
215				case RLF:
216					addto_lineno(&cur_line, -2);
217					break;
218				case '\010':
219					/* FALLTHROUGH */
220				case RHLF:
221					addto_lineno(&cur_line, -1);
222					break;
223				case '\011':
224					/* FALLTHROUGH */
225				case FHLF:
226					addto_lineno(&cur_line, 1);
227					if (cur_line > max_line)
228						max_line = cur_line;
229				}
230				continue;
231			case NL:
232				addto_lineno(&cur_line, 2);
233				if (cur_line > max_line)
234					max_line = cur_line;
235				cur_col = 0;
236				continue;
237			case SPACE:
238				++cur_col;
239				continue;
240			case SI:
241				cur_set = CS_NORMAL;
242				continue;
243			case SO:
244				cur_set = CS_ALTERNATE;
245				continue;
246			case TAB:		/* adjust column */
247				cur_col |= 7;
248				++cur_col;
249				continue;
250			case VT:
251				addto_lineno(&cur_line, -2);
252				continue;
253			}
254			if (iswspace(ch)) {
255				if ((width = wcwidth(ch)) > 0)
256					cur_col += width;
257				continue;
258			}
259			if (!pass_unknown_seqs)
260				continue;
261		}
262
263		/* Must stuff ch in a line - are we at the right one? */
264		if (cur_line + adjust != this_line) {
265			LINE *lnew;
266
267			/* round up to next line */
268			adjust = !fine && (cur_line & 1);
269
270			if (cur_line + adjust < this_line) {
271				while (cur_line + adjust < this_line &&
272				    l->l_prev != NULL) {
273					l = l->l_prev;
274					this_line--;
275				}
276				if (cur_line + adjust < this_line) {
277					if (nflushd_lines == 0) {
278						/*
279						 * Allow backup past first
280						 * line if nothing has been
281						 * flushed yet.
282						 */
283						while (cur_line + adjust
284						    < this_line) {
285							lnew = alloc_line();
286							l->l_prev = lnew;
287							lnew->l_next = l;
288							l = lines = lnew;
289							extra_lines++;
290							this_line--;
291						}
292					} else {
293						if (!warned++)
294							dowarn(cur_line);
295						cur_line = this_line - adjust;
296					}
297				}
298			} else {
299				/* may need to allocate here */
300				while (cur_line + adjust > this_line) {
301					if (l->l_next == NULL) {
302						l->l_next = alloc_line();
303						l->l_next->l_prev = l;
304					}
305					l = l->l_next;
306					this_line++;
307				}
308			}
309			if (this_line > nflushd_lines &&
310			    this_line - nflushd_lines >=
311			    max_bufd_lines + BUFFER_MARGIN) {
312				if (extra_lines) {
313					flush_lines(extra_lines);
314					extra_lines = 0;
315				}
316				flush_lines(this_line - nflushd_lines -
317				    max_bufd_lines);
318				nflushd_lines = this_line - max_bufd_lines;
319			}
320		}
321		/* grow line's buffer? */
322		if (l->l_line_len + 1 >= l->l_lsize) {
323			int need;
324
325			need = l->l_lsize ? l->l_lsize * 2 : 90;
326			if ((l->l_line = realloc(l->l_line,
327			    (unsigned)need * sizeof(CHAR))) == NULL)
328				err(1, (char *)NULL);
329			l->l_lsize = need;
330		}
331		c = &l->l_line[l->l_line_len++];
332		c->c_char = ch;
333		c->c_set = cur_set;
334		c->c_column = cur_col;
335		c->c_width = wcwidth(ch);
336		/*
337		 * If things are put in out of order, they will need sorting
338		 * when it is flushed.
339		 */
340		if (cur_col < l->l_max_col)
341			l->l_needs_sort = 1;
342		else
343			l->l_max_col = cur_col;
344		if (c->c_width > 0)
345			cur_col += c->c_width;
346	}
347	if (ferror(stdin))
348		err(1, NULL);
349	if (extra_lines)
350		flush_lines(extra_lines);
351
352	/* goto the last line that had a character on it */
353	for (; l->l_next; l = l->l_next)
354		this_line++;
355	flush_lines(this_line - nflushd_lines + 1);
356
357	/* make sure we leave things in a sane state */
358	if (last_set != CS_NORMAL)
359		PUTC(SI);
360
361	/* flush out the last few blank lines */
362	if (max_line > this_line)
363		nblank_lines = max_line - this_line;
364	if (max_line & 1)
365		nblank_lines++;
366	flush_blanks();
367	exit(0);
368}
369
370static void
371flush_lines(int nflush)
372{
373	LINE *l;
374
375	while (--nflush >= 0) {
376		l = lines;
377		lines = l->l_next;
378		if (l->l_line) {
379			flush_blanks();
380			flush_line(l);
381		}
382		if (l->l_line || l->l_next)
383			nblank_lines++;
384		if (l->l_line)
385			(void)free(l->l_line);
386		free_line(l);
387	}
388	if (lines)
389		lines->l_prev = NULL;
390}
391
392/*
393 * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
394 * is the number of half line feeds, otherwise it is the number of whole line
395 * feeds.
396 */
397static void
398flush_blanks(void)
399{
400	int half, i, nb;
401
402	half = 0;
403	nb = nblank_lines;
404	if (nb & 1) {
405		if (fine)
406			half = 1;
407		else
408			nb++;
409	}
410	nb /= 2;
411	for (i = nb; --i >= 0;)
412		PUTC('\n');
413	if (half) {
414		PUTC(ESC);
415		PUTC(FHLF);
416		if (!nb)
417			PUTC('\r');
418	}
419	nblank_lines = 0;
420}
421
422/*
423 * Write a line to stdout taking care of space to tab conversion (-h flag)
424 * and character set shifts.
425 */
426static void
427flush_line(LINE *l)
428{
429	CHAR *c, *endc;
430	int i, j, nchars, last_col, save, this_col, tot;
431
432	last_col = 0;
433	nchars = l->l_line_len;
434
435	if (l->l_needs_sort) {
436		static CHAR *sorted;
437		static int count_size, *count, sorted_size;
438
439		/*
440		 * Do an O(n) sort on l->l_line by column being careful to
441		 * preserve the order of characters in the same column.
442		 */
443		if (l->l_lsize > sorted_size) {
444			sorted_size = l->l_lsize;
445			if ((sorted = realloc(sorted,
446			    (unsigned)sizeof(CHAR) * sorted_size)) == NULL)
447				err(1, (char *)NULL);
448		}
449		if (l->l_max_col >= count_size) {
450			count_size = l->l_max_col + 1;
451			if ((count = realloc(count,
452			    (unsigned)sizeof(int) * count_size)) == NULL)
453				err(1, (char *)NULL);
454		}
455		memset(count, 0, sizeof(int) * l->l_max_col + 1);
456		for (i = nchars, c = l->l_line; --i >= 0; c++)
457			count[c->c_column]++;
458
459		/*
460		 * calculate running total (shifted down by 1) to use as
461		 * indices into new line.
462		 */
463		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
464			save = count[i];
465			count[i] = tot;
466			tot += save;
467		}
468
469		for (i = nchars, c = l->l_line; --i >= 0; c++)
470			sorted[count[c->c_column]++] = *c;
471		c = sorted;
472	} else
473		c = l->l_line;
474	while (nchars > 0) {
475		this_col = c->c_column;
476		endc = c;
477		do {
478			++endc;
479		} while (--nchars > 0 && this_col == endc->c_column);
480
481		/* if -b only print last character */
482		if (no_backspaces) {
483			c = endc - 1;
484			if (nchars > 0 &&
485			    this_col + c->c_width > endc->c_column)
486				continue;
487		}
488
489		if (this_col > last_col) {
490			int nspace = this_col - last_col;
491
492			if (compress_spaces && nspace > 1) {
493				while (1) {
494					int tab_col, tab_size;
495
496					tab_col = (last_col + 8) & ~7;
497					if (tab_col > this_col)
498						break;
499					tab_size = tab_col - last_col;
500					if (tab_size == 1)
501						PUTC(' ');
502					else
503						PUTC('\t');
504					nspace -= tab_size;
505					last_col = tab_col;
506				}
507			}
508			while (--nspace >= 0)
509				PUTC(' ');
510			last_col = this_col;
511		}
512
513		for (;;) {
514			if (c->c_set != last_set) {
515				switch (c->c_set) {
516				case CS_NORMAL:
517					PUTC(SI);
518					break;
519				case CS_ALTERNATE:
520					PUTC(SO);
521				}
522				last_set = c->c_set;
523			}
524			PUTC(c->c_char);
525			if ((c + 1) < endc)
526				for (j = 0; j < c->c_width; j++)
527					PUTC('\b');
528			if (++c >= endc)
529				break;
530		}
531		last_col += (c - 1)->c_width;
532	}
533}
534
535/*
536 * Increment or decrement a line number, checking for overflow.
537 * Stop one below INT_MAX such that the adjust variable is safe.
538 */
539void
540addto_lineno(int *lno, int offset)
541{
542	if (offset > 0) {
543		if (*lno >= INT_MAX - offset)
544			errx(1, "too many lines");
545	} else {
546		if (*lno < INT_MIN - offset)
547			errx(1, "too many reverse line feeds");
548	}
549	*lno += offset;
550}
551
552#define	NALLOC 64
553
554static LINE *line_freelist;
555
556static LINE *
557alloc_line(void)
558{
559	LINE *l;
560	int i;
561
562	if (!line_freelist) {
563		if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
564			err(1, (char *)NULL);
565		line_freelist = l;
566		for (i = 1; i < NALLOC; i++, l++)
567			l->l_next = l + 1;
568		l->l_next = NULL;
569	}
570	l = line_freelist;
571	line_freelist = l->l_next;
572
573	memset(l, 0, sizeof(LINE));
574	return (l);
575}
576
577static void
578free_line(LINE *l)
579{
580
581	l->l_next = line_freelist;
582	line_freelist = l;
583}
584
585static void
586usage(void)
587{
588
589	(void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n");
590	exit(1);
591}
592
593static void
594dowarn(int line)
595{
596
597	warnx("warning: can't back up %s",
598		line < 0 ? "past first line" : "-- line already flushed");
599}
600