col.c revision 282669
1/*-
2 * Copyright (c) 1990, 1993, 1994
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Michael Rendell of the Memorial University of Newfoundland.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#ifndef lint
34static const char copyright[] =
35"@(#) Copyright (c) 1990, 1993, 1994\n\
36	The Regents of the University of California.  All rights reserved.\n";
37#endif
38
39#if 0
40#ifndef lint
41static char sccsid[] = "@(#)col.c	8.5 (Berkeley) 5/4/95";
42#endif
43#endif
44
45#include <sys/cdefs.h>
46__FBSDID("$FreeBSD: head/usr.bin/col/col.c 282669 2015-05-08 22:11:54Z bapt $");
47
48#include <sys/capsicum.h>
49
50#include <err.h>
51#include <errno.h>
52#include <locale.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <termios.h>
57#include <unistd.h>
58#include <wchar.h>
59#include <wctype.h>
60
61#define	BS	'\b'		/* backspace */
62#define	TAB	'\t'		/* tab */
63#define	SPACE	' '		/* space */
64#define	NL	'\n'		/* newline */
65#define	CR	'\r'		/* carriage return */
66#define	ESC	'\033'		/* escape */
67#define	SI	'\017'		/* shift in to normal character set */
68#define	SO	'\016'		/* shift out to alternate character set */
69#define	VT	'\013'		/* vertical tab (aka reverse line feed) */
70#define	RLF	'7'		/* ESC-7 reverse line feed */
71#define	RHLF	'8'		/* ESC-8 reverse half-line feed */
72#define	FHLF	'9'		/* ESC-9 forward half-line feed */
73
74/* build up at least this many lines before flushing them out */
75#define	BUFFER_MARGIN		32
76
77typedef char CSET;
78
79typedef struct char_str {
80#define	CS_NORMAL	1
81#define	CS_ALTERNATE	2
82	short		c_column;	/* column character is in */
83	CSET		c_set;		/* character set (currently only 2) */
84	wchar_t		c_char;		/* character in question */
85	int		c_width;	/* character width */
86} CHAR;
87
88typedef struct line_str LINE;
89struct line_str {
90	CHAR	*l_line;		/* characters on the line */
91	LINE	*l_prev;		/* previous line */
92	LINE	*l_next;		/* next line */
93	int	l_lsize;		/* allocated sizeof l_line */
94	int	l_line_len;		/* strlen(l_line) */
95	int	l_needs_sort;		/* set if chars went in out of order */
96	int	l_max_col;		/* max column in the line */
97};
98
99static void	addto_lineno(int *, int);
100static LINE   *alloc_line(void);
101static void	dowarn(int);
102static void	flush_line(LINE *);
103static void	flush_lines(int);
104static void	flush_blanks(void);
105static void	free_line(LINE *);
106static void	usage(void);
107
108static CSET	last_set;		/* char_set of last char printed */
109static LINE    *lines;
110static int	compress_spaces;	/* if doing space -> tab conversion */
111static int	fine;			/* if `fine' resolution (half lines) */
112static int	max_bufd_lines;		/* max # of half lines to keep in memory */
113static int	nblank_lines;		/* # blanks after last flushed line */
114static int	no_backspaces;		/* if not to output any backspaces */
115static int	pass_unknown_seqs;	/* pass unknown control sequences */
116
117#define	PUTC(ch) \
118	do {					\
119		if (putwchar(ch) == WEOF)	\
120			errx(1, "write error");	\
121	} while (0)
122
123int
124main(int argc, char **argv)
125{
126	wint_t ch;
127	CHAR *c;
128	CSET cur_set;			/* current character set */
129	LINE *l;			/* current line */
130	int extra_lines;		/* # of lines above first line */
131	int cur_col;			/* current column */
132	int cur_line;			/* line number of current position */
133	int max_line;			/* max value of cur_line */
134	int this_line;			/* line l points to */
135	int nflushd_lines;		/* number of lines that were flushed */
136	int adjust, opt, warned, width;
137	const char *errstr;
138	cap_rights_t rights;
139	unsigned long cmd;
140
141	(void)setlocale(LC_CTYPE, "");
142
143	cap_rights_init(&rights, CAP_FSTAT, CAP_READ);
144	if (cap_rights_limit(STDIN_FILENO, &rights) < 0 && errno != ENOSYS)
145		err(1, "unable to limit rights for stdin");
146	cap_rights_init(&rights, CAP_FSTAT, CAP_WRITE, CAP_IOCTL);
147	if (cap_rights_limit(STDOUT_FILENO, &rights) < 0 && errno != ENOSYS)
148		err(1, "unable to limit rights for stdout");
149	cmd = TIOCGETA; /* required by isatty(3) in printf(3) */
150	if (cap_ioctls_limit(STDOUT_FILENO, &cmd, 1) < 0 && errno != ENOSYS)
151		err(1, "unable to limit ioctls for stdout");
152
153	if (cap_enter() < 0 && errno != ENOSYS)
154		err(1, "unable to enter capability mode");
155
156	max_bufd_lines = 256;
157	compress_spaces = 1;		/* compress spaces into tabs */
158	while ((opt = getopt(argc, argv, "bfhl:px")) != -1)
159		switch (opt) {
160		case 'b':		/* do not output backspaces */
161			no_backspaces = 1;
162			break;
163		case 'f':		/* allow half forward line feeds */
164			fine = 1;
165			break;
166		case 'h':		/* compress spaces into tabs */
167			compress_spaces = 1;
168			break;
169		case 'l':		/* buffered line count */
170			max_bufd_lines = strtonum(optarg, 1,
171			    (INT_MAX - BUFFER_MARGIN) / 2, &errstr) * 2;
172			if (errstr != NULL)
173				errx(1, "bad -l argument, %s: %s", errstr,
174					optarg);
175			break;
176		case 'p':		/* pass unknown control sequences */
177			pass_unknown_seqs = 1;
178			break;
179		case 'x':		/* do not compress spaces into tabs */
180			compress_spaces = 0;
181			break;
182		case '?':
183		default:
184			usage();
185		}
186
187	if (optind != argc)
188		usage();
189
190	adjust = cur_col = extra_lines = warned = 0;
191	cur_line = max_line = nflushd_lines = this_line = 0;
192	cur_set = last_set = CS_NORMAL;
193	lines = l = alloc_line();
194
195	while ((ch = getwchar()) != WEOF) {
196		if (!iswgraph(ch)) {
197			switch (ch) {
198			case BS:		/* can't go back further */
199				if (cur_col == 0)
200					continue;
201				--cur_col;
202				continue;
203			case CR:
204				cur_col = 0;
205				continue;
206			case ESC:		/* just ignore EOF */
207				switch(getwchar()) {
208				case RLF:
209					addto_lineno(&cur_line, -2);
210					break;
211				case RHLF:
212					addto_lineno(&cur_line, -1);
213					break;
214				case FHLF:
215					addto_lineno(&cur_line, 1);
216					if (cur_line > max_line)
217						max_line = cur_line;
218				}
219				continue;
220			case NL:
221				addto_lineno(&cur_line, 2);
222				if (cur_line > max_line)
223					max_line = cur_line;
224				cur_col = 0;
225				continue;
226			case SPACE:
227				++cur_col;
228				continue;
229			case SI:
230				cur_set = CS_NORMAL;
231				continue;
232			case SO:
233				cur_set = CS_ALTERNATE;
234				continue;
235			case TAB:		/* adjust column */
236				cur_col |= 7;
237				++cur_col;
238				continue;
239			case VT:
240				addto_lineno(&cur_line, -2);
241				continue;
242			}
243			if (iswspace(ch)) {
244				if ((width = wcwidth(ch)) > 0)
245					cur_col += width;
246				continue;
247			}
248			if (!pass_unknown_seqs)
249				continue;
250		}
251
252		/* Must stuff ch in a line - are we at the right one? */
253		if (cur_line + adjust != this_line) {
254			LINE *lnew;
255
256			/* round up to next line */
257			adjust = !fine && (cur_line & 1);
258
259			if (cur_line + adjust < this_line) {
260				while (cur_line + adjust < this_line &&
261				    l->l_prev != NULL) {
262					l = l->l_prev;
263					this_line--;
264				}
265				if (cur_line + adjust < this_line) {
266					if (nflushd_lines == 0) {
267						/*
268						 * Allow backup past first
269						 * line if nothing has been
270						 * flushed yet.
271						 */
272						while (cur_line + adjust
273						    < this_line) {
274							lnew = alloc_line();
275							l->l_prev = lnew;
276							lnew->l_next = l;
277							l = lines = lnew;
278							extra_lines++;
279							this_line--;
280						}
281					} else {
282						if (!warned++)
283							dowarn(cur_line);
284						cur_line = this_line - adjust;
285					}
286				}
287			} else {
288				/* may need to allocate here */
289				while (cur_line + adjust > this_line) {
290					if (l->l_next == NULL) {
291						l->l_next = alloc_line();
292						l->l_next->l_prev = l;
293					}
294					l = l->l_next;
295					this_line++;
296				}
297			}
298			if (this_line > nflushd_lines &&
299			    this_line - nflushd_lines >=
300			    max_bufd_lines + BUFFER_MARGIN) {
301				if (extra_lines) {
302					flush_lines(extra_lines);
303					extra_lines = 0;
304				}
305				flush_lines(this_line - nflushd_lines -
306				    max_bufd_lines);
307				nflushd_lines = this_line - max_bufd_lines;
308			}
309		}
310		/* grow line's buffer? */
311		if (l->l_line_len + 1 >= l->l_lsize) {
312			int need;
313
314			need = l->l_lsize ? l->l_lsize * 2 : 90;
315			if ((l->l_line = realloc(l->l_line,
316			    (unsigned)need * sizeof(CHAR))) == NULL)
317				err(1, NULL);
318			l->l_lsize = need;
319		}
320		c = &l->l_line[l->l_line_len++];
321		c->c_char = ch;
322		c->c_set = cur_set;
323		c->c_column = cur_col;
324		c->c_width = wcwidth(ch);
325		/*
326		 * If things are put in out of order, they will need sorting
327		 * when it is flushed.
328		 */
329		if (cur_col < l->l_max_col)
330			l->l_needs_sort = 1;
331		else
332			l->l_max_col = cur_col;
333		if (c->c_width > 0)
334			cur_col += c->c_width;
335	}
336	if (ferror(stdin))
337		err(1, NULL);
338	if (extra_lines)
339		flush_lines(extra_lines);
340
341	/* goto the last line that had a character on it */
342	for (; l->l_next; l = l->l_next)
343		this_line++;
344	flush_lines(this_line - nflushd_lines + 1);
345
346	/* make sure we leave things in a sane state */
347	if (last_set != CS_NORMAL)
348		PUTC(SI);
349
350	/* flush out the last few blank lines */
351	if (max_line > this_line)
352		nblank_lines = max_line - this_line;
353	if (max_line & 1)
354		nblank_lines++;
355	flush_blanks();
356	exit(0);
357}
358
359static void
360flush_lines(int nflush)
361{
362	LINE *l;
363
364	while (--nflush >= 0) {
365		l = lines;
366		lines = l->l_next;
367		if (l->l_line) {
368			flush_blanks();
369			flush_line(l);
370		}
371		if (l->l_line || l->l_next)
372			nblank_lines++;
373		if (l->l_line)
374			(void)free(l->l_line);
375		free_line(l);
376	}
377	if (lines)
378		lines->l_prev = NULL;
379}
380
381/*
382 * Print a number of newline/half newlines.  If fine flag is set, nblank_lines
383 * is the number of half line feeds, otherwise it is the number of whole line
384 * feeds.
385 */
386static void
387flush_blanks(void)
388{
389	int half, i, nb;
390
391	half = 0;
392	nb = nblank_lines;
393	if (nb & 1) {
394		if (fine)
395			half = 1;
396		else
397			nb++;
398	}
399	nb /= 2;
400	for (i = nb; --i >= 0;)
401		PUTC('\n');
402	if (half) {
403		PUTC(ESC);
404		PUTC(FHLF);
405		if (!nb)
406			PUTC('\r');
407	}
408	nblank_lines = 0;
409}
410
411/*
412 * Write a line to stdout taking care of space to tab conversion (-h flag)
413 * and character set shifts.
414 */
415static void
416flush_line(LINE *l)
417{
418	CHAR *c, *endc;
419	int i, j, nchars, last_col, save, this_col, tot;
420
421	last_col = 0;
422	nchars = l->l_line_len;
423
424	if (l->l_needs_sort) {
425		static CHAR *sorted;
426		static int count_size, *count, sorted_size;
427
428		/*
429		 * Do an O(n) sort on l->l_line by column being careful to
430		 * preserve the order of characters in the same column.
431		 */
432		if (l->l_lsize > sorted_size) {
433			sorted_size = l->l_lsize;
434			if ((sorted = realloc(sorted,
435			    (unsigned)sizeof(CHAR) * sorted_size)) == NULL)
436				err(1, NULL);
437		}
438		if (l->l_max_col >= count_size) {
439			count_size = l->l_max_col + 1;
440			if ((count = realloc(count,
441			    (unsigned)sizeof(int) * count_size)) == NULL)
442				err(1, NULL);
443		}
444		memset(count, 0, sizeof(int) * l->l_max_col + 1);
445		for (i = nchars, c = l->l_line; --i >= 0; c++)
446			count[c->c_column]++;
447
448		/*
449		 * calculate running total (shifted down by 1) to use as
450		 * indices into new line.
451		 */
452		for (tot = 0, i = 0; i <= l->l_max_col; i++) {
453			save = count[i];
454			count[i] = tot;
455			tot += save;
456		}
457
458		for (i = nchars, c = l->l_line; --i >= 0; c++)
459			sorted[count[c->c_column]++] = *c;
460		c = sorted;
461	} else
462		c = l->l_line;
463	while (nchars > 0) {
464		this_col = c->c_column;
465		endc = c;
466		do {
467			++endc;
468		} while (--nchars > 0 && this_col == endc->c_column);
469
470		/* if -b only print last character */
471		if (no_backspaces) {
472			c = endc - 1;
473			if (nchars > 0 &&
474			    this_col + c->c_width > endc->c_column)
475				continue;
476		}
477
478		if (this_col > last_col) {
479			int nspace = this_col - last_col;
480
481			if (compress_spaces && nspace > 1) {
482				while (1) {
483					int tab_col, tab_size;
484
485					tab_col = (last_col + 8) & ~7;
486					if (tab_col > this_col)
487						break;
488					tab_size = tab_col - last_col;
489					if (tab_size == 1)
490						PUTC(' ');
491					else
492						PUTC('\t');
493					nspace -= tab_size;
494					last_col = tab_col;
495				}
496			}
497			while (--nspace >= 0)
498				PUTC(' ');
499			last_col = this_col;
500		}
501
502		for (;;) {
503			if (c->c_set != last_set) {
504				switch (c->c_set) {
505				case CS_NORMAL:
506					PUTC(SI);
507					break;
508				case CS_ALTERNATE:
509					PUTC(SO);
510				}
511				last_set = c->c_set;
512			}
513			PUTC(c->c_char);
514			if ((c + 1) < endc)
515				for (j = 0; j < c->c_width; j++)
516					PUTC('\b');
517			if (++c >= endc)
518				break;
519		}
520		last_col += (c - 1)->c_width;
521	}
522}
523
524/*
525 * Increment or decrement a line number, checking for overflow.
526 * Stop one below INT_MAX such that the adjust variable is safe.
527 */
528void
529addto_lineno(int *lno, int offset)
530{
531	if (offset > 0) {
532		if (*lno >= INT_MAX - offset)
533			errx(1, "too many lines");
534	} else {
535		if (*lno < INT_MIN - offset)
536			errx(1, "too many reverse line feeds");
537	}
538	*lno += offset;
539}
540
541#define	NALLOC 64
542
543static LINE *line_freelist;
544
545static LINE *
546alloc_line(void)
547{
548	LINE *l;
549	int i;
550
551	if (!line_freelist) {
552		if ((l = realloc(NULL, sizeof(LINE) * NALLOC)) == NULL)
553			err(1, NULL);
554		line_freelist = l;
555		for (i = 1; i < NALLOC; i++, l++)
556			l->l_next = l + 1;
557		l->l_next = NULL;
558	}
559	l = line_freelist;
560	line_freelist = l->l_next;
561
562	memset(l, 0, sizeof(LINE));
563	return (l);
564}
565
566static void
567free_line(LINE *l)
568{
569
570	l->l_next = line_freelist;
571	line_freelist = l;
572}
573
574static void
575usage(void)
576{
577
578	(void)fprintf(stderr, "usage: col [-bfhpx] [-l nline]\n");
579	exit(1);
580}
581
582static void
583dowarn(int line)
584{
585
586	warnx("warning: can't back up %s",
587		line < 0 ? "past first line" : "-- line already flushed");
588}
589