1/*	$OpenBSD: fold.c,v 1.18 2016/05/23 10:31:42 schwarze Exp $	*/
2/*	$NetBSD: fold.c,v 1.6 1995/09/01 01:42:44 jtc Exp $	*/
3
4/*-
5 * Copyright (c) 1990, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Kevin Ruddy.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#include <ctype.h>
37#include <err.h>
38#include <limits.h>
39#include <locale.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <wchar.h>
45
46#define	DEFLINEWIDTH	80
47
48static void fold(unsigned int);
49static int isu8cont(unsigned char);
50static __dead void usage(void);
51
52int count_bytes = 0;
53int split_words = 0;
54
55int
56main(int argc, char *argv[])
57{
58	int ch, lastch, newarg, prevoptind;
59	unsigned int width;
60	const char *errstr;
61
62	setlocale(LC_CTYPE, "");
63
64	if (pledge("stdio rpath", NULL) == -1)
65		err(1, "pledge");
66
67	width = 0;
68	lastch = '\0';
69	prevoptind = 1;
70	newarg = 1;
71	while ((ch = getopt(argc, argv, "0123456789bsw:")) != -1) {
72		switch (ch) {
73		case 'b':
74			count_bytes = 1;
75			break;
76		case 's':
77			split_words = 1;
78			break;
79		case 'w':
80			width = strtonum(optarg, 1, UINT_MAX, &errstr);
81			if (errstr != NULL)
82				errx(1, "illegal width value, %s: %s", errstr,
83					optarg);
84			break;
85		case '0': case '1': case '2': case '3': case '4':
86		case '5': case '6': case '7': case '8': case '9':
87			if (newarg)
88				width = 0;
89			else if (!isdigit(lastch))
90				usage();
91			if (width > UINT_MAX / 10 - 1)
92				errx(1, "illegal width value, too large");
93			width = (width * 10) + (ch - '0');
94			if (width < 1)
95				errx(1, "illegal width value, too small");
96			break;
97		default:
98			usage();
99		}
100		lastch = ch;
101		newarg = optind != prevoptind;
102		prevoptind = optind;
103	}
104	argv += optind;
105	argc -= optind;
106
107	if (width == 0)
108		width = DEFLINEWIDTH;
109
110	if (!*argv) {
111		if (pledge("stdio", NULL) == -1)
112			err(1, "pledge");
113		fold(width);
114	} else {
115		for (; *argv; ++argv) {
116			if (!freopen(*argv, "r", stdin))
117				err(1, "%s", *argv);
118			else
119				fold(width);
120		}
121	}
122	return 0;
123}
124
125/*
126 * Fold the contents of standard input to fit within WIDTH columns
127 * (or bytes) and write to standard output.
128 *
129 * If split_words is set, split the line at the last space character
130 * on the line.  This flag necessitates storing the line in a buffer
131 * until the current column > width, or a newline or EOF is read.
132 *
133 * The buffer can grow larger than WIDTH due to backspaces and carriage
134 * returns embedded in the input stream.
135 */
136static void
137fold(unsigned int max_width)
138{
139	static char	*buf = NULL;
140	static size_t	 bufsz = 2048;
141	char		*cp;	/* Current mb character. */
142	char		*np;	/* Next mb character. */
143	char		*sp;	/* To search for the last space. */
144	char		*nbuf;	/* For buffer reallocation. */
145	wchar_t		 wc;	/* Current wide character. */
146	int		 ch;	/* Last byte read. */
147	int		 len;	/* Bytes in the current mb character. */
148	unsigned int	 col;	/* Current display position. */
149	int		 width; /* Display width of wc. */
150
151	if (buf == NULL && (buf = malloc(bufsz)) == NULL)
152		err(1, NULL);
153
154	np = cp = buf;
155	ch = 0;
156	col = 0;
157
158	while (ch != EOF) {  /* Loop on input characters. */
159		while ((ch = getchar()) != EOF) {  /* Loop on input bytes. */
160			if (np + 1 == buf + bufsz) {
161				nbuf = reallocarray(buf, 2, bufsz);
162				if (nbuf == NULL)
163					err(1, NULL);
164				bufsz *= 2;
165				cp = nbuf + (cp - buf);
166				np = nbuf + (np - buf);
167				buf = nbuf;
168			}
169			*np++ = ch;
170
171			/*
172			 * Read up to and including the first byte of
173			 * the next character, such that we are sure
174			 * to have a complete character in the buffer.
175			 * There is no need to read more than five bytes
176			 * ahead, since UTF-8 characters are four bytes
177			 * long at most.
178			 */
179
180			if (np - cp > 4 || (np - cp > 1 && !isu8cont(ch)))
181				break;
182		}
183
184		while (cp < np) {  /* Loop on output characters. */
185
186			/* Handle end of line and backspace. */
187
188			if (*cp == '\n' || (*cp == '\r' && !count_bytes)) {
189				fwrite(buf, 1, ++cp - buf, stdout);
190				memmove(buf, cp, np - cp);
191				np = buf + (np - cp);
192				cp = buf;
193				col = 0;
194				continue;
195			}
196			if (*cp == '\b' && !count_bytes) {
197				if (col)
198					col--;
199				cp++;
200				continue;
201			}
202
203			/*
204			 * Measure display width.
205			 * Process the last byte only if
206			 * end of file was reached.
207			 */
208
209			if (np - cp > (ch != EOF)) {
210				len = 1;
211				width = 1;
212
213				if (*cp == '\t') {
214					if (count_bytes == 0)
215						width = 8 - (col & 7);
216				} else if ((len = mbtowc(&wc, cp,
217				    np - cp)) < 1)
218					len = 1;
219				else if (count_bytes)
220					width = len;
221				else if ((width = wcwidth(wc)) < 0)
222					width = 1;
223
224				col += width;
225				if (col <= max_width || cp == buf) {
226					cp += len;
227					continue;
228				}
229			}
230
231			/* Line break required. */
232
233			if (col > max_width) {
234				if (split_words) {
235					for (sp = cp; sp > buf; sp--) {
236						if (sp[-1] == ' ') {
237							cp = sp;
238							break;
239						}
240					}
241				}
242				fwrite(buf, 1, cp - buf, stdout);
243				putchar('\n');
244				memmove(buf, cp, np - cp);
245				np = buf + (np - cp);
246				cp = buf;
247				col = 0;
248				continue;
249			}
250
251			/* Need more input. */
252
253			break;
254		}
255	}
256	fwrite(buf, 1, np - buf, stdout);
257
258	if (ferror(stdin))
259		err(1, NULL);
260}
261
262static int
263isu8cont(unsigned char c)
264{
265	return MB_CUR_MAX > 1 && (c & (0x80 | 0x40)) == 0x80;
266}
267
268static __dead void
269usage(void)
270{
271	(void)fprintf(stderr, "usage: fold [-bs] [-w width] [file ...]\n");
272	exit(1);
273}
274