1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1987, 1991, 1993
5 *	The Regents of the University of California.  All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/capsicum.h>
33#include <sys/param.h>
34#include <sys/stat.h>
35
36#include <capsicum_helpers.h>
37#include <ctype.h>
38#include <errno.h>
39#include <fcntl.h>
40#include <locale.h>
41#include <stdbool.h>
42#include <stdint.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47#include <wchar.h>
48#include <wctype.h>
49#include <libxo/xo.h>
50
51#include <libcasper.h>
52#include <casper/cap_fileargs.h>
53
54static const char *stdin_filename = "stdin";
55
56static fileargs_t *fa;
57static uintmax_t tlinect, twordct, tcharct, tlongline;
58static bool doline, doword, dochar, domulti, dolongline;
59static volatile sig_atomic_t siginfo;
60static xo_handle_t *stderr_handle;
61
62static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
63		    uintmax_t charct, uintmax_t llct);
64static int	cnt(const char *);
65static void	usage(void);
66
67static void
68siginfo_handler(int sig __unused)
69{
70
71	siginfo = 1;
72}
73
74static void
75reset_siginfo(void)
76{
77
78	signal(SIGINFO, SIG_DFL);
79	siginfo = 0;
80}
81
82int
83main(int argc, char *argv[])
84{
85	int ch, errors, total;
86	cap_rights_t rights;
87
88	(void) setlocale(LC_CTYPE, "");
89
90	argc = xo_parse_args(argc, argv);
91	if (argc < 0)
92		exit(EXIT_FAILURE);
93
94	while ((ch = getopt(argc, argv, "clmwL")) != -1)
95		switch((char)ch) {
96		case 'l':
97			doline = true;
98			break;
99		case 'w':
100			doword = true;
101			break;
102		case 'c':
103			dochar = true;
104			domulti = false;
105			break;
106		case 'L':
107			dolongline = true;
108			break;
109		case 'm':
110			domulti = true;
111			dochar = false;
112			break;
113		case '?':
114		default:
115			usage();
116		}
117	argv += optind;
118	argc -= optind;
119
120	fa = fileargs_init(argc, argv, O_RDONLY, 0,
121	    cap_rights_init(&rights, CAP_READ, CAP_FSTAT), FA_OPEN);
122	if (fa == NULL)
123		xo_err(EXIT_FAILURE, "Unable to initialize casper");
124	caph_cache_catpages();
125	if (caph_limit_stdio() < 0)
126		xo_err(EXIT_FAILURE, "Unable to limit stdio");
127	if (caph_enter_casper() < 0)
128		xo_err(EXIT_FAILURE, "Unable to enter capability mode");
129
130	/* Wc's flags are on by default. */
131	if (!(doline || doword || dochar || domulti || dolongline))
132		doline = doword = dochar = true;
133
134	stderr_handle = xo_create_to_file(stderr, XO_STYLE_TEXT, 0);
135	xo_open_container("wc");
136	xo_open_list("file");
137
138	(void)signal(SIGINFO, siginfo_handler);
139	errors = 0;
140	total = 0;
141	if (argc == 0) {
142		xo_open_instance("file");
143		if (cnt(NULL) != 0)
144			++errors;
145		xo_close_instance("file");
146	} else {
147		while (argc--) {
148			xo_open_instance("file");
149			if (cnt(*argv++) != 0)
150				++errors;
151			xo_close_instance("file");
152			++total;
153		}
154	}
155
156	xo_close_list("file");
157
158	if (total > 1) {
159		xo_open_container("total");
160		show_cnt("total", tlinect, twordct, tcharct, tlongline);
161		xo_close_container("total");
162	}
163
164	fileargs_free(fa);
165	xo_close_container("wc");
166	if (xo_finish() < 0)
167		xo_err(EXIT_FAILURE, "stdout");
168	exit(errors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
169}
170
171static void
172show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
173    uintmax_t charct, uintmax_t llct)
174{
175	xo_handle_t *xop;
176
177	if (!siginfo)
178		xop = NULL;
179	else {
180		xop = stderr_handle;
181		siginfo = 0;
182	}
183
184	if (doline)
185		xo_emit_h(xop, " {:lines/%7ju/%ju}", linect);
186	if (doword)
187		xo_emit_h(xop, " {:words/%7ju/%ju}", wordct);
188	if (dochar || domulti)
189		xo_emit_h(xop, " {:characters/%7ju/%ju}", charct);
190	if (dolongline)
191		xo_emit_h(xop, " {:long-lines/%7ju/%ju}", llct);
192	if (file != stdin_filename)
193		xo_emit_h(xop, " {:filename/%s}\n", file);
194	else
195		xo_emit_h(xop, "\n");
196}
197
198static int
199cnt(const char *file)
200{
201	static char buf[MAXBSIZE];
202	struct stat sb;
203	mbstate_t mbs;
204	const char *p;
205	uintmax_t linect, wordct, charct, llct, tmpll;
206	ssize_t len;
207	size_t clen;
208	int fd;
209	wchar_t wch;
210	bool gotsp, warned;
211
212	linect = wordct = charct = llct = tmpll = 0;
213	if (file == NULL) {
214		fd = STDIN_FILENO;
215		file = stdin_filename;
216	} else if ((fd = fileargs_open(fa, file)) < 0) {
217		xo_warn("%s: open", file);
218		return (1);
219	}
220	if (doword || (domulti && MB_CUR_MAX != 1))
221		goto word;
222	/*
223	 * If all we need is the number of characters and it's a regular file,
224	 * just stat it.
225	 */
226	if (doline == 0 && dolongline == 0) {
227		if (fstat(fd, &sb)) {
228			xo_warn("%s: fstat", file);
229			(void)close(fd);
230			return (1);
231		}
232		/* pseudo-filesystems advertize a zero size */
233		if (S_ISREG(sb.st_mode) && sb.st_size > 0) {
234			reset_siginfo();
235			charct = sb.st_size;
236			show_cnt(file, linect, wordct, charct, llct);
237			tcharct += charct;
238			(void)close(fd);
239			return (0);
240		}
241	}
242	/*
243	 * For files we can't stat, or if we need line counting, slurp the
244	 * file.  Line counting is split out because it's a lot faster to get
245	 * lines than to get words, since the word count requires locale
246	 * handling.
247	 */
248	while ((len = read(fd, buf, sizeof(buf))) != 0) {
249		if (len < 0) {
250			xo_warn("%s: read", file);
251			(void)close(fd);
252			return (1);
253		}
254		if (siginfo)
255			show_cnt(file, linect, wordct, charct, llct);
256		charct += len;
257		if (doline || dolongline) {
258			for (p = buf; len > 0; --len, ++p) {
259				if (*p == '\n') {
260					if (tmpll > llct)
261						llct = tmpll;
262					tmpll = 0;
263					++linect;
264				} else {
265					tmpll++;
266				}
267			}
268		}
269	}
270	reset_siginfo();
271	if (doline)
272		tlinect += linect;
273	if (dochar)
274		tcharct += charct;
275	if (dolongline && llct > tlongline)
276		tlongline = llct;
277	show_cnt(file, linect, wordct, charct, llct);
278	(void)close(fd);
279	return (0);
280
281	/* Do it the hard way... */
282word:	gotsp = true;
283	warned = false;
284	memset(&mbs, 0, sizeof(mbs));
285	while ((len = read(fd, buf, sizeof(buf))) != 0) {
286		if (len < 0) {
287			xo_warn("%s: read", file);
288			(void)close(fd);
289			return (1);
290		}
291		p = buf;
292		while (len > 0) {
293			if (siginfo)
294				show_cnt(file, linect, wordct, charct, llct);
295			if (!domulti || MB_CUR_MAX == 1) {
296				clen = 1;
297				wch = (unsigned char)*p;
298			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 0) {
299				clen = 1;
300			} else if (clen == (size_t)-1) {
301				if (!warned) {
302					errno = EILSEQ;
303					xo_warn("%s", file);
304					warned = true;
305				}
306				memset(&mbs, 0, sizeof(mbs));
307				clen = 1;
308				wch = (unsigned char)*p;
309			} else if (clen == (size_t)-2) {
310				break;
311			}
312			charct++;
313			if (wch != L'\n')
314				tmpll++;
315			len -= clen;
316			p += clen;
317			if (wch == L'\n') {
318				if (tmpll > llct)
319					llct = tmpll;
320				tmpll = 0;
321				++linect;
322			}
323			if (iswspace(wch)) {
324				gotsp = true;
325			} else if (gotsp) {
326				gotsp = false;
327				++wordct;
328			}
329		}
330	}
331	reset_siginfo();
332	if (domulti && MB_CUR_MAX > 1) {
333		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
334			xo_warn("%s", file);
335	}
336	if (doline)
337		tlinect += linect;
338	if (doword)
339		twordct += wordct;
340	if (dochar || domulti)
341		tcharct += charct;
342	if (dolongline && llct > tlongline)
343		tlongline = llct;
344	show_cnt(file, linect, wordct, charct, llct);
345	(void)close(fd);
346	return (0);
347}
348
349static void
350usage(void)
351{
352	xo_error("usage: wc [-Lclmw] [file ...]\n");
353	exit(EXIT_FAILURE);
354}
355