wc.c revision 208210
1272343Sngie/*
2272343Sngie * Copyright (c) 1980, 1987, 1991, 1993
3272343Sngie *	The Regents of the University of California.  All rights reserved.
4272343Sngie *
5272343Sngie * Redistribution and use in source and binary forms, with or without
6272343Sngie * modification, are permitted provided that the following conditions
7272343Sngie * are met:
8272343Sngie * 1. Redistributions of source code must retain the above copyright
9272343Sngie *    notice, this list of conditions and the following disclaimer.
10272343Sngie * 2. Redistributions in binary form must reproduce the above copyright
11272343Sngie *    notice, this list of conditions and the following disclaimer in the
12272343Sngie *    documentation and/or other materials provided with the distribution.
13272343Sngie * 3. All advertising materials mentioning features or use of this software
14272343Sngie *    must display the following acknowledgement:
15272343Sngie *	This product includes software developed by the University of
16272343Sngie *	California, Berkeley and its contributors.
17272343Sngie * 4. Neither the name of the University nor the names of its contributors
18272343Sngie *    may be used to endorse or promote products derived from this software
19272343Sngie *    without specific prior written permission.
20272343Sngie *
21272343Sngie * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22272343Sngie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23272343Sngie * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24272343Sngie * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25272343Sngie * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26272343Sngie * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27272343Sngie * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28272343Sngie * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29272343Sngie * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30272343Sngie * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31272343Sngie * SUCH DAMAGE.
32272343Sngie */
33272343Sngie
34272343Sngie#ifndef lint
35272343Sngiestatic const char copyright[] =
36272343Sngie"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
37272343Sngie	The Regents of the University of California.  All rights reserved.\n";
38272343Sngie#endif /* not lint */
39272343Sngie
40272343Sngie#if 0
41272343Sngie#ifndef lint
42272343Sngiestatic char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
43272343Sngie#endif /* not lint */
44272343Sngie#endif
45272343Sngie
46272343Sngie#include <sys/cdefs.h>
47272343Sngie__FBSDID("$FreeBSD: head/usr.bin/wc/wc.c 208210 2010-05-17 19:13:49Z pjd $");
48272343Sngie
49272343Sngie#include <sys/param.h>
50272343Sngie#include <sys/stat.h>
51272343Sngie
52272343Sngie#include <ctype.h>
53272343Sngie#include <err.h>
54272343Sngie#include <errno.h>
55272343Sngie#include <fcntl.h>
56272343Sngie#include <locale.h>
57272343Sngie#include <stdint.h>
58272343Sngie#include <stdio.h>
59272343Sngie#include <stdlib.h>
60272343Sngie#include <string.h>
61272343Sngie#include <unistd.h>
62272343Sngie#include <wchar.h>
63272343Sngie#include <wctype.h>
64272343Sngie
65272343Sngieuintmax_t tlinect, twordct, tcharct, tlongline;
66272343Sngieint doline, doword, dochar, domulti, dolongline;
67272343Sngiestatic volatile sig_atomic_t siginfo;
68272343Sngie
69272343Sngiestatic void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
70272343Sngie		    uintmax_t charct, uintmax_t llct);
71272343Sngiestatic int	cnt(const char *);
72272343Sngiestatic void	usage(void);
73272343Sngie
74272343Sngiestatic void
75272343Sngiesiginfo_handler(int sig __unused)
76272343Sngie{
77272343Sngie
78272343Sngie	siginfo = 1;
79272343Sngie}
80272343Sngie
81272343Sngieint
82272343Sngiemain(int argc, char *argv[])
83272343Sngie{
84272343Sngie	int ch, errors, total;
85272343Sngie
86272343Sngie	(void) setlocale(LC_CTYPE, "");
87272343Sngie
88272343Sngie	while ((ch = getopt(argc, argv, "clmwL")) != -1)
89272343Sngie		switch((char)ch) {
90272343Sngie		case 'l':
91272343Sngie			doline = 1;
92272343Sngie			break;
93272343Sngie		case 'w':
94272343Sngie			doword = 1;
95272343Sngie			break;
96272343Sngie		case 'c':
97272343Sngie			dochar = 1;
98272343Sngie			domulti = 0;
99272343Sngie			break;
100272343Sngie		case 'L':
101272343Sngie			dolongline = 1;
102272343Sngie			break;
103272343Sngie		case 'm':
104272343Sngie			domulti = 1;
105272343Sngie			dochar = 0;
106272343Sngie			break;
107272343Sngie		case '?':
108272343Sngie		default:
109272343Sngie			usage();
110272343Sngie		}
111272343Sngie	argv += optind;
112272343Sngie	argc -= optind;
113272343Sngie
114272343Sngie	(void)signal(SIGINFO, siginfo_handler);
115272343Sngie
116272343Sngie	/* Wc's flags are on by default. */
117272343Sngie	if (doline + doword + dochar + domulti + dolongline == 0)
118272343Sngie		doline = doword = dochar = 1;
119272343Sngie
120272343Sngie	errors = 0;
121272343Sngie	total = 0;
122272343Sngie	if (!*argv) {
123272343Sngie		if (cnt((char *)NULL) != 0)
124272343Sngie			++errors;
125272343Sngie	} else {
126272343Sngie		do {
127272343Sngie			if (cnt(*argv) != 0)
128272343Sngie				++errors;
129272343Sngie			++total;
130272343Sngie		} while(*++argv);
131272343Sngie	}
132272343Sngie
133272343Sngie	if (total > 1)
134272343Sngie		show_cnt("total", tlinect, twordct, tcharct, tlongline);
135272343Sngie	exit(errors == 0 ? 0 : 1);
136}
137
138static void
139show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
140    uintmax_t charct, uintmax_t llct)
141{
142	FILE *out;
143
144	if (!siginfo)
145		out = stdout;
146	else {
147		out = stderr;
148		siginfo = 0;
149	}
150
151	if (doline)
152		(void)fprintf(out, " %7ju", linect);
153	if (doword)
154		(void)fprintf(out, " %7ju", wordct);
155	if (dochar || domulti)
156		(void)fprintf(out, " %7ju", charct);
157	if (dolongline)
158		(void)fprintf(out, " %7ju", llct);
159	if (file != NULL)
160		(void)fprintf(out, " %s\n", file);
161	else
162		(void)fprintf(out, "\n");
163}
164
165static int
166cnt(const char *file)
167{
168	struct stat sb;
169	uintmax_t linect, wordct, charct, llct, tmpll;
170	int fd, len, warned;
171	size_t clen;
172	short gotsp;
173	u_char *p;
174	u_char buf[MAXBSIZE];
175	wchar_t wch;
176	mbstate_t mbs;
177
178	linect = wordct = charct = llct = tmpll = 0;
179	if (file == NULL)
180		fd = STDIN_FILENO;
181	else {
182		if ((fd = open(file, O_RDONLY, 0)) < 0) {
183			warn("%s: open", file);
184			return (1);
185		}
186		if (doword || (domulti && MB_CUR_MAX != 1))
187			goto word;
188		/*
189		 * Line counting is split out because it's a lot faster to get
190		 * lines than to get words, since the word count requires some
191		 * logic.
192		 */
193		if (doline) {
194			while ((len = read(fd, buf, MAXBSIZE))) {
195				if (len == -1) {
196					warn("%s: read", file);
197					(void)close(fd);
198					return (1);
199				}
200				if (siginfo) {
201					show_cnt(file, linect, wordct, charct,
202					    llct);
203				}
204				charct += len;
205				for (p = buf; len--; ++p)
206					if (*p == '\n') {
207						if (tmpll > llct)
208							llct = tmpll;
209						tmpll = 0;
210						++linect;
211					} else
212						tmpll++;
213			}
214			tlinect += linect;
215			if (dochar)
216				tcharct += charct;
217			if (dolongline) {
218				if (llct > tlongline)
219					tlongline = llct;
220			}
221			show_cnt(file, linect, wordct, charct, llct);
222			(void)close(fd);
223			return (0);
224		}
225		/*
226		 * If all we need is the number of characters and it's a
227		 * regular file, just stat the puppy.
228		 */
229		if (dochar || domulti) {
230			if (fstat(fd, &sb)) {
231				warn("%s: fstat", file);
232				(void)close(fd);
233				return (1);
234			}
235			if (S_ISREG(sb.st_mode)) {
236				charct = sb.st_size;
237				show_cnt(file, linect, wordct, charct, llct);
238				tcharct += charct;
239				(void)close(fd);
240				return (0);
241			}
242		}
243	}
244
245	/* Do it the hard way... */
246word:	gotsp = 1;
247	warned = 0;
248	memset(&mbs, 0, sizeof(mbs));
249	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
250		if (len == -1) {
251			warn("%s: read", file != NULL ? file : "stdin");
252			(void)close(fd);
253			return (1);
254		}
255		p = buf;
256		while (len > 0) {
257			if (siginfo)
258				show_cnt(file, linect, wordct, charct, llct);
259			if (!domulti || MB_CUR_MAX == 1) {
260				clen = 1;
261				wch = (unsigned char)*p;
262			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
263			    (size_t)-1) {
264				if (!warned) {
265					errno = EILSEQ;
266					warn("%s",
267					    file != NULL ? file : "stdin");
268					warned = 1;
269				}
270				memset(&mbs, 0, sizeof(mbs));
271				clen = 1;
272				wch = (unsigned char)*p;
273			} else if (clen == (size_t)-2)
274				break;
275			else if (clen == 0)
276				clen = 1;
277			charct++;
278			if (wch != L'\n')
279				tmpll++;
280			len -= clen;
281			p += clen;
282			if (wch == L'\n') {
283				if (tmpll > llct)
284					llct = tmpll;
285				tmpll = 0;
286				++linect;
287			}
288			if (iswspace(wch))
289				gotsp = 1;
290			else if (gotsp) {
291				gotsp = 0;
292				++wordct;
293			}
294		}
295	}
296	if (domulti && MB_CUR_MAX > 1)
297		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
298			warn("%s", file != NULL ? file : "stdin");
299	if (doline)
300		tlinect += linect;
301	if (doword)
302		twordct += wordct;
303	if (dochar || domulti)
304		tcharct += charct;
305	if (dolongline) {
306		if (llct > tlongline)
307			tlongline = llct;
308	}
309	show_cnt(file, linect, wordct, charct, llct);
310	(void)close(fd);
311	return (0);
312}
313
314static void
315usage(void)
316{
317	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
318	exit(1);
319}
320