wc.c revision 208210
1/*
2 * Copyright (c) 1980, 1987, 1991, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 *    must display the following acknowledgement:
15 *	This product includes software developed by the University of
16 *	California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 *    may be used to endorse or promote products derived from this software
19 *    without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#ifndef lint
35static const char copyright[] =
36"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
37	The Regents of the University of California.  All rights reserved.\n";
38#endif /* not lint */
39
40#if 0
41#ifndef lint
42static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
43#endif /* not lint */
44#endif
45
46#include <sys/cdefs.h>
47__FBSDID("$FreeBSD: head/usr.bin/wc/wc.c 208210 2010-05-17 19:13:49Z pjd $");
48
49#include <sys/param.h>
50#include <sys/stat.h>
51
52#include <ctype.h>
53#include <err.h>
54#include <errno.h>
55#include <fcntl.h>
56#include <locale.h>
57#include <stdint.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62#include <wchar.h>
63#include <wctype.h>
64
65uintmax_t tlinect, twordct, tcharct, tlongline;
66int doline, doword, dochar, domulti, dolongline;
67static volatile sig_atomic_t siginfo;
68
69static void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
70		    uintmax_t charct, uintmax_t llct);
71static int	cnt(const char *);
72static void	usage(void);
73
74static void
75siginfo_handler(int sig __unused)
76{
77
78	siginfo = 1;
79}
80
81int
82main(int argc, char *argv[])
83{
84	int ch, errors, total;
85
86	(void) setlocale(LC_CTYPE, "");
87
88	while ((ch = getopt(argc, argv, "clmwL")) != -1)
89		switch((char)ch) {
90		case 'l':
91			doline = 1;
92			break;
93		case 'w':
94			doword = 1;
95			break;
96		case 'c':
97			dochar = 1;
98			domulti = 0;
99			break;
100		case 'L':
101			dolongline = 1;
102			break;
103		case 'm':
104			domulti = 1;
105			dochar = 0;
106			break;
107		case '?':
108		default:
109			usage();
110		}
111	argv += optind;
112	argc -= optind;
113
114	(void)signal(SIGINFO, siginfo_handler);
115
116	/* Wc's flags are on by default. */
117	if (doline + doword + dochar + domulti + dolongline == 0)
118		doline = doword = dochar = 1;
119
120	errors = 0;
121	total = 0;
122	if (!*argv) {
123		if (cnt((char *)NULL) != 0)
124			++errors;
125	} else {
126		do {
127			if (cnt(*argv) != 0)
128				++errors;
129			++total;
130		} while(*++argv);
131	}
132
133	if (total > 1)
134		show_cnt("total", tlinect, twordct, tcharct, tlongline);
135	exit(errors == 0 ? 0 : 1);
136}
137
138static void
139show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
140    uintmax_t charct, uintmax_t llct)
141{
142	FILE *out;
143
144	if (!siginfo)
145		out = stdout;
146	else {
147		out = stderr;
148		siginfo = 0;
149	}
150
151	if (doline)
152		(void)fprintf(out, " %7ju", linect);
153	if (doword)
154		(void)fprintf(out, " %7ju", wordct);
155	if (dochar || domulti)
156		(void)fprintf(out, " %7ju", charct);
157	if (dolongline)
158		(void)fprintf(out, " %7ju", llct);
159	if (file != NULL)
160		(void)fprintf(out, " %s\n", file);
161	else
162		(void)fprintf(out, "\n");
163}
164
165static int
166cnt(const char *file)
167{
168	struct stat sb;
169	uintmax_t linect, wordct, charct, llct, tmpll;
170	int fd, len, warned;
171	size_t clen;
172	short gotsp;
173	u_char *p;
174	u_char buf[MAXBSIZE];
175	wchar_t wch;
176	mbstate_t mbs;
177
178	linect = wordct = charct = llct = tmpll = 0;
179	if (file == NULL)
180		fd = STDIN_FILENO;
181	else {
182		if ((fd = open(file, O_RDONLY, 0)) < 0) {
183			warn("%s: open", file);
184			return (1);
185		}
186		if (doword || (domulti && MB_CUR_MAX != 1))
187			goto word;
188		/*
189		 * Line counting is split out because it's a lot faster to get
190		 * lines than to get words, since the word count requires some
191		 * logic.
192		 */
193		if (doline) {
194			while ((len = read(fd, buf, MAXBSIZE))) {
195				if (len == -1) {
196					warn("%s: read", file);
197					(void)close(fd);
198					return (1);
199				}
200				if (siginfo) {
201					show_cnt(file, linect, wordct, charct,
202					    llct);
203				}
204				charct += len;
205				for (p = buf; len--; ++p)
206					if (*p == '\n') {
207						if (tmpll > llct)
208							llct = tmpll;
209						tmpll = 0;
210						++linect;
211					} else
212						tmpll++;
213			}
214			tlinect += linect;
215			if (dochar)
216				tcharct += charct;
217			if (dolongline) {
218				if (llct > tlongline)
219					tlongline = llct;
220			}
221			show_cnt(file, linect, wordct, charct, llct);
222			(void)close(fd);
223			return (0);
224		}
225		/*
226		 * If all we need is the number of characters and it's a
227		 * regular file, just stat the puppy.
228		 */
229		if (dochar || domulti) {
230			if (fstat(fd, &sb)) {
231				warn("%s: fstat", file);
232				(void)close(fd);
233				return (1);
234			}
235			if (S_ISREG(sb.st_mode)) {
236				charct = sb.st_size;
237				show_cnt(file, linect, wordct, charct, llct);
238				tcharct += charct;
239				(void)close(fd);
240				return (0);
241			}
242		}
243	}
244
245	/* Do it the hard way... */
246word:	gotsp = 1;
247	warned = 0;
248	memset(&mbs, 0, sizeof(mbs));
249	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
250		if (len == -1) {
251			warn("%s: read", file != NULL ? file : "stdin");
252			(void)close(fd);
253			return (1);
254		}
255		p = buf;
256		while (len > 0) {
257			if (siginfo)
258				show_cnt(file, linect, wordct, charct, llct);
259			if (!domulti || MB_CUR_MAX == 1) {
260				clen = 1;
261				wch = (unsigned char)*p;
262			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
263			    (size_t)-1) {
264				if (!warned) {
265					errno = EILSEQ;
266					warn("%s",
267					    file != NULL ? file : "stdin");
268					warned = 1;
269				}
270				memset(&mbs, 0, sizeof(mbs));
271				clen = 1;
272				wch = (unsigned char)*p;
273			} else if (clen == (size_t)-2)
274				break;
275			else if (clen == 0)
276				clen = 1;
277			charct++;
278			if (wch != L'\n')
279				tmpll++;
280			len -= clen;
281			p += clen;
282			if (wch == L'\n') {
283				if (tmpll > llct)
284					llct = tmpll;
285				tmpll = 0;
286				++linect;
287			}
288			if (iswspace(wch))
289				gotsp = 1;
290			else if (gotsp) {
291				gotsp = 0;
292				++wordct;
293			}
294		}
295	}
296	if (domulti && MB_CUR_MAX > 1)
297		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
298			warn("%s", file != NULL ? file : "stdin");
299	if (doline)
300		tlinect += linect;
301	if (doword)
302		twordct += wordct;
303	if (dochar || domulti)
304		tcharct += charct;
305	if (dolongline) {
306		if (llct > tlongline)
307			tlongline = llct;
308	}
309	show_cnt(file, linect, wordct, charct, llct);
310	(void)close(fd);
311	return (0);
312}
313
314static void
315usage(void)
316{
317	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
318	exit(1);
319}
320