wc.c revision 227201
1218792Snp/*
2237436Snp * Copyright (c) 1980, 1987, 1991, 1993
3218792Snp *	The Regents of the University of California.  All rights reserved.
4218792Snp *
5218792Snp * Redistribution and use in source and binary forms, with or without
6218792Snp * modification, are permitted provided that the following conditions
7218792Snp * are met:
8218792Snp * 1. Redistributions of source code must retain the above copyright
9218792Snp *    notice, this list of conditions and the following disclaimer.
10218792Snp * 2. Redistributions in binary form must reproduce the above copyright
11218792Snp *    notice, this list of conditions and the following disclaimer in the
12218792Snp *    documentation and/or other materials provided with the distribution.
13218792Snp * 4. Neither the name of the University nor the names of its contributors
14218792Snp *    may be used to endorse or promote products derived from this software
15218792Snp *    without specific prior written permission.
16218792Snp *
17218792Snp * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18218792Snp * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19218792Snp * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20218792Snp * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21218792Snp * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22218792Snp * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23218792Snp * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24218792Snp * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25218792Snp * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26218792Snp * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27218792Snp * SUCH DAMAGE.
28218792Snp */
29218792Snp
30237263Snp#ifndef lint
31237263Snpstatic const char copyright[] =
32218792Snp"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\
33218792Snp	The Regents of the University of California.  All rights reserved.\n";
34218792Snp#endif /* not lint */
35228561Snp
36218792Snp#if 0
37220649Snp#ifndef lint
38246385Snpstatic char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
39246385Snp#endif /* not lint */
40246385Snp#endif
41246385Snp
42246385Snp#include <sys/cdefs.h>
43246385Snp__FBSDID("$FreeBSD: head/usr.bin/wc/wc.c 227201 2011-11-06 08:19:00Z ed $");
44218792Snp
45218792Snp#include <sys/param.h>
46218792Snp#include <sys/stat.h>
47218792Snp
48218792Snp#include <ctype.h>
49218792Snp#include <err.h>
50218792Snp#include <errno.h>
51218792Snp#include <fcntl.h>
52218792Snp#include <locale.h>
53218792Snp#include <stdint.h>
54218792Snp#include <stdio.h>
55218792Snp#include <stdlib.h>
56218792Snp#include <string.h>
57218792Snp#include <unistd.h>
58218792Snp#include <wchar.h>
59218792Snp#include <wctype.h>
60218792Snp
61218792Snpstatic uintmax_t tlinect, twordct, tcharct, tlongline;
62218792Snpstatic int doline, doword, dochar, domulti, dolongline;
63218792Snpstatic volatile sig_atomic_t siginfo;
64218792Snp
65218792Snpstatic void	show_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
66218792Snp		    uintmax_t charct, uintmax_t llct);
67218792Snpstatic int	cnt(const char *);
68218792Snpstatic void	usage(void);
69218792Snp
70218792Snpstatic void
71218792Snpsiginfo_handler(int sig __unused)
72218792Snp{
73218792Snp
74218792Snp	siginfo = 1;
75218792Snp}
76218792Snp
77218792Snpint
78218792Snpmain(int argc, char *argv[])
79218792Snp{
80218792Snp	int ch, errors, total;
81218792Snp
82218792Snp	(void) setlocale(LC_CTYPE, "");
83218792Snp
84218792Snp	while ((ch = getopt(argc, argv, "clmwL")) != -1)
85218792Snp		switch((char)ch) {
86218792Snp		case 'l':
87218792Snp			doline = 1;
88218792Snp			break;
89218792Snp		case 'w':
90218792Snp			doword = 1;
91218792Snp			break;
92218792Snp		case 'c':
93218792Snp			dochar = 1;
94218792Snp			domulti = 0;
95218792Snp			break;
96218792Snp		case 'L':
97218792Snp			dolongline = 1;
98218792Snp			break;
99218792Snp		case 'm':
100218792Snp			domulti = 1;
101218792Snp			dochar = 0;
102218792Snp			break;
103218792Snp		case '?':
104218792Snp		default:
105218792Snp			usage();
106218792Snp		}
107218792Snp	argv += optind;
108218792Snp	argc -= optind;
109218792Snp
110218792Snp	(void)signal(SIGINFO, siginfo_handler);
111218792Snp
112218792Snp	/* Wc's flags are on by default. */
113218792Snp	if (doline + doword + dochar + domulti + dolongline == 0)
114218792Snp		doline = doword = dochar = 1;
115218792Snp
116218792Snp	errors = 0;
117218792Snp	total = 0;
118218792Snp	if (!*argv) {
119218792Snp		if (cnt((char *)NULL) != 0)
120218792Snp			++errors;
121218792Snp	} else {
122218792Snp		do {
123218792Snp			if (cnt(*argv) != 0)
124218792Snp				++errors;
125218792Snp			++total;
126218792Snp		} while(*++argv);
127218792Snp	}
128218792Snp
129218792Snp	if (total > 1)
130218792Snp		show_cnt("total", tlinect, twordct, tcharct, tlongline);
131218792Snp	exit(errors == 0 ? 0 : 1);
132218792Snp}
133218792Snp
134218792Snpstatic void
135218792Snpshow_cnt(const char *file, uintmax_t linect, uintmax_t wordct,
136218792Snp    uintmax_t charct, uintmax_t llct)
137218792Snp{
138218792Snp	FILE *out;
139218792Snp
140218792Snp	if (!siginfo)
141218792Snp		out = stdout;
142218792Snp	else {
143237436Snp		out = stderr;
144237436Snp		siginfo = 0;
145237436Snp	}
146237436Snp
147237436Snp	if (doline)
148237436Snp		(void)fprintf(out, " %7ju", linect);
149237436Snp	if (doword)
150237436Snp		(void)fprintf(out, " %7ju", wordct);
151237436Snp	if (dochar || domulti)
152237436Snp		(void)fprintf(out, " %7ju", charct);
153237436Snp	if (dolongline)
154237436Snp		(void)fprintf(out, " %7ju", llct);
155237436Snp	if (file != NULL)
156237436Snp		(void)fprintf(out, " %s\n", file);
157247355Snp	else
158247355Snp		(void)fprintf(out, "\n");
159247355Snp}
160247355Snp
161247355Snpstatic int
162247355Snpcnt(const char *file)
163247355Snp{
164247355Snp	struct stat sb;
165247355Snp	uintmax_t linect, wordct, charct, llct, tmpll;
166247355Snp	int fd, len, warned;
167247355Snp	size_t clen;
168247355Snp	short gotsp;
169247355Snp	u_char *p;
170247355Snp	u_char buf[MAXBSIZE];
171247355Snp	wchar_t wch;
172247355Snp	mbstate_t mbs;
173247355Snp
174247355Snp	linect = wordct = charct = llct = tmpll = 0;
175247355Snp	if (file == NULL)
176247355Snp		fd = STDIN_FILENO;
177247355Snp	else {
178247355Snp		if ((fd = open(file, O_RDONLY, 0)) < 0) {
179250090Snp			warn("%s: open", file);
180247355Snp			return (1);
181247355Snp		}
182247355Snp		if (doword || (domulti && MB_CUR_MAX != 1))
183247355Snp			goto word;
184247355Snp		/*
185218792Snp		 * Line counting is split out because it's a lot faster to get
186218792Snp		 * lines than to get words, since the word count requires some
187218792Snp		 * logic.
188218792Snp		 */
189218792Snp		if (doline) {
190218792Snp			while ((len = read(fd, buf, MAXBSIZE))) {
191218792Snp				if (len == -1) {
192218792Snp					warn("%s: read", file);
193218792Snp					(void)close(fd);
194218792Snp					return (1);
195218792Snp				}
196218792Snp				if (siginfo) {
197218792Snp					show_cnt(file, linect, wordct, charct,
198218792Snp					    llct);
199218792Snp				}
200218792Snp				charct += len;
201218792Snp				for (p = buf; len--; ++p)
202218792Snp					if (*p == '\n') {
203218792Snp						if (tmpll > llct)
204218792Snp							llct = tmpll;
205218792Snp						tmpll = 0;
206218792Snp						++linect;
207218792Snp					} else
208218792Snp						tmpll++;
209218792Snp			}
210218792Snp			tlinect += linect;
211218792Snp			if (dochar)
212218792Snp				tcharct += charct;
213218792Snp			if (dolongline) {
214218792Snp				if (llct > tlongline)
215218792Snp					tlongline = llct;
216218792Snp			}
217218792Snp			show_cnt(file, linect, wordct, charct, llct);
218218792Snp			(void)close(fd);
219218792Snp			return (0);
220218792Snp		}
221218792Snp		/*
222218792Snp		 * If all we need is the number of characters and it's a
223218792Snp		 * regular file, just stat the puppy.
224218792Snp		 */
225218792Snp		if (dochar || domulti) {
226218792Snp			if (fstat(fd, &sb)) {
227218792Snp				warn("%s: fstat", file);
228218792Snp				(void)close(fd);
229218792Snp				return (1);
230218792Snp			}
231218792Snp			if (S_ISREG(sb.st_mode)) {
232218792Snp				charct = sb.st_size;
233218792Snp				show_cnt(file, linect, wordct, charct, llct);
234218792Snp				tcharct += charct;
235218792Snp				(void)close(fd);
236218792Snp				return (0);
237218792Snp			}
238218792Snp		}
239228561Snp	}
240218792Snp
241218792Snp	/* Do it the hard way... */
242218792Snpword:	gotsp = 1;
243218792Snp	warned = 0;
244218792Snp	memset(&mbs, 0, sizeof(mbs));
245218792Snp	while ((len = read(fd, buf, MAXBSIZE)) != 0) {
246218792Snp		if (len == -1) {
247218792Snp			warn("%s: read", file != NULL ? file : "stdin");
248218792Snp			(void)close(fd);
249218792Snp			return (1);
250218792Snp		}
251218792Snp		p = buf;
252218792Snp		while (len > 0) {
253218792Snp			if (siginfo)
254218792Snp				show_cnt(file, linect, wordct, charct, llct);
255218792Snp			if (!domulti || MB_CUR_MAX == 1) {
256218792Snp				clen = 1;
257218792Snp				wch = (unsigned char)*p;
258218792Snp			} else if ((clen = mbrtowc(&wch, p, len, &mbs)) ==
259218792Snp			    (size_t)-1) {
260218792Snp				if (!warned) {
261218792Snp					errno = EILSEQ;
262286271Snp					warn("%s",
263286271Snp					    file != NULL ? file : "stdin");
264218792Snp					warned = 1;
265218792Snp				}
266218792Snp				memset(&mbs, 0, sizeof(mbs));
267218792Snp				clen = 1;
268218792Snp				wch = (unsigned char)*p;
269218792Snp			} else if (clen == (size_t)-2)
270218792Snp				break;
271218792Snp			else if (clen == 0)
272218792Snp				clen = 1;
273218792Snp			charct++;
274218792Snp			if (wch != L'\n')
275218792Snp				tmpll++;
276218792Snp			len -= clen;
277218792Snp			p += clen;
278218792Snp			if (wch == L'\n') {
279218792Snp				if (tmpll > llct)
280218792Snp					llct = tmpll;
281218792Snp				tmpll = 0;
282218792Snp				++linect;
283218792Snp			}
284218792Snp			if (iswspace(wch))
285218792Snp				gotsp = 1;
286218792Snp			else if (gotsp) {
287218792Snp				gotsp = 0;
288218792Snp				++wordct;
289286271Snp			}
290286271Snp		}
291218792Snp	}
292218792Snp	if (domulti && MB_CUR_MAX > 1)
293218792Snp		if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned)
294218792Snp			warn("%s", file != NULL ? file : "stdin");
295218792Snp	if (doline)
296218792Snp		tlinect += linect;
297218792Snp	if (doword)
298218792Snp		twordct += wordct;
299218792Snp	if (dochar || domulti)
300218792Snp		tcharct += charct;
301218792Snp	if (dolongline) {
302247355Snp		if (llct > tlongline)
303247355Snp			tlongline = llct;
304247355Snp	}
305247355Snp	show_cnt(file, linect, wordct, charct, llct);
306247355Snp	(void)close(fd);
307218792Snp	return (0);
308218792Snp}
309247355Snp
310247355Snpstatic void
311218792Snpusage(void)
312218792Snp{
313218792Snp	(void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n");
314218792Snp	exit(1);
315218792Snp}
316218792Snp