wc.c revision 208210
1272343Sngie/* 2272343Sngie * Copyright (c) 1980, 1987, 1991, 1993 3272343Sngie * The Regents of the University of California. All rights reserved. 4272343Sngie * 5272343Sngie * Redistribution and use in source and binary forms, with or without 6272343Sngie * modification, are permitted provided that the following conditions 7272343Sngie * are met: 8272343Sngie * 1. Redistributions of source code must retain the above copyright 9272343Sngie * notice, this list of conditions and the following disclaimer. 10272343Sngie * 2. Redistributions in binary form must reproduce the above copyright 11272343Sngie * notice, this list of conditions and the following disclaimer in the 12272343Sngie * documentation and/or other materials provided with the distribution. 13272343Sngie * 3. All advertising materials mentioning features or use of this software 14272343Sngie * must display the following acknowledgement: 15272343Sngie * This product includes software developed by the University of 16272343Sngie * California, Berkeley and its contributors. 17272343Sngie * 4. Neither the name of the University nor the names of its contributors 18272343Sngie * may be used to endorse or promote products derived from this software 19272343Sngie * without specific prior written permission. 20272343Sngie * 21272343Sngie * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22272343Sngie * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23272343Sngie * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24272343Sngie * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25272343Sngie * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26272343Sngie * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27272343Sngie * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28272343Sngie * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29272343Sngie * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30272343Sngie * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31272343Sngie * SUCH DAMAGE. 32272343Sngie */ 33272343Sngie 34272343Sngie#ifndef lint 35272343Sngiestatic const char copyright[] = 36272343Sngie"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\ 37272343Sngie The Regents of the University of California. All rights reserved.\n"; 38272343Sngie#endif /* not lint */ 39272343Sngie 40272343Sngie#if 0 41272343Sngie#ifndef lint 42272343Sngiestatic char sccsid[] = "@(#)wc.c 8.1 (Berkeley) 6/6/93"; 43272343Sngie#endif /* not lint */ 44272343Sngie#endif 45272343Sngie 46272343Sngie#include <sys/cdefs.h> 47272343Sngie__FBSDID("$FreeBSD: head/usr.bin/wc/wc.c 208210 2010-05-17 19:13:49Z pjd $"); 48272343Sngie 49272343Sngie#include <sys/param.h> 50272343Sngie#include <sys/stat.h> 51272343Sngie 52272343Sngie#include <ctype.h> 53272343Sngie#include <err.h> 54272343Sngie#include <errno.h> 55272343Sngie#include <fcntl.h> 56272343Sngie#include <locale.h> 57272343Sngie#include <stdint.h> 58272343Sngie#include <stdio.h> 59272343Sngie#include <stdlib.h> 60272343Sngie#include <string.h> 61272343Sngie#include <unistd.h> 62272343Sngie#include <wchar.h> 63272343Sngie#include <wctype.h> 64272343Sngie 65272343Sngieuintmax_t tlinect, twordct, tcharct, tlongline; 66272343Sngieint doline, doword, dochar, domulti, dolongline; 67272343Sngiestatic volatile sig_atomic_t siginfo; 68272343Sngie 69272343Sngiestatic void show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, 70272343Sngie uintmax_t charct, uintmax_t llct); 71272343Sngiestatic int cnt(const char *); 72272343Sngiestatic void usage(void); 73272343Sngie 74272343Sngiestatic void 75272343Sngiesiginfo_handler(int sig __unused) 76272343Sngie{ 77272343Sngie 78272343Sngie siginfo = 1; 79272343Sngie} 80272343Sngie 81272343Sngieint 82272343Sngiemain(int argc, char *argv[]) 83272343Sngie{ 84272343Sngie int ch, errors, total; 85272343Sngie 86272343Sngie (void) setlocale(LC_CTYPE, ""); 87272343Sngie 88272343Sngie while ((ch = getopt(argc, argv, "clmwL")) != -1) 89272343Sngie switch((char)ch) { 90272343Sngie case 'l': 91272343Sngie doline = 1; 92272343Sngie break; 93272343Sngie case 'w': 94272343Sngie doword = 1; 95272343Sngie break; 96272343Sngie case 'c': 97272343Sngie dochar = 1; 98272343Sngie domulti = 0; 99272343Sngie break; 100272343Sngie case 'L': 101272343Sngie dolongline = 1; 102272343Sngie break; 103272343Sngie case 'm': 104272343Sngie domulti = 1; 105272343Sngie dochar = 0; 106272343Sngie break; 107272343Sngie case '?': 108272343Sngie default: 109272343Sngie usage(); 110272343Sngie } 111272343Sngie argv += optind; 112272343Sngie argc -= optind; 113272343Sngie 114272343Sngie (void)signal(SIGINFO, siginfo_handler); 115272343Sngie 116272343Sngie /* Wc's flags are on by default. */ 117272343Sngie if (doline + doword + dochar + domulti + dolongline == 0) 118272343Sngie doline = doword = dochar = 1; 119272343Sngie 120272343Sngie errors = 0; 121272343Sngie total = 0; 122272343Sngie if (!*argv) { 123272343Sngie if (cnt((char *)NULL) != 0) 124272343Sngie ++errors; 125272343Sngie } else { 126272343Sngie do { 127272343Sngie if (cnt(*argv) != 0) 128272343Sngie ++errors; 129272343Sngie ++total; 130272343Sngie } while(*++argv); 131272343Sngie } 132272343Sngie 133272343Sngie if (total > 1) 134272343Sngie show_cnt("total", tlinect, twordct, tcharct, tlongline); 135272343Sngie exit(errors == 0 ? 0 : 1); 136} 137 138static void 139show_cnt(const char *file, uintmax_t linect, uintmax_t wordct, 140 uintmax_t charct, uintmax_t llct) 141{ 142 FILE *out; 143 144 if (!siginfo) 145 out = stdout; 146 else { 147 out = stderr; 148 siginfo = 0; 149 } 150 151 if (doline) 152 (void)fprintf(out, " %7ju", linect); 153 if (doword) 154 (void)fprintf(out, " %7ju", wordct); 155 if (dochar || domulti) 156 (void)fprintf(out, " %7ju", charct); 157 if (dolongline) 158 (void)fprintf(out, " %7ju", llct); 159 if (file != NULL) 160 (void)fprintf(out, " %s\n", file); 161 else 162 (void)fprintf(out, "\n"); 163} 164 165static int 166cnt(const char *file) 167{ 168 struct stat sb; 169 uintmax_t linect, wordct, charct, llct, tmpll; 170 int fd, len, warned; 171 size_t clen; 172 short gotsp; 173 u_char *p; 174 u_char buf[MAXBSIZE]; 175 wchar_t wch; 176 mbstate_t mbs; 177 178 linect = wordct = charct = llct = tmpll = 0; 179 if (file == NULL) 180 fd = STDIN_FILENO; 181 else { 182 if ((fd = open(file, O_RDONLY, 0)) < 0) { 183 warn("%s: open", file); 184 return (1); 185 } 186 if (doword || (domulti && MB_CUR_MAX != 1)) 187 goto word; 188 /* 189 * Line counting is split out because it's a lot faster to get 190 * lines than to get words, since the word count requires some 191 * logic. 192 */ 193 if (doline) { 194 while ((len = read(fd, buf, MAXBSIZE))) { 195 if (len == -1) { 196 warn("%s: read", file); 197 (void)close(fd); 198 return (1); 199 } 200 if (siginfo) { 201 show_cnt(file, linect, wordct, charct, 202 llct); 203 } 204 charct += len; 205 for (p = buf; len--; ++p) 206 if (*p == '\n') { 207 if (tmpll > llct) 208 llct = tmpll; 209 tmpll = 0; 210 ++linect; 211 } else 212 tmpll++; 213 } 214 tlinect += linect; 215 if (dochar) 216 tcharct += charct; 217 if (dolongline) { 218 if (llct > tlongline) 219 tlongline = llct; 220 } 221 show_cnt(file, linect, wordct, charct, llct); 222 (void)close(fd); 223 return (0); 224 } 225 /* 226 * If all we need is the number of characters and it's a 227 * regular file, just stat the puppy. 228 */ 229 if (dochar || domulti) { 230 if (fstat(fd, &sb)) { 231 warn("%s: fstat", file); 232 (void)close(fd); 233 return (1); 234 } 235 if (S_ISREG(sb.st_mode)) { 236 charct = sb.st_size; 237 show_cnt(file, linect, wordct, charct, llct); 238 tcharct += charct; 239 (void)close(fd); 240 return (0); 241 } 242 } 243 } 244 245 /* Do it the hard way... */ 246word: gotsp = 1; 247 warned = 0; 248 memset(&mbs, 0, sizeof(mbs)); 249 while ((len = read(fd, buf, MAXBSIZE)) != 0) { 250 if (len == -1) { 251 warn("%s: read", file != NULL ? file : "stdin"); 252 (void)close(fd); 253 return (1); 254 } 255 p = buf; 256 while (len > 0) { 257 if (siginfo) 258 show_cnt(file, linect, wordct, charct, llct); 259 if (!domulti || MB_CUR_MAX == 1) { 260 clen = 1; 261 wch = (unsigned char)*p; 262 } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 263 (size_t)-1) { 264 if (!warned) { 265 errno = EILSEQ; 266 warn("%s", 267 file != NULL ? file : "stdin"); 268 warned = 1; 269 } 270 memset(&mbs, 0, sizeof(mbs)); 271 clen = 1; 272 wch = (unsigned char)*p; 273 } else if (clen == (size_t)-2) 274 break; 275 else if (clen == 0) 276 clen = 1; 277 charct++; 278 if (wch != L'\n') 279 tmpll++; 280 len -= clen; 281 p += clen; 282 if (wch == L'\n') { 283 if (tmpll > llct) 284 llct = tmpll; 285 tmpll = 0; 286 ++linect; 287 } 288 if (iswspace(wch)) 289 gotsp = 1; 290 else if (gotsp) { 291 gotsp = 0; 292 ++wordct; 293 } 294 } 295 } 296 if (domulti && MB_CUR_MAX > 1) 297 if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) 298 warn("%s", file != NULL ? file : "stdin"); 299 if (doline) 300 tlinect += linect; 301 if (doword) 302 twordct += wordct; 303 if (dochar || domulti) 304 tcharct += charct; 305 if (dolongline) { 306 if (llct > tlongline) 307 tlongline = llct; 308 } 309 show_cnt(file, linect, wordct, charct, llct); 310 (void)close(fd); 311 return (0); 312} 313 314static void 315usage(void) 316{ 317 (void)fprintf(stderr, "usage: wc [-Lclmw] [file ...]\n"); 318 exit(1); 319} 320