1/* 2 * Copyright (c) 1980, 1987, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34#ifndef lint 35static const char copyright[] = 36"@(#) Copyright (c) 1980, 1987, 1991, 1993\n\ 37 The Regents of the University of California. All rights reserved.\n"; 38#endif /* not lint */ 39 40#if 0 41#ifndef lint 42static char sccsid[] = "@(#)wc.c 8.1 (Berkeley) 6/6/93"; 43#endif /* not lint */ 44#endif 45 46#include <sys/cdefs.h> 47__FBSDID("$FreeBSD: src/usr.bin/wc/wc.c,v 1.21 2004/12/27 22:27:56 josef Exp $"); 48 49#include <sys/param.h> 50#include <sys/mount.h> 51#include <sys/stat.h> 52 53#include <ctype.h> 54#include <err.h> 55#include <errno.h> 56#include <fcntl.h> 57#include <locale.h> 58#include <stdint.h> 59#include <stdio.h> 60#include <stdlib.h> 61#include <string.h> 62#include <unistd.h> 63#include <wchar.h> 64#include <wctype.h> 65 66/* We allocte this much memory statically, and use it as a fallback for 67 malloc failure, or statfs failure. So it should be small, but not 68 "too small" */ 69#define SMALL_BUF_SIZE (1024 * 8) 70 71uintmax_t tlinect, twordct, tcharct; 72int doline, doword, dochar, domulti; 73 74static int cnt(const char *); 75static void usage(void); 76 77int 78main(int argc, char *argv[]) 79{ 80 int ch, errors, total; 81 82 (void) setlocale(LC_CTYPE, ""); 83 84 while ((ch = getopt(argc, argv, "clmw")) != -1) 85 switch((char)ch) { 86 case 'l': 87 doline = 1; 88 break; 89 case 'w': 90 doword = 1; 91 break; 92 case 'c': 93 dochar = 1; 94 domulti = 0; 95 break; 96 case 'm': 97 domulti = 1; 98 dochar = 0; 99 break; 100 case '?': 101 default: 102 usage(); 103 } 104 argv += optind; 105 argc -= optind; 106 107 /* Wc's flags are on by default. */ 108 if (doline + doword + dochar + domulti == 0) 109 doline = doword = dochar = 1; 110 111 errors = 0; 112 total = 0; 113 if (!*argv) { 114 if (cnt((char *)NULL) != 0) 115 ++errors; 116 else 117 (void)printf("\n"); 118 } 119 else do { 120 if (cnt(*argv) != 0) 121 ++errors; 122 else 123 (void)printf(" %s\n", *argv); 124 ++total; 125 } while(*++argv); 126 127 if (total > 1) { 128 if (doline) 129 (void)printf(" %7ju", tlinect); 130 if (doword) 131 (void)printf(" %7ju", twordct); 132 if (dochar || domulti) 133 (void)printf(" %7ju", tcharct); 134 (void)printf(" total\n"); 135 } 136 exit(errors == 0 ? 0 : 1); 137} 138 139static int 140cnt(const char *file) 141{ 142 struct stat sb; 143 struct statfs fsb; 144 uintmax_t linect, wordct, charct; 145 int fd, len, warned; 146 int stat_ret; 147 size_t clen; 148 short gotsp; 149 u_char *p; 150 static u_char small_buf[SMALL_BUF_SIZE]; 151 static u_char *buf = small_buf; 152 static off_t buf_size = SMALL_BUF_SIZE; 153 wchar_t wch; 154 mbstate_t mbs; 155 156 linect = wordct = charct = 0; 157 if (file == NULL) { 158 file = "stdin"; 159 fd = STDIN_FILENO; 160 } else { 161 if ((fd = open(file, O_RDONLY, 0)) < 0) { 162 warn("%s: open", file); 163 return (1); 164 } 165 } 166 167 if (fstatfs(fd, &fsb)) { 168 fsb.f_iosize = SMALL_BUF_SIZE; 169 } 170 if (fsb.f_iosize != buf_size) { 171 if (buf != small_buf) { 172 free(buf); 173 } 174 if (fsb.f_iosize == SMALL_BUF_SIZE || !(buf = malloc(fsb.f_iosize))) { 175 buf = small_buf; 176 buf_size = SMALL_BUF_SIZE; 177 } else { 178 buf_size = fsb.f_iosize; 179 } 180 } 181 182 if (doword || (domulti && MB_CUR_MAX != 1)) 183 goto word; 184 /* 185 * Line counting is split out because it's a lot faster to get 186 * lines than to get words, since the word count requires some 187 * logic. 188 */ 189 if (doline) { 190 while ((len = read(fd, buf, buf_size))) { 191 if (len == -1) { 192 warn("%s: read", file); 193 (void)close(fd); 194 return (1); 195 } 196 charct += len; 197 for (p = buf; len--; ++p) 198 if (*p == '\n') 199 ++linect; 200 } 201 tlinect += linect; 202 (void)printf(" %7ju", linect); 203 if (dochar) { 204 tcharct += charct; 205 (void)printf(" %7ju", charct); 206 } 207 (void)close(fd); 208 return (0); 209 } 210 /* 211 * If all we need is the number of characters and it's a 212 * regular file, just stat the puppy. 213 */ 214 if (dochar || domulti) { 215 if (fstat(fd, &sb)) { 216 warn("%s: fstat", file); 217 (void)close(fd); 218 return (1); 219 } 220 if (S_ISREG(sb.st_mode)) { 221 (void)printf(" %7lld", (long long)sb.st_size); 222 tcharct += sb.st_size; 223 (void)close(fd); 224 return (0); 225 } 226 } 227 228 /* Do it the hard way... */ 229word: gotsp = 1; 230 warned = 0; 231 memset(&mbs, 0, sizeof(mbs)); 232 while ((len = read(fd, buf, buf_size)) != 0) { 233 if (len == -1) { 234 warn("%s: read", file); 235 (void)close(fd); 236 return (1); 237 } 238 p = buf; 239 while (len > 0) { 240 if (!domulti || MB_CUR_MAX == 1) { 241 clen = 1; 242 wch = (unsigned char)*p; 243 } else if ((clen = mbrtowc(&wch, p, len, &mbs)) == 244 (size_t)-1) { 245 if (!warned) { 246 errno = EILSEQ; 247 warn("%s", file); 248 warned = 1; 249 } 250 memset(&mbs, 0, sizeof(mbs)); 251 clen = 1; 252 wch = (unsigned char)*p; 253 } else if (clen == (size_t)-2) 254 break; 255 else if (clen == 0) 256 clen = 1; 257 charct++; 258 len -= clen; 259 p += clen; 260 if (wch == L'\n') 261 ++linect; 262 if (iswspace(wch)) 263 gotsp = 1; 264 else if (gotsp) { 265 gotsp = 0; 266 ++wordct; 267 } 268 } 269 } 270 if (domulti && MB_CUR_MAX > 1) 271 if (mbrtowc(NULL, NULL, 0, &mbs) == (size_t)-1 && !warned) 272 warn("%s", file); 273 if (doline) { 274 tlinect += linect; 275 (void)printf(" %7ju", linect); 276 } 277 if (doword) { 278 twordct += wordct; 279 (void)printf(" %7ju", wordct); 280 } 281 if (dochar || domulti) { 282 tcharct += charct; 283 (void)printf(" %7ju", charct); 284 } 285 (void)close(fd); 286 return (0); 287} 288 289static void 290usage() 291{ 292 (void)fprintf(stderr, "usage: wc [-clmw] [file ...]\n"); 293 exit(1); 294} 295