1/* $NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $ */ 2 3/* 4 * Copyright (c) 1980, 1987, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33#ifndef lint 34__COPYRIGHT("@(#) Copyright (c) 1980, 1987, 1991, 1993\ 35 The Regents of the University of California. All rights reserved."); 36#endif /* not lint */ 37 38#ifndef lint 39#if 0 40static char sccsid[] = "@(#)wc.c 8.2 (Berkeley) 5/2/95"; 41#else 42__RCSID("$NetBSD: wc.c,v 1.34 2010/02/19 11:15:23 tron Exp $"); 43#endif 44#endif /* not lint */ 45 46/* wc line, word, char count and optionally longest line. */ 47 48#include <sys/param.h> 49#include <sys/file.h> 50#include <sys/stat.h> 51 52#include <ctype.h> 53#include <fcntl.h> 54#include <err.h> 55#include <errno.h> 56#include <locale.h> 57#include <stdbool.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62#include <wchar.h> 63#include <wctype.h> 64 65#ifdef NO_QUAD 66typedef u_long wc_count_t; 67# define WCFMT " %7lu" 68# define WCCAST unsigned long 69#else 70typedef u_quad_t wc_count_t; 71# define WCFMT " %7llu" 72# define WCCAST unsigned long long 73#endif 74 75static wc_count_t tlinect, twordct, tcharct, tlongest; 76static bool doline, doword, dobyte, dochar, dolongest; 77static int rval = 0; 78 79static void cnt(const char *); 80static void print_counts(wc_count_t, wc_count_t, wc_count_t, wc_count_t, 81 const char *); 82__dead static void usage(void); 83static size_t do_mb(wchar_t *, const char *, size_t, mbstate_t *, 84 size_t *, const char *); 85 86int 87main(int argc, char *argv[]) 88{ 89 int ch; 90 91 setlocale(LC_ALL, ""); 92 93 while ((ch = getopt(argc, argv, "lwcmL")) != -1) 94 switch (ch) { 95 case 'l': 96 doline = true; 97 break; 98 case 'w': 99 doword = true; 100 break; 101 case 'm': 102 dochar = true; 103 dobyte = 0; 104 break; 105 case 'c': 106 dochar = 0; 107 dobyte = true; 108 break; 109 case 'L': 110 dolongest = true; 111 break; 112 case '?': 113 default: 114 usage(); 115 } 116 argv += optind; 117 argc -= optind; 118 119 /* Wc's flags are on by default. */ 120 if (!(doline || doword || dobyte || dochar || dolongest)) 121 doline = doword = dobyte = true; 122 123 if (*argv == NULL) { 124 cnt(NULL); 125 } else { 126 bool dototal = (argc > 1); 127 128 do { 129 cnt(*argv); 130 } while(*++argv); 131 132 if (dototal) { 133 print_counts(tlinect, twordct, tcharct, tlongest, 134 "total"); 135 } 136 } 137 138 exit(rval); 139} 140 141static size_t 142do_mb(wchar_t *wc, const char *p, size_t len, mbstate_t *st, 143 size_t *retcnt, const char *file) 144{ 145 size_t r; 146 size_t c = 0; 147 148 do { 149 r = mbrtowc(wc, p, len, st); 150 if (r == (size_t)-1) { 151 warnx("%s: invalid byte sequence", file); 152 rval = 1; 153 154 /* XXX skip 1 byte */ 155 len--; 156 p++; 157 memset(st, 0, sizeof(*st)); 158 continue; 159 } else if (r == (size_t)-2) 160 break; 161 else if (r == 0) 162 r = 1; 163 c++; 164 if (wc) 165 wc++; 166 len -= r; 167 p += r; 168 } while (len > 0); 169 170 *retcnt = c; 171 172 return (r); 173} 174 175static void 176cnt(const char *file) 177{ 178 u_char buf[MAXBSIZE]; 179 wchar_t wbuf[MAXBSIZE]; 180 struct stat sb; 181 wc_count_t charct, linect, wordct, longest; 182 mbstate_t st; 183 u_char *C; 184 wchar_t *WC; 185 const char *name; /* filename or <stdin> */ 186 size_t r = 0; 187 int fd, len = 0; 188 189 linect = wordct = charct = longest = 0; 190 if (file != NULL) { 191 if ((fd = open(file, O_RDONLY, 0)) < 0) { 192 warn("%s", file); 193 rval = 1; 194 return; 195 } 196 name = file; 197 } else { 198 fd = STDIN_FILENO; 199 name = "<stdin>"; 200 } 201 202 if (dochar || doword || dolongest) 203 (void)memset(&st, 0, sizeof(st)); 204 205 if (!(doword || dolongest)) { 206 /* 207 * line counting is split out because it's a lot 208 * faster to get lines than to get words, since 209 * the word count requires some logic. 210 */ 211 if (doline || dochar) { 212 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 213 if (dochar) { 214 size_t wlen; 215 216 r = do_mb(0, (char *)buf, (size_t)len, 217 &st, &wlen, name); 218 charct += wlen; 219 } else if (dobyte) 220 charct += len; 221 if (doline) { 222 for (C = buf; len--; ++C) { 223 if (*C == '\n') 224 ++linect; 225 } 226 } 227 } 228 } 229 230 /* 231 * if all we need is the number of characters and 232 * it's a directory or a regular or linked file, just 233 * stat the puppy. We avoid testing for it not being 234 * a special device in case someone adds a new type 235 * of inode. 236 */ 237 else if (dobyte) { 238 if (fstat(fd, &sb)) { 239 warn("%s", name); 240 rval = 1; 241 } else { 242 if (S_ISREG(sb.st_mode) || 243 S_ISLNK(sb.st_mode) || 244 S_ISDIR(sb.st_mode)) { 245 charct = sb.st_size; 246 } else { 247 while ((len = 248 read(fd, buf, MAXBSIZE)) > 0) 249 charct += len; 250 } 251 } 252 } 253 } else { 254 /* do it the hard way... */ 255 wc_count_t linelen; 256 bool gotsp; 257 258 linelen = 0; 259 gotsp = true; 260 while ((len = read(fd, buf, MAXBSIZE)) > 0) { 261 size_t wlen; 262 263 r = do_mb(wbuf, (char *)buf, (size_t)len, &st, &wlen, 264 name); 265 if (dochar) { 266 charct += wlen; 267 } else if (dobyte) { 268 charct += len; 269 } 270 for (WC = wbuf; wlen--; ++WC) { 271 if (iswspace(*WC)) { 272 gotsp = true; 273 if (*WC == L'\n') { 274 ++linect; 275 if (linelen > longest) 276 longest = linelen; 277 linelen = 0; 278 } else { 279 linelen++; 280 } 281 } else { 282 /* 283 * This line implements the POSIX 284 * spec, i.e. a word is a "maximal 285 * string of characters delimited by 286 * whitespace." Notice nothing was 287 * said about a character being 288 * printing or non-printing. 289 */ 290 if (gotsp) { 291 gotsp = false; 292 ++wordct; 293 } 294 295 linelen++; 296 } 297 } 298 } 299 } 300 301 if (len == -1) { 302 warn("%s", name); 303 rval = 1; 304 } 305 if (dochar && r == (size_t)-2) { 306 warnx("%s: incomplete multibyte character", name); 307 rval = 1; 308 } 309 310 print_counts(linect, wordct, charct, longest, file); 311 312 /* 313 * don't bother checkint doline, doword, or dobyte --- speeds 314 * up the common case 315 */ 316 tlinect += linect; 317 twordct += wordct; 318 tcharct += charct; 319 if (dolongest && longest > tlongest) 320 tlongest = longest; 321 322 if (close(fd)) { 323 warn("%s", name); 324 rval = 1; 325 } 326} 327 328static void 329print_counts(wc_count_t lines, wc_count_t words, wc_count_t chars, 330 wc_count_t longest, const char *name) 331{ 332 333 if (doline) 334 (void)printf(WCFMT, (WCCAST)lines); 335 if (doword) 336 (void)printf(WCFMT, (WCCAST)words); 337 if (dobyte || dochar) 338 (void)printf(WCFMT, (WCCAST)chars); 339 if (dolongest) 340 (void)printf(WCFMT, (WCCAST)longest); 341 342 if (name != NULL) 343 (void)printf(" %s\n", name); 344 else 345 (void)putchar('\n'); 346} 347 348static void 349usage(void) 350{ 351 352 (void)fprintf(stderr, "usage: wc [-c | -m] [-Llw] [file ...]\n"); 353 exit(1); 354} 355