1/* vi: set sw=4 ts=4: */ 2/* 3 * wc implementation for busybox 4 * 5 * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> 6 * 7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 8 */ 9 10/* BB_AUDIT SUSv3 compliant. */ 11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ 12 13/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) 14 * 15 * Rewritten to fix a number of problems and do some size optimizations. 16 * Problems in the previous busybox implementation (besides bloat) included: 17 * 1) broken 'wc -c' optimization (read note below) 18 * 2) broken handling of '-' args 19 * 3) no checking of ferror on EOF returns 20 * 4) isprint() wasn't considered when word counting. 21 * 22 * NOTES: 23 * 24 * The previous busybox wc attempted an optimization using stat for the 25 * case of counting chars only. I omitted that because it was broken. 26 * It didn't take into account the possibility of input coming from a 27 * pipe, or input from a file with file pointer not at the beginning. 28 * 29 * To implement such a speed optimization correctly, not only do you 30 * need the size, but also the file position. Note also that the 31 * file position may be past the end of file. Consider the example 32 * (adapted from example in gnu wc.c) 33 * 34 * echo hello > /tmp/testfile && 35 * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile 36 * 37 * for which 'wc -c' should output '0'. 38 */ 39#include "libbb.h" 40#include "unicode.h" 41 42#if !ENABLE_LOCALE_SUPPORT 43# undef isprint 44# undef isspace 45# define isprint(c) ((unsigned)((c) - 0x20) <= (0x7e - 0x20)) 46# define isspace(c) ((c) == ' ') 47#endif 48 49#if ENABLE_FEATURE_WC_LARGE 50# define COUNT_T unsigned long long 51# define COUNT_FMT "llu" 52#else 53# define COUNT_T unsigned 54# define COUNT_FMT "u" 55#endif 56 57/* We support -m even when UNICODE_SUPPORT is off, 58 * we just don't advertise it in help text, 59 * since it is the same as -c in this case. 60 */ 61 62//usage:#define wc_trivial_usage 63//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." 64//usage: 65//usage:#define wc_full_usage "\n\n" 66//usage: "Count lines, words, and bytes for each FILE (or stdin)\n" 67//usage: "\nOptions:" 68//usage: "\n -c Count bytes" 69//usage: IF_UNICODE_SUPPORT( 70//usage: "\n -m Count characters" 71//usage: ) 72//usage: "\n -l Count newlines" 73//usage: "\n -w Count words" 74//usage: "\n -L Print longest line length" 75//usage: 76//usage:#define wc_example_usage 77//usage: "$ wc /etc/passwd\n" 78//usage: " 31 46 1365 /etc/passwd\n" 79 80/* Order is important if we want to be compatible with 81 * column order in "wc -cmlwL" output: 82 */ 83enum { 84 WC_LINES = 0, 85 WC_WORDS = 1, 86 WC_UNICHARS = 2, 87 WC_CHARS = 3, 88 WC_LENGTH = 4, 89 NUM_WCS = 5, 90}; 91 92int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; 93int wc_main(int argc UNUSED_PARAM, char **argv) 94{ 95 const char *arg; 96 const char *start_fmt = " %9"COUNT_FMT + 1; 97 const char *fname_fmt = " %s\n"; 98 COUNT_T *pcounts; 99 COUNT_T counts[NUM_WCS]; 100 COUNT_T totals[NUM_WCS]; 101 int num_files; 102 smallint status = EXIT_SUCCESS; 103 unsigned print_type; 104 105 init_unicode(); 106 107 print_type = getopt32(argv, "lwcmL"); 108 109 if (print_type == 0) { 110 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); 111 } 112 113 argv += optind; 114 if (!argv[0]) { 115 *--argv = (char *) bb_msg_standard_input; 116 fname_fmt = "\n"; 117 } 118 if (!argv[1]) { /* zero or one filename? */ 119 if (!((print_type-1) & print_type)) /* exactly one option? */ 120 start_fmt = "%"COUNT_FMT; 121 } 122 123 memset(totals, 0, sizeof(totals)); 124 125 pcounts = counts; 126 127 num_files = 0; 128 while ((arg = *argv++) != NULL) { 129 FILE *fp; 130 const char *s; 131 unsigned u; 132 unsigned linepos; 133 smallint in_word; 134 135 ++num_files; 136 fp = fopen_or_warn_stdin(arg); 137 if (!fp) { 138 status = EXIT_FAILURE; 139 continue; 140 } 141 142 memset(counts, 0, sizeof(counts)); 143 linepos = 0; 144 in_word = 0; 145 146 while (1) { 147 int c; 148 /* Our -w doesn't match GNU wc exactly... oh well */ 149 150 c = getc(fp); 151 if (c == EOF) { 152 if (ferror(fp)) { 153 bb_simple_perror_msg(arg); 154 status = EXIT_FAILURE; 155 } 156 goto DO_EOF; /* Treat an EOF as '\r'. */ 157 } 158 159 /* Cater for -c and -m */ 160 ++counts[WC_CHARS]; 161 if (unicode_status != UNICODE_ON /* every byte is a new char */ 162 || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ 163 ) { 164 ++counts[WC_UNICHARS]; 165 } 166 167 if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ 168 ++linepos; 169 if (!isspace(c)) { 170 in_word = 1; 171 continue; 172 } 173 } else if ((unsigned)(c - 9) <= 4) { 174 /* \t 9 175 * \n 10 176 * \v 11 177 * \f 12 178 * \r 13 179 */ 180 if (c == '\t') { 181 linepos = (linepos | 7) + 1; 182 } else { /* '\n', '\r', '\f', or '\v' */ 183 DO_EOF: 184 if (linepos > counts[WC_LENGTH]) { 185 counts[WC_LENGTH] = linepos; 186 } 187 if (c == '\n') { 188 ++counts[WC_LINES]; 189 } 190 if (c != '\v') { 191 linepos = 0; 192 } 193 } 194 } else { 195 continue; 196 } 197 198 counts[WC_WORDS] += in_word; 199 in_word = 0; 200 if (c == EOF) { 201 break; 202 } 203 } 204 205 fclose_if_not_stdin(fp); 206 207 if (totals[WC_LENGTH] < counts[WC_LENGTH]) { 208 totals[WC_LENGTH] = counts[WC_LENGTH]; 209 } 210 totals[WC_LENGTH] -= counts[WC_LENGTH]; 211 212 OUTPUT: 213 /* coreutils wc tries hard to print pretty columns 214 * (saves results for all files, finds max col len etc...) 215 * we won't try that hard, it will bloat us too much */ 216 s = start_fmt; 217 u = 0; 218 do { 219 if (print_type & (1 << u)) { 220 printf(s, pcounts[u]); 221 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ 222 } 223 totals[u] += pcounts[u]; 224 } while (++u < NUM_WCS); 225 printf(fname_fmt, arg); 226 } 227 228 /* If more than one file was processed, we want the totals. To save some 229 * space, we set the pcounts ptr to the totals array. This has the side 230 * effect of trashing the totals array after outputting it, but that's 231 * irrelavent since we no longer need it. */ 232 if (num_files > 1) { 233 num_files = 0; /* Make sure we don't get here again. */ 234 arg = "total"; 235 pcounts = totals; 236 --argv; 237 goto OUTPUT; 238 } 239 240 fflush_stdout_and_exit(status); 241} 242