1/* vi: set sw=4 ts=4: */ 2/* 3 * wc implementation for busybox 4 * 5 * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> 6 * 7 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 8 */ 9 10/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ 11/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ 12 13/* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) 14 * 15 * Rewritten to fix a number of problems and do some size optimizations. 16 * Problems in the previous busybox implementation (besides bloat) included: 17 * 1) broken 'wc -c' optimization (read note below) 18 * 2) broken handling of '-' args 19 * 3) no checking of ferror on EOF returns 20 * 4) isprint() wasn't considered when word counting. 21 * 22 * TODO: 23 * 24 * When locale support is enabled, count multibyte chars in the '-m' case. 25 * 26 * NOTES: 27 * 28 * The previous busybox wc attempted an optimization using stat for the 29 * case of counting chars only. I omitted that because it was broken. 30 * It didn't take into account the possibility of input coming from a 31 * pipe, or input from a file with file pointer not at the beginning. 32 * 33 * To implement such a speed optimization correctly, not only do you 34 * need the size, but also the file position. Note also that the 35 * file position may be past the end of file. Consider the example 36 * (adapted from example in gnu wc.c) 37 * 38 * echo hello > /tmp/testfile && 39 * (dd ibs=1k skip=1 count=0 &> /dev/null; wc -c) < /tmp/testfile 40 * 41 * for which 'wc -c' should output '0'. 42 */ 43 44#include "libbb.h" 45 46#if ENABLE_LOCALE_SUPPORT 47#define isspace_given_isprint(c) isspace(c) 48#else 49#undef isspace 50#undef isprint 51#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9)))) 52#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20)) 53#define isspace_given_isprint(c) ((c) == ' ') 54#endif 55 56#if ENABLE_FEATURE_WC_LARGE 57#define COUNT_T unsigned long long 58#define COUNT_FMT "llu" 59#else 60#define COUNT_T unsigned 61#define COUNT_FMT "u" 62#endif 63 64enum { 65 WC_LINES = 0, 66 WC_WORDS = 1, 67 WC_CHARS = 2, 68 WC_LENGTH = 3 69}; 70 71int wc_main(int argc, char **argv); 72int wc_main(int argc, char **argv) 73{ 74 FILE *fp; 75 const char *s, *arg; 76 const char *start_fmt = "%9"COUNT_FMT; 77 const char *fname_fmt = " %s\n"; 78 COUNT_T *pcounts; 79 COUNT_T counts[4]; 80 COUNT_T totals[4]; 81 unsigned linepos; 82 unsigned u; 83 int num_files = 0; 84 int c; 85 smallint status = EXIT_SUCCESS; 86 smallint in_word; 87 unsigned print_type; 88 89 print_type = getopt32(argv, "lwcL"); 90 91 if (print_type == 0) { 92 print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); 93 } 94 95 argv += optind; 96 if (!argv[0]) { 97 *--argv = (char *) bb_msg_standard_input; 98 fname_fmt = "\n"; 99 if (!((print_type-1) & print_type)) /* exactly one option? */ 100 start_fmt = "%"COUNT_FMT; 101 } 102 103 memset(totals, 0, sizeof(totals)); 104 105 pcounts = counts; 106 107 while ((arg = *argv++) != 0) { 108 ++num_files; 109 fp = fopen_or_warn_stdin(arg); 110 if (!fp) { 111 status = EXIT_FAILURE; 112 continue; 113 } 114 115 memset(counts, 0, sizeof(counts)); 116 linepos = 0; 117 in_word = 0; 118 119 do { 120 /* Our -w doesn't match GNU wc exactly... oh well */ 121 122 ++counts[WC_CHARS]; 123 c = getc(fp); 124 if (isprint(c)) { 125 ++linepos; 126 if (!isspace_given_isprint(c)) { 127 in_word = 1; 128 continue; 129 } 130 } else if (((unsigned int)(c - 9)) <= 4) { 131 /* \t 9 132 * \n 10 133 * \v 11 134 * \f 12 135 * \r 13 136 */ 137 if (c == '\t') { 138 linepos = (linepos | 7) + 1; 139 } else { /* '\n', '\r', '\f', or '\v' */ 140 DO_EOF: 141 if (linepos > counts[WC_LENGTH]) { 142 counts[WC_LENGTH] = linepos; 143 } 144 if (c == '\n') { 145 ++counts[WC_LINES]; 146 } 147 if (c != '\v') { 148 linepos = 0; 149 } 150 } 151 } else if (c == EOF) { 152 if (ferror(fp)) { 153 bb_perror_msg("%s", arg); 154 status = EXIT_FAILURE; 155 } 156 --counts[WC_CHARS]; 157 goto DO_EOF; /* Treat an EOF as '\r'. */ 158 } else { 159 continue; 160 } 161 162 counts[WC_WORDS] += in_word; 163 in_word = 0; 164 if (c == EOF) { 165 break; 166 } 167 } while (1); 168 169 if (totals[WC_LENGTH] < counts[WC_LENGTH]) { 170 totals[WC_LENGTH] = counts[WC_LENGTH]; 171 } 172 totals[WC_LENGTH] -= counts[WC_LENGTH]; 173 174 fclose_if_not_stdin(fp); 175 176 OUTPUT: 177 /* coreutils wc tries hard to print pretty columns 178 * (saves results for all files, find max col len etc...) 179 * we won't try that hard, it will bloat us too much */ 180 s = start_fmt; 181 u = 0; 182 do { 183 if (print_type & (1 << u)) { 184 printf(s, pcounts[u]); 185 s = " %9"COUNT_FMT; /* Ok... restore the leading space. */ 186 } 187 totals[u] += pcounts[u]; 188 } while (++u < 4); 189 printf(fname_fmt, arg); 190 } 191 192 /* If more than one file was processed, we want the totals. To save some 193 * space, we set the pcounts ptr to the totals array. This has the side 194 * effect of trashing the totals array after outputting it, but that's 195 * irrelavent since we no longer need it. */ 196 if (num_files > 1) { 197 num_files = 0; /* Make sure we don't get here again. */ 198 arg = "total"; 199 pcounts = totals; 200 --argv; 201 goto OUTPUT; 202 } 203 204 fflush_stdout_and_exit(status); 205} 206