cut.c revision 131183
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42#endif /* not lint */ 43#include <sys/cdefs.h> 44__FBSDID("$FreeBSD: head/usr.bin/cut/cut.c 131183 2004-06-27 10:35:28Z tjr $"); 45 46#include <ctype.h> 47#include <err.h> 48#include <limits.h> 49#include <locale.h> 50#include <stdio.h> 51#include <stdlib.h> 52#include <string.h> 53#include <unistd.h> 54#include <wchar.h> 55 56int bflag; 57int cflag; 58char dchar; 59int dflag; 60int fflag; 61int nflag; 62int sflag; 63 64void b_n_cut(FILE *, const char *); 65void c_cut(FILE *, const char *); 66void f_cut(FILE *, const char *); 67void get_list(char *); 68void needpos(size_t); 69static void usage(void); 70 71int 72main(int argc, char *argv[]) 73{ 74 FILE *fp; 75 void (*fcn)(FILE *, const char *); 76 int ch, rval; 77 78 setlocale(LC_ALL, ""); 79 80 fcn = NULL; 81 dchar = '\t'; /* default delimiter is \t */ 82 83 /* 84 * Since we don't support multi-byte characters, the -c and -b 85 * options are equivalent. 86 */ 87 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 88 switch(ch) { 89 case 'b': 90 fcn = c_cut; 91 get_list(optarg); 92 bflag = 1; 93 break; 94 case 'c': 95 fcn = c_cut; 96 get_list(optarg); 97 cflag = 1; 98 break; 99 case 'd': 100 dchar = *optarg; 101 dflag = 1; 102 break; 103 case 'f': 104 get_list(optarg); 105 fcn = f_cut; 106 fflag = 1; 107 break; 108 case 's': 109 sflag = 1; 110 break; 111 case 'n': 112 nflag = 1; 113 break; 114 case '?': 115 default: 116 usage(); 117 } 118 argc -= optind; 119 argv += optind; 120 121 if (fflag) { 122 if (bflag || cflag || nflag) 123 usage(); 124 } else if (!(bflag || cflag) || dflag || sflag) 125 usage(); 126 else if (!bflag && nflag) 127 usage(); 128 129 if (nflag) 130 fcn = b_n_cut; 131 132 rval = 0; 133 if (*argv) 134 for (; *argv; ++argv) { 135 if (strcmp(*argv, "-") == 0) 136 fcn(stdin, "stdin"); 137 else { 138 if (!(fp = fopen(*argv, "r"))) { 139 warn("%s", *argv); 140 rval = 1; 141 continue; 142 } 143 fcn(fp, *argv); 144 (void)fclose(fp); 145 } 146 } 147 else 148 fcn(stdin, "stdin"); 149 exit(rval); 150} 151 152size_t autostart, autostop, maxval; 153 154char *positions; 155 156void 157get_list(char *list) 158{ 159 size_t setautostart, start, stop; 160 char *pos; 161 char *p; 162 163 /* 164 * set a byte in the positions array to indicate if a field or 165 * column is to be selected; use +1, it's 1-based, not 0-based. 166 * This parser is less restrictive than the Draft 9 POSIX spec. 167 * POSIX doesn't allow lists that aren't in increasing order or 168 * overlapping lists. We also handle "-3-5" although there's no 169 * real reason too. 170 */ 171 for (; (p = strsep(&list, ", \t")) != NULL;) { 172 setautostart = start = stop = 0; 173 if (*p == '-') { 174 ++p; 175 setautostart = 1; 176 } 177 if (isdigit((unsigned char)*p)) { 178 start = stop = strtol(p, &p, 10); 179 if (setautostart && start > autostart) 180 autostart = start; 181 } 182 if (*p == '-') { 183 if (isdigit((unsigned char)p[1])) 184 stop = strtol(p + 1, &p, 10); 185 if (*p == '-') { 186 ++p; 187 if (!autostop || autostop > stop) 188 autostop = stop; 189 } 190 } 191 if (*p) 192 errx(1, "[-cf] list: illegal list value"); 193 if (!stop || !start) 194 errx(1, "[-cf] list: values may not include zero"); 195 if (maxval < stop) { 196 maxval = stop; 197 needpos(maxval + 1); 198 } 199 for (pos = positions + start; start++ <= stop; *pos++ = 1); 200 } 201 202 /* overlapping ranges */ 203 if (autostop && maxval > autostop) { 204 maxval = autostop; 205 needpos(maxval + 1); 206 } 207 208 /* set autostart */ 209 if (autostart) 210 memset(positions + 1, '1', autostart); 211} 212 213void 214needpos(size_t n) 215{ 216 static size_t npos; 217 size_t oldnpos; 218 219 /* Grow the positions array to at least the specified size. */ 220 if (n > npos) { 221 oldnpos = npos; 222 if (npos == 0) 223 npos = n; 224 while (n > npos) 225 npos *= 2; 226 if ((positions = realloc(positions, npos)) == NULL) 227 err(1, "realloc"); 228 memset((char *)positions + oldnpos, 0, npos - oldnpos); 229 } 230} 231 232/* 233 * Cut based on byte positions, taking care not to split multibyte characters. 234 * Although this function also handles the case where -n is not specified, 235 * c_cut() ought to be much faster. 236 */ 237void 238b_n_cut(FILE *fp, const char *fname) 239{ 240 size_t col, i, lbuflen; 241 char *lbuf; 242 int canwrite, clen, warned; 243 mbstate_t mbs; 244 245 memset(&mbs, 0, sizeof(mbs)); 246 warned = 0; 247 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 248 for (col = 0; lbuflen > 0; col += clen) { 249 if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 250 if (!warned) { 251 warn("%s", fname); 252 warned = 1; 253 } 254 memset(&mbs, 0, sizeof(mbs)); 255 clen = 1; 256 } 257 if (clen == 0 || *lbuf == '\n') 258 break; 259 if (col < maxval && !positions[1 + col]) { 260 /* 261 * Print the character if (1) after an initial 262 * segment of un-selected bytes, the rest of 263 * it is selected, and (2) the last byte is 264 * selected. 265 */ 266 i = col; 267 while (i < col + clen && i < maxval && 268 !positions[1 + i]) 269 i++; 270 canwrite = i < col + clen; 271 for (; i < col + clen && i < maxval; i++) 272 canwrite &= positions[1 + i]; 273 if (canwrite) 274 fwrite(lbuf, 1, clen, stdout); 275 } else { 276 /* 277 * Print the character if all of it has 278 * been selected. 279 */ 280 canwrite = 1; 281 for (i = col; i < col + clen; i++) 282 if ((i >= maxval && !autostop) || 283 (i < maxval && !positions[1 + i])) { 284 canwrite = 0; 285 break; 286 } 287 if (canwrite) 288 fwrite(lbuf, 1, clen, stdout); 289 } 290 lbuf += clen; 291 lbuflen -= clen; 292 } 293 if (lbuflen > 0) 294 putchar('\n'); 295 } 296} 297 298void 299c_cut(FILE *fp, const char *fname __unused) 300{ 301 int ch, col; 302 char *pos; 303 304 ch = 0; 305 for (;;) { 306 pos = positions + 1; 307 for (col = maxval; col; --col) { 308 if ((ch = getc(fp)) == EOF) 309 return; 310 if (ch == '\n') 311 break; 312 if (*pos++) 313 (void)putchar(ch); 314 } 315 if (ch != '\n') { 316 if (autostop) 317 while ((ch = getc(fp)) != EOF && ch != '\n') 318 (void)putchar(ch); 319 else 320 while ((ch = getc(fp)) != EOF && ch != '\n'); 321 } 322 (void)putchar('\n'); 323 } 324} 325 326void 327f_cut(FILE *fp, const char *fname __unused) 328{ 329 int ch, field, isdelim; 330 char *pos, *p, sep; 331 int output; 332 char *lbuf, *mlbuf; 333 size_t lbuflen; 334 335 mlbuf = NULL; 336 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 337 /* Assert EOL has a newline. */ 338 if (*(lbuf + lbuflen - 1) != '\n') { 339 /* Can't have > 1 line with no trailing newline. */ 340 mlbuf = malloc(lbuflen + 1); 341 if (mlbuf == NULL) 342 err(1, "malloc"); 343 memcpy(mlbuf, lbuf, lbuflen); 344 *(mlbuf + lbuflen) = '\n'; 345 lbuf = mlbuf; 346 } 347 output = 0; 348 for (isdelim = 0, p = lbuf;; ++p) { 349 ch = *p; 350 /* this should work if newline is delimiter */ 351 if (ch == sep) 352 isdelim = 1; 353 if (ch == '\n') { 354 if (!isdelim && !sflag) 355 (void)fwrite(lbuf, lbuflen, 1, stdout); 356 break; 357 } 358 } 359 if (!isdelim) 360 continue; 361 362 pos = positions + 1; 363 for (field = maxval, p = lbuf; field; --field, ++pos) { 364 if (*pos) { 365 if (output++) 366 (void)putchar(sep); 367 while ((ch = *p++) != '\n' && ch != sep) 368 (void)putchar(ch); 369 } else { 370 while ((ch = *p++) != '\n' && ch != sep) 371 continue; 372 } 373 if (ch == '\n') 374 break; 375 } 376 if (ch != '\n') { 377 if (autostop) { 378 if (output) 379 (void)putchar(sep); 380 for (; (ch = *p) != '\n'; ++p) 381 (void)putchar(ch); 382 } else 383 for (; (ch = *p) != '\n'; ++p); 384 } 385 (void)putchar('\n'); 386 } 387 if (mlbuf != NULL) 388 free(mlbuf); 389} 390 391static void 392usage(void) 393{ 394 (void)fprintf(stderr, "%s\n%s\n%s\n", 395 "usage: cut -b list [-n] [file ...]", 396 " cut -c list [file ...]", 397 " cut -f list [-s] [-d delim] [file ...]"); 398 exit(1); 399} 400