cut.c revision 131196
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42#endif /* not lint */ 43#include <sys/cdefs.h> 44__FBSDID("$FreeBSD: head/usr.bin/cut/cut.c 131196 2004-06-27 15:27:15Z tjr $"); 45 46#include <ctype.h> 47#include <err.h> 48#include <limits.h> 49#include <locale.h> 50#include <stdio.h> 51#include <stdlib.h> 52#include <string.h> 53#include <unistd.h> 54#include <wchar.h> 55 56int bflag; 57int cflag; 58char dchar; 59int dflag; 60int fflag; 61int nflag; 62int sflag; 63 64int b_cut(FILE *, const char *); 65int b_n_cut(FILE *, const char *); 66int c_cut(FILE *, const char *); 67int f_cut(FILE *, const char *); 68void get_list(char *); 69void needpos(size_t); 70static void usage(void); 71 72int 73main(int argc, char *argv[]) 74{ 75 FILE *fp; 76 int (*fcn)(FILE *, const char *); 77 int ch, rval; 78 79 setlocale(LC_ALL, ""); 80 81 fcn = NULL; 82 dchar = '\t'; /* default delimiter is \t */ 83 84 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 85 switch(ch) { 86 case 'b': 87 get_list(optarg); 88 bflag = 1; 89 break; 90 case 'c': 91 get_list(optarg); 92 cflag = 1; 93 break; 94 case 'd': 95 dchar = *optarg; 96 dflag = 1; 97 break; 98 case 'f': 99 get_list(optarg); 100 fflag = 1; 101 break; 102 case 's': 103 sflag = 1; 104 break; 105 case 'n': 106 nflag = 1; 107 break; 108 case '?': 109 default: 110 usage(); 111 } 112 argc -= optind; 113 argv += optind; 114 115 if (fflag) { 116 if (bflag || cflag || nflag) 117 usage(); 118 } else if (!(bflag || cflag) || dflag || sflag) 119 usage(); 120 else if (!bflag && nflag) 121 usage(); 122 123 if (fflag) 124 fcn = f_cut; 125 else if (cflag) 126 fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; 127 else if (bflag) 128 fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; 129 130 rval = 0; 131 if (*argv) 132 for (; *argv; ++argv) { 133 if (strcmp(*argv, "-") == 0) 134 rval |= fcn(stdin, "stdin"); 135 else { 136 if (!(fp = fopen(*argv, "r"))) { 137 warn("%s", *argv); 138 rval = 1; 139 continue; 140 } 141 fcn(fp, *argv); 142 (void)fclose(fp); 143 } 144 } 145 else 146 rval = fcn(stdin, "stdin"); 147 exit(rval); 148} 149 150size_t autostart, autostop, maxval; 151 152char *positions; 153 154void 155get_list(char *list) 156{ 157 size_t setautostart, start, stop; 158 char *pos; 159 char *p; 160 161 /* 162 * set a byte in the positions array to indicate if a field or 163 * column is to be selected; use +1, it's 1-based, not 0-based. 164 * Numbers and number ranges may be overlapping, repeated, and in 165 * any order. We handle "-3-5" although there's no real reason too. 166 */ 167 for (; (p = strsep(&list, ", \t")) != NULL;) { 168 setautostart = start = stop = 0; 169 if (*p == '-') { 170 ++p; 171 setautostart = 1; 172 } 173 if (isdigit((unsigned char)*p)) { 174 start = stop = strtol(p, &p, 10); 175 if (setautostart && start > autostart) 176 autostart = start; 177 } 178 if (*p == '-') { 179 if (isdigit((unsigned char)p[1])) 180 stop = strtol(p + 1, &p, 10); 181 if (*p == '-') { 182 ++p; 183 if (!autostop || autostop > stop) 184 autostop = stop; 185 } 186 } 187 if (*p) 188 errx(1, "[-cf] list: illegal list value"); 189 if (!stop || !start) 190 errx(1, "[-cf] list: values may not include zero"); 191 if (maxval < stop) { 192 maxval = stop; 193 needpos(maxval + 1); 194 } 195 for (pos = positions + start; start++ <= stop; *pos++ = 1); 196 } 197 198 /* overlapping ranges */ 199 if (autostop && maxval > autostop) { 200 maxval = autostop; 201 needpos(maxval + 1); 202 } 203 204 /* set autostart */ 205 if (autostart) 206 memset(positions + 1, '1', autostart); 207} 208 209void 210needpos(size_t n) 211{ 212 static size_t npos; 213 size_t oldnpos; 214 215 /* Grow the positions array to at least the specified size. */ 216 if (n > npos) { 217 oldnpos = npos; 218 if (npos == 0) 219 npos = n; 220 while (n > npos) 221 npos *= 2; 222 if ((positions = realloc(positions, npos)) == NULL) 223 err(1, "realloc"); 224 memset((char *)positions + oldnpos, 0, npos - oldnpos); 225 } 226} 227 228int 229b_cut(FILE *fp, const char *fname) 230{ 231 int ch, col; 232 char *pos; 233 234 ch = 0; 235 for (;;) { 236 pos = positions + 1; 237 for (col = maxval; col; --col) { 238 if ((ch = getc(fp)) == EOF) 239 return (0); 240 if (ch == '\n') 241 break; 242 if (*pos++) 243 (void)putchar(ch); 244 } 245 if (ch != '\n') { 246 if (autostop) 247 while ((ch = getc(fp)) != EOF && ch != '\n') 248 (void)putchar(ch); 249 else 250 while ((ch = getc(fp)) != EOF && ch != '\n'); 251 } 252 (void)putchar('\n'); 253 } 254 return (0); 255} 256 257/* 258 * Cut based on byte positions, taking care not to split multibyte characters. 259 * Although this function also handles the case where -n is not specified, 260 * b_cut() ought to be much faster. 261 */ 262int 263b_n_cut(FILE *fp, const char *fname) 264{ 265 size_t col, i, lbuflen; 266 char *lbuf; 267 int canwrite, clen, warned; 268 mbstate_t mbs; 269 270 memset(&mbs, 0, sizeof(mbs)); 271 warned = 0; 272 while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 273 for (col = 0; lbuflen > 0; col += clen) { 274 if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 275 if (!warned) { 276 warn("%s", fname); 277 warned = 1; 278 } 279 memset(&mbs, 0, sizeof(mbs)); 280 clen = 1; 281 } 282 if (clen == 0 || *lbuf == '\n') 283 break; 284 if (col < maxval && !positions[1 + col]) { 285 /* 286 * Print the character if (1) after an initial 287 * segment of un-selected bytes, the rest of 288 * it is selected, and (2) the last byte is 289 * selected. 290 */ 291 i = col; 292 while (i < col + clen && i < maxval && 293 !positions[1 + i]) 294 i++; 295 canwrite = i < col + clen; 296 for (; i < col + clen && i < maxval; i++) 297 canwrite &= positions[1 + i]; 298 if (canwrite) 299 fwrite(lbuf, 1, clen, stdout); 300 } else { 301 /* 302 * Print the character if all of it has 303 * been selected. 304 */ 305 canwrite = 1; 306 for (i = col; i < col + clen; i++) 307 if ((i >= maxval && !autostop) || 308 (i < maxval && !positions[1 + i])) { 309 canwrite = 0; 310 break; 311 } 312 if (canwrite) 313 fwrite(lbuf, 1, clen, stdout); 314 } 315 lbuf += clen; 316 lbuflen -= clen; 317 } 318 if (lbuflen > 0) 319 putchar('\n'); 320 } 321 return (warned); 322} 323 324int 325c_cut(FILE *fp, const char *fname) 326{ 327 wint_t ch; 328 int col; 329 char *pos; 330 331 ch = 0; 332 for (;;) { 333 pos = positions + 1; 334 for (col = maxval; col; --col) { 335 if ((ch = getwc(fp)) == WEOF) 336 goto out; 337 if (ch == '\n') 338 break; 339 if (*pos++) 340 (void)putwchar(ch); 341 } 342 if (ch != '\n') { 343 if (autostop) 344 while ((ch = getwc(fp)) != WEOF && ch != '\n') 345 (void)putwchar(ch); 346 else 347 while ((ch = getwc(fp)) != WEOF && ch != '\n'); 348 } 349 (void)putwchar('\n'); 350 } 351out: 352 if (ferror(fp)) { 353 warn("%s", fname); 354 return (1); 355 } 356 return (0); 357} 358 359int 360f_cut(FILE *fp, const char *fname __unused) 361{ 362 int ch, field, isdelim; 363 char *pos, *p, sep; 364 int output; 365 char *lbuf, *mlbuf; 366 size_t lbuflen; 367 368 mlbuf = NULL; 369 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 370 /* Assert EOL has a newline. */ 371 if (*(lbuf + lbuflen - 1) != '\n') { 372 /* Can't have > 1 line with no trailing newline. */ 373 mlbuf = malloc(lbuflen + 1); 374 if (mlbuf == NULL) 375 err(1, "malloc"); 376 memcpy(mlbuf, lbuf, lbuflen); 377 *(mlbuf + lbuflen) = '\n'; 378 lbuf = mlbuf; 379 } 380 output = 0; 381 for (isdelim = 0, p = lbuf;; ++p) { 382 ch = *p; 383 /* this should work if newline is delimiter */ 384 if (ch == sep) 385 isdelim = 1; 386 if (ch == '\n') { 387 if (!isdelim && !sflag) 388 (void)fwrite(lbuf, lbuflen, 1, stdout); 389 break; 390 } 391 } 392 if (!isdelim) 393 continue; 394 395 pos = positions + 1; 396 for (field = maxval, p = lbuf; field; --field, ++pos) { 397 if (*pos) { 398 if (output++) 399 (void)putchar(sep); 400 while ((ch = *p++) != '\n' && ch != sep) 401 (void)putchar(ch); 402 } else { 403 while ((ch = *p++) != '\n' && ch != sep) 404 continue; 405 } 406 if (ch == '\n') 407 break; 408 } 409 if (ch != '\n') { 410 if (autostop) { 411 if (output) 412 (void)putchar(sep); 413 for (; (ch = *p) != '\n'; ++p) 414 (void)putchar(ch); 415 } else 416 for (; (ch = *p) != '\n'; ++p); 417 } 418 (void)putchar('\n'); 419 } 420 if (mlbuf != NULL) 421 free(mlbuf); 422 return (0); 423} 424 425static void 426usage(void) 427{ 428 (void)fprintf(stderr, "%s\n%s\n%s\n", 429 "usage: cut -b list [-n] [file ...]", 430 " cut -c list [file ...]", 431 " cut -f list [-s] [-d delim] [file ...]"); 432 exit(1); 433} 434