cut.c revision 75930
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1989, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 71590Srgrimes * 81590Srgrimes * Redistribution and use in source and binary forms, with or without 91590Srgrimes * modification, are permitted provided that the following conditions 101590Srgrimes * are met: 111590Srgrimes * 1. Redistributions of source code must retain the above copyright 121590Srgrimes * notice, this list of conditions and the following disclaimer. 131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer in the 151590Srgrimes * documentation and/or other materials provided with the distribution. 161590Srgrimes * 3. All advertising materials mentioning features or use of this software 171590Srgrimes * must display the following acknowledgement: 181590Srgrimes * This product includes software developed by the University of 191590Srgrimes * California, Berkeley and its contributors. 201590Srgrimes * 4. Neither the name of the University nor the names of its contributors 211590Srgrimes * may be used to endorse or promote products derived from this software 221590Srgrimes * without specific prior written permission. 231590Srgrimes * 241590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 251590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 261590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 271590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 281590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 291590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 301590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 311590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 321590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 331590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 341590Srgrimes * SUCH DAMAGE. 351590Srgrimes */ 361590Srgrimes 371590Srgrimes#ifndef lint 3841568Sarchiestatic const char copyright[] = 391590Srgrimes"@(#) Copyright (c) 1989, 1993\n\ 401590Srgrimes The Regents of the University of California. All rights reserved.\n"; 4141568Sarchiestatic const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 4271725Swillstatic const char rcsid[] = 4371725Swill "$FreeBSD: head/usr.bin/cut/cut.c 75930 2001-04-25 05:42:53Z dd $"; 441590Srgrimes#endif /* not lint */ 451590Srgrimes 461590Srgrimes#include <ctype.h> 4727098Scharnier#include <err.h> 481590Srgrimes#include <limits.h> 4943531Seivind#include <locale.h> 501590Srgrimes#include <stdio.h> 511590Srgrimes#include <stdlib.h> 521590Srgrimes#include <string.h> 5323693Speter#include <unistd.h> 541590Srgrimes 551590Srgrimesint cflag; 561590Srgrimeschar dchar; 571590Srgrimesint dflag; 581590Srgrimesint fflag; 591590Srgrimesint sflag; 601590Srgrimes 6171726Swillvoid c_cut (FILE *, const char *); 6271726Swillvoid f_cut (FILE *, const char *); 6371726Swillvoid get_list (char *); 6471726Swillint main (int, char **); 6571726Swillstatic void usage (void); 661590Srgrimes 671590Srgrimesint 681590Srgrimesmain(argc, argv) 691590Srgrimes int argc; 701590Srgrimes char *argv[]; 711590Srgrimes{ 721590Srgrimes FILE *fp; 7371726Swill void (*fcn) (FILE *, const char *) = NULL; 741590Srgrimes int ch; 751590Srgrimes 7643533Seivind fcn = NULL; 7743531Seivind setlocale (LC_ALL, ""); 7843531Seivind 791590Srgrimes dchar = '\t'; /* default delimiter is \t */ 801590Srgrimes 8143531Seivind /* Since we don't support multi-byte characters, the -c and -b 8243531Seivind options are equivalent, and the -n option is meaningless. */ 8343532Seivind while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 841590Srgrimes switch(ch) { 8543531Seivind case 'b': 861590Srgrimes case 'c': 871590Srgrimes fcn = c_cut; 881590Srgrimes get_list(optarg); 891590Srgrimes cflag = 1; 901590Srgrimes break; 911590Srgrimes case 'd': 921590Srgrimes dchar = *optarg; 931590Srgrimes dflag = 1; 941590Srgrimes break; 951590Srgrimes case 'f': 961590Srgrimes get_list(optarg); 971590Srgrimes fcn = f_cut; 981590Srgrimes fflag = 1; 991590Srgrimes break; 1001590Srgrimes case 's': 1011590Srgrimes sflag = 1; 1021590Srgrimes break; 10343531Seivind case 'n': 10443531Seivind break; 1051590Srgrimes case '?': 1061590Srgrimes default: 1071590Srgrimes usage(); 1081590Srgrimes } 1091590Srgrimes argc -= optind; 1101590Srgrimes argv += optind; 1111590Srgrimes 1121590Srgrimes if (fflag) { 1131590Srgrimes if (cflag) 1141590Srgrimes usage(); 1151590Srgrimes } else if (!cflag || dflag || sflag) 1161590Srgrimes usage(); 1171590Srgrimes 1181590Srgrimes if (*argv) 1191590Srgrimes for (; *argv; ++argv) { 1201590Srgrimes if (!(fp = fopen(*argv, "r"))) 12127098Scharnier err(1, "%s", *argv); 1221590Srgrimes fcn(fp, *argv); 1231590Srgrimes (void)fclose(fp); 1241590Srgrimes } 1251590Srgrimes else 1261590Srgrimes fcn(stdin, "stdin"); 1271590Srgrimes exit(0); 1281590Srgrimes} 1291590Srgrimes 13071725Swillsize_t autostart, autostop, maxval; 1311590Srgrimes 1321590Srgrimeschar positions[_POSIX2_LINE_MAX + 1]; 1331590Srgrimes 1341590Srgrimesvoid 1351590Srgrimesget_list(list) 1361590Srgrimes char *list; 1371590Srgrimes{ 13871725Swill size_t setautostart, start, stop; 13943533Seivind char *pos; 1401590Srgrimes char *p; 1411590Srgrimes 1421590Srgrimes /* 1431590Srgrimes * set a byte in the positions array to indicate if a field or 1441590Srgrimes * column is to be selected; use +1, it's 1-based, not 0-based. 1451590Srgrimes * This parser is less restrictive than the Draft 9 POSIX spec. 1461590Srgrimes * POSIX doesn't allow lists that aren't in increasing order or 1471590Srgrimes * overlapping lists. We also handle "-3-5" although there's no 1481590Srgrimes * real reason too. 1491590Srgrimes */ 15043533Seivind for (; (p = strsep(&list, ", \t")) != NULL;) { 1511590Srgrimes setautostart = start = stop = 0; 1521590Srgrimes if (*p == '-') { 1531590Srgrimes ++p; 1541590Srgrimes setautostart = 1; 1551590Srgrimes } 15643533Seivind if (isdigit((unsigned char)*p)) { 1571590Srgrimes start = stop = strtol(p, &p, 10); 1581590Srgrimes if (setautostart && start > autostart) 1591590Srgrimes autostart = start; 1601590Srgrimes } 1611590Srgrimes if (*p == '-') { 16243533Seivind if (isdigit((unsigned char)p[1])) 1631590Srgrimes stop = strtol(p + 1, &p, 10); 1641590Srgrimes if (*p == '-') { 1651590Srgrimes ++p; 1661590Srgrimes if (!autostop || autostop > stop) 1671590Srgrimes autostop = stop; 1681590Srgrimes } 1691590Srgrimes } 1701590Srgrimes if (*p) 17127098Scharnier errx(1, "[-cf] list: illegal list value"); 1721590Srgrimes if (!stop || !start) 17327098Scharnier errx(1, "[-cf] list: values may not include zero"); 1741590Srgrimes if (stop > _POSIX2_LINE_MAX) 17527098Scharnier errx(1, "[-cf] list: %d too large (max %d)", 1761590Srgrimes stop, _POSIX2_LINE_MAX); 1771590Srgrimes if (maxval < stop) 1781590Srgrimes maxval = stop; 1791590Srgrimes for (pos = positions + start; start++ <= stop; *pos++ = 1); 1801590Srgrimes } 1811590Srgrimes 1821590Srgrimes /* overlapping ranges */ 1831590Srgrimes if (autostop && maxval > autostop) 1841590Srgrimes maxval = autostop; 1851590Srgrimes 1861590Srgrimes /* set autostart */ 1871590Srgrimes if (autostart) 1881590Srgrimes memset(positions + 1, '1', autostart); 1891590Srgrimes} 1901590Srgrimes 1911590Srgrimes/* ARGSUSED */ 1921590Srgrimesvoid 1931590Srgrimesc_cut(fp, fname) 1941590Srgrimes FILE *fp; 19571725Swill const char *fname; 1961590Srgrimes{ 19743533Seivind int ch, col; 19843533Seivind char *pos; 19971725Swill fname = NULL; 2001590Srgrimes 20143533Seivind ch = 0; 2021590Srgrimes for (;;) { 2031590Srgrimes pos = positions + 1; 2041590Srgrimes for (col = maxval; col; --col) { 2051590Srgrimes if ((ch = getc(fp)) == EOF) 2061590Srgrimes return; 2071590Srgrimes if (ch == '\n') 2081590Srgrimes break; 2091590Srgrimes if (*pos++) 2101590Srgrimes (void)putchar(ch); 2111590Srgrimes } 21243533Seivind if (ch != '\n') { 2131590Srgrimes if (autostop) 2141590Srgrimes while ((ch = getc(fp)) != EOF && ch != '\n') 2151590Srgrimes (void)putchar(ch); 2161590Srgrimes else 2171590Srgrimes while ((ch = getc(fp)) != EOF && ch != '\n'); 21843533Seivind } 2191590Srgrimes (void)putchar('\n'); 2201590Srgrimes } 2211590Srgrimes} 2221590Srgrimes 2231590Srgrimesvoid 2241590Srgrimesf_cut(fp, fname) 2251590Srgrimes FILE *fp; 22671725Swill const char *fname; 2271590Srgrimes{ 22843533Seivind int ch, field, isdelim; 22943533Seivind char *pos, *p, sep; 2301590Srgrimes int output; 23175930Sdd char *lbuf, *mlbuf = NULL; 23275930Sdd size_t lbuflen; 2331590Srgrimes 23475930Sdd for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 23575930Sdd /* Assert EOL has a newline. */ 23675930Sdd if (*(lbuf + lbuflen - 1) != '\n') { 23775930Sdd /* Can't have > 1 line with no trailing newline. */ 23875930Sdd mlbuf = malloc(lbuflen + 1); 23975930Sdd if (mlbuf == NULL) 24075930Sdd err(1, "malloc"); 24175930Sdd memcpy(mlbuf, lbuf, lbuflen); 24275930Sdd *(mlbuf + lbuflen) = '\n'; 24375930Sdd lbuf = mlbuf; 24475930Sdd } 2457200Sache output = 0; 2461590Srgrimes for (isdelim = 0, p = lbuf;; ++p) { 24775930Sdd ch = *p; 2481590Srgrimes /* this should work if newline is delimiter */ 2491590Srgrimes if (ch == sep) 2501590Srgrimes isdelim = 1; 2511590Srgrimes if (ch == '\n') { 2521590Srgrimes if (!isdelim && !sflag) 25375930Sdd (void)fwrite(lbuf, lbuflen, 1, stdout); 2541590Srgrimes break; 2551590Srgrimes } 2561590Srgrimes } 2571590Srgrimes if (!isdelim) 2581590Srgrimes continue; 2591590Srgrimes 2601590Srgrimes pos = positions + 1; 2611590Srgrimes for (field = maxval, p = lbuf; field; --field, ++pos) { 2621590Srgrimes if (*pos) { 2631590Srgrimes if (output++) 2641590Srgrimes (void)putchar(sep); 2651590Srgrimes while ((ch = *p++) != '\n' && ch != sep) 2661590Srgrimes (void)putchar(ch); 26743533Seivind } else { 26843533Seivind while ((ch = *p++) != '\n' && ch != sep) 26943533Seivind continue; 27043533Seivind } 2711590Srgrimes if (ch == '\n') 2721590Srgrimes break; 2731590Srgrimes } 27443533Seivind if (ch != '\n') { 2751590Srgrimes if (autostop) { 2761590Srgrimes if (output) 2771590Srgrimes (void)putchar(sep); 2781590Srgrimes for (; (ch = *p) != '\n'; ++p) 2791590Srgrimes (void)putchar(ch); 2801590Srgrimes } else 2811590Srgrimes for (; (ch = *p) != '\n'; ++p); 28243533Seivind } 2831590Srgrimes (void)putchar('\n'); 2841590Srgrimes } 28575930Sdd if (mlbuf != NULL) 28675930Sdd free(mlbuf); 2871590Srgrimes} 2881590Srgrimes 28927098Scharnierstatic void 2901590Srgrimesusage() 2911590Srgrimes{ 29243531Seivind (void)fprintf(stderr, "%s\n%s\n%s\n", 29343531Seivind "usage: cut -b list [-n] [file ...]", 29443531Seivind " cut -c list [file ...]", 29527098Scharnier " cut -f list [-s] [-d delim] [file ...]"); 2961590Srgrimes exit(1); 2971590Srgrimes} 298