cut.c revision 43533
1290650Shselasky/* 2347819Shselasky * Copyright (c) 1989, 1993 3290650Shselasky * The Regents of the University of California. All rights reserved. 4290650Shselasky * 5290650Shselasky * This code is derived from software contributed to Berkeley by 6290650Shselasky * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7290650Shselasky * 8290650Shselasky * Redistribution and use in source and binary forms, with or without 9290650Shselasky * modification, are permitted provided that the following conditions 10290650Shselasky * are met: 11290650Shselasky * 1. Redistributions of source code must retain the above copyright 12290650Shselasky * notice, this list of conditions and the following disclaimer. 13290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright 14290650Shselasky * notice, this list of conditions and the following disclaimer in the 15290650Shselasky * documentation and/or other materials provided with the distribution. 16290650Shselasky * 3. All advertising materials mentioning features or use of this software 17290650Shselasky * must display the following acknowledgement: 18290650Shselasky * This product includes software developed by the University of 19290650Shselasky * California, Berkeley and its contributors. 20290650Shselasky * 4. Neither the name of the University nor the names of its contributors 21290650Shselasky * may be used to endorse or promote products derived from this software 22290650Shselasky * without specific prior written permission. 23290650Shselasky * 24290650Shselasky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27290650Shselasky * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34290650Shselasky * SUCH DAMAGE. 35290650Shselasky */ 36347802Shselasky 37290650Shselasky#ifndef lint 38290650Shselaskystatic const char copyright[] = 39290650Shselasky"@(#) Copyright (c) 1989, 1993\n\ 40290650Shselasky The Regents of the University of California. All rights reserved.\n"; 41353197Shselasky#endif /* not lint */ 42290650Shselasky 43290650Shselasky#ifndef lint 44341958Shselaskystatic const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 45341958Shselasky#endif /* not lint */ 46290650Shselasky 47329200Shselasky#include <ctype.h> 48290650Shselasky#include <err.h> 49341948Shselasky#include <errno.h> 50341948Shselasky#include <limits.h> 51290650Shselasky#include <locale.h> 52290650Shselasky#include <stdio.h> 53290650Shselasky#include <stdlib.h> 54290650Shselasky#include <string.h> 55347839Shselasky#include <unistd.h> 56347847Shselasky 57290650Shselaskyint cflag; 58290650Shselaskychar dchar; 59347835Shselaskyint dflag; 60347835Shselaskyint fflag; 61290650Shselaskyint sflag; 62347835Shselasky 63347835Shselaskyvoid c_cut __P((FILE *, char *)); 64347835Shselaskyvoid f_cut __P((FILE *, char *)); 65290650Shselaskyvoid get_list __P((char *)); 66290650Shselaskyint main __P((int, char **)); 67347835Shselaskystatic void usage __P((void)); 68347835Shselasky 69347835Shselaskyint 70347835Shselaskymain(argc, argv) 71290650Shselasky int argc; 72347819Shselasky char *argv[]; 73347819Shselasky{ 74347819Shselasky FILE *fp; 75347819Shselasky void (*fcn) __P((FILE *, char *)) = NULL; 76347819Shselasky int ch; 77290650Shselasky 78290650Shselasky fcn = NULL; 79290650Shselasky setlocale (LC_ALL, ""); 80290650Shselasky 81290650Shselasky dchar = '\t'; /* default delimiter is \t */ 82290650Shselasky 83290650Shselasky /* Since we don't support multi-byte characters, the -c and -b 84290650Shselasky options are equivalent, and the -n option is meaningless. */ 85290650Shselasky while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 86290650Shselasky switch(ch) { 87290650Shselasky case 'b': 88290650Shselasky case 'c': 89329209Shselasky fcn = c_cut; 90329209Shselasky get_list(optarg); 91329209Shselasky cflag = 1; 92329209Shselasky break; 93329209Shselasky case 'd': 94290650Shselasky dchar = *optarg; 95290650Shselasky dflag = 1; 96290650Shselasky break; 97290650Shselasky case 'f': 98290650Shselasky get_list(optarg); 99290650Shselasky fcn = f_cut; 100290650Shselasky fflag = 1; 101290650Shselasky break; 102290650Shselasky case 's': 103290650Shselasky sflag = 1; 104290650Shselasky break; 105290650Shselasky case 'n': 106290650Shselasky break; 107290650Shselasky case '?': 108290650Shselasky default: 109290650Shselasky usage(); 110290650Shselasky } 111290650Shselasky argc -= optind; 112290650Shselasky argv += optind; 113290650Shselasky 114290650Shselasky if (fflag) { 115290650Shselasky if (cflag) 116290650Shselasky usage(); 117290650Shselasky } else if (!cflag || dflag || sflag) 118290650Shselasky usage(); 119290650Shselasky 120290650Shselasky if (*argv) 121290650Shselasky for (; *argv; ++argv) { 122290650Shselasky if (!(fp = fopen(*argv, "r"))) 123290650Shselasky err(1, "%s", *argv); 124290650Shselasky fcn(fp, *argv); 125290650Shselasky (void)fclose(fp); 126290650Shselasky } 127290650Shselasky else 128290650Shselasky fcn(stdin, "stdin"); 129290650Shselasky exit(0); 130290650Shselasky} 131290650Shselasky 132290650Shselaskyint autostart, autostop, maxval; 133290650Shselasky 134290650Shselaskychar positions[_POSIX2_LINE_MAX + 1]; 135290650Shselasky 136290650Shselaskyvoid 137290650Shselaskyget_list(list) 138290650Shselasky char *list; 139290650Shselasky{ 140290650Shselasky int setautostart, start, stop; 141290650Shselasky char *pos; 142290650Shselasky char *p; 143290650Shselasky 144290650Shselasky /* 145290650Shselasky * set a byte in the positions array to indicate if a field or 146290650Shselasky * column is to be selected; use +1, it's 1-based, not 0-based. 147290650Shselasky * This parser is less restrictive than the Draft 9 POSIX spec. 148290650Shselasky * POSIX doesn't allow lists that aren't in increasing order or 149290650Shselasky * overlapping lists. We also handle "-3-5" although there's no 150290650Shselasky * real reason too. 151290650Shselasky */ 152290650Shselasky for (; (p = strsep(&list, ", \t")) != NULL;) { 153290650Shselasky setautostart = start = stop = 0; 154290650Shselasky if (*p == '-') { 155290650Shselasky ++p; 156290650Shselasky setautostart = 1; 157290650Shselasky } 158290650Shselasky if (isdigit((unsigned char)*p)) { 159290650Shselasky start = stop = strtol(p, &p, 10); 160290650Shselasky if (setautostart && start > autostart) 161290650Shselasky autostart = start; 162290650Shselasky } 163290650Shselasky if (*p == '-') { 164290650Shselasky if (isdigit((unsigned char)p[1])) 165290650Shselasky stop = strtol(p + 1, &p, 10); 166290650Shselasky if (*p == '-') { 167290650Shselasky ++p; 168290650Shselasky if (!autostop || autostop > stop) 169290650Shselasky autostop = stop; 170290650Shselasky } 171290650Shselasky } 172290650Shselasky if (*p) 173290650Shselasky errx(1, "[-cf] list: illegal list value"); 174290650Shselasky if (!stop || !start) 175353224Shselasky errx(1, "[-cf] list: values may not include zero"); 176290650Shselasky if (stop > _POSIX2_LINE_MAX) 177290650Shselasky errx(1, "[-cf] list: %d too large (max %d)", 178290650Shselasky stop, _POSIX2_LINE_MAX); 179290650Shselasky if (maxval < stop) 180353224Shselasky maxval = stop; 181290650Shselasky for (pos = positions + start; start++ <= stop; *pos++ = 1); 182290650Shselasky } 183353224Shselasky 184290650Shselasky /* overlapping ranges */ 185290650Shselasky if (autostop && maxval > autostop) 186290650Shselasky maxval = autostop; 187290650Shselasky 188290650Shselasky /* set autostart */ 189290650Shselasky if (autostart) 190353224Shselasky memset(positions + 1, '1', autostart); 191290650Shselasky} 192290650Shselasky 193353224Shselasky/* ARGSUSED */ 194290650Shselaskyvoid 195290650Shselaskyc_cut(fp, fname) 196290650Shselasky FILE *fp; 197290650Shselasky char *fname; 198290650Shselasky{ 199290650Shselasky int ch, col; 200290650Shselasky char *pos; 201290650Shselasky 202347862Shselasky ch = 0; 203347862Shselasky for (;;) { 204347862Shselasky pos = positions + 1; 205347862Shselasky for (col = maxval; col; --col) { 206347862Shselasky if ((ch = getc(fp)) == EOF) 207347862Shselasky return; 208347862Shselasky if (ch == '\n') 209347862Shselasky break; 210347862Shselasky if (*pos++) 211347862Shselasky (void)putchar(ch); 212347862Shselasky } 213347862Shselasky if (ch != '\n') { 214347862Shselasky if (autostop) 215347862Shselasky while ((ch = getc(fp)) != EOF && ch != '\n') 216347862Shselasky (void)putchar(ch); 217331580Shselasky else 218331580Shselasky while ((ch = getc(fp)) != EOF && ch != '\n'); 219331580Shselasky } 220331580Shselasky (void)putchar('\n'); 221331580Shselasky } 222331580Shselasky} 223331580Shselasky 224331580Shselaskyvoid 225331580Shselaskyf_cut(fp, fname) 226331580Shselasky FILE *fp; 227331580Shselasky char *fname; 228331580Shselasky{ 229331580Shselasky int ch, field, isdelim; 230331580Shselasky char *pos, *p, sep; 231331580Shselasky int output; 232331580Shselasky char lbuf[_POSIX2_LINE_MAX + 1]; 233331580Shselasky 234331580Shselasky for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) { 235331580Shselasky output = 0; 236331580Shselasky for (isdelim = 0, p = lbuf;; ++p) { 237331580Shselasky if (!(ch = *p)) 238331580Shselasky errx(1, "%s: line too long.", fname); 239331580Shselasky /* this should work if newline is delimiter */ 240331580Shselasky if (ch == sep) 241331580Shselasky isdelim = 1; 242331580Shselasky if (ch == '\n') { 243331580Shselasky if (!isdelim && !sflag) 244331580Shselasky (void)printf("%s", lbuf); 245290650Shselasky break; 246290650Shselasky } 247353224Shselasky } 248290650Shselasky if (!isdelim) 249290650Shselasky continue; 250290650Shselasky 251353224Shselasky pos = positions + 1; 252290650Shselasky for (field = maxval, p = lbuf; field; --field, ++pos) { 253290650Shselasky if (*pos) { 254290650Shselasky if (output++) 255290650Shselasky (void)putchar(sep); 256290650Shselasky while ((ch = *p++) != '\n' && ch != sep) 257353224Shselasky (void)putchar(ch); 258290650Shselasky } else { 259290650Shselasky while ((ch = *p++) != '\n' && ch != sep) 260290650Shselasky continue; 261290650Shselasky } 262290650Shselasky if (ch == '\n') 263290650Shselasky break; 264290650Shselasky } 265290650Shselasky if (ch != '\n') { 266290650Shselasky if (autostop) { 267290650Shselasky if (output) 268290650Shselasky (void)putchar(sep); 269290650Shselasky for (; (ch = *p) != '\n'; ++p) 270290650Shselasky (void)putchar(ch); 271290650Shselasky } else 272338554Shselasky for (; (ch = *p) != '\n'; ++p); 273337112Shselasky } 274290650Shselasky (void)putchar('\n'); 275290650Shselasky } 276337112Shselasky} 277337112Shselasky 278337112Shselaskystatic void 279337112Shselaskyusage() 280337112Shselasky{ 281353189Shselasky (void)fprintf(stderr, "%s\n%s\n%s\n", 282353189Shselasky "usage: cut -b list [-n] [file ...]", 283353189Shselasky " cut -c list [file ...]", 284353189Shselasky " cut -f list [-s] [-d delim] [file ...]"); 285290650Shselasky exit(1); 286290650Shselasky} 287290650Shselasky