cut.c revision 97218
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42static const char rcsid[] = 43 "$FreeBSD: head/usr.bin/cut/cut.c 97218 2002-05-24 06:03:12Z tjr $"; 44#endif /* not lint */ 45 46#include <ctype.h> 47#include <err.h> 48#include <limits.h> 49#include <locale.h> 50#include <stdio.h> 51#include <stdlib.h> 52#include <string.h> 53#include <unistd.h> 54 55int cflag; 56char dchar; 57int dflag; 58int fflag; 59int sflag; 60 61void c_cut (FILE *, const char *); 62void f_cut (FILE *, const char *); 63void get_list (char *); 64int main (int, char **); 65static void usage (void); 66 67int 68main(argc, argv) 69 int argc; 70 char *argv[]; 71{ 72 FILE *fp; 73 void (*fcn) (FILE *, const char *) = NULL; 74 int ch, rval; 75 76 fcn = NULL; 77 setlocale (LC_ALL, ""); 78 79 dchar = '\t'; /* default delimiter is \t */ 80 81 /* Since we don't support multi-byte characters, the -c and -b 82 options are equivalent, and the -n option is meaningless. */ 83 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 84 switch(ch) { 85 case 'b': 86 case 'c': 87 fcn = c_cut; 88 get_list(optarg); 89 cflag = 1; 90 break; 91 case 'd': 92 dchar = *optarg; 93 dflag = 1; 94 break; 95 case 'f': 96 get_list(optarg); 97 fcn = f_cut; 98 fflag = 1; 99 break; 100 case 's': 101 sflag = 1; 102 break; 103 case 'n': 104 break; 105 case '?': 106 default: 107 usage(); 108 } 109 argc -= optind; 110 argv += optind; 111 112 if (fflag) { 113 if (cflag) 114 usage(); 115 } else if (!cflag || dflag || sflag) 116 usage(); 117 118 rval = 0; 119 if (*argv) 120 for (; *argv; ++argv) { 121 if (!(fp = fopen(*argv, "r"))) { 122 warn("%s", *argv); 123 rval = 1; 124 continue; 125 } 126 fcn(fp, *argv); 127 (void)fclose(fp); 128 } 129 else 130 fcn(stdin, "stdin"); 131 exit(rval); 132} 133 134size_t autostart, autostop, maxval; 135 136char positions[_POSIX2_LINE_MAX + 1]; 137 138void 139get_list(list) 140 char *list; 141{ 142 size_t setautostart, start, stop; 143 char *pos; 144 char *p; 145 146 /* 147 * set a byte in the positions array to indicate if a field or 148 * column is to be selected; use +1, it's 1-based, not 0-based. 149 * This parser is less restrictive than the Draft 9 POSIX spec. 150 * POSIX doesn't allow lists that aren't in increasing order or 151 * overlapping lists. We also handle "-3-5" although there's no 152 * real reason too. 153 */ 154 for (; (p = strsep(&list, ", \t")) != NULL;) { 155 setautostart = start = stop = 0; 156 if (*p == '-') { 157 ++p; 158 setautostart = 1; 159 } 160 if (isdigit((unsigned char)*p)) { 161 start = stop = strtol(p, &p, 10); 162 if (setautostart && start > autostart) 163 autostart = start; 164 } 165 if (*p == '-') { 166 if (isdigit((unsigned char)p[1])) 167 stop = strtol(p + 1, &p, 10); 168 if (*p == '-') { 169 ++p; 170 if (!autostop || autostop > stop) 171 autostop = stop; 172 } 173 } 174 if (*p) 175 errx(1, "[-cf] list: illegal list value"); 176 if (!stop || !start) 177 errx(1, "[-cf] list: values may not include zero"); 178 if (stop > _POSIX2_LINE_MAX) 179 errx(1, "[-cf] list: %ld too large (max %d)", 180 (long)stop, _POSIX2_LINE_MAX); 181 if (maxval < stop) 182 maxval = stop; 183 for (pos = positions + start; start++ <= stop; *pos++ = 1); 184 } 185 186 /* overlapping ranges */ 187 if (autostop && maxval > autostop) 188 maxval = autostop; 189 190 /* set autostart */ 191 if (autostart) 192 memset(positions + 1, '1', autostart); 193} 194 195/* ARGSUSED */ 196void 197c_cut(fp, fname) 198 FILE *fp; 199 const char *fname; 200{ 201 int ch, col; 202 char *pos; 203 fname = NULL; 204 205 ch = 0; 206 for (;;) { 207 pos = positions + 1; 208 for (col = maxval; col; --col) { 209 if ((ch = getc(fp)) == EOF) 210 return; 211 if (ch == '\n') 212 break; 213 if (*pos++) 214 (void)putchar(ch); 215 } 216 if (ch != '\n') { 217 if (autostop) 218 while ((ch = getc(fp)) != EOF && ch != '\n') 219 (void)putchar(ch); 220 else 221 while ((ch = getc(fp)) != EOF && ch != '\n'); 222 } 223 (void)putchar('\n'); 224 } 225} 226 227void 228f_cut(fp, fname) 229 FILE *fp; 230 const char *fname __unused; 231{ 232 int ch, field, isdelim; 233 char *pos, *p, sep; 234 int output; 235 char *lbuf, *mlbuf = NULL; 236 size_t lbuflen; 237 238 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 239 /* Assert EOL has a newline. */ 240 if (*(lbuf + lbuflen - 1) != '\n') { 241 /* Can't have > 1 line with no trailing newline. */ 242 mlbuf = malloc(lbuflen + 1); 243 if (mlbuf == NULL) 244 err(1, "malloc"); 245 memcpy(mlbuf, lbuf, lbuflen); 246 *(mlbuf + lbuflen) = '\n'; 247 lbuf = mlbuf; 248 } 249 output = 0; 250 for (isdelim = 0, p = lbuf;; ++p) { 251 ch = *p; 252 /* this should work if newline is delimiter */ 253 if (ch == sep) 254 isdelim = 1; 255 if (ch == '\n') { 256 if (!isdelim && !sflag) 257 (void)fwrite(lbuf, lbuflen, 1, stdout); 258 break; 259 } 260 } 261 if (!isdelim) 262 continue; 263 264 pos = positions + 1; 265 for (field = maxval, p = lbuf; field; --field, ++pos) { 266 if (*pos) { 267 if (output++) 268 (void)putchar(sep); 269 while ((ch = *p++) != '\n' && ch != sep) 270 (void)putchar(ch); 271 } else { 272 while ((ch = *p++) != '\n' && ch != sep) 273 continue; 274 } 275 if (ch == '\n') 276 break; 277 } 278 if (ch != '\n') { 279 if (autostop) { 280 if (output) 281 (void)putchar(sep); 282 for (; (ch = *p) != '\n'; ++p) 283 (void)putchar(ch); 284 } else 285 for (; (ch = *p) != '\n'; ++p); 286 } 287 (void)putchar('\n'); 288 } 289 if (mlbuf != NULL) 290 free(mlbuf); 291} 292 293static void 294usage() 295{ 296 (void)fprintf(stderr, "%s\n%s\n%s\n", 297 "usage: cut -b list [-n] [file ...]", 298 " cut -c list [file ...]", 299 " cut -f list [-s] [-d delim] [file ...]"); 300 exit(1); 301} 302