cut.c revision 97328
1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 42static const char rcsid[] = 43 "$FreeBSD: head/usr.bin/cut/cut.c 97328 2002-05-27 02:01:25Z tjr $"; 44#endif /* not lint */ 45 46#include <ctype.h> 47#include <err.h> 48#include <limits.h> 49#include <locale.h> 50#include <stdio.h> 51#include <stdlib.h> 52#include <string.h> 53#include <unistd.h> 54 55int cflag; 56char dchar; 57int dflag; 58int fflag; 59int sflag; 60 61void c_cut (FILE *, const char *); 62void f_cut (FILE *, const char *); 63void get_list (char *); 64int main (int, char **); 65void needpos(size_t); 66static void usage (void); 67 68int 69main(argc, argv) 70 int argc; 71 char *argv[]; 72{ 73 FILE *fp; 74 void (*fcn) (FILE *, const char *) = NULL; 75 int ch, rval; 76 77 fcn = NULL; 78 setlocale (LC_ALL, ""); 79 80 dchar = '\t'; /* default delimiter is \t */ 81 82 /* Since we don't support multi-byte characters, the -c and -b 83 options are equivalent, and the -n option is meaningless. */ 84 while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1) 85 switch(ch) { 86 case 'b': 87 case 'c': 88 fcn = c_cut; 89 get_list(optarg); 90 cflag = 1; 91 break; 92 case 'd': 93 dchar = *optarg; 94 dflag = 1; 95 break; 96 case 'f': 97 get_list(optarg); 98 fcn = f_cut; 99 fflag = 1; 100 break; 101 case 's': 102 sflag = 1; 103 break; 104 case 'n': 105 break; 106 case '?': 107 default: 108 usage(); 109 } 110 argc -= optind; 111 argv += optind; 112 113 if (fflag) { 114 if (cflag) 115 usage(); 116 } else if (!cflag || dflag || sflag) 117 usage(); 118 119 rval = 0; 120 if (*argv) 121 for (; *argv; ++argv) { 122 if (strcmp(*argv, "-") == 0) 123 fcn(stdin, "stdin"); 124 else { 125 if (!(fp = fopen(*argv, "r"))) { 126 warn("%s", *argv); 127 rval = 1; 128 continue; 129 } 130 fcn(fp, *argv); 131 (void)fclose(fp); 132 } 133 } 134 else 135 fcn(stdin, "stdin"); 136 exit(rval); 137} 138 139size_t autostart, autostop, maxval; 140 141char *positions; 142 143void 144get_list(list) 145 char *list; 146{ 147 size_t setautostart, start, stop; 148 char *pos; 149 char *p; 150 151 /* 152 * set a byte in the positions array to indicate if a field or 153 * column is to be selected; use +1, it's 1-based, not 0-based. 154 * This parser is less restrictive than the Draft 9 POSIX spec. 155 * POSIX doesn't allow lists that aren't in increasing order or 156 * overlapping lists. We also handle "-3-5" although there's no 157 * real reason too. 158 */ 159 for (; (p = strsep(&list, ", \t")) != NULL;) { 160 setautostart = start = stop = 0; 161 if (*p == '-') { 162 ++p; 163 setautostart = 1; 164 } 165 if (isdigit((unsigned char)*p)) { 166 start = stop = strtol(p, &p, 10); 167 if (setautostart && start > autostart) 168 autostart = start; 169 } 170 if (*p == '-') { 171 if (isdigit((unsigned char)p[1])) 172 stop = strtol(p + 1, &p, 10); 173 if (*p == '-') { 174 ++p; 175 if (!autostop || autostop > stop) 176 autostop = stop; 177 } 178 } 179 if (*p) 180 errx(1, "[-cf] list: illegal list value"); 181 if (!stop || !start) 182 errx(1, "[-cf] list: values may not include zero"); 183 if (maxval < stop) { 184 maxval = stop; 185 needpos(maxval + 1); 186 } 187 for (pos = positions + start; start++ <= stop; *pos++ = 1); 188 } 189 190 /* overlapping ranges */ 191 if (autostop && maxval > autostop) { 192 maxval = autostop; 193 needpos(maxval + 1); 194 } 195 196 /* set autostart */ 197 if (autostart) 198 memset(positions + 1, '1', autostart); 199} 200 201void 202needpos(size_t n) 203{ 204 static size_t npos; 205 size_t oldnpos; 206 207 /* Grow the positions array to at least the specified size. */ 208 if (n > npos) { 209 oldnpos = npos; 210 if (npos == 0) 211 npos = n; 212 while (n > npos) 213 npos *= 2; 214 if ((positions = realloc(positions, npos)) == NULL) 215 err(1, "realloc"); 216 memset((char *)positions + oldnpos, 0, npos - oldnpos); 217 } 218} 219 220/* ARGSUSED */ 221void 222c_cut(fp, fname) 223 FILE *fp; 224 const char *fname; 225{ 226 int ch, col; 227 char *pos; 228 fname = NULL; 229 230 ch = 0; 231 for (;;) { 232 pos = positions + 1; 233 for (col = maxval; col; --col) { 234 if ((ch = getc(fp)) == EOF) 235 return; 236 if (ch == '\n') 237 break; 238 if (*pos++) 239 (void)putchar(ch); 240 } 241 if (ch != '\n') { 242 if (autostop) 243 while ((ch = getc(fp)) != EOF && ch != '\n') 244 (void)putchar(ch); 245 else 246 while ((ch = getc(fp)) != EOF && ch != '\n'); 247 } 248 (void)putchar('\n'); 249 } 250} 251 252void 253f_cut(fp, fname) 254 FILE *fp; 255 const char *fname __unused; 256{ 257 int ch, field, isdelim; 258 char *pos, *p, sep; 259 int output; 260 char *lbuf, *mlbuf = NULL; 261 size_t lbuflen; 262 263 for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) { 264 /* Assert EOL has a newline. */ 265 if (*(lbuf + lbuflen - 1) != '\n') { 266 /* Can't have > 1 line with no trailing newline. */ 267 mlbuf = malloc(lbuflen + 1); 268 if (mlbuf == NULL) 269 err(1, "malloc"); 270 memcpy(mlbuf, lbuf, lbuflen); 271 *(mlbuf + lbuflen) = '\n'; 272 lbuf = mlbuf; 273 } 274 output = 0; 275 for (isdelim = 0, p = lbuf;; ++p) { 276 ch = *p; 277 /* this should work if newline is delimiter */ 278 if (ch == sep) 279 isdelim = 1; 280 if (ch == '\n') { 281 if (!isdelim && !sflag) 282 (void)fwrite(lbuf, lbuflen, 1, stdout); 283 break; 284 } 285 } 286 if (!isdelim) 287 continue; 288 289 pos = positions + 1; 290 for (field = maxval, p = lbuf; field; --field, ++pos) { 291 if (*pos) { 292 if (output++) 293 (void)putchar(sep); 294 while ((ch = *p++) != '\n' && ch != sep) 295 (void)putchar(ch); 296 } else { 297 while ((ch = *p++) != '\n' && ch != sep) 298 continue; 299 } 300 if (ch == '\n') 301 break; 302 } 303 if (ch != '\n') { 304 if (autostop) { 305 if (output) 306 (void)putchar(sep); 307 for (; (ch = *p) != '\n'; ++p) 308 (void)putchar(ch); 309 } else 310 for (; (ch = *p) != '\n'; ++p); 311 } 312 (void)putchar('\n'); 313 } 314 if (mlbuf != NULL) 315 free(mlbuf); 316} 317 318static void 319usage() 320{ 321 (void)fprintf(stderr, "%s\n%s\n%s\n", 322 "usage: cut -b list [-n] [file ...]", 323 " cut -c list [file ...]", 324 " cut -f list [-s] [-d delim] [file ...]"); 325 exit(1); 326} 327