1/* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * This code is derived from software contributed to Berkeley by 6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. All advertising materials mentioning features or use of this software 17 * must display the following acknowledgement: 18 * This product includes software developed by the University of 19 * California, Berkeley and its contributors. 20 * 4. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37#ifndef lint 38static const char copyright[] = 39"@(#) Copyright (c) 1989, 1993\n\ 40 The Regents of the University of California. All rights reserved.\n"; 41#endif /* not lint */ 42 43#ifndef lint 44static const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 45#endif /* not lint */ 46 47#include <config.h> 48 49#include <ctype.h> 50#include <stdio.h> 51#include <errno.h> 52 53#include "bashansi.h" 54 55#ifdef HAVE_LIMITS_H 56# include <limits.h> 57#endif 58 59#ifdef HAVE_UNISTD_H 60# include <unistd.h> 61#endif 62 63#include "builtins.h" 64#include "shell.h" 65#include "bashgetopt.h" 66 67#if !defined (errno) 68extern int errno; 69#endif 70 71#if !defined (_POSIX2_LINE_MAX) 72# define _POSIX2_LINE_MAX 2048 73#endif 74 75static int cflag; 76static char dchar; 77static int dflag; 78static int fflag; 79static int sflag; 80 81static int autostart, autostop, maxval; 82static char positions[_POSIX2_LINE_MAX + 1]; 83 84static int c_cut __P((FILE *, char *)); 85static int f_cut __P((FILE *, char *)); 86static int get_list __P((char *)); 87static char *_cut_strsep __P((char **, const char *)); 88 89int 90cut_builtin(list) 91 WORD_LIST *list; 92{ 93 FILE *fp; 94 int (*fcn) __P((FILE *, char *)) = NULL; 95 int ch; 96 97 fcn = NULL; 98 dchar = '\t'; /* default delimiter is \t */ 99 100 /* Since we don't support multi-byte characters, the -c and -b 101 options are equivalent, and the -n option is meaningless. */ 102 reset_internal_getopt (); 103 while ((ch = internal_getopt (list, "b:c:d:f:sn")) != -1) 104 switch(ch) { 105 case 'b': 106 case 'c': 107 fcn = c_cut; 108 if (get_list(list_optarg) < 0) 109 return (EXECUTION_FAILURE); 110 cflag = 1; 111 break; 112 case 'd': 113 dchar = *list_optarg; 114 dflag = 1; 115 break; 116 case 'f': 117 fcn = f_cut; 118 if (get_list(list_optarg) < 0) 119 return (EXECUTION_FAILURE); 120 fflag = 1; 121 break; 122 case 's': 123 sflag = 1; 124 break; 125 case 'n': 126 break; 127 case '?': 128 default: 129 builtin_usage(); 130 return (EX_USAGE); 131 } 132 133 list = loptend; 134 135 if (fflag) { 136 if (cflag) { 137 builtin_usage(); 138 return (EX_USAGE); 139 } 140 } else if (!cflag || dflag || sflag) { 141 builtin_usage(); 142 return (EX_USAGE); 143 } 144 145 if (list) { 146 while (list) { 147 fp = fopen(list->word->word, "r"); 148 if (fp == 0) { 149 builtin_error("%s", list->word->word); 150 return (EXECUTION_FAILURE); 151 } 152 ch = (*fcn)(fp, list->word->word); 153 (void)fclose(fp); 154 if (ch < 0) 155 return (EXECUTION_FAILURE); 156 list = list->next; 157 } 158 } else { 159 ch = (*fcn)(stdin, "stdin"); 160 if (ch < 0) 161 return (EXECUTION_FAILURE); 162 } 163 164 return (EXECUTION_SUCCESS); 165} 166 167static int 168get_list(list) 169 char *list; 170{ 171 int setautostart, start, stop; 172 char *pos; 173 char *p; 174 175 /* 176 * set a byte in the positions array to indicate if a field or 177 * column is to be selected; use +1, it's 1-based, not 0-based. 178 * This parser is less restrictive than the Draft 9 POSIX spec. 179 * POSIX doesn't allow lists that aren't in increasing order or 180 * overlapping lists. We also handle "-3-5" although there's no 181 * real reason too. 182 */ 183 for (; (p = _cut_strsep(&list, ", \t")) != NULL;) { 184 setautostart = start = stop = 0; 185 if (*p == '-') { 186 ++p; 187 setautostart = 1; 188 } 189 if (isdigit((unsigned char)*p)) { 190 start = stop = strtol(p, &p, 10); 191 if (setautostart && start > autostart) 192 autostart = start; 193 } 194 if (*p == '-') { 195 if (isdigit((unsigned char)p[1])) 196 stop = strtol(p + 1, &p, 10); 197 if (*p == '-') { 198 ++p; 199 if (!autostop || autostop > stop) 200 autostop = stop; 201 } 202 } 203 if (*p) { 204 builtin_error("[-cf] list: illegal list value"); 205 return -1; 206 } 207 if (!stop || !start) { 208 builtin_error("[-cf] list: values may not include zero"); 209 return -1; 210 } 211 if (stop > _POSIX2_LINE_MAX) { 212 builtin_error("[-cf] list: %d too large (max %d)", 213 stop, _POSIX2_LINE_MAX); 214 return -1; 215 } 216 if (maxval < stop) 217 maxval = stop; 218 for (pos = positions + start; start++ <= stop; *pos++ = 1); 219 } 220 221 /* overlapping ranges */ 222 if (autostop && maxval > autostop) 223 maxval = autostop; 224 225 /* set autostart */ 226 if (autostart) 227 memset(positions + 1, '1', autostart); 228 229 return 0; 230} 231 232/* ARGSUSED */ 233static int 234c_cut(fp, fname) 235 FILE *fp; 236 char *fname; 237{ 238 int ch, col; 239 char *pos; 240 241 ch = 0; 242 for (;;) { 243 pos = positions + 1; 244 for (col = maxval; col; --col) { 245 if ((ch = getc(fp)) == EOF) 246 return; 247 if (ch == '\n') 248 break; 249 if (*pos++) 250 (void)putchar(ch); 251 } 252 if (ch != '\n') { 253 if (autostop) 254 while ((ch = getc(fp)) != EOF && ch != '\n') 255 (void)putchar(ch); 256 else 257 while ((ch = getc(fp)) != EOF && ch != '\n'); 258 } 259 (void)putchar('\n'); 260 } 261 return (0); 262} 263 264static int 265f_cut(fp, fname) 266 FILE *fp; 267 char *fname; 268{ 269 int ch, field, isdelim; 270 char *pos, *p, sep; 271 int output; 272 char lbuf[_POSIX2_LINE_MAX + 1]; 273 274 for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) { 275 output = 0; 276 for (isdelim = 0, p = lbuf;; ++p) { 277 if (!(ch = *p)) { 278 builtin_error("%s: line too long.", fname); 279 return -1; 280 } 281 /* this should work if newline is delimiter */ 282 if (ch == sep) 283 isdelim = 1; 284 if (ch == '\n') { 285 if (!isdelim && !sflag) 286 (void)printf("%s", lbuf); 287 break; 288 } 289 } 290 if (!isdelim) 291 continue; 292 293 pos = positions + 1; 294 for (field = maxval, p = lbuf; field; --field, ++pos) { 295 if (*pos) { 296 if (output++) 297 (void)putchar(sep); 298 while ((ch = *p++) != '\n' && ch != sep) 299 (void)putchar(ch); 300 } else { 301 while ((ch = *p++) != '\n' && ch != sep) 302 continue; 303 } 304 if (ch == '\n') 305 break; 306 } 307 if (ch != '\n') { 308 if (autostop) { 309 if (output) 310 (void)putchar(sep); 311 for (; (ch = *p) != '\n'; ++p) 312 (void)putchar(ch); 313 } else 314 for (; (ch = *p) != '\n'; ++p); 315 } 316 (void)putchar('\n'); 317 } 318 return (0); 319} 320 321/* 322 * Get next token from string *stringp, where tokens are possibly-empty 323 * strings separated by characters from delim. 324 * 325 * Writes NULs into the string at *stringp to end tokens. 326 * delim need not remain constant from call to call. 327 * On return, *stringp points past the last NUL written (if there might 328 * be further tokens), or is NULL (if there are definitely no more tokens). 329 * 330 * If *stringp is NULL, strsep returns NULL. 331 */ 332static char * 333_cut_strsep(stringp, delim) 334 register char **stringp; 335 register const char *delim; 336{ 337 register char *s; 338 register const char *spanp; 339 register int c, sc; 340 char *tok; 341 342 if ((s = *stringp) == NULL) 343 return (NULL); 344 for (tok = s;;) { 345 c = *s++; 346 spanp = delim; 347 do { 348 if ((sc = *spanp++) == c) { 349 if (c == 0) 350 s = NULL; 351 else 352 s[-1] = 0; 353 *stringp = s; 354 return (tok); 355 } 356 } while (sc != 0); 357 } 358 /* NOTREACHED */ 359} 360 361static char *cut_doc[] = { 362 "Select portions of each line (as specified by LIST) from each FILE", 363 "(by default, the standard input), and write them to the standard output.", 364 "Items specified by LIST are either column positions or fields delimited", 365 "by a special character. Column numbering starts at 1.", 366 (char *)0 367}; 368 369struct builtin cut_struct = { 370 "cut", 371 cut_builtin, 372 BUILTIN_ENABLED, 373 cut_doc, 374 "cut -b list [-n] [file ...] OR cut -c list [file ...] OR cut -f list [-s] [-d delim] [file ...]", 375 0 376}; 377