11590Srgrimes/* 21590Srgrimes * Copyright (c) 1989, 1993 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * This code is derived from software contributed to Berkeley by 61590Srgrimes * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue. 71590Srgrimes * 81590Srgrimes * Redistribution and use in source and binary forms, with or without 91590Srgrimes * modification, are permitted provided that the following conditions 101590Srgrimes * are met: 111590Srgrimes * 1. Redistributions of source code must retain the above copyright 121590Srgrimes * notice, this list of conditions and the following disclaimer. 131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 141590Srgrimes * notice, this list of conditions and the following disclaimer in the 151590Srgrimes * documentation and/or other materials provided with the distribution. 161590Srgrimes * 4. Neither the name of the University nor the names of its contributors 171590Srgrimes * may be used to endorse or promote products derived from this software 181590Srgrimes * without specific prior written permission. 191590Srgrimes * 201590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 211590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 221590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 231590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 241590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 251590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 261590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 271590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 281590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 291590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 301590Srgrimes * SUCH DAMAGE. 311590Srgrimes */ 321590Srgrimes 331590Srgrimes#ifndef lint 3441568Sarchiestatic const char copyright[] = 351590Srgrimes"@(#) Copyright (c) 1989, 1993\n\ 361590Srgrimes The Regents of the University of California. All rights reserved.\n"; 3741568Sarchiestatic const char sccsid[] = "@(#)cut.c 8.3 (Berkeley) 5/4/95"; 381590Srgrimes#endif /* not lint */ 3999115Sobrien#include <sys/cdefs.h> 4099115Sobrien__FBSDID("$FreeBSD$"); 411590Srgrimes 421590Srgrimes#include <ctype.h> 4327098Scharnier#include <err.h> 44131197Stjr#include <errno.h> 451590Srgrimes#include <limits.h> 4643531Seivind#include <locale.h> 471590Srgrimes#include <stdio.h> 481590Srgrimes#include <stdlib.h> 491590Srgrimes#include <string.h> 5023693Speter#include <unistd.h> 51131183Stjr#include <wchar.h> 521590Srgrimes 53227162Sedstatic int bflag; 54227162Sedstatic int cflag; 55227162Sedstatic wchar_t dchar; 56227162Sedstatic char dcharmb[MB_LEN_MAX + 1]; 57227162Sedstatic int dflag; 58227162Sedstatic int fflag; 59227162Sedstatic int nflag; 60227162Sedstatic int sflag; 61243320Seadlerstatic int wflag; 621590Srgrimes 63227162Sedstatic size_t autostart, autostop, maxval; 64227162Sedstatic char * positions; 65131226Stjr 66227162Sedstatic int b_cut(FILE *, const char *); 67227162Sedstatic int b_n_cut(FILE *, const char *); 68227162Sedstatic int c_cut(FILE *, const char *); 69227162Sedstatic int f_cut(FILE *, const char *); 70227162Sedstatic void get_list(char *); 71243474Sandrewstatic int is_delim(wchar_t); 72227162Sedstatic void needpos(size_t); 73227162Sedstatic void usage(void); 741590Srgrimes 751590Srgrimesint 76102944Sdwmalonemain(int argc, char *argv[]) 771590Srgrimes{ 781590Srgrimes FILE *fp; 79131194Stjr int (*fcn)(FILE *, const char *); 8097218Stjr int ch, rval; 81131197Stjr size_t n; 821590Srgrimes 8398012Stjr setlocale(LC_ALL, ""); 8498012Stjr 8543533Seivind fcn = NULL; 861590Srgrimes dchar = '\t'; /* default delimiter is \t */ 87131197Stjr strcpy(dcharmb, "\t"); 881590Srgrimes 89243320Seadler while ((ch = getopt(argc, argv, "b:c:d:f:snw")) != -1) 901590Srgrimes switch(ch) { 9143531Seivind case 'b': 9298035Stjr get_list(optarg); 9398035Stjr bflag = 1; 9498035Stjr break; 951590Srgrimes case 'c': 961590Srgrimes get_list(optarg); 971590Srgrimes cflag = 1; 981590Srgrimes break; 991590Srgrimes case 'd': 100131197Stjr n = mbrtowc(&dchar, optarg, MB_LEN_MAX, NULL); 101131197Stjr if (dchar == '\0' || n != strlen(optarg)) 102131197Stjr errx(1, "bad delimiter"); 103131197Stjr strcpy(dcharmb, optarg); 1041590Srgrimes dflag = 1; 1051590Srgrimes break; 1061590Srgrimes case 'f': 1071590Srgrimes get_list(optarg); 1081590Srgrimes fflag = 1; 1091590Srgrimes break; 1101590Srgrimes case 's': 1111590Srgrimes sflag = 1; 1121590Srgrimes break; 11343531Seivind case 'n': 11498035Stjr nflag = 1; 11543531Seivind break; 116243320Seadler case 'w': 117243320Seadler wflag = 1; 118243320Seadler break; 1191590Srgrimes case '?': 1201590Srgrimes default: 1211590Srgrimes usage(); 1221590Srgrimes } 1231590Srgrimes argc -= optind; 1241590Srgrimes argv += optind; 1251590Srgrimes 1261590Srgrimes if (fflag) { 127243320Seadler if (bflag || cflag || nflag || (wflag && dflag)) 1281590Srgrimes usage(); 129243320Seadler } else if (!(bflag || cflag) || dflag || sflag || wflag) 1301590Srgrimes usage(); 13198035Stjr else if (!bflag && nflag) 13298035Stjr usage(); 1331590Srgrimes 134131194Stjr if (fflag) 135131194Stjr fcn = f_cut; 136131194Stjr else if (cflag) 137131194Stjr fcn = MB_CUR_MAX > 1 ? c_cut : b_cut; 138131194Stjr else if (bflag) 139131194Stjr fcn = nflag && MB_CUR_MAX > 1 ? b_n_cut : b_cut; 14098035Stjr 14197218Stjr rval = 0; 1421590Srgrimes if (*argv) 1431590Srgrimes for (; *argv; ++argv) { 14497237Stjr if (strcmp(*argv, "-") == 0) 145131194Stjr rval |= fcn(stdin, "stdin"); 14697237Stjr else { 14797237Stjr if (!(fp = fopen(*argv, "r"))) { 14897237Stjr warn("%s", *argv); 14997237Stjr rval = 1; 15097237Stjr continue; 15197237Stjr } 15297237Stjr fcn(fp, *argv); 15397237Stjr (void)fclose(fp); 15497218Stjr } 1551590Srgrimes } 1561590Srgrimes else 157131194Stjr rval = fcn(stdin, "stdin"); 15897218Stjr exit(rval); 1591590Srgrimes} 1601590Srgrimes 161227162Sedstatic void 162102944Sdwmaloneget_list(char *list) 1631590Srgrimes{ 16471725Swill size_t setautostart, start, stop; 16543533Seivind char *pos; 1661590Srgrimes char *p; 1671590Srgrimes 1681590Srgrimes /* 1691590Srgrimes * set a byte in the positions array to indicate if a field or 1701590Srgrimes * column is to be selected; use +1, it's 1-based, not 0-based. 171131196Stjr * Numbers and number ranges may be overlapping, repeated, and in 172236866Skevlo * any order. We handle "-3-5" although there's no real reason to. 1731590Srgrimes */ 17443533Seivind for (; (p = strsep(&list, ", \t")) != NULL;) { 1751590Srgrimes setautostart = start = stop = 0; 1761590Srgrimes if (*p == '-') { 1771590Srgrimes ++p; 1781590Srgrimes setautostart = 1; 1791590Srgrimes } 18043533Seivind if (isdigit((unsigned char)*p)) { 1811590Srgrimes start = stop = strtol(p, &p, 10); 1821590Srgrimes if (setautostart && start > autostart) 1831590Srgrimes autostart = start; 1841590Srgrimes } 1851590Srgrimes if (*p == '-') { 18643533Seivind if (isdigit((unsigned char)p[1])) 1871590Srgrimes stop = strtol(p + 1, &p, 10); 1881590Srgrimes if (*p == '-') { 1891590Srgrimes ++p; 1901590Srgrimes if (!autostop || autostop > stop) 1911590Srgrimes autostop = stop; 1921590Srgrimes } 1931590Srgrimes } 1941590Srgrimes if (*p) 195236866Skevlo errx(1, "[-bcf] list: illegal list value"); 1961590Srgrimes if (!stop || !start) 197236866Skevlo errx(1, "[-bcf] list: values may not include zero"); 19897234Stjr if (maxval < stop) { 1991590Srgrimes maxval = stop; 20097234Stjr needpos(maxval + 1); 20197234Stjr } 2021590Srgrimes for (pos = positions + start; start++ <= stop; *pos++ = 1); 2031590Srgrimes } 2041590Srgrimes 2051590Srgrimes /* overlapping ranges */ 20697234Stjr if (autostop && maxval > autostop) { 2071590Srgrimes maxval = autostop; 20897234Stjr needpos(maxval + 1); 20997234Stjr } 2101590Srgrimes 2111590Srgrimes /* set autostart */ 2121590Srgrimes if (autostart) 2131590Srgrimes memset(positions + 1, '1', autostart); 2141590Srgrimes} 2151590Srgrimes 216227162Sedstatic void 21797234Stjrneedpos(size_t n) 21897234Stjr{ 21997234Stjr static size_t npos; 22097328Stjr size_t oldnpos; 22197234Stjr 22297234Stjr /* Grow the positions array to at least the specified size. */ 22397234Stjr if (n > npos) { 22497328Stjr oldnpos = npos; 22597234Stjr if (npos == 0) 22697234Stjr npos = n; 22797234Stjr while (n > npos) 22897234Stjr npos *= 2; 22997234Stjr if ((positions = realloc(positions, npos)) == NULL) 23097234Stjr err(1, "realloc"); 23197328Stjr memset((char *)positions + oldnpos, 0, npos - oldnpos); 23297234Stjr } 23397234Stjr} 23497234Stjr 235227162Sedstatic int 236131201Stjrb_cut(FILE *fp, const char *fname __unused) 237131194Stjr{ 238131194Stjr int ch, col; 239131194Stjr char *pos; 240131194Stjr 241131194Stjr ch = 0; 242131194Stjr for (;;) { 243131194Stjr pos = positions + 1; 244131194Stjr for (col = maxval; col; --col) { 245131194Stjr if ((ch = getc(fp)) == EOF) 246131194Stjr return (0); 247131194Stjr if (ch == '\n') 248131194Stjr break; 249131194Stjr if (*pos++) 250131194Stjr (void)putchar(ch); 251131194Stjr } 252131194Stjr if (ch != '\n') { 253131194Stjr if (autostop) 254131194Stjr while ((ch = getc(fp)) != EOF && ch != '\n') 255131194Stjr (void)putchar(ch); 256131194Stjr else 257131194Stjr while ((ch = getc(fp)) != EOF && ch != '\n'); 258131194Stjr } 259131194Stjr (void)putchar('\n'); 260131194Stjr } 261131194Stjr return (0); 262131194Stjr} 263131194Stjr 26498035Stjr/* 26598035Stjr * Cut based on byte positions, taking care not to split multibyte characters. 26698035Stjr * Although this function also handles the case where -n is not specified, 267131194Stjr * b_cut() ought to be much faster. 26898035Stjr */ 269227162Sedstatic int 270102944Sdwmaloneb_n_cut(FILE *fp, const char *fname) 27198035Stjr{ 27298035Stjr size_t col, i, lbuflen; 27398035Stjr char *lbuf; 27498035Stjr int canwrite, clen, warned; 275131183Stjr mbstate_t mbs; 27698035Stjr 277131183Stjr memset(&mbs, 0, sizeof(mbs)); 27898035Stjr warned = 0; 27998035Stjr while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 28098035Stjr for (col = 0; lbuflen > 0; col += clen) { 281131183Stjr if ((clen = mbrlen(lbuf, lbuflen, &mbs)) < 0) { 28298035Stjr if (!warned) { 28398035Stjr warn("%s", fname); 28498035Stjr warned = 1; 28598035Stjr } 286131183Stjr memset(&mbs, 0, sizeof(mbs)); 28798035Stjr clen = 1; 28898035Stjr } 28998035Stjr if (clen == 0 || *lbuf == '\n') 29098035Stjr break; 29198035Stjr if (col < maxval && !positions[1 + col]) { 29298035Stjr /* 29398035Stjr * Print the character if (1) after an initial 29498035Stjr * segment of un-selected bytes, the rest of 29598035Stjr * it is selected, and (2) the last byte is 29698035Stjr * selected. 29798035Stjr */ 29898035Stjr i = col; 29998035Stjr while (i < col + clen && i < maxval && 30098035Stjr !positions[1 + i]) 30198035Stjr i++; 30298035Stjr canwrite = i < col + clen; 30398035Stjr for (; i < col + clen && i < maxval; i++) 30498035Stjr canwrite &= positions[1 + i]; 30598035Stjr if (canwrite) 30698035Stjr fwrite(lbuf, 1, clen, stdout); 30798035Stjr } else { 30898035Stjr /* 30998035Stjr * Print the character if all of it has 31098035Stjr * been selected. 31198035Stjr */ 31298035Stjr canwrite = 1; 31398035Stjr for (i = col; i < col + clen; i++) 31498035Stjr if ((i >= maxval && !autostop) || 31598035Stjr (i < maxval && !positions[1 + i])) { 31698035Stjr canwrite = 0; 31798035Stjr break; 31898035Stjr } 31998035Stjr if (canwrite) 32098035Stjr fwrite(lbuf, 1, clen, stdout); 32198035Stjr } 32298035Stjr lbuf += clen; 32398035Stjr lbuflen -= clen; 32498035Stjr } 32598035Stjr if (lbuflen > 0) 32698035Stjr putchar('\n'); 32798035Stjr } 328131194Stjr return (warned); 32998035Stjr} 33098035Stjr 331227162Sedstatic int 332131194Stjrc_cut(FILE *fp, const char *fname) 3331590Srgrimes{ 334131194Stjr wint_t ch; 335131194Stjr int col; 33643533Seivind char *pos; 3371590Srgrimes 33843533Seivind ch = 0; 3391590Srgrimes for (;;) { 3401590Srgrimes pos = positions + 1; 3411590Srgrimes for (col = maxval; col; --col) { 342131194Stjr if ((ch = getwc(fp)) == WEOF) 343131194Stjr goto out; 3441590Srgrimes if (ch == '\n') 3451590Srgrimes break; 3461590Srgrimes if (*pos++) 347131194Stjr (void)putwchar(ch); 3481590Srgrimes } 34943533Seivind if (ch != '\n') { 3501590Srgrimes if (autostop) 351131194Stjr while ((ch = getwc(fp)) != WEOF && ch != '\n') 352131194Stjr (void)putwchar(ch); 3531590Srgrimes else 354131194Stjr while ((ch = getwc(fp)) != WEOF && ch != '\n'); 35543533Seivind } 356131194Stjr (void)putwchar('\n'); 3571590Srgrimes } 358131194Stjrout: 359131194Stjr if (ferror(fp)) { 360131194Stjr warn("%s", fname); 361131194Stjr return (1); 362131194Stjr } 363131194Stjr return (0); 3641590Srgrimes} 3651590Srgrimes 366227162Sedstatic int 367243474Sandrewis_delim(wchar_t ch) 368243320Seadler{ 369243320Seadler if (wflag) { 370243320Seadler if (ch == ' ' || ch == '\t') 371243320Seadler return 1; 372243320Seadler } else { 373243320Seadler if (ch == dchar) 374243320Seadler return 1; 375243320Seadler } 376243320Seadler return 0; 377243320Seadler} 378243320Seadler 379243320Seadlerstatic int 380131197Stjrf_cut(FILE *fp, const char *fname) 3811590Srgrimes{ 382131197Stjr wchar_t ch; 383131197Stjr int field, i, isdelim; 384131197Stjr char *pos, *p; 3851590Srgrimes int output; 38698012Stjr char *lbuf, *mlbuf; 387137250Stjr size_t clen, lbuflen, reallen; 3881590Srgrimes 38998012Stjr mlbuf = NULL; 390243320Seadler while ((lbuf = fgetln(fp, &lbuflen)) != NULL) { 391137250Stjr reallen = lbuflen; 39275930Sdd /* Assert EOL has a newline. */ 39375930Sdd if (*(lbuf + lbuflen - 1) != '\n') { 39475930Sdd /* Can't have > 1 line with no trailing newline. */ 39575930Sdd mlbuf = malloc(lbuflen + 1); 39675930Sdd if (mlbuf == NULL) 39775930Sdd err(1, "malloc"); 39875930Sdd memcpy(mlbuf, lbuf, lbuflen); 39975930Sdd *(mlbuf + lbuflen) = '\n'; 40075930Sdd lbuf = mlbuf; 401137250Stjr reallen++; 40275930Sdd } 4037200Sache output = 0; 404131197Stjr for (isdelim = 0, p = lbuf;; p += clen) { 405137250Stjr clen = mbrtowc(&ch, p, lbuf + reallen - p, NULL); 406131197Stjr if (clen == (size_t)-1 || clen == (size_t)-2) { 407131197Stjr warnc(EILSEQ, "%s", fname); 408131197Stjr free(mlbuf); 409131197Stjr return (1); 410131197Stjr } 411131197Stjr if (clen == 0) 412131197Stjr clen = 1; 4131590Srgrimes /* this should work if newline is delimiter */ 414243320Seadler if (is_delim(ch)) 4151590Srgrimes isdelim = 1; 4161590Srgrimes if (ch == '\n') { 4171590Srgrimes if (!isdelim && !sflag) 41875930Sdd (void)fwrite(lbuf, lbuflen, 1, stdout); 4191590Srgrimes break; 4201590Srgrimes } 4211590Srgrimes } 4221590Srgrimes if (!isdelim) 4231590Srgrimes continue; 4241590Srgrimes 4251590Srgrimes pos = positions + 1; 4261590Srgrimes for (field = maxval, p = lbuf; field; --field, ++pos) { 427131197Stjr if (*pos && output++) 428131197Stjr for (i = 0; dcharmb[i] != '\0'; i++) 429131197Stjr putchar(dcharmb[i]); 430131197Stjr for (;;) { 431137250Stjr clen = mbrtowc(&ch, p, lbuf + reallen - p, 432131197Stjr NULL); 433131197Stjr if (clen == (size_t)-1 || clen == (size_t)-2) { 434131197Stjr warnc(EILSEQ, "%s", fname); 435131197Stjr free(mlbuf); 436131197Stjr return (1); 437131197Stjr } 438131197Stjr if (clen == 0) 439131197Stjr clen = 1; 440131197Stjr p += clen; 441243320Seadler if (ch == '\n' || is_delim(ch)) { 442243320Seadler /* compress whitespace */ 443243320Seadler if (wflag && ch != '\n') 444243320Seadler while (is_delim(*p)) 445243320Seadler p++; 446131197Stjr break; 447243320Seadler } 448131197Stjr if (*pos) 449131197Stjr for (i = 0; i < (int)clen; i++) 450131197Stjr putchar(p[i - clen]); 45143533Seivind } 4521590Srgrimes if (ch == '\n') 4531590Srgrimes break; 4541590Srgrimes } 45543533Seivind if (ch != '\n') { 4561590Srgrimes if (autostop) { 4571590Srgrimes if (output) 458131197Stjr for (i = 0; dcharmb[i] != '\0'; i++) 459131197Stjr putchar(dcharmb[i]); 4601590Srgrimes for (; (ch = *p) != '\n'; ++p) 4611590Srgrimes (void)putchar(ch); 4621590Srgrimes } else 4631590Srgrimes for (; (ch = *p) != '\n'; ++p); 46443533Seivind } 4651590Srgrimes (void)putchar('\n'); 4661590Srgrimes } 467131197Stjr free(mlbuf); 468131194Stjr return (0); 4691590Srgrimes} 4701590Srgrimes 47127098Scharnierstatic void 472102944Sdwmaloneusage(void) 4731590Srgrimes{ 47443531Seivind (void)fprintf(stderr, "%s\n%s\n%s\n", 47543531Seivind "usage: cut -b list [-n] [file ...]", 47643531Seivind " cut -c list [file ...]", 477243320Seadler " cut -f list [-s] [-w | -d delim] [file ...]"); 4781590Srgrimes exit(1); 4791590Srgrimes} 480