cut.c revision 99115
122347Spst/*
222347Spst * Copyright (c) 1989, 1993
329964Sache *	The Regents of the University of California.  All rights reserved.
429964Sache *
522347Spst * This code is derived from software contributed to Berkeley by
622347Spst * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
722347Spst *
822347Spst * Redistribution and use in source and binary forms, with or without
922347Spst * modification, are permitted provided that the following conditions
1022347Spst * are met:
1122347Spst * 1. Redistributions of source code must retain the above copyright
1222347Spst *    notice, this list of conditions and the following disclaimer.
1322347Spst * 2. Redistributions in binary form must reproduce the above copyright
1422347Spst *    notice, this list of conditions and the following disclaimer in the
1522347Spst *    documentation and/or other materials provided with the distribution.
1622347Spst * 3. All advertising materials mentioning features or use of this software
1729964Sache *    must display the following acknowledgement:
1829964Sache *	This product includes software developed by the University of
1929964Sache *	California, Berkeley and its contributors.
2029964Sache * 4. Neither the name of the University nor the names of its contributors
2129964Sache *    may be used to endorse or promote products derived from this software
2229964Sache *    without specific prior written permission.
2329964Sache *
2429964Sache * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
2522347Spst * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2622347Spst * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2722347Spst * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2822347Spst * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2922347Spst * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
3022347Spst * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
3122347Spst * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
3222347Spst * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
3322347Spst * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3422347Spst * SUCH DAMAGE.
3522347Spst */
3622347Spst
3722347Spst#ifndef lint
3822347Spststatic const char copyright[] =
3922347Spst"@(#) Copyright (c) 1989, 1993\n\
4022347Spst	The Regents of the University of California.  All rights reserved.\n";
4122347Spststatic const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
4222347Spst#endif /* not lint */
4322347Spst#include <sys/cdefs.h>
4422347Spst__FBSDID("$FreeBSD: head/usr.bin/cut/cut.c 99115 2002-06-30 05:34:21Z obrien $");
4529964Sache
4629964Sache#include <ctype.h>
4729964Sache#include <err.h>
4829964Sache#include <limits.h>
4929964Sache#include <locale.h>
5029964Sache#include <stdio.h>
5129964Sache#include <stdlib.h>
5229964Sache#include <string.h>
5329964Sache#include <unistd.h>
5429964Sache
5529964Sacheint	bflag;
5629964Sacheint	cflag;
5729964Sachechar	dchar;
5829964Sacheint	dflag;
5929964Sacheint	fflag;
6029964Sacheint	nflag;
6129964Sacheint	sflag;
6229964Sache
6329964Sachevoid	b_n_cut(FILE *, const char *);
6429964Sachevoid	c_cut(FILE *, const char *);
6529964Sachevoid	f_cut(FILE *, const char *);
6629964Sachevoid	get_list(char *);
6729964Sacheint	main(int, char **);
6829964Sachevoid	needpos(size_t);
6929964Sachestatic 	void usage(void);
7029964Sache
7129964Sacheint
7229964Sachemain(argc, argv)
7322347Spst	int argc;
7429964Sache	char *argv[];
7529964Sache{
7622347Spst	FILE *fp;
7722347Spst	void (*fcn)(FILE *, const char *);
7822347Spst	int ch, rval;
7922347Spst
8022347Spst	setlocale(LC_ALL, "");
8122347Spst
8222347Spst	fcn = NULL;
8322347Spst	dchar = '\t';			/* default delimiter is \t */
8422347Spst
8522347Spst	/*
8622347Spst	 * Since we don't support multi-byte characters, the -c and -b
8722347Spst	 * options are equivalent.
8822347Spst	 */
8922347Spst	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
9022347Spst		switch(ch) {
9122347Spst		case 'b':
9222347Spst			fcn = c_cut;
9322347Spst			get_list(optarg);
9422347Spst			bflag = 1;
9522347Spst			break;
9622347Spst		case 'c':
9722347Spst			fcn = c_cut;
9822347Spst			get_list(optarg);
9922347Spst			cflag = 1;
10022347Spst			break;
10122347Spst		case 'd':
10222347Spst			dchar = *optarg;
10322347Spst			dflag = 1;
10422347Spst			break;
10522347Spst		case 'f':
10622347Spst			get_list(optarg);
10722347Spst			fcn = f_cut;
10822347Spst			fflag = 1;
10922347Spst			break;
11022347Spst		case 's':
11122347Spst			sflag = 1;
11222347Spst			break;
11322347Spst		case 'n':
11422347Spst			nflag = 1;
11522347Spst			break;
11622347Spst		case '?':
11722347Spst		default:
11822347Spst			usage();
11922347Spst		}
12022347Spst	argc -= optind;
12122347Spst	argv += optind;
12222347Spst
12322347Spst	if (fflag) {
12422347Spst		if (bflag || cflag || nflag)
12522347Spst			usage();
12622347Spst	} else if (!(bflag || cflag) || dflag || sflag)
12722347Spst		usage();
12822347Spst	else if (!bflag && nflag)
12922347Spst		usage();
13022347Spst
13122347Spst	if (nflag)
13222347Spst		fcn = b_n_cut;
13322347Spst
13422347Spst	rval = 0;
13522347Spst	if (*argv)
13622347Spst		for (; *argv; ++argv) {
13722347Spst			if (strcmp(*argv, "-") == 0)
13822347Spst				fcn(stdin, "stdin");
13922347Spst			else {
14022347Spst				if (!(fp = fopen(*argv, "r"))) {
14122347Spst					warn("%s", *argv);
14222347Spst					rval = 1;
14322347Spst					continue;
14422347Spst				}
14522347Spst				fcn(fp, *argv);
14622347Spst				(void)fclose(fp);
14722347Spst			}
14822347Spst		}
14922347Spst	else
15022347Spst		fcn(stdin, "stdin");
15122347Spst	exit(rval);
15222347Spst}
15322347Spst
15422347Spstsize_t autostart, autostop, maxval;
15522347Spst
15622347Spstchar *positions;
15722347Spst
15822347Spstvoid
15922347Spstget_list(list)
16022347Spst	char *list;
16122347Spst{
16222347Spst	size_t setautostart, start, stop;
16322347Spst	char *pos;
16422347Spst	char *p;
16522347Spst
16622347Spst	/*
16722347Spst	 * set a byte in the positions array to indicate if a field or
16822347Spst	 * column is to be selected; use +1, it's 1-based, not 0-based.
16922347Spst	 * This parser is less restrictive than the Draft 9 POSIX spec.
17022347Spst	 * POSIX doesn't allow lists that aren't in increasing order or
17122347Spst	 * overlapping lists.  We also handle "-3-5" although there's no
17222347Spst	 * real reason too.
17322347Spst	 */
17422347Spst	for (; (p = strsep(&list, ", \t")) != NULL;) {
17522347Spst		setautostart = start = stop = 0;
17622347Spst		if (*p == '-') {
17722347Spst			++p;
17822347Spst			setautostart = 1;
17922347Spst		}
18022347Spst		if (isdigit((unsigned char)*p)) {
18122347Spst			start = stop = strtol(p, &p, 10);
18222347Spst			if (setautostart && start > autostart)
18322347Spst				autostart = start;
18422347Spst		}
18522347Spst		if (*p == '-') {
18622347Spst			if (isdigit((unsigned char)p[1]))
18722347Spst				stop = strtol(p + 1, &p, 10);
18822347Spst			if (*p == '-') {
18922347Spst				++p;
19022347Spst				if (!autostop || autostop > stop)
19122347Spst					autostop = stop;
19222347Spst			}
19322347Spst		}
19422347Spst		if (*p)
19522347Spst			errx(1, "[-cf] list: illegal list value");
19622347Spst		if (!stop || !start)
19722347Spst			errx(1, "[-cf] list: values may not include zero");
19822347Spst		if (maxval < stop) {
19922347Spst			maxval = stop;
20022347Spst			needpos(maxval + 1);
20122347Spst		}
20222347Spst		for (pos = positions + start; start++ <= stop; *pos++ = 1);
20322347Spst	}
20422347Spst
20522347Spst	/* overlapping ranges */
20622347Spst	if (autostop && maxval > autostop) {
20722347Spst		maxval = autostop;
20822347Spst		needpos(maxval + 1);
20922347Spst	}
21022347Spst
21122347Spst	/* set autostart */
21222347Spst	if (autostart)
21322347Spst		memset(positions + 1, '1', autostart);
21422347Spst}
21522347Spst
21622347Spstvoid
21722347Spstneedpos(size_t n)
21822347Spst{
21922347Spst	static size_t npos;
22022347Spst	size_t oldnpos;
22122347Spst
22222347Spst	/* Grow the positions array to at least the specified size. */
22322347Spst	if (n > npos) {
22422347Spst		oldnpos = npos;
22522347Spst		if (npos == 0)
22622347Spst			npos = n;
22722347Spst		while (n > npos)
22822347Spst			npos *= 2;
22922347Spst		if ((positions = realloc(positions, npos)) == NULL)
23022347Spst			err(1, "realloc");
23122347Spst		memset((char *)positions + oldnpos, 0, npos - oldnpos);
23222347Spst	}
23322347Spst}
23422347Spst
23522347Spst/*
23622347Spst * Cut based on byte positions, taking care not to split multibyte characters.
23722347Spst * Although this function also handles the case where -n is not specified,
23822347Spst * c_cut() ought to be much faster.
23922347Spst */
24022347Spstvoid
24122347Spstb_n_cut(fp, fname)
24222347Spst	FILE *fp;
24322347Spst	const char *fname;
24422347Spst{
24522347Spst	size_t col, i, lbuflen;
24622347Spst	char *lbuf;
24722347Spst	int canwrite, clen, warned;
24822347Spst
24922347Spst	warned = 0;
25022347Spst	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
25122347Spst		for (col = 0; lbuflen > 0; col += clen) {
25222347Spst			if ((clen = mblen(lbuf, lbuflen)) < 0) {
25322347Spst				if (!warned) {
25422347Spst					warn("%s", fname);
25522347Spst					warned = 1;
25622347Spst				}
25722347Spst				clen = 1;
25822347Spst			}
25922347Spst			if (clen == 0 || *lbuf == '\n')
26022347Spst				break;
26122347Spst			if (col < maxval && !positions[1 + col]) {
26222347Spst				/*
26322347Spst				 * Print the character if (1) after an initial
26422347Spst				 * segment of un-selected bytes, the rest of
26522347Spst				 * it is selected, and (2) the last byte is
26622347Spst				 * selected.
26722347Spst				 */
26822347Spst				i = col;
26922347Spst				while (i < col + clen && i < maxval &&
27022347Spst				    !positions[1 + i])
27129964Sache					i++;
27229964Sache				canwrite = i < col + clen;
27329964Sache				for (; i < col + clen && i < maxval; i++)
27429964Sache					canwrite &= positions[1 + i];
27529964Sache				if (canwrite)
27629964Sache					fwrite(lbuf, 1, clen, stdout);
27729964Sache			} else {
27829964Sache				/*
27929964Sache				 * Print the character if all of it has
28029964Sache				 * been selected.
28129964Sache				 */
28222347Spst				canwrite = 1;
28322347Spst				for (i = col; i < col + clen; i++)
28422347Spst					if ((i >= maxval && !autostop) ||
28522347Spst					    (i < maxval && !positions[1 + i])) {
28622347Spst						canwrite = 0;
28722347Spst						break;
28822347Spst					}
28922347Spst				if (canwrite)
29022347Spst					fwrite(lbuf, 1, clen, stdout);
29122347Spst			}
29222347Spst			lbuf += clen;
29322347Spst			lbuflen -= clen;
29422347Spst		}
29522347Spst		if (lbuflen > 0)
29622347Spst			putchar('\n');
29722347Spst	}
29822347Spst}
29922347Spst
30022347Spstvoid
30122347Spstc_cut(fp, fname)
30222347Spst	FILE *fp;
30322347Spst	const char *fname __unused;
30422347Spst{
30522347Spst	int ch, col;
30622347Spst	char *pos;
30722347Spst
30822347Spst	ch = 0;
30922347Spst	for (;;) {
31022347Spst		pos = positions + 1;
31122347Spst		for (col = maxval; col; --col) {
31222347Spst			if ((ch = getc(fp)) == EOF)
31329964Sache				return;
31429964Sache			if (ch == '\n')
31529964Sache				break;
31629964Sache			if (*pos++)
31729964Sache				(void)putchar(ch);
31829964Sache		}
31929964Sache		if (ch != '\n') {
32029964Sache			if (autostop)
32129964Sache				while ((ch = getc(fp)) != EOF && ch != '\n')
32229964Sache					(void)putchar(ch);
32329964Sache			else
32429964Sache				while ((ch = getc(fp)) != EOF && ch != '\n');
32529964Sache		}
32629964Sache		(void)putchar('\n');
32729964Sache	}
32829964Sache}
32929964Sache
33022347Spstvoid
33129964Sachef_cut(fp, fname)
33229964Sache	FILE *fp;
33329964Sache	const char *fname __unused;
33429964Sache{
33529964Sache	int ch, field, isdelim;
33629964Sache	char *pos, *p, sep;
33729964Sache	int output;
33829964Sache	char *lbuf, *mlbuf;
33929964Sache	size_t lbuflen;
34029964Sache
34129964Sache	mlbuf = NULL;
34229964Sache	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
34329964Sache		/* Assert EOL has a newline. */
34429964Sache		if (*(lbuf + lbuflen - 1) != '\n') {
34529964Sache			/* Can't have > 1 line with no trailing newline. */
34622347Spst			mlbuf = malloc(lbuflen + 1);
34729964Sache			if (mlbuf == NULL)
34829964Sache				err(1, "malloc");
34929964Sache			memcpy(mlbuf, lbuf, lbuflen);
35029964Sache			*(mlbuf + lbuflen) = '\n';
35129964Sache			lbuf = mlbuf;
35229964Sache		}
35329964Sache		output = 0;
35429964Sache		for (isdelim = 0, p = lbuf;; ++p) {
35529964Sache			ch = *p;
35629964Sache			/* this should work if newline is delimiter */
35729964Sache			if (ch == sep)
35829964Sache				isdelim = 1;
35929964Sache			if (ch == '\n') {
36029964Sache				if (!isdelim && !sflag)
36129964Sache					(void)fwrite(lbuf, lbuflen, 1, stdout);
36229964Sache				break;
36329964Sache			}
36429964Sache		}
36529964Sache		if (!isdelim)
36629964Sache			continue;
36729964Sache
36829964Sache		pos = positions + 1;
36929964Sache		for (field = maxval, p = lbuf; field; --field, ++pos) {
37029964Sache			if (*pos) {
37129964Sache				if (output++)
37229964Sache					(void)putchar(sep);
37329964Sache				while ((ch = *p++) != '\n' && ch != sep)
37429964Sache					(void)putchar(ch);
37529964Sache			} else {
37629964Sache				while ((ch = *p++) != '\n' && ch != sep)
37729964Sache					continue;
37829964Sache			}
37929964Sache			if (ch == '\n')
38029964Sache				break;
38129964Sache		}
38229964Sache		if (ch != '\n') {
38322347Spst			if (autostop) {
38422347Spst				if (output)
38522347Spst					(void)putchar(sep);
38622347Spst				for (; (ch = *p) != '\n'; ++p)
38722347Spst					(void)putchar(ch);
38822347Spst			} else
38922347Spst				for (; (ch = *p) != '\n'; ++p);
39022347Spst		}
39122347Spst		(void)putchar('\n');
39222347Spst	}
39322347Spst	if (mlbuf != NULL)
39422347Spst		free(mlbuf);
39522347Spst}
39622347Spst
39722347Spststatic void
39822347Spstusage()
39922347Spst{
40022347Spst	(void)fprintf(stderr, "%s\n%s\n%s\n",
40122347Spst		"usage: cut -b list [-n] [file ...]",
40222347Spst		"       cut -c list [file ...]",
40322347Spst		"       cut -f list [-s] [-d delim] [file ...]");
40422347Spst	exit(1);
40522347Spst}
40622347Spst