cut.c revision 102944
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
42#endif /* not lint */
43#include <sys/cdefs.h>
44__FBSDID("$FreeBSD: head/usr.bin/cut/cut.c 102944 2002-09-04 23:29:10Z dwmalone $");
45
46#include <ctype.h>
47#include <err.h>
48#include <limits.h>
49#include <locale.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54
55int	bflag;
56int	cflag;
57char	dchar;
58int	dflag;
59int	fflag;
60int	nflag;
61int	sflag;
62
63void	b_n_cut(FILE *, const char *);
64void	c_cut(FILE *, const char *);
65void	f_cut(FILE *, const char *);
66void	get_list(char *);
67void	needpos(size_t);
68static 	void usage(void);
69
70int
71main(int argc, char *argv[])
72{
73	FILE *fp;
74	void (*fcn)(FILE *, const char *);
75	int ch, rval;
76
77	setlocale(LC_ALL, "");
78
79	fcn = NULL;
80	dchar = '\t';			/* default delimiter is \t */
81
82	/*
83	 * Since we don't support multi-byte characters, the -c and -b
84	 * options are equivalent.
85	 */
86	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
87		switch(ch) {
88		case 'b':
89			fcn = c_cut;
90			get_list(optarg);
91			bflag = 1;
92			break;
93		case 'c':
94			fcn = c_cut;
95			get_list(optarg);
96			cflag = 1;
97			break;
98		case 'd':
99			dchar = *optarg;
100			dflag = 1;
101			break;
102		case 'f':
103			get_list(optarg);
104			fcn = f_cut;
105			fflag = 1;
106			break;
107		case 's':
108			sflag = 1;
109			break;
110		case 'n':
111			nflag = 1;
112			break;
113		case '?':
114		default:
115			usage();
116		}
117	argc -= optind;
118	argv += optind;
119
120	if (fflag) {
121		if (bflag || cflag || nflag)
122			usage();
123	} else if (!(bflag || cflag) || dflag || sflag)
124		usage();
125	else if (!bflag && nflag)
126		usage();
127
128	if (nflag)
129		fcn = b_n_cut;
130
131	rval = 0;
132	if (*argv)
133		for (; *argv; ++argv) {
134			if (strcmp(*argv, "-") == 0)
135				fcn(stdin, "stdin");
136			else {
137				if (!(fp = fopen(*argv, "r"))) {
138					warn("%s", *argv);
139					rval = 1;
140					continue;
141				}
142				fcn(fp, *argv);
143				(void)fclose(fp);
144			}
145		}
146	else
147		fcn(stdin, "stdin");
148	exit(rval);
149}
150
151size_t autostart, autostop, maxval;
152
153char *positions;
154
155void
156get_list(char *list)
157{
158	size_t setautostart, start, stop;
159	char *pos;
160	char *p;
161
162	/*
163	 * set a byte in the positions array to indicate if a field or
164	 * column is to be selected; use +1, it's 1-based, not 0-based.
165	 * This parser is less restrictive than the Draft 9 POSIX spec.
166	 * POSIX doesn't allow lists that aren't in increasing order or
167	 * overlapping lists.  We also handle "-3-5" although there's no
168	 * real reason too.
169	 */
170	for (; (p = strsep(&list, ", \t")) != NULL;) {
171		setautostart = start = stop = 0;
172		if (*p == '-') {
173			++p;
174			setautostart = 1;
175		}
176		if (isdigit((unsigned char)*p)) {
177			start = stop = strtol(p, &p, 10);
178			if (setautostart && start > autostart)
179				autostart = start;
180		}
181		if (*p == '-') {
182			if (isdigit((unsigned char)p[1]))
183				stop = strtol(p + 1, &p, 10);
184			if (*p == '-') {
185				++p;
186				if (!autostop || autostop > stop)
187					autostop = stop;
188			}
189		}
190		if (*p)
191			errx(1, "[-cf] list: illegal list value");
192		if (!stop || !start)
193			errx(1, "[-cf] list: values may not include zero");
194		if (maxval < stop) {
195			maxval = stop;
196			needpos(maxval + 1);
197		}
198		for (pos = positions + start; start++ <= stop; *pos++ = 1);
199	}
200
201	/* overlapping ranges */
202	if (autostop && maxval > autostop) {
203		maxval = autostop;
204		needpos(maxval + 1);
205	}
206
207	/* set autostart */
208	if (autostart)
209		memset(positions + 1, '1', autostart);
210}
211
212void
213needpos(size_t n)
214{
215	static size_t npos;
216	size_t oldnpos;
217
218	/* Grow the positions array to at least the specified size. */
219	if (n > npos) {
220		oldnpos = npos;
221		if (npos == 0)
222			npos = n;
223		while (n > npos)
224			npos *= 2;
225		if ((positions = realloc(positions, npos)) == NULL)
226			err(1, "realloc");
227		memset((char *)positions + oldnpos, 0, npos - oldnpos);
228	}
229}
230
231/*
232 * Cut based on byte positions, taking care not to split multibyte characters.
233 * Although this function also handles the case where -n is not specified,
234 * c_cut() ought to be much faster.
235 */
236void
237b_n_cut(FILE *fp, const char *fname)
238{
239	size_t col, i, lbuflen;
240	char *lbuf;
241	int canwrite, clen, warned;
242
243	warned = 0;
244	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
245		for (col = 0; lbuflen > 0; col += clen) {
246			if ((clen = mblen(lbuf, lbuflen)) < 0) {
247				if (!warned) {
248					warn("%s", fname);
249					warned = 1;
250				}
251				clen = 1;
252			}
253			if (clen == 0 || *lbuf == '\n')
254				break;
255			if (col < maxval && !positions[1 + col]) {
256				/*
257				 * Print the character if (1) after an initial
258				 * segment of un-selected bytes, the rest of
259				 * it is selected, and (2) the last byte is
260				 * selected.
261				 */
262				i = col;
263				while (i < col + clen && i < maxval &&
264				    !positions[1 + i])
265					i++;
266				canwrite = i < col + clen;
267				for (; i < col + clen && i < maxval; i++)
268					canwrite &= positions[1 + i];
269				if (canwrite)
270					fwrite(lbuf, 1, clen, stdout);
271			} else {
272				/*
273				 * Print the character if all of it has
274				 * been selected.
275				 */
276				canwrite = 1;
277				for (i = col; i < col + clen; i++)
278					if ((i >= maxval && !autostop) ||
279					    (i < maxval && !positions[1 + i])) {
280						canwrite = 0;
281						break;
282					}
283				if (canwrite)
284					fwrite(lbuf, 1, clen, stdout);
285			}
286			lbuf += clen;
287			lbuflen -= clen;
288		}
289		if (lbuflen > 0)
290			putchar('\n');
291	}
292}
293
294void
295c_cut(FILE *fp, const char *fname __unused)
296{
297	int ch, col;
298	char *pos;
299
300	ch = 0;
301	for (;;) {
302		pos = positions + 1;
303		for (col = maxval; col; --col) {
304			if ((ch = getc(fp)) == EOF)
305				return;
306			if (ch == '\n')
307				break;
308			if (*pos++)
309				(void)putchar(ch);
310		}
311		if (ch != '\n') {
312			if (autostop)
313				while ((ch = getc(fp)) != EOF && ch != '\n')
314					(void)putchar(ch);
315			else
316				while ((ch = getc(fp)) != EOF && ch != '\n');
317		}
318		(void)putchar('\n');
319	}
320}
321
322void
323f_cut(FILE *fp, const char *fname __unused)
324{
325	int ch, field, isdelim;
326	char *pos, *p, sep;
327	int output;
328	char *lbuf, *mlbuf;
329	size_t lbuflen;
330
331	mlbuf = NULL;
332	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
333		/* Assert EOL has a newline. */
334		if (*(lbuf + lbuflen - 1) != '\n') {
335			/* Can't have > 1 line with no trailing newline. */
336			mlbuf = malloc(lbuflen + 1);
337			if (mlbuf == NULL)
338				err(1, "malloc");
339			memcpy(mlbuf, lbuf, lbuflen);
340			*(mlbuf + lbuflen) = '\n';
341			lbuf = mlbuf;
342		}
343		output = 0;
344		for (isdelim = 0, p = lbuf;; ++p) {
345			ch = *p;
346			/* this should work if newline is delimiter */
347			if (ch == sep)
348				isdelim = 1;
349			if (ch == '\n') {
350				if (!isdelim && !sflag)
351					(void)fwrite(lbuf, lbuflen, 1, stdout);
352				break;
353			}
354		}
355		if (!isdelim)
356			continue;
357
358		pos = positions + 1;
359		for (field = maxval, p = lbuf; field; --field, ++pos) {
360			if (*pos) {
361				if (output++)
362					(void)putchar(sep);
363				while ((ch = *p++) != '\n' && ch != sep)
364					(void)putchar(ch);
365			} else {
366				while ((ch = *p++) != '\n' && ch != sep)
367					continue;
368			}
369			if (ch == '\n')
370				break;
371		}
372		if (ch != '\n') {
373			if (autostop) {
374				if (output)
375					(void)putchar(sep);
376				for (; (ch = *p) != '\n'; ++p)
377					(void)putchar(ch);
378			} else
379				for (; (ch = *p) != '\n'; ++p);
380		}
381		(void)putchar('\n');
382	}
383	if (mlbuf != NULL)
384		free(mlbuf);
385}
386
387static void
388usage(void)
389{
390	(void)fprintf(stderr, "%s\n%s\n%s\n",
391		"usage: cut -b list [-n] [file ...]",
392		"       cut -c list [file ...]",
393		"       cut -f list [-s] [-d delim] [file ...]");
394	exit(1);
395}
396