cut.c revision 98035
1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
42static const char rcsid[] =
43  "$FreeBSD: head/usr.bin/cut/cut.c 98035 2002-06-08 07:27:21Z tjr $";
44#endif /* not lint */
45
46#include <ctype.h>
47#include <err.h>
48#include <limits.h>
49#include <locale.h>
50#include <stdio.h>
51#include <stdlib.h>
52#include <string.h>
53#include <unistd.h>
54
55int	bflag;
56int	cflag;
57char	dchar;
58int	dflag;
59int	fflag;
60int	nflag;
61int	sflag;
62
63void	b_n_cut(FILE *, const char *);
64void	c_cut(FILE *, const char *);
65void	f_cut(FILE *, const char *);
66void	get_list(char *);
67int	main(int, char **);
68void	needpos(size_t);
69static 	void usage(void);
70
71int
72main(argc, argv)
73	int argc;
74	char *argv[];
75{
76	FILE *fp;
77	void (*fcn)(FILE *, const char *);
78	int ch, rval;
79
80	setlocale(LC_ALL, "");
81
82	fcn = NULL;
83	dchar = '\t';			/* default delimiter is \t */
84
85	/*
86	 * Since we don't support multi-byte characters, the -c and -b
87	 * options are equivalent.
88	 */
89	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
90		switch(ch) {
91		case 'b':
92			fcn = c_cut;
93			get_list(optarg);
94			bflag = 1;
95			break;
96		case 'c':
97			fcn = c_cut;
98			get_list(optarg);
99			cflag = 1;
100			break;
101		case 'd':
102			dchar = *optarg;
103			dflag = 1;
104			break;
105		case 'f':
106			get_list(optarg);
107			fcn = f_cut;
108			fflag = 1;
109			break;
110		case 's':
111			sflag = 1;
112			break;
113		case 'n':
114			nflag = 1;
115			break;
116		case '?':
117		default:
118			usage();
119		}
120	argc -= optind;
121	argv += optind;
122
123	if (fflag) {
124		if (bflag || cflag || nflag)
125			usage();
126	} else if (!(bflag || cflag) || dflag || sflag)
127		usage();
128	else if (!bflag && nflag)
129		usage();
130
131	if (nflag)
132		fcn = b_n_cut;
133
134	rval = 0;
135	if (*argv)
136		for (; *argv; ++argv) {
137			if (strcmp(*argv, "-") == 0)
138				fcn(stdin, "stdin");
139			else {
140				if (!(fp = fopen(*argv, "r"))) {
141					warn("%s", *argv);
142					rval = 1;
143					continue;
144				}
145				fcn(fp, *argv);
146				(void)fclose(fp);
147			}
148		}
149	else
150		fcn(stdin, "stdin");
151	exit(rval);
152}
153
154size_t autostart, autostop, maxval;
155
156char *positions;
157
158void
159get_list(list)
160	char *list;
161{
162	size_t setautostart, start, stop;
163	char *pos;
164	char *p;
165
166	/*
167	 * set a byte in the positions array to indicate if a field or
168	 * column is to be selected; use +1, it's 1-based, not 0-based.
169	 * This parser is less restrictive than the Draft 9 POSIX spec.
170	 * POSIX doesn't allow lists that aren't in increasing order or
171	 * overlapping lists.  We also handle "-3-5" although there's no
172	 * real reason too.
173	 */
174	for (; (p = strsep(&list, ", \t")) != NULL;) {
175		setautostart = start = stop = 0;
176		if (*p == '-') {
177			++p;
178			setautostart = 1;
179		}
180		if (isdigit((unsigned char)*p)) {
181			start = stop = strtol(p, &p, 10);
182			if (setautostart && start > autostart)
183				autostart = start;
184		}
185		if (*p == '-') {
186			if (isdigit((unsigned char)p[1]))
187				stop = strtol(p + 1, &p, 10);
188			if (*p == '-') {
189				++p;
190				if (!autostop || autostop > stop)
191					autostop = stop;
192			}
193		}
194		if (*p)
195			errx(1, "[-cf] list: illegal list value");
196		if (!stop || !start)
197			errx(1, "[-cf] list: values may not include zero");
198		if (maxval < stop) {
199			maxval = stop;
200			needpos(maxval + 1);
201		}
202		for (pos = positions + start; start++ <= stop; *pos++ = 1);
203	}
204
205	/* overlapping ranges */
206	if (autostop && maxval > autostop) {
207		maxval = autostop;
208		needpos(maxval + 1);
209	}
210
211	/* set autostart */
212	if (autostart)
213		memset(positions + 1, '1', autostart);
214}
215
216void
217needpos(size_t n)
218{
219	static size_t npos;
220	size_t oldnpos;
221
222	/* Grow the positions array to at least the specified size. */
223	if (n > npos) {
224		oldnpos = npos;
225		if (npos == 0)
226			npos = n;
227		while (n > npos)
228			npos *= 2;
229		if ((positions = realloc(positions, npos)) == NULL)
230			err(1, "realloc");
231		memset((char *)positions + oldnpos, 0, npos - oldnpos);
232	}
233}
234
235/*
236 * Cut based on byte positions, taking care not to split multibyte characters.
237 * Although this function also handles the case where -n is not specified,
238 * c_cut() ought to be much faster.
239 */
240void
241b_n_cut(fp, fname)
242	FILE *fp;
243	const char *fname;
244{
245	size_t col, i, lbuflen;
246	char *lbuf;
247	int canwrite, clen, warned;
248
249	warned = 0;
250	while ((lbuf = fgetln(fp, &lbuflen)) != NULL) {
251		for (col = 0; lbuflen > 0; col += clen) {
252			if ((clen = mblen(lbuf, lbuflen)) < 0) {
253				if (!warned) {
254					warn("%s", fname);
255					warned = 1;
256				}
257				clen = 1;
258			}
259			if (clen == 0 || *lbuf == '\n')
260				break;
261			if (col < maxval && !positions[1 + col]) {
262				/*
263				 * Print the character if (1) after an initial
264				 * segment of un-selected bytes, the rest of
265				 * it is selected, and (2) the last byte is
266				 * selected.
267				 */
268				i = col;
269				while (i < col + clen && i < maxval &&
270				    !positions[1 + i])
271					i++;
272				canwrite = i < col + clen;
273				for (; i < col + clen && i < maxval; i++)
274					canwrite &= positions[1 + i];
275				if (canwrite)
276					fwrite(lbuf, 1, clen, stdout);
277			} else {
278				/*
279				 * Print the character if all of it has
280				 * been selected.
281				 */
282				canwrite = 1;
283				for (i = col; i < col + clen; i++)
284					if ((i >= maxval && !autostop) ||
285					    (i < maxval && !positions[1 + i])) {
286						canwrite = 0;
287						break;
288					}
289				if (canwrite)
290					fwrite(lbuf, 1, clen, stdout);
291			}
292			lbuf += clen;
293			lbuflen -= clen;
294		}
295		if (lbuflen > 0)
296			putchar('\n');
297	}
298}
299
300void
301c_cut(fp, fname)
302	FILE *fp;
303	const char *fname __unused;
304{
305	int ch, col;
306	char *pos;
307
308	ch = 0;
309	for (;;) {
310		pos = positions + 1;
311		for (col = maxval; col; --col) {
312			if ((ch = getc(fp)) == EOF)
313				return;
314			if (ch == '\n')
315				break;
316			if (*pos++)
317				(void)putchar(ch);
318		}
319		if (ch != '\n') {
320			if (autostop)
321				while ((ch = getc(fp)) != EOF && ch != '\n')
322					(void)putchar(ch);
323			else
324				while ((ch = getc(fp)) != EOF && ch != '\n');
325		}
326		(void)putchar('\n');
327	}
328}
329
330void
331f_cut(fp, fname)
332	FILE *fp;
333	const char *fname __unused;
334{
335	int ch, field, isdelim;
336	char *pos, *p, sep;
337	int output;
338	char *lbuf, *mlbuf;
339	size_t lbuflen;
340
341	mlbuf = NULL;
342	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
343		/* Assert EOL has a newline. */
344		if (*(lbuf + lbuflen - 1) != '\n') {
345			/* Can't have > 1 line with no trailing newline. */
346			mlbuf = malloc(lbuflen + 1);
347			if (mlbuf == NULL)
348				err(1, "malloc");
349			memcpy(mlbuf, lbuf, lbuflen);
350			*(mlbuf + lbuflen) = '\n';
351			lbuf = mlbuf;
352		}
353		output = 0;
354		for (isdelim = 0, p = lbuf;; ++p) {
355			ch = *p;
356			/* this should work if newline is delimiter */
357			if (ch == sep)
358				isdelim = 1;
359			if (ch == '\n') {
360				if (!isdelim && !sflag)
361					(void)fwrite(lbuf, lbuflen, 1, stdout);
362				break;
363			}
364		}
365		if (!isdelim)
366			continue;
367
368		pos = positions + 1;
369		for (field = maxval, p = lbuf; field; --field, ++pos) {
370			if (*pos) {
371				if (output++)
372					(void)putchar(sep);
373				while ((ch = *p++) != '\n' && ch != sep)
374					(void)putchar(ch);
375			} else {
376				while ((ch = *p++) != '\n' && ch != sep)
377					continue;
378			}
379			if (ch == '\n')
380				break;
381		}
382		if (ch != '\n') {
383			if (autostop) {
384				if (output)
385					(void)putchar(sep);
386				for (; (ch = *p) != '\n'; ++p)
387					(void)putchar(ch);
388			} else
389				for (; (ch = *p) != '\n'; ++p);
390		}
391		(void)putchar('\n');
392	}
393	if (mlbuf != NULL)
394		free(mlbuf);
395}
396
397static void
398usage()
399{
400	(void)fprintf(stderr, "%s\n%s\n%s\n",
401		"usage: cut -b list [-n] [file ...]",
402		"       cut -c list [file ...]",
403		"       cut -f list [-s] [-d delim] [file ...]");
404	exit(1);
405}
406