cut.c revision 43533
1290650Shselasky/*
2347819Shselasky * Copyright (c) 1989, 1993
3290650Shselasky *	The Regents of the University of California.  All rights reserved.
4290650Shselasky *
5290650Shselasky * This code is derived from software contributed to Berkeley by
6290650Shselasky * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7290650Shselasky *
8290650Shselasky * Redistribution and use in source and binary forms, with or without
9290650Shselasky * modification, are permitted provided that the following conditions
10290650Shselasky * are met:
11290650Shselasky * 1. Redistributions of source code must retain the above copyright
12290650Shselasky *    notice, this list of conditions and the following disclaimer.
13290650Shselasky * 2. Redistributions in binary form must reproduce the above copyright
14290650Shselasky *    notice, this list of conditions and the following disclaimer in the
15290650Shselasky *    documentation and/or other materials provided with the distribution.
16290650Shselasky * 3. All advertising materials mentioning features or use of this software
17290650Shselasky *    must display the following acknowledgement:
18290650Shselasky *	This product includes software developed by the University of
19290650Shselasky *	California, Berkeley and its contributors.
20290650Shselasky * 4. Neither the name of the University nor the names of its contributors
21290650Shselasky *    may be used to endorse or promote products derived from this software
22290650Shselasky *    without specific prior written permission.
23290650Shselasky *
24290650Shselasky * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25290650Shselasky * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26290650Shselasky * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27290650Shselasky * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28290650Shselasky * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29290650Shselasky * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30290650Shselasky * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31290650Shselasky * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32290650Shselasky * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33290650Shselasky * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34290650Shselasky * SUCH DAMAGE.
35290650Shselasky */
36347802Shselasky
37290650Shselasky#ifndef lint
38290650Shselaskystatic const char copyright[] =
39290650Shselasky"@(#) Copyright (c) 1989, 1993\n\
40290650Shselasky	The Regents of the University of California.  All rights reserved.\n";
41353197Shselasky#endif /* not lint */
42290650Shselasky
43290650Shselasky#ifndef lint
44341958Shselaskystatic const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
45341958Shselasky#endif /* not lint */
46290650Shselasky
47329200Shselasky#include <ctype.h>
48290650Shselasky#include <err.h>
49341948Shselasky#include <errno.h>
50341948Shselasky#include <limits.h>
51290650Shselasky#include <locale.h>
52290650Shselasky#include <stdio.h>
53290650Shselasky#include <stdlib.h>
54290650Shselasky#include <string.h>
55347839Shselasky#include <unistd.h>
56347847Shselasky
57290650Shselaskyint	cflag;
58290650Shselaskychar	dchar;
59347835Shselaskyint	dflag;
60347835Shselaskyint	fflag;
61290650Shselaskyint	sflag;
62347835Shselasky
63347835Shselaskyvoid	c_cut __P((FILE *, char *));
64347835Shselaskyvoid	f_cut __P((FILE *, char *));
65290650Shselaskyvoid	get_list __P((char *));
66290650Shselaskyint	main __P((int, char **));
67347835Shselaskystatic 	void usage __P((void));
68347835Shselasky
69347835Shselaskyint
70347835Shselaskymain(argc, argv)
71290650Shselasky	int argc;
72347819Shselasky	char *argv[];
73347819Shselasky{
74347819Shselasky	FILE *fp;
75347819Shselasky	void (*fcn) __P((FILE *, char *)) = NULL;
76347819Shselasky	int ch;
77290650Shselasky
78290650Shselasky	fcn = NULL;
79290650Shselasky	setlocale (LC_ALL, "");
80290650Shselasky
81290650Shselasky	dchar = '\t';			/* default delimiter is \t */
82290650Shselasky
83290650Shselasky	/* Since we don't support multi-byte characters, the -c and -b
84290650Shselasky	   options are equivalent, and the -n option is meaningless. */
85290650Shselasky	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
86290650Shselasky		switch(ch) {
87290650Shselasky		case 'b':
88290650Shselasky		case 'c':
89329209Shselasky			fcn = c_cut;
90329209Shselasky			get_list(optarg);
91329209Shselasky			cflag = 1;
92329209Shselasky			break;
93329209Shselasky		case 'd':
94290650Shselasky			dchar = *optarg;
95290650Shselasky			dflag = 1;
96290650Shselasky			break;
97290650Shselasky		case 'f':
98290650Shselasky			get_list(optarg);
99290650Shselasky			fcn = f_cut;
100290650Shselasky			fflag = 1;
101290650Shselasky			break;
102290650Shselasky		case 's':
103290650Shselasky			sflag = 1;
104290650Shselasky			break;
105290650Shselasky		case 'n':
106290650Shselasky			break;
107290650Shselasky		case '?':
108290650Shselasky		default:
109290650Shselasky			usage();
110290650Shselasky		}
111290650Shselasky	argc -= optind;
112290650Shselasky	argv += optind;
113290650Shselasky
114290650Shselasky	if (fflag) {
115290650Shselasky		if (cflag)
116290650Shselasky			usage();
117290650Shselasky	} else if (!cflag || dflag || sflag)
118290650Shselasky		usage();
119290650Shselasky
120290650Shselasky	if (*argv)
121290650Shselasky		for (; *argv; ++argv) {
122290650Shselasky			if (!(fp = fopen(*argv, "r")))
123290650Shselasky				err(1, "%s", *argv);
124290650Shselasky			fcn(fp, *argv);
125290650Shselasky			(void)fclose(fp);
126290650Shselasky		}
127290650Shselasky	else
128290650Shselasky		fcn(stdin, "stdin");
129290650Shselasky	exit(0);
130290650Shselasky}
131290650Shselasky
132290650Shselaskyint autostart, autostop, maxval;
133290650Shselasky
134290650Shselaskychar positions[_POSIX2_LINE_MAX + 1];
135290650Shselasky
136290650Shselaskyvoid
137290650Shselaskyget_list(list)
138290650Shselasky	char *list;
139290650Shselasky{
140290650Shselasky	int setautostart, start, stop;
141290650Shselasky	char *pos;
142290650Shselasky	char *p;
143290650Shselasky
144290650Shselasky	/*
145290650Shselasky	 * set a byte in the positions array to indicate if a field or
146290650Shselasky	 * column is to be selected; use +1, it's 1-based, not 0-based.
147290650Shselasky	 * This parser is less restrictive than the Draft 9 POSIX spec.
148290650Shselasky	 * POSIX doesn't allow lists that aren't in increasing order or
149290650Shselasky	 * overlapping lists.  We also handle "-3-5" although there's no
150290650Shselasky	 * real reason too.
151290650Shselasky	 */
152290650Shselasky	for (; (p = strsep(&list, ", \t")) != NULL;) {
153290650Shselasky		setautostart = start = stop = 0;
154290650Shselasky		if (*p == '-') {
155290650Shselasky			++p;
156290650Shselasky			setautostart = 1;
157290650Shselasky		}
158290650Shselasky		if (isdigit((unsigned char)*p)) {
159290650Shselasky			start = stop = strtol(p, &p, 10);
160290650Shselasky			if (setautostart && start > autostart)
161290650Shselasky				autostart = start;
162290650Shselasky		}
163290650Shselasky		if (*p == '-') {
164290650Shselasky			if (isdigit((unsigned char)p[1]))
165290650Shselasky				stop = strtol(p + 1, &p, 10);
166290650Shselasky			if (*p == '-') {
167290650Shselasky				++p;
168290650Shselasky				if (!autostop || autostop > stop)
169290650Shselasky					autostop = stop;
170290650Shselasky			}
171290650Shselasky		}
172290650Shselasky		if (*p)
173290650Shselasky			errx(1, "[-cf] list: illegal list value");
174290650Shselasky		if (!stop || !start)
175353224Shselasky			errx(1, "[-cf] list: values may not include zero");
176290650Shselasky		if (stop > _POSIX2_LINE_MAX)
177290650Shselasky			errx(1, "[-cf] list: %d too large (max %d)",
178290650Shselasky			    stop, _POSIX2_LINE_MAX);
179290650Shselasky		if (maxval < stop)
180353224Shselasky			maxval = stop;
181290650Shselasky		for (pos = positions + start; start++ <= stop; *pos++ = 1);
182290650Shselasky	}
183353224Shselasky
184290650Shselasky	/* overlapping ranges */
185290650Shselasky	if (autostop && maxval > autostop)
186290650Shselasky		maxval = autostop;
187290650Shselasky
188290650Shselasky	/* set autostart */
189290650Shselasky	if (autostart)
190353224Shselasky		memset(positions + 1, '1', autostart);
191290650Shselasky}
192290650Shselasky
193353224Shselasky/* ARGSUSED */
194290650Shselaskyvoid
195290650Shselaskyc_cut(fp, fname)
196290650Shselasky	FILE *fp;
197290650Shselasky	char *fname;
198290650Shselasky{
199290650Shselasky	int ch, col;
200290650Shselasky	char *pos;
201290650Shselasky
202347862Shselasky	ch = 0;
203347862Shselasky	for (;;) {
204347862Shselasky		pos = positions + 1;
205347862Shselasky		for (col = maxval; col; --col) {
206347862Shselasky			if ((ch = getc(fp)) == EOF)
207347862Shselasky				return;
208347862Shselasky			if (ch == '\n')
209347862Shselasky				break;
210347862Shselasky			if (*pos++)
211347862Shselasky				(void)putchar(ch);
212347862Shselasky		}
213347862Shselasky		if (ch != '\n') {
214347862Shselasky			if (autostop)
215347862Shselasky				while ((ch = getc(fp)) != EOF && ch != '\n')
216347862Shselasky					(void)putchar(ch);
217331580Shselasky			else
218331580Shselasky				while ((ch = getc(fp)) != EOF && ch != '\n');
219331580Shselasky		}
220331580Shselasky		(void)putchar('\n');
221331580Shselasky	}
222331580Shselasky}
223331580Shselasky
224331580Shselaskyvoid
225331580Shselaskyf_cut(fp, fname)
226331580Shselasky	FILE *fp;
227331580Shselasky	char *fname;
228331580Shselasky{
229331580Shselasky	int ch, field, isdelim;
230331580Shselasky	char *pos, *p, sep;
231331580Shselasky	int output;
232331580Shselasky	char lbuf[_POSIX2_LINE_MAX + 1];
233331580Shselasky
234331580Shselasky	for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) {
235331580Shselasky		output = 0;
236331580Shselasky		for (isdelim = 0, p = lbuf;; ++p) {
237331580Shselasky			if (!(ch = *p))
238331580Shselasky				errx(1, "%s: line too long.", fname);
239331580Shselasky			/* this should work if newline is delimiter */
240331580Shselasky			if (ch == sep)
241331580Shselasky				isdelim = 1;
242331580Shselasky			if (ch == '\n') {
243331580Shselasky				if (!isdelim && !sflag)
244331580Shselasky					(void)printf("%s", lbuf);
245290650Shselasky				break;
246290650Shselasky			}
247353224Shselasky		}
248290650Shselasky		if (!isdelim)
249290650Shselasky			continue;
250290650Shselasky
251353224Shselasky		pos = positions + 1;
252290650Shselasky		for (field = maxval, p = lbuf; field; --field, ++pos) {
253290650Shselasky			if (*pos) {
254290650Shselasky				if (output++)
255290650Shselasky					(void)putchar(sep);
256290650Shselasky				while ((ch = *p++) != '\n' && ch != sep)
257353224Shselasky					(void)putchar(ch);
258290650Shselasky			} else {
259290650Shselasky				while ((ch = *p++) != '\n' && ch != sep)
260290650Shselasky					continue;
261290650Shselasky			}
262290650Shselasky			if (ch == '\n')
263290650Shselasky				break;
264290650Shselasky		}
265290650Shselasky		if (ch != '\n') {
266290650Shselasky			if (autostop) {
267290650Shselasky				if (output)
268290650Shselasky					(void)putchar(sep);
269290650Shselasky				for (; (ch = *p) != '\n'; ++p)
270290650Shselasky					(void)putchar(ch);
271290650Shselasky			} else
272338554Shselasky				for (; (ch = *p) != '\n'; ++p);
273337112Shselasky		}
274290650Shselasky		(void)putchar('\n');
275290650Shselasky	}
276337112Shselasky}
277337112Shselasky
278337112Shselaskystatic void
279337112Shselaskyusage()
280337112Shselasky{
281353189Shselasky	(void)fprintf(stderr, "%s\n%s\n%s\n",
282353189Shselasky		"usage: cut -b list [-n] [file ...]",
283353189Shselasky		"       cut -c list [file ...]",
284353189Shselasky		"       cut -f list [-s] [-d delim] [file ...]");
285290650Shselasky	exit(1);
286290650Shselasky}
287290650Shselasky