1/*
2 * Copyright (c) 1989, 1993
3 *	The Regents of the University of California.  All rights reserved.
4 *
5 * This code is derived from software contributed to Berkeley by
6 * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 *    must display the following acknowledgement:
18 *	This product includes software developed by the University of
19 *	California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1989, 1993\n\
40	The Regents of the University of California.  All rights reserved.\n";
41#endif /* not lint */
42
43#ifndef lint
44static const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
45#endif /* not lint */
46
47#include <config.h>
48
49#include <ctype.h>
50#include <stdio.h>
51#include <errno.h>
52
53#include "bashansi.h"
54
55#ifdef HAVE_LIMITS_H
56#  include <limits.h>
57#endif
58
59#ifdef HAVE_UNISTD_H
60#  include <unistd.h>
61#endif
62
63#include "builtins.h"
64#include "shell.h"
65#include "bashgetopt.h"
66
67#if !defined (errno)
68extern int	errno;
69#endif
70
71#if !defined (_POSIX2_LINE_MAX)
72#  define _POSIX2_LINE_MAX 2048
73#endif
74
75static int	cflag;
76static char	dchar;
77static int	dflag;
78static int	fflag;
79static int	sflag;
80
81static int autostart, autostop, maxval;
82static char positions[_POSIX2_LINE_MAX + 1];
83
84static int	c_cut __P((FILE *, char *));
85static int	f_cut __P((FILE *, char *));
86static int	get_list __P((char *));
87static char	*_cut_strsep __P((char **, const char *));
88
89int
90cut_builtin(list)
91	WORD_LIST *list;
92{
93	FILE *fp;
94	int (*fcn) __P((FILE *, char *)) = NULL;
95	int ch;
96
97	fcn = NULL;
98	dchar = '\t';			/* default delimiter is \t */
99
100	/* Since we don't support multi-byte characters, the -c and -b
101	   options are equivalent, and the -n option is meaningless. */
102	reset_internal_getopt ();
103	while ((ch = internal_getopt (list, "b:c:d:f:sn")) != -1)
104		switch(ch) {
105		case 'b':
106		case 'c':
107			fcn = c_cut;
108			if (get_list(list_optarg) < 0)
109				return (EXECUTION_FAILURE);
110			cflag = 1;
111			break;
112		case 'd':
113			dchar = *list_optarg;
114			dflag = 1;
115			break;
116		case 'f':
117			fcn = f_cut;
118			if (get_list(list_optarg) < 0)
119				return (EXECUTION_FAILURE);
120			fflag = 1;
121			break;
122		case 's':
123			sflag = 1;
124			break;
125		case 'n':
126			break;
127		case '?':
128		default:
129			builtin_usage();
130			return (EX_USAGE);
131		}
132
133	list = loptend;
134
135	if (fflag) {
136		if (cflag) {
137			builtin_usage();
138			return (EX_USAGE);
139		}
140	} else if (!cflag || dflag || sflag) {
141		builtin_usage();
142		return (EX_USAGE);
143	}
144
145	if (list) {
146		while (list) {
147			fp = fopen(list->word->word, "r");
148			if (fp == 0) {
149				builtin_error("%s", list->word->word);
150				return (EXECUTION_FAILURE);
151			}
152			ch = (*fcn)(fp, list->word->word);
153			(void)fclose(fp);
154			if (ch < 0)
155				return (EXECUTION_FAILURE);
156			list = list->next;
157		}
158	} else {
159		ch = (*fcn)(stdin, "stdin");
160		if (ch < 0)
161			return (EXECUTION_FAILURE);
162	}
163
164	return (EXECUTION_SUCCESS);
165}
166
167static int
168get_list(list)
169	char *list;
170{
171	int setautostart, start, stop;
172	char *pos;
173	char *p;
174
175	/*
176	 * set a byte in the positions array to indicate if a field or
177	 * column is to be selected; use +1, it's 1-based, not 0-based.
178	 * This parser is less restrictive than the Draft 9 POSIX spec.
179	 * POSIX doesn't allow lists that aren't in increasing order or
180	 * overlapping lists.  We also handle "-3-5" although there's no
181	 * real reason too.
182	 */
183	for (; (p = _cut_strsep(&list, ", \t")) != NULL;) {
184		setautostart = start = stop = 0;
185		if (*p == '-') {
186			++p;
187			setautostart = 1;
188		}
189		if (isdigit((unsigned char)*p)) {
190			start = stop = strtol(p, &p, 10);
191			if (setautostart && start > autostart)
192				autostart = start;
193		}
194		if (*p == '-') {
195			if (isdigit((unsigned char)p[1]))
196				stop = strtol(p + 1, &p, 10);
197			if (*p == '-') {
198				++p;
199				if (!autostop || autostop > stop)
200					autostop = stop;
201			}
202		}
203		if (*p) {
204			builtin_error("[-cf] list: illegal list value");
205			return -1;
206		}
207		if (!stop || !start) {
208			builtin_error("[-cf] list: values may not include zero");
209			return -1;
210		}
211		if (stop > _POSIX2_LINE_MAX) {
212			builtin_error("[-cf] list: %d too large (max %d)",
213				       stop, _POSIX2_LINE_MAX);
214			return -1;
215		}
216		if (maxval < stop)
217			maxval = stop;
218		for (pos = positions + start; start++ <= stop; *pos++ = 1);
219	}
220
221	/* overlapping ranges */
222	if (autostop && maxval > autostop)
223		maxval = autostop;
224
225	/* set autostart */
226	if (autostart)
227		memset(positions + 1, '1', autostart);
228
229	return 0;
230}
231
232/* ARGSUSED */
233static int
234c_cut(fp, fname)
235	FILE *fp;
236	char *fname;
237{
238	int ch, col;
239	char *pos;
240
241	ch = 0;
242	for (;;) {
243		pos = positions + 1;
244		for (col = maxval; col; --col) {
245			if ((ch = getc(fp)) == EOF)
246				return;
247			if (ch == '\n')
248				break;
249			if (*pos++)
250				(void)putchar(ch);
251		}
252		if (ch != '\n') {
253			if (autostop)
254				while ((ch = getc(fp)) != EOF && ch != '\n')
255					(void)putchar(ch);
256			else
257				while ((ch = getc(fp)) != EOF && ch != '\n');
258		}
259		(void)putchar('\n');
260	}
261	return (0);
262}
263
264static int
265f_cut(fp, fname)
266	FILE *fp;
267	char *fname;
268{
269	int ch, field, isdelim;
270	char *pos, *p, sep;
271	int output;
272	char lbuf[_POSIX2_LINE_MAX + 1];
273
274	for (sep = dchar; fgets(lbuf, sizeof(lbuf), fp);) {
275		output = 0;
276		for (isdelim = 0, p = lbuf;; ++p) {
277			if (!(ch = *p)) {
278				builtin_error("%s: line too long.", fname);
279				return -1;
280			}
281			/* this should work if newline is delimiter */
282			if (ch == sep)
283				isdelim = 1;
284			if (ch == '\n') {
285				if (!isdelim && !sflag)
286					(void)printf("%s", lbuf);
287				break;
288			}
289		}
290		if (!isdelim)
291			continue;
292
293		pos = positions + 1;
294		for (field = maxval, p = lbuf; field; --field, ++pos) {
295			if (*pos) {
296				if (output++)
297					(void)putchar(sep);
298				while ((ch = *p++) != '\n' && ch != sep)
299					(void)putchar(ch);
300			} else {
301				while ((ch = *p++) != '\n' && ch != sep)
302					continue;
303			}
304			if (ch == '\n')
305				break;
306		}
307		if (ch != '\n') {
308			if (autostop) {
309				if (output)
310					(void)putchar(sep);
311				for (; (ch = *p) != '\n'; ++p)
312					(void)putchar(ch);
313			} else
314				for (; (ch = *p) != '\n'; ++p);
315		}
316		(void)putchar('\n');
317	}
318	return (0);
319}
320
321/*
322 * Get next token from string *stringp, where tokens are possibly-empty
323 * strings separated by characters from delim.
324 *
325 * Writes NULs into the string at *stringp to end tokens.
326 * delim need not remain constant from call to call.
327 * On return, *stringp points past the last NUL written (if there might
328 * be further tokens), or is NULL (if there are definitely no more tokens).
329 *
330 * If *stringp is NULL, strsep returns NULL.
331 */
332static char *
333_cut_strsep(stringp, delim)
334	register char **stringp;
335	register const char *delim;
336{
337	register char *s;
338	register const char *spanp;
339	register int c, sc;
340	char *tok;
341
342	if ((s = *stringp) == NULL)
343		return (NULL);
344	for (tok = s;;) {
345		c = *s++;
346		spanp = delim;
347		do {
348			if ((sc = *spanp++) == c) {
349				if (c == 0)
350					s = NULL;
351				else
352					s[-1] = 0;
353				*stringp = s;
354				return (tok);
355			}
356		} while (sc != 0);
357	}
358	/* NOTREACHED */
359}
360
361static char *cut_doc[] = {
362	"Select portions of each line (as specified by LIST) from each FILE",
363	"(by default, the standard input), and write them to the standard output.",
364	"Items specified by LIST are either column positions or fields delimited",
365	"by a special character.  Column numbering starts at 1.",
366	(char *)0
367};
368
369struct builtin cut_struct = {
370	"cut",
371	cut_builtin,
372	BUILTIN_ENABLED,
373	cut_doc,
374	"cut -b list [-n] [file ...] OR cut -c list [file ...] OR cut -f list [-s] [-d delim] [file ...]",
375	0
376};
377