cut.c revision 75930
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1989, 1993
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * This code is derived from software contributed to Berkeley by
61590Srgrimes * Adam S. Moskowitz of Menlo Consulting and Marciano Pitargue.
71590Srgrimes *
81590Srgrimes * Redistribution and use in source and binary forms, with or without
91590Srgrimes * modification, are permitted provided that the following conditions
101590Srgrimes * are met:
111590Srgrimes * 1. Redistributions of source code must retain the above copyright
121590Srgrimes *    notice, this list of conditions and the following disclaimer.
131590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
141590Srgrimes *    notice, this list of conditions and the following disclaimer in the
151590Srgrimes *    documentation and/or other materials provided with the distribution.
161590Srgrimes * 3. All advertising materials mentioning features or use of this software
171590Srgrimes *    must display the following acknowledgement:
181590Srgrimes *	This product includes software developed by the University of
191590Srgrimes *	California, Berkeley and its contributors.
201590Srgrimes * 4. Neither the name of the University nor the names of its contributors
211590Srgrimes *    may be used to endorse or promote products derived from this software
221590Srgrimes *    without specific prior written permission.
231590Srgrimes *
241590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
251590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
261590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
271590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
281590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
291590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
301590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
311590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
321590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
331590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
341590Srgrimes * SUCH DAMAGE.
351590Srgrimes */
361590Srgrimes
371590Srgrimes#ifndef lint
3841568Sarchiestatic const char copyright[] =
391590Srgrimes"@(#) Copyright (c) 1989, 1993\n\
401590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
4141568Sarchiestatic const char sccsid[] = "@(#)cut.c	8.3 (Berkeley) 5/4/95";
4271725Swillstatic const char rcsid[] =
4371725Swill  "$FreeBSD: head/usr.bin/cut/cut.c 75930 2001-04-25 05:42:53Z dd $";
441590Srgrimes#endif /* not lint */
451590Srgrimes
461590Srgrimes#include <ctype.h>
4727098Scharnier#include <err.h>
481590Srgrimes#include <limits.h>
4943531Seivind#include <locale.h>
501590Srgrimes#include <stdio.h>
511590Srgrimes#include <stdlib.h>
521590Srgrimes#include <string.h>
5323693Speter#include <unistd.h>
541590Srgrimes
551590Srgrimesint	cflag;
561590Srgrimeschar	dchar;
571590Srgrimesint	dflag;
581590Srgrimesint	fflag;
591590Srgrimesint	sflag;
601590Srgrimes
6171726Swillvoid	c_cut (FILE *, const char *);
6271726Swillvoid	f_cut (FILE *, const char *);
6371726Swillvoid	get_list (char *);
6471726Swillint	main (int, char **);
6571726Swillstatic 	void usage (void);
661590Srgrimes
671590Srgrimesint
681590Srgrimesmain(argc, argv)
691590Srgrimes	int argc;
701590Srgrimes	char *argv[];
711590Srgrimes{
721590Srgrimes	FILE *fp;
7371726Swill	void (*fcn) (FILE *, const char *) = NULL;
741590Srgrimes	int ch;
751590Srgrimes
7643533Seivind	fcn = NULL;
7743531Seivind	setlocale (LC_ALL, "");
7843531Seivind
791590Srgrimes	dchar = '\t';			/* default delimiter is \t */
801590Srgrimes
8143531Seivind	/* Since we don't support multi-byte characters, the -c and -b
8243531Seivind	   options are equivalent, and the -n option is meaningless. */
8343532Seivind	while ((ch = getopt(argc, argv, "b:c:d:f:sn")) != -1)
841590Srgrimes		switch(ch) {
8543531Seivind		case 'b':
861590Srgrimes		case 'c':
871590Srgrimes			fcn = c_cut;
881590Srgrimes			get_list(optarg);
891590Srgrimes			cflag = 1;
901590Srgrimes			break;
911590Srgrimes		case 'd':
921590Srgrimes			dchar = *optarg;
931590Srgrimes			dflag = 1;
941590Srgrimes			break;
951590Srgrimes		case 'f':
961590Srgrimes			get_list(optarg);
971590Srgrimes			fcn = f_cut;
981590Srgrimes			fflag = 1;
991590Srgrimes			break;
1001590Srgrimes		case 's':
1011590Srgrimes			sflag = 1;
1021590Srgrimes			break;
10343531Seivind		case 'n':
10443531Seivind			break;
1051590Srgrimes		case '?':
1061590Srgrimes		default:
1071590Srgrimes			usage();
1081590Srgrimes		}
1091590Srgrimes	argc -= optind;
1101590Srgrimes	argv += optind;
1111590Srgrimes
1121590Srgrimes	if (fflag) {
1131590Srgrimes		if (cflag)
1141590Srgrimes			usage();
1151590Srgrimes	} else if (!cflag || dflag || sflag)
1161590Srgrimes		usage();
1171590Srgrimes
1181590Srgrimes	if (*argv)
1191590Srgrimes		for (; *argv; ++argv) {
1201590Srgrimes			if (!(fp = fopen(*argv, "r")))
12127098Scharnier				err(1, "%s", *argv);
1221590Srgrimes			fcn(fp, *argv);
1231590Srgrimes			(void)fclose(fp);
1241590Srgrimes		}
1251590Srgrimes	else
1261590Srgrimes		fcn(stdin, "stdin");
1271590Srgrimes	exit(0);
1281590Srgrimes}
1291590Srgrimes
13071725Swillsize_t autostart, autostop, maxval;
1311590Srgrimes
1321590Srgrimeschar positions[_POSIX2_LINE_MAX + 1];
1331590Srgrimes
1341590Srgrimesvoid
1351590Srgrimesget_list(list)
1361590Srgrimes	char *list;
1371590Srgrimes{
13871725Swill	size_t setautostart, start, stop;
13943533Seivind	char *pos;
1401590Srgrimes	char *p;
1411590Srgrimes
1421590Srgrimes	/*
1431590Srgrimes	 * set a byte in the positions array to indicate if a field or
1441590Srgrimes	 * column is to be selected; use +1, it's 1-based, not 0-based.
1451590Srgrimes	 * This parser is less restrictive than the Draft 9 POSIX spec.
1461590Srgrimes	 * POSIX doesn't allow lists that aren't in increasing order or
1471590Srgrimes	 * overlapping lists.  We also handle "-3-5" although there's no
1481590Srgrimes	 * real reason too.
1491590Srgrimes	 */
15043533Seivind	for (; (p = strsep(&list, ", \t")) != NULL;) {
1511590Srgrimes		setautostart = start = stop = 0;
1521590Srgrimes		if (*p == '-') {
1531590Srgrimes			++p;
1541590Srgrimes			setautostart = 1;
1551590Srgrimes		}
15643533Seivind		if (isdigit((unsigned char)*p)) {
1571590Srgrimes			start = stop = strtol(p, &p, 10);
1581590Srgrimes			if (setautostart && start > autostart)
1591590Srgrimes				autostart = start;
1601590Srgrimes		}
1611590Srgrimes		if (*p == '-') {
16243533Seivind			if (isdigit((unsigned char)p[1]))
1631590Srgrimes				stop = strtol(p + 1, &p, 10);
1641590Srgrimes			if (*p == '-') {
1651590Srgrimes				++p;
1661590Srgrimes				if (!autostop || autostop > stop)
1671590Srgrimes					autostop = stop;
1681590Srgrimes			}
1691590Srgrimes		}
1701590Srgrimes		if (*p)
17127098Scharnier			errx(1, "[-cf] list: illegal list value");
1721590Srgrimes		if (!stop || !start)
17327098Scharnier			errx(1, "[-cf] list: values may not include zero");
1741590Srgrimes		if (stop > _POSIX2_LINE_MAX)
17527098Scharnier			errx(1, "[-cf] list: %d too large (max %d)",
1761590Srgrimes			    stop, _POSIX2_LINE_MAX);
1771590Srgrimes		if (maxval < stop)
1781590Srgrimes			maxval = stop;
1791590Srgrimes		for (pos = positions + start; start++ <= stop; *pos++ = 1);
1801590Srgrimes	}
1811590Srgrimes
1821590Srgrimes	/* overlapping ranges */
1831590Srgrimes	if (autostop && maxval > autostop)
1841590Srgrimes		maxval = autostop;
1851590Srgrimes
1861590Srgrimes	/* set autostart */
1871590Srgrimes	if (autostart)
1881590Srgrimes		memset(positions + 1, '1', autostart);
1891590Srgrimes}
1901590Srgrimes
1911590Srgrimes/* ARGSUSED */
1921590Srgrimesvoid
1931590Srgrimesc_cut(fp, fname)
1941590Srgrimes	FILE *fp;
19571725Swill	const char *fname;
1961590Srgrimes{
19743533Seivind	int ch, col;
19843533Seivind	char *pos;
19971725Swill	fname = NULL;
2001590Srgrimes
20143533Seivind	ch = 0;
2021590Srgrimes	for (;;) {
2031590Srgrimes		pos = positions + 1;
2041590Srgrimes		for (col = maxval; col; --col) {
2051590Srgrimes			if ((ch = getc(fp)) == EOF)
2061590Srgrimes				return;
2071590Srgrimes			if (ch == '\n')
2081590Srgrimes				break;
2091590Srgrimes			if (*pos++)
2101590Srgrimes				(void)putchar(ch);
2111590Srgrimes		}
21243533Seivind		if (ch != '\n') {
2131590Srgrimes			if (autostop)
2141590Srgrimes				while ((ch = getc(fp)) != EOF && ch != '\n')
2151590Srgrimes					(void)putchar(ch);
2161590Srgrimes			else
2171590Srgrimes				while ((ch = getc(fp)) != EOF && ch != '\n');
21843533Seivind		}
2191590Srgrimes		(void)putchar('\n');
2201590Srgrimes	}
2211590Srgrimes}
2221590Srgrimes
2231590Srgrimesvoid
2241590Srgrimesf_cut(fp, fname)
2251590Srgrimes	FILE *fp;
22671725Swill	const char *fname;
2271590Srgrimes{
22843533Seivind	int ch, field, isdelim;
22943533Seivind	char *pos, *p, sep;
2301590Srgrimes	int output;
23175930Sdd	char *lbuf, *mlbuf = NULL;
23275930Sdd	size_t lbuflen;
2331590Srgrimes
23475930Sdd	for (sep = dchar; (lbuf = fgetln(fp, &lbuflen)) != NULL;) {
23575930Sdd		/* Assert EOL has a newline. */
23675930Sdd		if (*(lbuf + lbuflen - 1) != '\n') {
23775930Sdd			/* Can't have > 1 line with no trailing newline. */
23875930Sdd			mlbuf = malloc(lbuflen + 1);
23975930Sdd			if (mlbuf == NULL)
24075930Sdd				err(1, "malloc");
24175930Sdd			memcpy(mlbuf, lbuf, lbuflen);
24275930Sdd			*(mlbuf + lbuflen) = '\n';
24375930Sdd			lbuf = mlbuf;
24475930Sdd		}
2457200Sache		output = 0;
2461590Srgrimes		for (isdelim = 0, p = lbuf;; ++p) {
24775930Sdd			ch = *p;
2481590Srgrimes			/* this should work if newline is delimiter */
2491590Srgrimes			if (ch == sep)
2501590Srgrimes				isdelim = 1;
2511590Srgrimes			if (ch == '\n') {
2521590Srgrimes				if (!isdelim && !sflag)
25375930Sdd					(void)fwrite(lbuf, lbuflen, 1, stdout);
2541590Srgrimes				break;
2551590Srgrimes			}
2561590Srgrimes		}
2571590Srgrimes		if (!isdelim)
2581590Srgrimes			continue;
2591590Srgrimes
2601590Srgrimes		pos = positions + 1;
2611590Srgrimes		for (field = maxval, p = lbuf; field; --field, ++pos) {
2621590Srgrimes			if (*pos) {
2631590Srgrimes				if (output++)
2641590Srgrimes					(void)putchar(sep);
2651590Srgrimes				while ((ch = *p++) != '\n' && ch != sep)
2661590Srgrimes					(void)putchar(ch);
26743533Seivind			} else {
26843533Seivind				while ((ch = *p++) != '\n' && ch != sep)
26943533Seivind					continue;
27043533Seivind			}
2711590Srgrimes			if (ch == '\n')
2721590Srgrimes				break;
2731590Srgrimes		}
27443533Seivind		if (ch != '\n') {
2751590Srgrimes			if (autostop) {
2761590Srgrimes				if (output)
2771590Srgrimes					(void)putchar(sep);
2781590Srgrimes				for (; (ch = *p) != '\n'; ++p)
2791590Srgrimes					(void)putchar(ch);
2801590Srgrimes			} else
2811590Srgrimes				for (; (ch = *p) != '\n'; ++p);
28243533Seivind		}
2831590Srgrimes		(void)putchar('\n');
2841590Srgrimes	}
28575930Sdd	if (mlbuf != NULL)
28675930Sdd		free(mlbuf);
2871590Srgrimes}
2881590Srgrimes
28927098Scharnierstatic void
2901590Srgrimesusage()
2911590Srgrimes{
29243531Seivind	(void)fprintf(stderr, "%s\n%s\n%s\n",
29343531Seivind		"usage: cut -b list [-n] [file ...]",
29443531Seivind		"       cut -c list [file ...]",
29527098Scharnier		"       cut -f list [-s] [-d delim] [file ...]");
2961590Srgrimes	exit(1);
2971590Srgrimes}
298