split.c revision 68887
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
341590Srgrimes#ifndef lint
3528071Scharnierstatic const char copyright[] =
361590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\
371590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
381590Srgrimes#endif /* not lint */
391590Srgrimes
401590Srgrimes#ifndef lint
4128071Scharnier#if 0
421590Srgrimesstatic char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
4368887Sjwd#else
4468887Sjwdstatic const char rcsid[] =
4568887Sjwd  "$FreeBSD: head/usr.bin/split/split.c 68887 2000-11-19 01:44:20Z jwd $";
4628071Scharnier#endif
471590Srgrimes#endif /* not lint */
481590Srgrimes
491590Srgrimes#include <sys/param.h>
5043513Sarchie#include <sys/types.h>
511590Srgrimes
521590Srgrimes#include <ctype.h>
531590Srgrimes#include <err.h>
541590Srgrimes#include <fcntl.h>
551590Srgrimes#include <stdio.h>
561590Srgrimes#include <stdlib.h>
571590Srgrimes#include <string.h>
581590Srgrimes#include <unistd.h>
5943513Sarchie#include <regex.h>
6043513Sarchie#include <sysexits.h>
611590Srgrimes
621590Srgrimes#define DEFLINE	1000			/* Default num lines per file. */
631590Srgrimes
6468887Sjwdsize_t	 bytecnt;			/* Byte count to split on. */
651590Srgrimeslong	 numlines;			/* Line count to split on. */
661590Srgrimesint	 file_open;			/* If a file open. */
671590Srgrimesint	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
681590Srgrimeschar	 bfr[MAXBSIZE];			/* I/O buffer. */
691590Srgrimeschar	 fname[MAXPATHLEN];		/* File name prefix. */
7043513Sarchieregex_t	 rgx;
7143513Sarchieint	 pflag;
721590Srgrimes
731590Srgrimesvoid newfile __P((void));
741590Srgrimesvoid split1 __P((void));
751590Srgrimesvoid split2 __P((void));
7628071Scharnierstatic void usage __P((void));
771590Srgrimes
781590Srgrimesint
791590Srgrimesmain(argc, argv)
801590Srgrimes	int argc;
811590Srgrimes	char *argv[];
821590Srgrimes{
831590Srgrimes	int ch;
841590Srgrimes	char *ep, *p;
851590Srgrimes
8643513Sarchie	while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1)
871590Srgrimes		switch (ch) {
881590Srgrimes		case '0': case '1': case '2': case '3': case '4':
891590Srgrimes		case '5': case '6': case '7': case '8': case '9':
901590Srgrimes			/*
911590Srgrimes			 * Undocumented kludge: split was originally designed
921590Srgrimes			 * to take a number after a dash.
931590Srgrimes			 */
941590Srgrimes			if (numlines == 0) {
951590Srgrimes				p = argv[optind - 1];
961590Srgrimes				if (p[0] == '-' && p[1] == ch && !p[2])
971590Srgrimes					numlines = strtol(++p, &ep, 10);
981590Srgrimes				else
991590Srgrimes					numlines =
1001590Srgrimes					    strtol(argv[optind] + 1, &ep, 10);
1011590Srgrimes				if (numlines <= 0 || *ep)
10243513Sarchie					errx(EX_USAGE,
10343513Sarchie					    "%s: illegal line count", optarg);
1041590Srgrimes			}
1051590Srgrimes			break;
1061590Srgrimes		case '-':		/* Undocumented: historic stdin flag. */
1071590Srgrimes			if (ifd != -1)
1081590Srgrimes				usage();
1091590Srgrimes			ifd = 0;
1101590Srgrimes			break;
1111590Srgrimes		case 'b':		/* Byte count. */
11268887Sjwd			if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
11328071Scharnier			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
11443513Sarchie				errx(EX_USAGE,
11543513Sarchie				    "%s: illegal byte count", optarg);
1161590Srgrimes			if (*ep == 'k')
1171590Srgrimes				bytecnt *= 1024;
1181590Srgrimes			else if (*ep == 'm')
1191590Srgrimes				bytecnt *= 1048576;
1201590Srgrimes			break;
12143513Sarchie		case 'p' :      /* pattern matching. */
12243513Sarchie			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
12343513Sarchie				errx(EX_USAGE, "%s: illegal regexp", optarg);
12443513Sarchie			pflag = 1;
12543513Sarchie			break;
1261590Srgrimes		case 'l':		/* Line count. */
1271590Srgrimes			if (numlines != 0)
1281590Srgrimes				usage();
1299427Srgrimes			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
13043513Sarchie				errx(EX_USAGE,
13143513Sarchie				    "%s: illegal line count", optarg);
1321590Srgrimes			break;
1331590Srgrimes		default:
1341590Srgrimes			usage();
1351590Srgrimes		}
1361590Srgrimes	argv += optind;
1371590Srgrimes	argc -= optind;
1381590Srgrimes
1391590Srgrimes	if (*argv != NULL)
1401590Srgrimes		if (ifd == -1) {		/* Input file. */
1411590Srgrimes			if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
14243513Sarchie				err(EX_NOINPUT, "%s", *argv);
1431590Srgrimes			++argv;
1441590Srgrimes		}
1451590Srgrimes	if (*argv != NULL)			/* File name prefix. */
1461590Srgrimes		(void)strcpy(fname, *argv++);
1471590Srgrimes	if (*argv != NULL)
1481590Srgrimes		usage();
1491590Srgrimes
15043513Sarchie	if (pflag && (numlines != 0 || bytecnt != 0))
15143513Sarchie		usage();
15243513Sarchie
1531590Srgrimes	if (numlines == 0)
1541590Srgrimes		numlines = DEFLINE;
15543513Sarchie	else if (bytecnt != 0)
1561590Srgrimes		usage();
1571590Srgrimes
1581590Srgrimes	if (ifd == -1)				/* Stdin by default. */
1591590Srgrimes		ifd = 0;
1601590Srgrimes
1611590Srgrimes	if (bytecnt) {
1621590Srgrimes		split1();
1631590Srgrimes		exit (0);
1641590Srgrimes	}
1651590Srgrimes	split2();
16643513Sarchie	if (pflag)
16743513Sarchie		regfree(&rgx);
1681590Srgrimes	exit(0);
1691590Srgrimes}
1701590Srgrimes
1711590Srgrimes/*
1721590Srgrimes * split1 --
1731590Srgrimes *	Split the input by bytes.
1741590Srgrimes */
1751590Srgrimesvoid
1761590Srgrimessplit1()
1771590Srgrimes{
17868887Sjwd	size_t bcnt, dist, len;
1791590Srgrimes	char *C;
1801590Srgrimes
1811590Srgrimes	for (bcnt = 0;;)
18243513Sarchie		switch ((len = read(ifd, bfr, MAXBSIZE))) {
1831590Srgrimes		case 0:
1841590Srgrimes			exit(0);
1851590Srgrimes		case -1:
18643513Sarchie			err(EX_IOERR, "read");
1871590Srgrimes			/* NOTREACHED */
1881590Srgrimes		default:
18943513Sarchie			if (!file_open)
1901590Srgrimes				newfile();
1911590Srgrimes			if (bcnt + len >= bytecnt) {
1921590Srgrimes				dist = bytecnt - bcnt;
1931590Srgrimes				if (write(ofd, bfr, dist) != dist)
19443513Sarchie					err(EX_IOERR, "write");
1951590Srgrimes				len -= dist;
1961590Srgrimes				for (C = bfr + dist; len >= bytecnt;
1971590Srgrimes				    len -= bytecnt, C += bytecnt) {
1981590Srgrimes					newfile();
1991590Srgrimes					if (write(ofd,
2001590Srgrimes					    C, (int)bytecnt) != bytecnt)
20143513Sarchie						err(EX_IOERR, "write");
2021590Srgrimes				}
20343513Sarchie				if (len != 0) {
2041590Srgrimes					newfile();
2051590Srgrimes					if (write(ofd, C, len) != len)
20643513Sarchie						err(EX_IOERR, "write");
2071590Srgrimes				} else
2081590Srgrimes					file_open = 0;
2091590Srgrimes				bcnt = len;
2101590Srgrimes			} else {
2111590Srgrimes				bcnt += len;
2121590Srgrimes				if (write(ofd, bfr, len) != len)
21343513Sarchie					err(EX_IOERR, "write");
2141590Srgrimes			}
2151590Srgrimes		}
2161590Srgrimes}
2171590Srgrimes
2181590Srgrimes/*
2191590Srgrimes * split2 --
2201590Srgrimes *	Split the input by lines.
2211590Srgrimes */
2221590Srgrimesvoid
2231590Srgrimessplit2()
2241590Srgrimes{
22543513Sarchie	long lcnt = 0;
22643513Sarchie	FILE *infp;
2271590Srgrimes
22843513Sarchie	/* Stick a stream on top of input file descriptor */
22943513Sarchie	if ((infp = fdopen(ifd, "r")) == NULL)
23043513Sarchie		err(EX_NOINPUT, "fdopen");
23143513Sarchie
23243513Sarchie	/* Process input one line at a time */
23343513Sarchie	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
23443513Sarchie		const int len = strlen(bfr);
23543513Sarchie
23643513Sarchie		/* If line is too long to deal with, just write it out */
23743513Sarchie		if (bfr[len - 1] != '\n')
23843513Sarchie			goto writeit;
23943513Sarchie
24043513Sarchie		/* Check if we need to start a new file */
24143513Sarchie		if (pflag) {
24243513Sarchie			regmatch_t pmatch;
24343513Sarchie
24443513Sarchie			pmatch.rm_so = 0;
24543513Sarchie			pmatch.rm_eo = len - 1;
24643513Sarchie			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
2471590Srgrimes				newfile();
24843513Sarchie		} else if (lcnt++ == numlines) {
24943513Sarchie			newfile();
25043513Sarchie			lcnt = 1;
2511590Srgrimes		}
25243513Sarchie
25343513Sarchiewriteit:
25443513Sarchie		/* Open output file if needed */
25543513Sarchie		if (!file_open)
25643513Sarchie			newfile();
25743513Sarchie
25843513Sarchie		/* Write out line */
25943513Sarchie		if (write(ofd, bfr, len) != len)
26043513Sarchie			err(EX_IOERR, "write");
26143513Sarchie	}
26243513Sarchie
26343513Sarchie	/* EOF or error? */
26443513Sarchie	if (ferror(infp))
26543513Sarchie		err(EX_IOERR, "read");
26643513Sarchie	else
26743513Sarchie		exit(0);
2681590Srgrimes}
2691590Srgrimes
2701590Srgrimes/*
2711590Srgrimes * newfile --
2721590Srgrimes *	Open a new output file.
2731590Srgrimes */
2741590Srgrimesvoid
2751590Srgrimesnewfile()
2761590Srgrimes{
2771590Srgrimes	static long fnum;
2781590Srgrimes	static int defname;
2791590Srgrimes	static char *fpnt;
2801590Srgrimes
2811590Srgrimes	if (ofd == -1) {
2821590Srgrimes		if (fname[0] == '\0') {
2831590Srgrimes			fname[0] = 'x';
2841590Srgrimes			fpnt = fname + 1;
2851590Srgrimes			defname = 1;
2861590Srgrimes		} else {
2871590Srgrimes			fpnt = fname + strlen(fname);
2881590Srgrimes			defname = 0;
2891590Srgrimes		}
2901590Srgrimes		ofd = fileno(stdout);
2911590Srgrimes	}
2921590Srgrimes	/*
2931590Srgrimes	 * Hack to increase max files; original code wandered through
2941590Srgrimes	 * magic characters.  Maximum files is 3 * 26 * 26 == 2028
2951590Srgrimes	 */
2961590Srgrimes#define MAXFILES	676
2971590Srgrimes	if (fnum == MAXFILES) {
2981590Srgrimes		if (!defname || fname[0] == 'z')
29943513Sarchie			errx(EX_DATAERR, "too many files");
3001590Srgrimes		++fname[0];
3011590Srgrimes		fnum = 0;
3021590Srgrimes	}
3031590Srgrimes	fpnt[0] = fnum / 26 + 'a';
3041590Srgrimes	fpnt[1] = fnum % 26 + 'a';
3051590Srgrimes	++fnum;
3061590Srgrimes	if (!freopen(fname, "w", stdout))
30743513Sarchie		err(EX_IOERR, "%s", fname);
30843513Sarchie	file_open = 1;
3091590Srgrimes}
3101590Srgrimes
31128071Scharnierstatic void
3121590Srgrimesusage()
3131590Srgrimes{
31443625Sarchie	(void)fprintf(stderr,
31543625Sarchie"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n");
31643625Sarchie	exit(EX_USAGE);
3171590Srgrimes}
318