split.c revision 98253
11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 3. All advertising materials mentioning features or use of this software
141590Srgrimes *    must display the following acknowledgement:
151590Srgrimes *	This product includes software developed by the University of
161590Srgrimes *	California, Berkeley and its contributors.
171590Srgrimes * 4. Neither the name of the University nor the names of its contributors
181590Srgrimes *    may be used to endorse or promote products derived from this software
191590Srgrimes *    without specific prior written permission.
201590Srgrimes *
211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
241590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
311590Srgrimes * SUCH DAMAGE.
321590Srgrimes */
331590Srgrimes
3487765Smarkm#include <sys/cdefs.h>
3587765Smarkm__FBSDID("$FreeBSD: head/usr.bin/split/split.c 98253 2002-06-15 11:03:28Z jmallett $");
3687765Smarkm
371590Srgrimes#ifndef lint
3828071Scharnierstatic const char copyright[] =
391590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\
401590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
4187765Smarkm#endif
421590Srgrimes
431590Srgrimes#ifndef lint
4487765Smarkmstatic const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
4528071Scharnier#endif
461590Srgrimes
471590Srgrimes#include <sys/param.h>
481590Srgrimes
491590Srgrimes#include <ctype.h>
501590Srgrimes#include <err.h>
511590Srgrimes#include <fcntl.h>
521590Srgrimes#include <stdio.h>
531590Srgrimes#include <stdlib.h>
541590Srgrimes#include <string.h>
551590Srgrimes#include <unistd.h>
5643513Sarchie#include <regex.h>
5743513Sarchie#include <sysexits.h>
581590Srgrimes
591590Srgrimes#define DEFLINE	1000			/* Default num lines per file. */
601590Srgrimes
6187765Smarkmint	 bytecnt;			/* Byte count to split on. */
621590Srgrimeslong	 numlines;			/* Line count to split on. */
631590Srgrimesint	 file_open;			/* If a file open. */
641590Srgrimesint	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
651590Srgrimeschar	 bfr[MAXBSIZE];			/* I/O buffer. */
661590Srgrimeschar	 fname[MAXPATHLEN];		/* File name prefix. */
6743513Sarchieregex_t	 rgx;
6843513Sarchieint	 pflag;
6990048Smikelong	 sufflen = 2;			/* File name suffix length. */
701590Srgrimes
7192922Simpvoid newfile(void);
7292922Simpvoid split1(void);
7392922Simpvoid split2(void);
7492922Simpstatic void usage(void);
751590Srgrimes
761590Srgrimesint
7798253Sjmallettmain(int argc, char **argv)
781590Srgrimes{
791590Srgrimes	int ch;
801590Srgrimes	char *ep, *p;
811590Srgrimes
8290048Smike	while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
831590Srgrimes		switch (ch) {
841590Srgrimes		case '0': case '1': case '2': case '3': case '4':
851590Srgrimes		case '5': case '6': case '7': case '8': case '9':
861590Srgrimes			/*
871590Srgrimes			 * Undocumented kludge: split was originally designed
881590Srgrimes			 * to take a number after a dash.
891590Srgrimes			 */
901590Srgrimes			if (numlines == 0) {
911590Srgrimes				p = argv[optind - 1];
921590Srgrimes				if (p[0] == '-' && p[1] == ch && !p[2])
931590Srgrimes					numlines = strtol(++p, &ep, 10);
941590Srgrimes				else
951590Srgrimes					numlines =
961590Srgrimes					    strtol(argv[optind] + 1, &ep, 10);
971590Srgrimes				if (numlines <= 0 || *ep)
9843513Sarchie					errx(EX_USAGE,
9943513Sarchie					    "%s: illegal line count", optarg);
1001590Srgrimes			}
1011590Srgrimes			break;
1021590Srgrimes		case '-':		/* Undocumented: historic stdin flag. */
1031590Srgrimes			if (ifd != -1)
1041590Srgrimes				usage();
1051590Srgrimes			ifd = 0;
1061590Srgrimes			break;
10790048Smike		case 'a':		/* Suffix length */
10890048Smike			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
10990048Smike				errx(EX_USAGE,
11090048Smike				    "%s: illegal suffix length", optarg);
11190048Smike			break;
1121590Srgrimes		case 'b':		/* Byte count. */
11368887Sjwd			if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
11428071Scharnier			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
11543513Sarchie				errx(EX_USAGE,
11643513Sarchie				    "%s: illegal byte count", optarg);
1171590Srgrimes			if (*ep == 'k')
1181590Srgrimes				bytecnt *= 1024;
1191590Srgrimes			else if (*ep == 'm')
1201590Srgrimes				bytecnt *= 1048576;
1211590Srgrimes			break;
12243513Sarchie		case 'p' :      /* pattern matching. */
12343513Sarchie			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
12443513Sarchie				errx(EX_USAGE, "%s: illegal regexp", optarg);
12543513Sarchie			pflag = 1;
12643513Sarchie			break;
1271590Srgrimes		case 'l':		/* Line count. */
1281590Srgrimes			if (numlines != 0)
1291590Srgrimes				usage();
1309427Srgrimes			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
13143513Sarchie				errx(EX_USAGE,
13243513Sarchie				    "%s: illegal line count", optarg);
1331590Srgrimes			break;
1341590Srgrimes		default:
1351590Srgrimes			usage();
1361590Srgrimes		}
1371590Srgrimes	argv += optind;
1381590Srgrimes	argc -= optind;
1391590Srgrimes
1401590Srgrimes	if (*argv != NULL)
1411590Srgrimes		if (ifd == -1) {		/* Input file. */
14297334Stjr			if (strcmp(*argv, "-") == 0)
14397334Stjr				ifd = STDIN_FILENO;
14497334Stjr			else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
14543513Sarchie				err(EX_NOINPUT, "%s", *argv);
1461590Srgrimes			++argv;
1471590Srgrimes		}
1481590Srgrimes	if (*argv != NULL)			/* File name prefix. */
14997332Stjr		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
15097332Stjr			errx(EX_USAGE, "file name prefix is too long");
1511590Srgrimes	if (*argv != NULL)
1521590Srgrimes		usage();
1531590Srgrimes
15490048Smike	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
15590048Smike		errx(EX_USAGE, "suffix is too long");
15643513Sarchie	if (pflag && (numlines != 0 || bytecnt != 0))
15743513Sarchie		usage();
15843513Sarchie
1591590Srgrimes	if (numlines == 0)
1601590Srgrimes		numlines = DEFLINE;
16143513Sarchie	else if (bytecnt != 0)
1621590Srgrimes		usage();
1631590Srgrimes
1641590Srgrimes	if (ifd == -1)				/* Stdin by default. */
1651590Srgrimes		ifd = 0;
1661590Srgrimes
1671590Srgrimes	if (bytecnt) {
1681590Srgrimes		split1();
1691590Srgrimes		exit (0);
1701590Srgrimes	}
1711590Srgrimes	split2();
17243513Sarchie	if (pflag)
17343513Sarchie		regfree(&rgx);
1741590Srgrimes	exit(0);
1751590Srgrimes}
1761590Srgrimes
1771590Srgrimes/*
1781590Srgrimes * split1 --
1791590Srgrimes *	Split the input by bytes.
1801590Srgrimes */
1811590Srgrimesvoid
18298253Sjmallettsplit1(void)
1831590Srgrimes{
18487765Smarkm	size_t bcnt;
1851590Srgrimes	char *C;
18687765Smarkm	int dist, len;
1871590Srgrimes
1881590Srgrimes	for (bcnt = 0;;)
18943513Sarchie		switch ((len = read(ifd, bfr, MAXBSIZE))) {
1901590Srgrimes		case 0:
1911590Srgrimes			exit(0);
1921590Srgrimes		case -1:
19343513Sarchie			err(EX_IOERR, "read");
1941590Srgrimes			/* NOTREACHED */
1951590Srgrimes		default:
19643513Sarchie			if (!file_open)
1971590Srgrimes				newfile();
19887765Smarkm			if (bcnt + len >= (u_int)bytecnt) {
1991590Srgrimes				dist = bytecnt - bcnt;
2001590Srgrimes				if (write(ofd, bfr, dist) != dist)
20143513Sarchie					err(EX_IOERR, "write");
2021590Srgrimes				len -= dist;
2031590Srgrimes				for (C = bfr + dist; len >= bytecnt;
2041590Srgrimes				    len -= bytecnt, C += bytecnt) {
2051590Srgrimes					newfile();
2061590Srgrimes					if (write(ofd,
20787765Smarkm					    C, bytecnt) != bytecnt)
20843513Sarchie						err(EX_IOERR, "write");
2091590Srgrimes				}
21043513Sarchie				if (len != 0) {
2111590Srgrimes					newfile();
2121590Srgrimes					if (write(ofd, C, len) != len)
21343513Sarchie						err(EX_IOERR, "write");
2141590Srgrimes				} else
2151590Srgrimes					file_open = 0;
2161590Srgrimes				bcnt = len;
2171590Srgrimes			} else {
2181590Srgrimes				bcnt += len;
2191590Srgrimes				if (write(ofd, bfr, len) != len)
22043513Sarchie					err(EX_IOERR, "write");
2211590Srgrimes			}
2221590Srgrimes		}
2231590Srgrimes}
2241590Srgrimes
2251590Srgrimes/*
2261590Srgrimes * split2 --
2271590Srgrimes *	Split the input by lines.
2281590Srgrimes */
2291590Srgrimesvoid
23098253Sjmallettsplit2(void)
2311590Srgrimes{
23243513Sarchie	long lcnt = 0;
23343513Sarchie	FILE *infp;
2341590Srgrimes
23543513Sarchie	/* Stick a stream on top of input file descriptor */
23643513Sarchie	if ((infp = fdopen(ifd, "r")) == NULL)
23743513Sarchie		err(EX_NOINPUT, "fdopen");
23843513Sarchie
23943513Sarchie	/* Process input one line at a time */
24043513Sarchie	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
24143513Sarchie		const int len = strlen(bfr);
24243513Sarchie
24343513Sarchie		/* If line is too long to deal with, just write it out */
24443513Sarchie		if (bfr[len - 1] != '\n')
24543513Sarchie			goto writeit;
24643513Sarchie
24743513Sarchie		/* Check if we need to start a new file */
24843513Sarchie		if (pflag) {
24943513Sarchie			regmatch_t pmatch;
25043513Sarchie
25143513Sarchie			pmatch.rm_so = 0;
25243513Sarchie			pmatch.rm_eo = len - 1;
25343513Sarchie			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
2541590Srgrimes				newfile();
25543513Sarchie		} else if (lcnt++ == numlines) {
25643513Sarchie			newfile();
25743513Sarchie			lcnt = 1;
2581590Srgrimes		}
25943513Sarchie
26043513Sarchiewriteit:
26143513Sarchie		/* Open output file if needed */
26243513Sarchie		if (!file_open)
26343513Sarchie			newfile();
26443513Sarchie
26543513Sarchie		/* Write out line */
26643513Sarchie		if (write(ofd, bfr, len) != len)
26743513Sarchie			err(EX_IOERR, "write");
26843513Sarchie	}
26943513Sarchie
27043513Sarchie	/* EOF or error? */
27143513Sarchie	if (ferror(infp))
27243513Sarchie		err(EX_IOERR, "read");
27343513Sarchie	else
27443513Sarchie		exit(0);
2751590Srgrimes}
2761590Srgrimes
2771590Srgrimes/*
2781590Srgrimes * newfile --
2791590Srgrimes *	Open a new output file.
2801590Srgrimes */
2811590Srgrimesvoid
28298253Sjmallettnewfile(void)
2831590Srgrimes{
28490048Smike	long i, maxfiles, tfnum;
2851590Srgrimes	static long fnum;
2861590Srgrimes	static int defname;
2871590Srgrimes	static char *fpnt;
2881590Srgrimes
2891590Srgrimes	if (ofd == -1) {
2901590Srgrimes		if (fname[0] == '\0') {
2911590Srgrimes			fname[0] = 'x';
2921590Srgrimes			fpnt = fname + 1;
2931590Srgrimes			defname = 1;
2941590Srgrimes		} else {
2951590Srgrimes			fpnt = fname + strlen(fname);
2961590Srgrimes			defname = 0;
2971590Srgrimes		}
2981590Srgrimes		ofd = fileno(stdout);
2991590Srgrimes	}
30090048Smike
30190048Smike	/* maxfiles = 26^sufflen, but don't use libm. */
30290048Smike	for (maxfiles = 1, i = 0; i < sufflen; i++)
30390048Smike		if ((maxfiles *= 26) <= 0)
30490048Smike			errx(EX_USAGE, "suffix is too long (max %ld)", i);
30590048Smike
3061590Srgrimes	/*
3071590Srgrimes	 * Hack to increase max files; original code wandered through
30890048Smike	 * magic characters.
3091590Srgrimes	 */
31090048Smike	if (fnum == maxfiles) {
3111590Srgrimes		if (!defname || fname[0] == 'z')
31243513Sarchie			errx(EX_DATAERR, "too many files");
3131590Srgrimes		++fname[0];
3141590Srgrimes		fnum = 0;
3151590Srgrimes	}
31690048Smike
31790048Smike	/* Generate suffix of sufflen letters */
31890048Smike	tfnum = fnum;
31990048Smike	i = sufflen - 1;
32090048Smike	do {
32190048Smike		fpnt[i] = tfnum % 26 + 'a';
32290048Smike		tfnum /= 26;
32390048Smike	} while (i-- > 0);
32490048Smike	fpnt[sufflen] = '\0';
32590048Smike
3261590Srgrimes	++fnum;
3271590Srgrimes	if (!freopen(fname, "w", stdout))
32843513Sarchie		err(EX_IOERR, "%s", fname);
32943513Sarchie	file_open = 1;
3301590Srgrimes}
3311590Srgrimes
33228071Scharnierstatic void
33398253Sjmallettusage(void)
3341590Srgrimes{
33543625Sarchie	(void)fprintf(stderr,
33690048Smike"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
33790048Smike	(void)fprintf(stderr,
33890048Smike"             [file [prefix]]\n");
33943625Sarchie	exit(EX_USAGE);
3401590Srgrimes}
341