150477Speter/*
235388Smjacob * Copyright (c) 1987, 1993, 1994
335388Smjacob *	The Regents of the University of California.  All rights reserved.
435388Smjacob *
566189Smjacob * Redistribution and use in source and binary forms, with or without
635388Smjacob * modification, are permitted provided that the following conditions
752347Smjacob * are met:
835388Smjacob * 1. Redistributions of source code must retain the above copyright
935388Smjacob *    notice, this list of conditions and the following disclaimer.
1035388Smjacob * 2. Redistributions in binary form must reproduce the above copyright
1135388Smjacob *    notice, this list of conditions and the following disclaimer in the
1235388Smjacob *    documentation and/or other materials provided with the distribution.
1335388Smjacob * 4. Neither the name of the University nor the names of its contributors
1466189Smjacob *    may be used to endorse or promote products derived from this software
1535388Smjacob *    without specific prior written permission.
1635388Smjacob *
1735388Smjacob * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
1835388Smjacob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1935388Smjacob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2035388Smjacob * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
2135388Smjacob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2235388Smjacob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2335388Smjacob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2435388Smjacob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2535388Smjacob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2635388Smjacob * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2735388Smjacob * SUCH DAMAGE.
2835388Smjacob */
2935388Smjacob
3035388Smjacob#include <sys/cdefs.h>
3135388Smjacob__FBSDID("$FreeBSD$");
3235388Smjacob
3335388Smjacob#ifndef lint
3444819Smjacobstatic const char copyright[] =
3535388Smjacob"@(#) Copyright (c) 1987, 1993, 1994\n\
3653487Smjacob	The Regents of the University of California.  All rights reserved.\n";
3753487Smjacob#endif
3856004Smjacob
3935388Smjacob#ifndef lint
4053487Smjacobstatic const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
4135388Smjacob#endif
4235388Smjacob
4353487Smjacob#include <sys/param.h>
4453487Smjacob#include <sys/types.h>
4556004Smjacob#include <sys/stat.h>
4635388Smjacob
4753487Smjacob#include <ctype.h>
4835388Smjacob#include <err.h>
4942131Smjacob#include <errno.h>
5053487Smjacob#include <fcntl.h>
5153487Smjacob#include <inttypes.h>
5256004Smjacob#include <libutil.h>
5335388Smjacob#include <limits.h>
5453487Smjacob#include <locale.h>
5535388Smjacob#include <stdbool.h>
5664087Smjacob#include <stdint.h>
5793837Smjacob#include <stdio.h>
5839235Sgibbs#include <stdlib.h>
5935388Smjacob#include <string.h>
6043420Smjacob#include <unistd.h>
6135388Smjacob#include <regex.h>
6235388Smjacob#include <sysexits.h>
6335388Smjacob
6482689Smjacob#define DEFLINE	1000			/* Default num lines per file. */
6582689Smjacob
6674229Smjacobstatic off_t	 bytecnt;		/* Byte count to split on. */
6774229Smjacobstatic off_t	 chunks = 0;		/* Chunks count to split into. */
6874229Smjacobstatic long	 numlines;		/* Line count to split on. */
6974229Smjacobstatic int	 file_open;		/* If a file open. */
7082689Smjacobstatic int	 ifd = -1, ofd = -1;	/* Input/output file descriptors. */
7135388Smjacobstatic char	 bfr[MAXBSIZE];		/* I/O buffer. */
7282689Smjacobstatic char	 fname[MAXPATHLEN];	/* File name prefix. */
7374229Smjacobstatic regex_t	 rgx;
7474229Smjacobstatic int	 pflag;
7574229Smjacobstatic bool	 dflag;
7692893Smjacobstatic long	 sufflen = 2;		/* File name suffix length. */
7735388Smjacob
7835388Smjacobstatic void newfile(void);
7935388Smjacobstatic void split1(void);
8035388Smjacobstatic void split2(void);
8164087Smjacobstatic void split3(void);
8264087Smjacobstatic void usage(void);
8364087Smjacob
8482689Smjacobint
8582689Smjacobmain(int argc, char **argv)
8649909Smjacob{
8761772Smjacob	int ch;
8849909Smjacob	int error;
8982689Smjacob	char *ep, *p;
9082689Smjacob
9182689Smjacob	setlocale(LC_ALL, "");
9282689Smjacob
9382689Smjacob	dflag = false;
9482689Smjacob	while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1)
9553487Smjacob		switch (ch) {
9653487Smjacob		case '0': case '1': case '2': case '3': case '4':
9764087Smjacob		case '5': case '6': case '7': case '8': case '9':
9864087Smjacob			/*
9953487Smjacob			 * Undocumented kludge: split was originally designed
10082689Smjacob			 * to take a number after a dash.
10182689Smjacob			 */
10253487Smjacob			if (numlines == 0) {
10353487Smjacob				p = argv[optind - 1];
10453487Smjacob				if (p[0] == '-' && p[1] == ch && !p[2])
10553487Smjacob					numlines = strtol(++p, &ep, 10);
10653487Smjacob				else
10753487Smjacob					numlines =
10853487Smjacob					    strtol(argv[optind] + 1, &ep, 10);
10953487Smjacob				if (numlines <= 0 || *ep)
11053487Smjacob					errx(EX_USAGE,
11153487Smjacob					    "%s: illegal line count", optarg);
11253487Smjacob			}
11353487Smjacob			break;
11453487Smjacob		case 'a':		/* Suffix length */
11553487Smjacob			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
11653487Smjacob				errx(EX_USAGE,
11753487Smjacob				    "%s: illegal suffix length", optarg);
11853487Smjacob			break;
11953487Smjacob		case 'b':		/* Byte count. */
12053487Smjacob			errno = 0;
12153487Smjacob			error = expand_number(optarg, &bytecnt);
12253487Smjacob			if (error == -1)
12364087Smjacob				errx(EX_USAGE, "%s: offset too large", optarg);
12464087Smjacob			break;
12553487Smjacob		case 'd':		/* Decimal suffix */
12653487Smjacob			dflag = true;
12753487Smjacob			break;
12853487Smjacob		case 'l':		/* Line count. */
12953487Smjacob			if (numlines != 0)
13053487Smjacob				usage();
13153487Smjacob			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
13264087Smjacob				errx(EX_USAGE,
13364087Smjacob				    "%s: illegal line count", optarg);
13464087Smjacob			break;
13564087Smjacob		case 'n':		/* Chunks. */
13664087Smjacob			if (!isdigit((unsigned char)optarg[0]) ||
13764087Smjacob			    (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
13864087Smjacob			    *ep != '\0') {
13964087Smjacob				errx(EX_USAGE, "%s: illegal number of chunks",
14064087Smjacob				     optarg);
14164087Smjacob			}
14264087Smjacob			break;
14364087Smjacob
14464087Smjacob		case 'p':		/* pattern matching. */
14564087Smjacob			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
14664087Smjacob				errx(EX_USAGE, "%s: illegal regexp", optarg);
14764087Smjacob			pflag = 1;
14864087Smjacob			break;
14939235Sgibbs		default:
15064087Smjacob			usage();
15164087Smjacob		}
15293706Smjacob	argv += optind;
15393706Smjacob	argc -= optind;
15493706Smjacob
15564087Smjacob	if (*argv != NULL) {			/* Input file. */
15664087Smjacob		if (strcmp(*argv, "-") == 0)
15793706Smjacob			ifd = STDIN_FILENO;
15839235Sgibbs		else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
15939235Sgibbs			err(EX_NOINPUT, "%s", *argv);
16039235Sgibbs		++argv;
16165140Smjacob	}
16239235Sgibbs	if (*argv != NULL)			/* File name prefix. */
16352347Smjacob		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
16465140Smjacob			errx(EX_USAGE, "file name prefix is too long");
16565140Smjacob	if (*argv != NULL)
16653487Smjacob		usage();
16787635Smjacob
16887635Smjacob	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
16987635Smjacob		errx(EX_USAGE, "suffix is too long");
17087635Smjacob	if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
17153487Smjacob		usage();
17235388Smjacob
17346968Smjacob	if (numlines == 0)
17435388Smjacob		numlines = DEFLINE;
17535388Smjacob	else if (bytecnt != 0 || chunks != 0)
17635388Smjacob		usage();
17746968Smjacob
17852347Smjacob	if (bytecnt && chunks)
17952347Smjacob		usage();
18035388Smjacob
18135388Smjacob	if (ifd == -1)				/* Stdin by default. */
18242461Smjacob		ifd = 0;
18343420Smjacob
18435388Smjacob	if (bytecnt) {
18545040Smjacob		split1();
18665140Smjacob		exit (0);
18735388Smjacob	} else if (chunks) {
18852347Smjacob		split3();
18952347Smjacob		exit (0);
19052347Smjacob	}
19135388Smjacob	split2();
19252347Smjacob	if (pflag)
19352347Smjacob		regfree(&rgx);
19452347Smjacob	exit(0);
19535388Smjacob}
19680582Smjacob
19780582Smjacob/*
19846968Smjacob * split1 --
19980582Smjacob *	Split the input by bytes.
20043420Smjacob */
20143420Smjacobstatic void
20280582Smjacobsplit1(void)
20380582Smjacob{
20480582Smjacob	off_t bcnt;
20580582Smjacob	char *C;
20680582Smjacob	ssize_t dist, len;
20780582Smjacob	int nfiles;
20880582Smjacob
20980582Smjacob	nfiles = 0;
21080582Smjacob
21135388Smjacob	for (bcnt = 0;;)
21246968Smjacob		switch ((len = read(ifd, bfr, MAXBSIZE))) {
21335388Smjacob		case 0:
21435388Smjacob			exit(0);
21535388Smjacob		case -1:
21635388Smjacob			err(EX_IOERR, "read");
21739235Sgibbs			/* NOTREACHED */
21839235Sgibbs		default:
21939235Sgibbs			if (!file_open) {
22039235Sgibbs				if (!chunks || (nfiles < chunks)) {
22139235Sgibbs					newfile();
22239235Sgibbs					nfiles++;
22339235Sgibbs				}
22439235Sgibbs			}
22565140Smjacob			if (bcnt + len >= bytecnt) {
22665140Smjacob				dist = bytecnt - bcnt;
22765140Smjacob				if (write(ofd, bfr, dist) != dist)
22843793Smjacob					err(EX_IOERR, "write");
22939235Sgibbs				len -= dist;
23035388Smjacob				for (C = bfr + dist; len >= bytecnt;
23139235Sgibbs				    len -= bytecnt, C += bytecnt) {
23245040Smjacob					if (!chunks || (nfiles < chunks)) {
23365140Smjacob					newfile();
23465140Smjacob						nfiles++;
23565140Smjacob					}
23665140Smjacob					if (write(ofd,
23752347Smjacob					    C, bytecnt) != bytecnt)
23852347Smjacob						err(EX_IOERR, "write");
23952347Smjacob				}
24052347Smjacob				if (len != 0) {
24135388Smjacob					if (!chunks || (nfiles < chunks)) {
24235388Smjacob					newfile();
24335388Smjacob						nfiles++;
24435388Smjacob					}
24552347Smjacob					if (write(ofd, C, len) != len)
24652347Smjacob						err(EX_IOERR, "write");
24752347Smjacob				} else
24852347Smjacob					file_open = 0;
24993837Smjacob				bcnt = len;
25093837Smjacob			} else {
25193837Smjacob				bcnt += len;
25293837Smjacob				if (write(ofd, bfr, len) != len)
25393837Smjacob					err(EX_IOERR, "write");
25435388Smjacob			}
25552347Smjacob		}
25684241Smjacob}
25772355Smjacob
25848484Smjacob/*
25977776Smjacob * split2 --
26048484Smjacob *	Split the input by lines.
26148484Smjacob */
26259454Smjacobstatic void
26348484Smjacobsplit2(void)
26472355Smjacob{
26539235Sgibbs	long lcnt = 0;
26639235Sgibbs	FILE *infp;
26772355Smjacob
26852347Smjacob	/* Stick a stream on top of input file descriptor */
26982841Smjacob	if ((infp = fdopen(ifd, "r")) == NULL)
27039235Sgibbs		err(EX_NOINPUT, "fdopen");
27152347Smjacob
27252347Smjacob	/* Process input one line at a time */
27352347Smjacob	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
27439235Sgibbs		const int len = strlen(bfr);
27539235Sgibbs
27648484Smjacob		/* If line is too long to deal with, just write it out */
27748484Smjacob		if (bfr[len - 1] != '\n')
27835388Smjacob			goto writeit;
27948484Smjacob
28048484Smjacob		/* Check if we need to start a new file */
28148484Smjacob		if (pflag) {
28248484Smjacob			regmatch_t pmatch;
28348484Smjacob
28448484Smjacob			pmatch.rm_so = 0;
28548484Smjacob			pmatch.rm_eo = len - 1;
28644819Smjacob			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
28748484Smjacob				newfile();
28893837Smjacob		} else if (lcnt++ == numlines) {
28993837Smjacob			newfile();
29093837Smjacob			lcnt = 1;
29193837Smjacob		}
29260221Smjacob
29393837Smjacobwriteit:
29477776Smjacob		/* Open output file if needed */
29571079Smjacob		if (!file_open)
29671079Smjacob			newfile();
29760221Smjacob
29860221Smjacob		/* Write out line */
29960221Smjacob		if (write(ofd, bfr, len) != len)
30052347Smjacob			err(EX_IOERR, "write");
30148484Smjacob	}
30248484Smjacob
30372355Smjacob	/* EOF or error? */
30444819Smjacob	if (ferror(infp))
30544819Smjacob		err(EX_IOERR, "read");
30635388Smjacob	else
30735388Smjacob		exit(0);
30848195Smjacob}
30982689Smjacob
31090752Smjacob/*
31190752Smjacob * split3 --
31290752Smjacob *	Split the input into specified number of chunks
31335388Smjacob */
31435388Smjacobstatic void
31548484Smjacobsplit3(void)
31648484Smjacob{
31748484Smjacob	struct stat sb;
31848484Smjacob
31948484Smjacob	if (fstat(ifd, &sb) == -1) {
32048484Smjacob		err(1, "stat");
32148484Smjacob		/* NOTREACHED */
32248484Smjacob	}
32335388Smjacob
32448484Smjacob	if (chunks > sb.st_size) {
32548484Smjacob		errx(1, "can't split into more than %d files",
32648484Smjacob		    (int)sb.st_size);
32772355Smjacob		/* NOTREACHED */
32872355Smjacob	}
32972355Smjacob
33077776Smjacob	bytecnt = sb.st_size / chunks;
33177776Smjacob	split1();
33277776Smjacob}
33335388Smjacob
33459454Smjacob
33559454Smjacob/*
33659454Smjacob * newfile --
33759454Smjacob *	Open a new output file.
33859454Smjacob */
33959454Smjacobstatic void
34042131Smjacobnewfile(void)
34135388Smjacob{
34235388Smjacob	long i, maxfiles, tfnum;
34365140Smjacob	static long fnum;
34435388Smjacob	static char *fpnt;
34535388Smjacob	char beg, end;
34635388Smjacob	int pattlen;
34735388Smjacob
34835388Smjacob	if (ofd == -1) {
34935388Smjacob		if (fname[0] == '\0') {
35053487Smjacob			fname[0] = 'x';
35135388Smjacob			fpnt = fname + 1;
35235388Smjacob		} else {
35335388Smjacob			fpnt = fname + strlen(fname);
35435388Smjacob		}
35553487Smjacob		ofd = fileno(stdout);
35653487Smjacob	}
35735388Smjacob
35835388Smjacob	if (dflag) {
35953487Smjacob		beg = '0';
36053487Smjacob		end = '9';
36153487Smjacob	}
36253487Smjacob	else {
36353487Smjacob		beg = 'a';
36453487Smjacob		end = 'z';
36561772Smjacob	}
36635388Smjacob	pattlen = end - beg + 1;
36771079Smjacob
36890224Smjacob	/* maxfiles = pattlen^sufflen, but don't use libm. */
36990224Smjacob	for (maxfiles = 1, i = 0; i < sufflen; i++)
37087635Smjacob		if (LONG_MAX / pattlen < maxfiles)
37187635Smjacob			errx(EX_USAGE, "suffix is too long (max %ld)", i);
37253487Smjacob		else
37353487Smjacob			maxfiles *= pattlen;
37465140Smjacob
37587635Smjacob	if (fnum == maxfiles)
37671079Smjacob		errx(EX_DATAERR, "too many files");
37771079Smjacob
37871079Smjacob	/* Generate suffix of sufflen letters */
37971079Smjacob	tfnum = fnum;
38082689Smjacob	i = sufflen - 1;
38182689Smjacob	do {
38282689Smjacob		fpnt[i] = tfnum % pattlen + beg;
38382689Smjacob		tfnum /= pattlen;
38482689Smjacob	} while (i-- > 0);
38569522Smjacob	fpnt[sufflen] = '\0';
38669522Smjacob
38769522Smjacob	++fnum;
38869522Smjacob	if (!freopen(fname, "w", stdout))
38969522Smjacob		err(EX_IOERR, "%s", fname);
39088855Smjacob	file_open = 1;
39188855Smjacob}
39288855Smjacob
39388855Smjacobstatic void
39488855Smjacobusage(void)
39588855Smjacob{
39639235Sgibbs	(void)fprintf(stderr,
39735388Smjacob"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
39839235Sgibbs"       split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
39935388Smjacob"       split -n chunk_count [-a suffix_length] [file [prefix]]\n"
40035388Smjacob"       split -p pattern [-a suffix_length] [file [prefix]]\n");
40162171Smjacob	exit(EX_USAGE);
40287635Smjacob}
40387635Smjacob