11590Srgrimes/*
21590Srgrimes * Copyright (c) 1987, 1993, 1994
31590Srgrimes *	The Regents of the University of California.  All rights reserved.
41590Srgrimes *
51590Srgrimes * Redistribution and use in source and binary forms, with or without
61590Srgrimes * modification, are permitted provided that the following conditions
71590Srgrimes * are met:
81590Srgrimes * 1. Redistributions of source code must retain the above copyright
91590Srgrimes *    notice, this list of conditions and the following disclaimer.
101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright
111590Srgrimes *    notice, this list of conditions and the following disclaimer in the
121590Srgrimes *    documentation and/or other materials provided with the distribution.
131590Srgrimes * 4. Neither the name of the University nor the names of its contributors
141590Srgrimes *    may be used to endorse or promote products derived from this software
151590Srgrimes *    without specific prior written permission.
161590Srgrimes *
171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
201590Srgrimes * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
271590Srgrimes * SUCH DAMAGE.
281590Srgrimes */
291590Srgrimes
3087765Smarkm#include <sys/cdefs.h>
3187765Smarkm__FBSDID("$FreeBSD: releng/11.0/usr.bin/split/split.c 257884 2013-11-09 08:57:21Z eadler $");
3287765Smarkm
331590Srgrimes#ifndef lint
3428071Scharnierstatic const char copyright[] =
351590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\
361590Srgrimes	The Regents of the University of California.  All rights reserved.\n";
3787765Smarkm#endif
381590Srgrimes
391590Srgrimes#ifndef lint
4087765Smarkmstatic const char sccsid[] = "@(#)split.c	8.2 (Berkeley) 4/16/94";
4128071Scharnier#endif
421590Srgrimes
431590Srgrimes#include <sys/param.h>
44177256Sdas#include <sys/types.h>
45177256Sdas#include <sys/stat.h>
461590Srgrimes
471590Srgrimes#include <ctype.h>
481590Srgrimes#include <err.h>
49100024Stjr#include <errno.h>
501590Srgrimes#include <fcntl.h>
51100024Stjr#include <inttypes.h>
52257884Seadler#include <libutil.h>
53100024Stjr#include <limits.h>
54131978Stjr#include <locale.h>
55250432Seadler#include <stdbool.h>
56100024Stjr#include <stdint.h>
571590Srgrimes#include <stdio.h>
581590Srgrimes#include <stdlib.h>
591590Srgrimes#include <string.h>
601590Srgrimes#include <unistd.h>
6143513Sarchie#include <regex.h>
6243513Sarchie#include <sysexits.h>
631590Srgrimes
641590Srgrimes#define DEFLINE	1000			/* Default num lines per file. */
651590Srgrimes
66227183Sedstatic off_t	 bytecnt;		/* Byte count to split on. */
67227183Sedstatic off_t	 chunks = 0;		/* Chunks count to split into. */
68227183Sedstatic long	 numlines;		/* Line count to split on. */
69227183Sedstatic int	 file_open;		/* If a file open. */
70227183Sedstatic int	 ifd = -1, ofd = -1;	/* Input/output file descriptors. */
71227183Sedstatic char	 bfr[MAXBSIZE];		/* I/O buffer. */
72227183Sedstatic char	 fname[MAXPATHLEN];	/* File name prefix. */
73227183Sedstatic regex_t	 rgx;
74227183Sedstatic int	 pflag;
75250432Seadlerstatic bool	 dflag;
76227183Sedstatic long	 sufflen = 2;		/* File name suffix length. */
771590Srgrimes
78177256Sdasstatic void newfile(void);
79177256Sdasstatic void split1(void);
80177256Sdasstatic void split2(void);
81177256Sdasstatic void split3(void);
8292922Simpstatic void usage(void);
831590Srgrimes
841590Srgrimesint
8598253Sjmallettmain(int argc, char **argv)
861590Srgrimes{
871590Srgrimes	int ch;
88257884Seadler	int error;
891590Srgrimes	char *ep, *p;
901590Srgrimes
91131978Stjr	setlocale(LC_ALL, "");
92131978Stjr
93250432Seadler	dflag = false;
94250432Seadler	while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1)
951590Srgrimes		switch (ch) {
961590Srgrimes		case '0': case '1': case '2': case '3': case '4':
971590Srgrimes		case '5': case '6': case '7': case '8': case '9':
981590Srgrimes			/*
991590Srgrimes			 * Undocumented kludge: split was originally designed
1001590Srgrimes			 * to take a number after a dash.
1011590Srgrimes			 */
1021590Srgrimes			if (numlines == 0) {
1031590Srgrimes				p = argv[optind - 1];
1041590Srgrimes				if (p[0] == '-' && p[1] == ch && !p[2])
1051590Srgrimes					numlines = strtol(++p, &ep, 10);
1061590Srgrimes				else
1071590Srgrimes					numlines =
1081590Srgrimes					    strtol(argv[optind] + 1, &ep, 10);
1091590Srgrimes				if (numlines <= 0 || *ep)
11043513Sarchie					errx(EX_USAGE,
11143513Sarchie					    "%s: illegal line count", optarg);
1121590Srgrimes			}
1131590Srgrimes			break;
11490048Smike		case 'a':		/* Suffix length */
11590048Smike			if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
11690048Smike				errx(EX_USAGE,
11790048Smike				    "%s: illegal suffix length", optarg);
11890048Smike			break;
1191590Srgrimes		case 'b':		/* Byte count. */
120100024Stjr			errno = 0;
121257884Seadler			error = expand_number(optarg, &bytecnt);
122257884Seadler			if (error == -1)
123100024Stjr				errx(EX_USAGE, "%s: offset too large", optarg);
1241590Srgrimes			break;
125250432Seadler		case 'd':		/* Decimal suffix */
126250432Seadler			dflag = true;
127250432Seadler			break;
1281590Srgrimes		case 'l':		/* Line count. */
1291590Srgrimes			if (numlines != 0)
1301590Srgrimes				usage();
1319427Srgrimes			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
13243513Sarchie				errx(EX_USAGE,
13343513Sarchie				    "%s: illegal line count", optarg);
1341590Srgrimes			break;
135177256Sdas		case 'n':		/* Chunks. */
136177256Sdas			if (!isdigit((unsigned char)optarg[0]) ||
137177256Sdas			    (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 ||
138177256Sdas			    *ep != '\0') {
139177256Sdas				errx(EX_USAGE, "%s: illegal number of chunks",
140177256Sdas				     optarg);
141177256Sdas			}
142177256Sdas			break;
143177256Sdas
144161106Skeramida		case 'p':		/* pattern matching. */
145161106Skeramida			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
146161106Skeramida				errx(EX_USAGE, "%s: illegal regexp", optarg);
147161106Skeramida			pflag = 1;
148161106Skeramida			break;
1491590Srgrimes		default:
1501590Srgrimes			usage();
1511590Srgrimes		}
1521590Srgrimes	argv += optind;
1531590Srgrimes	argc -= optind;
1541590Srgrimes
155149616Stjr	if (*argv != NULL) {			/* Input file. */
156149616Stjr		if (strcmp(*argv, "-") == 0)
157149616Stjr			ifd = STDIN_FILENO;
158149616Stjr		else if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
159149616Stjr			err(EX_NOINPUT, "%s", *argv);
160149616Stjr		++argv;
161149616Stjr	}
1621590Srgrimes	if (*argv != NULL)			/* File name prefix. */
16397332Stjr		if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
16497332Stjr			errx(EX_USAGE, "file name prefix is too long");
1651590Srgrimes	if (*argv != NULL)
1661590Srgrimes		usage();
1671590Srgrimes
16890048Smike	if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
16990048Smike		errx(EX_USAGE, "suffix is too long");
170177256Sdas	if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0))
17143513Sarchie		usage();
17243513Sarchie
1731590Srgrimes	if (numlines == 0)
1741590Srgrimes		numlines = DEFLINE;
175177256Sdas	else if (bytecnt != 0 || chunks != 0)
1761590Srgrimes		usage();
1771590Srgrimes
178177256Sdas	if (bytecnt && chunks)
179177256Sdas		usage();
180177256Sdas
1811590Srgrimes	if (ifd == -1)				/* Stdin by default. */
1821590Srgrimes		ifd = 0;
1831590Srgrimes
1841590Srgrimes	if (bytecnt) {
1851590Srgrimes		split1();
1861590Srgrimes		exit (0);
187177256Sdas	} else if (chunks) {
188177256Sdas		split3();
189177256Sdas		exit (0);
1901590Srgrimes	}
1911590Srgrimes	split2();
19243513Sarchie	if (pflag)
19343513Sarchie		regfree(&rgx);
1941590Srgrimes	exit(0);
1951590Srgrimes}
1961590Srgrimes
1971590Srgrimes/*
1981590Srgrimes * split1 --
1991590Srgrimes *	Split the input by bytes.
2001590Srgrimes */
201177256Sdasstatic void
20298253Sjmallettsplit1(void)
2031590Srgrimes{
204100024Stjr	off_t bcnt;
2051590Srgrimes	char *C;
206100024Stjr	ssize_t dist, len;
207177256Sdas	int nfiles;
2081590Srgrimes
209177256Sdas	nfiles = 0;
210177256Sdas
2111590Srgrimes	for (bcnt = 0;;)
21243513Sarchie		switch ((len = read(ifd, bfr, MAXBSIZE))) {
2131590Srgrimes		case 0:
2141590Srgrimes			exit(0);
2151590Srgrimes		case -1:
21643513Sarchie			err(EX_IOERR, "read");
2171590Srgrimes			/* NOTREACHED */
2181590Srgrimes		default:
219177256Sdas			if (!file_open) {
220177256Sdas				if (!chunks || (nfiles < chunks)) {
221177256Sdas					newfile();
222177256Sdas					nfiles++;
223177256Sdas				}
224177256Sdas			}
225100024Stjr			if (bcnt + len >= bytecnt) {
2261590Srgrimes				dist = bytecnt - bcnt;
2271590Srgrimes				if (write(ofd, bfr, dist) != dist)
22843513Sarchie					err(EX_IOERR, "write");
2291590Srgrimes				len -= dist;
2301590Srgrimes				for (C = bfr + dist; len >= bytecnt;
2311590Srgrimes				    len -= bytecnt, C += bytecnt) {
232177256Sdas					if (!chunks || (nfiles < chunks)) {
2331590Srgrimes					newfile();
234177256Sdas						nfiles++;
235177256Sdas					}
2361590Srgrimes					if (write(ofd,
23787765Smarkm					    C, bytecnt) != bytecnt)
23843513Sarchie						err(EX_IOERR, "write");
2391590Srgrimes				}
24043513Sarchie				if (len != 0) {
241177256Sdas					if (!chunks || (nfiles < chunks)) {
2421590Srgrimes					newfile();
243177256Sdas						nfiles++;
244177256Sdas					}
2451590Srgrimes					if (write(ofd, C, len) != len)
24643513Sarchie						err(EX_IOERR, "write");
2471590Srgrimes				} else
2481590Srgrimes					file_open = 0;
2491590Srgrimes				bcnt = len;
2501590Srgrimes			} else {
2511590Srgrimes				bcnt += len;
2521590Srgrimes				if (write(ofd, bfr, len) != len)
25343513Sarchie					err(EX_IOERR, "write");
2541590Srgrimes			}
2551590Srgrimes		}
2561590Srgrimes}
2571590Srgrimes
2581590Srgrimes/*
2591590Srgrimes * split2 --
2601590Srgrimes *	Split the input by lines.
2611590Srgrimes */
262177256Sdasstatic void
26398253Sjmallettsplit2(void)
2641590Srgrimes{
26543513Sarchie	long lcnt = 0;
26643513Sarchie	FILE *infp;
2671590Srgrimes
26843513Sarchie	/* Stick a stream on top of input file descriptor */
26943513Sarchie	if ((infp = fdopen(ifd, "r")) == NULL)
27043513Sarchie		err(EX_NOINPUT, "fdopen");
27143513Sarchie
27243513Sarchie	/* Process input one line at a time */
27343513Sarchie	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
27443513Sarchie		const int len = strlen(bfr);
27543513Sarchie
27643513Sarchie		/* If line is too long to deal with, just write it out */
27743513Sarchie		if (bfr[len - 1] != '\n')
27843513Sarchie			goto writeit;
27943513Sarchie
28043513Sarchie		/* Check if we need to start a new file */
28143513Sarchie		if (pflag) {
28243513Sarchie			regmatch_t pmatch;
28343513Sarchie
28443513Sarchie			pmatch.rm_so = 0;
28543513Sarchie			pmatch.rm_eo = len - 1;
28643513Sarchie			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
2871590Srgrimes				newfile();
28843513Sarchie		} else if (lcnt++ == numlines) {
28943513Sarchie			newfile();
29043513Sarchie			lcnt = 1;
2911590Srgrimes		}
29243513Sarchie
29343513Sarchiewriteit:
29443513Sarchie		/* Open output file if needed */
29543513Sarchie		if (!file_open)
29643513Sarchie			newfile();
29743513Sarchie
29843513Sarchie		/* Write out line */
29943513Sarchie		if (write(ofd, bfr, len) != len)
30043513Sarchie			err(EX_IOERR, "write");
30143513Sarchie	}
30243513Sarchie
30343513Sarchie	/* EOF or error? */
30443513Sarchie	if (ferror(infp))
30543513Sarchie		err(EX_IOERR, "read");
30643513Sarchie	else
30743513Sarchie		exit(0);
3081590Srgrimes}
3091590Srgrimes
3101590Srgrimes/*
311177256Sdas * split3 --
312177256Sdas *	Split the input into specified number of chunks
313177256Sdas */
314177256Sdasstatic void
315177256Sdassplit3(void)
316177256Sdas{
317177256Sdas	struct stat sb;
318177256Sdas
319177256Sdas	if (fstat(ifd, &sb) == -1) {
320177256Sdas		err(1, "stat");
321177256Sdas		/* NOTREACHED */
322177256Sdas	}
323177256Sdas
324177256Sdas	if (chunks > sb.st_size) {
325177256Sdas		errx(1, "can't split into more than %d files",
326177256Sdas		    (int)sb.st_size);
327177256Sdas		/* NOTREACHED */
328177256Sdas	}
329177256Sdas
330177256Sdas	bytecnt = sb.st_size / chunks;
331177256Sdas	split1();
332177256Sdas}
333177256Sdas
334177256Sdas
335177256Sdas/*
3361590Srgrimes * newfile --
3371590Srgrimes *	Open a new output file.
3381590Srgrimes */
339177256Sdasstatic void
34098253Sjmallettnewfile(void)
3411590Srgrimes{
34290048Smike	long i, maxfiles, tfnum;
3431590Srgrimes	static long fnum;
3441590Srgrimes	static char *fpnt;
345250432Seadler	char beg, end;
346250432Seadler	int pattlen;
3471590Srgrimes
3481590Srgrimes	if (ofd == -1) {
3491590Srgrimes		if (fname[0] == '\0') {
3501590Srgrimes			fname[0] = 'x';
3511590Srgrimes			fpnt = fname + 1;
3521590Srgrimes		} else {
3531590Srgrimes			fpnt = fname + strlen(fname);
3541590Srgrimes		}
3551590Srgrimes		ofd = fileno(stdout);
3561590Srgrimes	}
35790048Smike
358250432Seadler	if (dflag) {
359250432Seadler		beg = '0';
360250432Seadler		end = '9';
361250432Seadler	}
362250432Seadler	else {
363250432Seadler		beg = 'a';
364250432Seadler		end = 'z';
365250432Seadler	}
366250432Seadler	pattlen = end - beg + 1;
367250432Seadler
368250432Seadler	/* maxfiles = pattlen^sufflen, but don't use libm. */
36990048Smike	for (maxfiles = 1, i = 0; i < sufflen; i++)
370250882Seadler		if (LONG_MAX / pattlen < maxfiles)
37190048Smike			errx(EX_USAGE, "suffix is too long (max %ld)", i);
372250882Seadler		else
373250882Seadler			maxfiles *= pattlen;
37490048Smike
375149345Stjr	if (fnum == maxfiles)
376149345Stjr		errx(EX_DATAERR, "too many files");
37790048Smike
37890048Smike	/* Generate suffix of sufflen letters */
37990048Smike	tfnum = fnum;
38090048Smike	i = sufflen - 1;
38190048Smike	do {
382250432Seadler		fpnt[i] = tfnum % pattlen + beg;
383250432Seadler		tfnum /= pattlen;
38490048Smike	} while (i-- > 0);
38590048Smike	fpnt[sufflen] = '\0';
38690048Smike
3871590Srgrimes	++fnum;
3881590Srgrimes	if (!freopen(fname, "w", stdout))
38943513Sarchie		err(EX_IOERR, "%s", fname);
39043513Sarchie	file_open = 1;
3911590Srgrimes}
3921590Srgrimes
39328071Scharnierstatic void
39498253Sjmallettusage(void)
3951590Srgrimes{
39643625Sarchie	(void)fprintf(stderr,
397161106Skeramida"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n"
398161172Skeramida"       split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n"
399177256Sdas"       split -n chunk_count [-a suffix_length] [file [prefix]]\n"
400161106Skeramida"       split -p pattern [-a suffix_length] [file [prefix]]\n");
40143625Sarchie	exit(EX_USAGE);
4021590Srgrimes}
403