1/*	$OpenBSD: split.c,v 1.23 2021/11/28 19:28:42 deraadt Exp $	*/
2/*	$NetBSD: split.c,v 1.5 1995/08/31 22:22:05 jtc Exp $	*/
3
4/*
5 * Copyright (c) 1987, 1993, 1994
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the University nor the names of its contributors
17 *    may be used to endorse or promote products derived from this software
18 *    without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/types.h>
34
35#include <ctype.h>
36#include <err.h>
37#include <fcntl.h>
38#include <limits.h>
39#include <stdio.h>
40#include <stdlib.h>
41#include <string.h>
42#include <unistd.h>
43#include <regex.h>
44
45#define _MAXBSIZE (64 * 1024)
46
47#define DEFLINE	1000			/* Default num lines per file. */
48
49ssize_t	 bytecnt;			/* Byte count to split on. */
50long	 numlines;			/* Line count to split on. */
51int	 file_open;			/* If a file open. */
52int	 ifd = -1, ofd = -1;		/* Input/output file descriptors. */
53char	 bfr[_MAXBSIZE];		/* I/O buffer. */
54char	 fname[PATH_MAX];		/* File name prefix. */
55regex_t	 rgx;
56int	 pflag;
57int	 sufflen = 2;			/* File name suffix length. */
58
59void newfile(void);
60void split1(void);
61void split2(void);
62__dead void usage(void);
63
64int
65main(int argc, char *argv[])
66{
67	int ch, scale;
68	char *ep, *p;
69	const char *errstr;
70
71	if (pledge("stdio rpath wpath cpath", NULL) == -1)
72		err(1, "pledge");
73
74	while ((ch = getopt(argc, argv, "0123456789a:b:l:p:-")) != -1)
75		switch (ch) {
76		case '0': case '1': case '2': case '3': case '4':
77		case '5': case '6': case '7': case '8': case '9':
78			/*
79			 * Undocumented kludge: split was originally designed
80			 * to take a number after a dash.
81			 */
82			if (numlines == 0) {
83				p = argv[optind - 1];
84				if (p[0] == '-' && p[1] == ch && !p[2])
85					numlines = strtol(++p, &ep, 10);
86				else
87					numlines =
88					    strtol(argv[optind] + 1, &ep, 10);
89				if (numlines <= 0 || *ep)
90					errx(1, "%s: illegal line count",
91					    optarg);
92			}
93			break;
94		case '-':		/* Undocumented: historic stdin flag. */
95			if (ifd != -1)
96				usage();
97			ifd = 0;
98			break;
99		case 'a':		/* suffix length. */
100			sufflen = strtonum(optarg, 1, NAME_MAX, &errstr);
101			if (errstr)
102				errx(1, "%s: %s", optarg, errstr);
103			break;
104		case 'b':		/* Byte count. */
105			if ((bytecnt = strtol(optarg, &ep, 10)) <= 0 ||
106			    (*ep != '\0' && *ep != 'k' && *ep != 'm'))
107				errx(1, "%s: illegal byte count", optarg);
108			if (*ep == 'k')
109				scale = 1024;
110			else if (*ep == 'm')
111				scale = 1048576;
112			else
113				scale = 1;
114			if (bytecnt > SSIZE_MAX / scale)
115				errx(1, "%s: byte count too large", optarg);
116			bytecnt *= scale;
117			break;
118		case 'p' :      /* pattern matching. */
119			if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
120				errx(1, "%s: illegal regexp", optarg);
121			pflag = 1;
122			break;
123		case 'l':		/* Line count. */
124			if (numlines != 0)
125				usage();
126			if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
127				errx(1, "%s: illegal line count", optarg);
128			break;
129		default:
130			usage();
131		}
132	argv += optind;
133	argc -= optind;
134
135	if (*argv != NULL)
136		if (ifd == -1) {		/* Input file. */
137			if ((ifd = open(*argv, O_RDONLY)) < 0)
138				err(1, "%s", *argv);
139			++argv;
140		}
141	if (*argv != NULL)			/* File name prefix. */
142		(void)strlcpy(fname, *argv++, sizeof(fname));
143	if (*argv != NULL)
144		usage();
145
146	if (strlen(fname) + sufflen >= sizeof(fname))
147		errx(1, "suffix is too long");
148	if (pflag && (numlines != 0 || bytecnt != 0))
149		usage();
150
151	if (numlines == 0)
152		numlines = DEFLINE;
153	else if (bytecnt != 0)
154		usage();
155
156	if (ifd == -1)				/* Stdin by default. */
157		ifd = 0;
158
159	if (bytecnt) {
160		split1();
161		exit (0);
162	}
163	split2();
164	if (pflag)
165		regfree(&rgx);
166	exit(0);
167}
168
169/*
170 * split1 --
171 *	Split the input by bytes.
172 */
173void
174split1(void)
175{
176	ssize_t bcnt, dist, len;
177	char *C;
178
179	for (bcnt = 0;;)
180		switch ((len = read(ifd, bfr, sizeof(bfr)))) {
181		case 0:
182			exit(0);
183		case -1:
184			err(1, "read");
185			/* NOTREACHED */
186		default:
187			if (!file_open)
188				newfile();
189			if (bcnt + len >= bytecnt) {
190				dist = bytecnt - bcnt;
191				if (write(ofd, bfr, dist) != dist)
192					err(1, "write");
193				len -= dist;
194				for (C = bfr + dist; len >= bytecnt;
195				    len -= bytecnt, C += bytecnt) {
196					newfile();
197					if (write(ofd, C, bytecnt) != bytecnt)
198						err(1, "write");
199				}
200				if (len != 0) {
201					newfile();
202					if (write(ofd, C, len) != len)
203						err(1, "write");
204				} else
205					file_open = 0;
206				bcnt = len;
207			} else {
208				bcnt += len;
209				if (write(ofd, bfr, len) != len)
210					err(1, "write");
211			}
212		}
213}
214
215/*
216 * split2 --
217 *	Split the input by lines.
218 */
219void
220split2(void)
221{
222	long lcnt = 0;
223	FILE *infp;
224
225	/* Stick a stream on top of input file descriptor */
226	if ((infp = fdopen(ifd, "r")) == NULL)
227		err(1, "fdopen");
228
229	/* Process input one line at a time */
230	while (fgets(bfr, sizeof(bfr), infp) != NULL) {
231		const int len = strlen(bfr);
232
233		if (len == 0)
234			continue;
235
236		/* If line is too long to deal with, just write it out */
237		if (bfr[len - 1] != '\n')
238			goto writeit;
239
240		/* Check if we need to start a new file */
241		if (pflag) {
242			regmatch_t pmatch;
243
244			pmatch.rm_so = 0;
245			pmatch.rm_eo = len - 1;
246			if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
247				newfile();
248		} else if (lcnt++ == numlines) {
249			newfile();
250			lcnt = 1;
251		}
252
253writeit:
254		/* Open output file if needed */
255		if (!file_open)
256			newfile();
257
258		/* Write out line */
259		if (write(ofd, bfr, len) != len)
260			err(1, "write");
261	}
262
263	/* EOF or error? */
264	if (ferror(infp))
265		err(1, "read");
266	else
267		exit(0);
268}
269
270/*
271 * newfile --
272 *	Open a new output file.
273 */
274void
275newfile(void)
276{
277	static char *suffix, *sufftail;
278	char *sptr;
279
280	if (ofd == -1) {
281		ofd = fileno(stdout);
282		if (*fname == '\0') {
283			*fname = 'x';	/* no name specified, use 'x' */
284			memset(fname + 1, 'a', sufflen);
285			suffix = fname;
286			sufflen++;	/* treat 'x' as part of suffix */
287		} else {
288			suffix = fname + strlen(fname);
289			memset(suffix, 'a', sufflen);
290		}
291		suffix[sufflen] = '\0';
292		sufftail = suffix + sufflen - 1;
293	} else {
294		for (sptr = sufftail; sptr >= suffix; sptr--) {
295			if (*sptr != 'z') {
296				(*sptr)++;
297				break;
298			} else
299				*sptr = 'a';
300		}
301		if (sptr < suffix)
302			errx(1, "too many files");
303	}
304
305	if (!freopen(fname, "w", stdout))
306		err(1, "%s", fname);
307	file_open = 1;
308}
309
310__dead void
311usage(void)
312{
313	extern char *__progname;
314
315	(void)fprintf(stderr, "usage: %s [-a suffix_length]\n"
316	    "             [-b byte_count[k|m] | -l line_count | -p pattern] "
317	    "[file [name]]\n", __progname);
318	exit(1);
319}
320