Deleted Added
full compact
split.c (92922) split.c (97332)
1/*
2 * Copyright (c) 1987, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
1/*
2 * Copyright (c) 1987, 1993, 1994
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD: head/usr.bin/split/split.c 92922 2002-03-22 01:42:45Z imp $");
35__FBSDID("$FreeBSD: head/usr.bin/split/split.c 97332 2002-05-27 04:59:46Z tjr $");
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1987, 1993, 1994\n\
40 The Regents of the University of California. All rights reserved.\n";
41#endif
42
43#ifndef lint
44static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
45#endif
46
47#include <sys/param.h>
48#include <sys/types.h>
49
50#include <ctype.h>
51#include <err.h>
52#include <fcntl.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <unistd.h>
57#include <regex.h>
58#include <sysexits.h>
59
60#define DEFLINE 1000 /* Default num lines per file. */
61
62int bytecnt; /* Byte count to split on. */
63long numlines; /* Line count to split on. */
64int file_open; /* If a file open. */
65int ifd = -1, ofd = -1; /* Input/output file descriptors. */
66char bfr[MAXBSIZE]; /* I/O buffer. */
67char fname[MAXPATHLEN]; /* File name prefix. */
68regex_t rgx;
69int pflag;
70long sufflen = 2; /* File name suffix length. */
71
72void newfile(void);
73void split1(void);
74void split2(void);
75static void usage(void);
76
77int
78main(argc, argv)
79 int argc;
80 char *argv[];
81{
82 int ch;
83 char *ep, *p;
84
85 while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
86 switch (ch) {
87 case '0': case '1': case '2': case '3': case '4':
88 case '5': case '6': case '7': case '8': case '9':
89 /*
90 * Undocumented kludge: split was originally designed
91 * to take a number after a dash.
92 */
93 if (numlines == 0) {
94 p = argv[optind - 1];
95 if (p[0] == '-' && p[1] == ch && !p[2])
96 numlines = strtol(++p, &ep, 10);
97 else
98 numlines =
99 strtol(argv[optind] + 1, &ep, 10);
100 if (numlines <= 0 || *ep)
101 errx(EX_USAGE,
102 "%s: illegal line count", optarg);
103 }
104 break;
105 case '-': /* Undocumented: historic stdin flag. */
106 if (ifd != -1)
107 usage();
108 ifd = 0;
109 break;
110 case 'a': /* Suffix length */
111 if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
112 errx(EX_USAGE,
113 "%s: illegal suffix length", optarg);
114 break;
115 case 'b': /* Byte count. */
116 if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
117 (*ep != '\0' && *ep != 'k' && *ep != 'm'))
118 errx(EX_USAGE,
119 "%s: illegal byte count", optarg);
120 if (*ep == 'k')
121 bytecnt *= 1024;
122 else if (*ep == 'm')
123 bytecnt *= 1048576;
124 break;
125 case 'p' : /* pattern matching. */
126 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
127 errx(EX_USAGE, "%s: illegal regexp", optarg);
128 pflag = 1;
129 break;
130 case 'l': /* Line count. */
131 if (numlines != 0)
132 usage();
133 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
134 errx(EX_USAGE,
135 "%s: illegal line count", optarg);
136 break;
137 default:
138 usage();
139 }
140 argv += optind;
141 argc -= optind;
142
143 if (*argv != NULL)
144 if (ifd == -1) { /* Input file. */
145 if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
146 err(EX_NOINPUT, "%s", *argv);
147 ++argv;
148 }
149 if (*argv != NULL) /* File name prefix. */
36
37#ifndef lint
38static const char copyright[] =
39"@(#) Copyright (c) 1987, 1993, 1994\n\
40 The Regents of the University of California. All rights reserved.\n";
41#endif
42
43#ifndef lint
44static const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94";
45#endif
46
47#include <sys/param.h>
48#include <sys/types.h>
49
50#include <ctype.h>
51#include <err.h>
52#include <fcntl.h>
53#include <stdio.h>
54#include <stdlib.h>
55#include <string.h>
56#include <unistd.h>
57#include <regex.h>
58#include <sysexits.h>
59
60#define DEFLINE 1000 /* Default num lines per file. */
61
62int bytecnt; /* Byte count to split on. */
63long numlines; /* Line count to split on. */
64int file_open; /* If a file open. */
65int ifd = -1, ofd = -1; /* Input/output file descriptors. */
66char bfr[MAXBSIZE]; /* I/O buffer. */
67char fname[MAXPATHLEN]; /* File name prefix. */
68regex_t rgx;
69int pflag;
70long sufflen = 2; /* File name suffix length. */
71
72void newfile(void);
73void split1(void);
74void split2(void);
75static void usage(void);
76
77int
78main(argc, argv)
79 int argc;
80 char *argv[];
81{
82 int ch;
83 char *ep, *p;
84
85 while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1)
86 switch (ch) {
87 case '0': case '1': case '2': case '3': case '4':
88 case '5': case '6': case '7': case '8': case '9':
89 /*
90 * Undocumented kludge: split was originally designed
91 * to take a number after a dash.
92 */
93 if (numlines == 0) {
94 p = argv[optind - 1];
95 if (p[0] == '-' && p[1] == ch && !p[2])
96 numlines = strtol(++p, &ep, 10);
97 else
98 numlines =
99 strtol(argv[optind] + 1, &ep, 10);
100 if (numlines <= 0 || *ep)
101 errx(EX_USAGE,
102 "%s: illegal line count", optarg);
103 }
104 break;
105 case '-': /* Undocumented: historic stdin flag. */
106 if (ifd != -1)
107 usage();
108 ifd = 0;
109 break;
110 case 'a': /* Suffix length */
111 if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep)
112 errx(EX_USAGE,
113 "%s: illegal suffix length", optarg);
114 break;
115 case 'b': /* Byte count. */
116 if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 ||
117 (*ep != '\0' && *ep != 'k' && *ep != 'm'))
118 errx(EX_USAGE,
119 "%s: illegal byte count", optarg);
120 if (*ep == 'k')
121 bytecnt *= 1024;
122 else if (*ep == 'm')
123 bytecnt *= 1048576;
124 break;
125 case 'p' : /* pattern matching. */
126 if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0)
127 errx(EX_USAGE, "%s: illegal regexp", optarg);
128 pflag = 1;
129 break;
130 case 'l': /* Line count. */
131 if (numlines != 0)
132 usage();
133 if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep)
134 errx(EX_USAGE,
135 "%s: illegal line count", optarg);
136 break;
137 default:
138 usage();
139 }
140 argv += optind;
141 argc -= optind;
142
143 if (*argv != NULL)
144 if (ifd == -1) { /* Input file. */
145 if ((ifd = open(*argv, O_RDONLY, 0)) < 0)
146 err(EX_NOINPUT, "%s", *argv);
147 ++argv;
148 }
149 if (*argv != NULL) /* File name prefix. */
150 (void)strcpy(fname, *argv++);
150 if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname))
151 errx(EX_USAGE, "file name prefix is too long");
151 if (*argv != NULL)
152 usage();
153
154 if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
155 errx(EX_USAGE, "suffix is too long");
156 if (pflag && (numlines != 0 || bytecnt != 0))
157 usage();
158
159 if (numlines == 0)
160 numlines = DEFLINE;
161 else if (bytecnt != 0)
162 usage();
163
164 if (ifd == -1) /* Stdin by default. */
165 ifd = 0;
166
167 if (bytecnt) {
168 split1();
169 exit (0);
170 }
171 split2();
172 if (pflag)
173 regfree(&rgx);
174 exit(0);
175}
176
177/*
178 * split1 --
179 * Split the input by bytes.
180 */
181void
182split1()
183{
184 size_t bcnt;
185 char *C;
186 int dist, len;
187
188 for (bcnt = 0;;)
189 switch ((len = read(ifd, bfr, MAXBSIZE))) {
190 case 0:
191 exit(0);
192 case -1:
193 err(EX_IOERR, "read");
194 /* NOTREACHED */
195 default:
196 if (!file_open)
197 newfile();
198 if (bcnt + len >= (u_int)bytecnt) {
199 dist = bytecnt - bcnt;
200 if (write(ofd, bfr, dist) != dist)
201 err(EX_IOERR, "write");
202 len -= dist;
203 for (C = bfr + dist; len >= bytecnt;
204 len -= bytecnt, C += bytecnt) {
205 newfile();
206 if (write(ofd,
207 C, bytecnt) != bytecnt)
208 err(EX_IOERR, "write");
209 }
210 if (len != 0) {
211 newfile();
212 if (write(ofd, C, len) != len)
213 err(EX_IOERR, "write");
214 } else
215 file_open = 0;
216 bcnt = len;
217 } else {
218 bcnt += len;
219 if (write(ofd, bfr, len) != len)
220 err(EX_IOERR, "write");
221 }
222 }
223}
224
225/*
226 * split2 --
227 * Split the input by lines.
228 */
229void
230split2()
231{
232 long lcnt = 0;
233 FILE *infp;
234
235 /* Stick a stream on top of input file descriptor */
236 if ((infp = fdopen(ifd, "r")) == NULL)
237 err(EX_NOINPUT, "fdopen");
238
239 /* Process input one line at a time */
240 while (fgets(bfr, sizeof(bfr), infp) != NULL) {
241 const int len = strlen(bfr);
242
243 /* If line is too long to deal with, just write it out */
244 if (bfr[len - 1] != '\n')
245 goto writeit;
246
247 /* Check if we need to start a new file */
248 if (pflag) {
249 regmatch_t pmatch;
250
251 pmatch.rm_so = 0;
252 pmatch.rm_eo = len - 1;
253 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
254 newfile();
255 } else if (lcnt++ == numlines) {
256 newfile();
257 lcnt = 1;
258 }
259
260writeit:
261 /* Open output file if needed */
262 if (!file_open)
263 newfile();
264
265 /* Write out line */
266 if (write(ofd, bfr, len) != len)
267 err(EX_IOERR, "write");
268 }
269
270 /* EOF or error? */
271 if (ferror(infp))
272 err(EX_IOERR, "read");
273 else
274 exit(0);
275}
276
277/*
278 * newfile --
279 * Open a new output file.
280 */
281void
282newfile()
283{
284 long i, maxfiles, tfnum;
285 static long fnum;
286 static int defname;
287 static char *fpnt;
288
289 if (ofd == -1) {
290 if (fname[0] == '\0') {
291 fname[0] = 'x';
292 fpnt = fname + 1;
293 defname = 1;
294 } else {
295 fpnt = fname + strlen(fname);
296 defname = 0;
297 }
298 ofd = fileno(stdout);
299 }
300
301 /* maxfiles = 26^sufflen, but don't use libm. */
302 for (maxfiles = 1, i = 0; i < sufflen; i++)
303 if ((maxfiles *= 26) <= 0)
304 errx(EX_USAGE, "suffix is too long (max %ld)", i);
305
306 /*
307 * Hack to increase max files; original code wandered through
308 * magic characters.
309 */
310 if (fnum == maxfiles) {
311 if (!defname || fname[0] == 'z')
312 errx(EX_DATAERR, "too many files");
313 ++fname[0];
314 fnum = 0;
315 }
316
317 /* Generate suffix of sufflen letters */
318 tfnum = fnum;
319 i = sufflen - 1;
320 do {
321 fpnt[i] = tfnum % 26 + 'a';
322 tfnum /= 26;
323 } while (i-- > 0);
324 fpnt[sufflen] = '\0';
325
326 ++fnum;
327 if (!freopen(fname, "w", stdout))
328 err(EX_IOERR, "%s", fname);
329 file_open = 1;
330}
331
332static void
333usage()
334{
335 (void)fprintf(stderr,
336"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
337 (void)fprintf(stderr,
338" [file [prefix]]\n");
339 exit(EX_USAGE);
340}
152 if (*argv != NULL)
153 usage();
154
155 if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname))
156 errx(EX_USAGE, "suffix is too long");
157 if (pflag && (numlines != 0 || bytecnt != 0))
158 usage();
159
160 if (numlines == 0)
161 numlines = DEFLINE;
162 else if (bytecnt != 0)
163 usage();
164
165 if (ifd == -1) /* Stdin by default. */
166 ifd = 0;
167
168 if (bytecnt) {
169 split1();
170 exit (0);
171 }
172 split2();
173 if (pflag)
174 regfree(&rgx);
175 exit(0);
176}
177
178/*
179 * split1 --
180 * Split the input by bytes.
181 */
182void
183split1()
184{
185 size_t bcnt;
186 char *C;
187 int dist, len;
188
189 for (bcnt = 0;;)
190 switch ((len = read(ifd, bfr, MAXBSIZE))) {
191 case 0:
192 exit(0);
193 case -1:
194 err(EX_IOERR, "read");
195 /* NOTREACHED */
196 default:
197 if (!file_open)
198 newfile();
199 if (bcnt + len >= (u_int)bytecnt) {
200 dist = bytecnt - bcnt;
201 if (write(ofd, bfr, dist) != dist)
202 err(EX_IOERR, "write");
203 len -= dist;
204 for (C = bfr + dist; len >= bytecnt;
205 len -= bytecnt, C += bytecnt) {
206 newfile();
207 if (write(ofd,
208 C, bytecnt) != bytecnt)
209 err(EX_IOERR, "write");
210 }
211 if (len != 0) {
212 newfile();
213 if (write(ofd, C, len) != len)
214 err(EX_IOERR, "write");
215 } else
216 file_open = 0;
217 bcnt = len;
218 } else {
219 bcnt += len;
220 if (write(ofd, bfr, len) != len)
221 err(EX_IOERR, "write");
222 }
223 }
224}
225
226/*
227 * split2 --
228 * Split the input by lines.
229 */
230void
231split2()
232{
233 long lcnt = 0;
234 FILE *infp;
235
236 /* Stick a stream on top of input file descriptor */
237 if ((infp = fdopen(ifd, "r")) == NULL)
238 err(EX_NOINPUT, "fdopen");
239
240 /* Process input one line at a time */
241 while (fgets(bfr, sizeof(bfr), infp) != NULL) {
242 const int len = strlen(bfr);
243
244 /* If line is too long to deal with, just write it out */
245 if (bfr[len - 1] != '\n')
246 goto writeit;
247
248 /* Check if we need to start a new file */
249 if (pflag) {
250 regmatch_t pmatch;
251
252 pmatch.rm_so = 0;
253 pmatch.rm_eo = len - 1;
254 if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0)
255 newfile();
256 } else if (lcnt++ == numlines) {
257 newfile();
258 lcnt = 1;
259 }
260
261writeit:
262 /* Open output file if needed */
263 if (!file_open)
264 newfile();
265
266 /* Write out line */
267 if (write(ofd, bfr, len) != len)
268 err(EX_IOERR, "write");
269 }
270
271 /* EOF or error? */
272 if (ferror(infp))
273 err(EX_IOERR, "read");
274 else
275 exit(0);
276}
277
278/*
279 * newfile --
280 * Open a new output file.
281 */
282void
283newfile()
284{
285 long i, maxfiles, tfnum;
286 static long fnum;
287 static int defname;
288 static char *fpnt;
289
290 if (ofd == -1) {
291 if (fname[0] == '\0') {
292 fname[0] = 'x';
293 fpnt = fname + 1;
294 defname = 1;
295 } else {
296 fpnt = fname + strlen(fname);
297 defname = 0;
298 }
299 ofd = fileno(stdout);
300 }
301
302 /* maxfiles = 26^sufflen, but don't use libm. */
303 for (maxfiles = 1, i = 0; i < sufflen; i++)
304 if ((maxfiles *= 26) <= 0)
305 errx(EX_USAGE, "suffix is too long (max %ld)", i);
306
307 /*
308 * Hack to increase max files; original code wandered through
309 * magic characters.
310 */
311 if (fnum == maxfiles) {
312 if (!defname || fname[0] == 'z')
313 errx(EX_DATAERR, "too many files");
314 ++fname[0];
315 fnum = 0;
316 }
317
318 /* Generate suffix of sufflen letters */
319 tfnum = fnum;
320 i = sufflen - 1;
321 do {
322 fpnt[i] = tfnum % 26 + 'a';
323 tfnum /= 26;
324 } while (i-- > 0);
325 fpnt[sufflen] = '\0';
326
327 ++fnum;
328 if (!freopen(fname, "w", stdout))
329 err(EX_IOERR, "%s", fname);
330 file_open = 1;
331}
332
333static void
334usage()
335{
336 (void)fprintf(stderr,
337"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n");
338 (void)fprintf(stderr,
339" [file [prefix]]\n");
340 exit(EX_USAGE);
341}