11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 4. Neither the name of the University nor the names of its contributors 141590Srgrimes * may be used to endorse or promote products derived from this software 151590Srgrimes * without specific prior written permission. 161590Srgrimes * 171590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 181590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 191590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 201590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 211590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 221590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 231590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 241590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 251590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 261590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 271590Srgrimes * SUCH DAMAGE. 281590Srgrimes */ 291590Srgrimes 3087765Smarkm#include <sys/cdefs.h> 3187765Smarkm__FBSDID("$FreeBSD: releng/11.0/usr.bin/split/split.c 257884 2013-11-09 08:57:21Z eadler $"); 3287765Smarkm 331590Srgrimes#ifndef lint 3428071Scharnierstatic const char copyright[] = 351590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\ 361590Srgrimes The Regents of the University of California. All rights reserved.\n"; 3787765Smarkm#endif 381590Srgrimes 391590Srgrimes#ifndef lint 4087765Smarkmstatic const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 4128071Scharnier#endif 421590Srgrimes 431590Srgrimes#include <sys/param.h> 44177256Sdas#include <sys/types.h> 45177256Sdas#include <sys/stat.h> 461590Srgrimes 471590Srgrimes#include <ctype.h> 481590Srgrimes#include <err.h> 49100024Stjr#include <errno.h> 501590Srgrimes#include <fcntl.h> 51100024Stjr#include <inttypes.h> 52257884Seadler#include <libutil.h> 53100024Stjr#include <limits.h> 54131978Stjr#include <locale.h> 55250432Seadler#include <stdbool.h> 56100024Stjr#include <stdint.h> 571590Srgrimes#include <stdio.h> 581590Srgrimes#include <stdlib.h> 591590Srgrimes#include <string.h> 601590Srgrimes#include <unistd.h> 6143513Sarchie#include <regex.h> 6243513Sarchie#include <sysexits.h> 631590Srgrimes 641590Srgrimes#define DEFLINE 1000 /* Default num lines per file. */ 651590Srgrimes 66227183Sedstatic off_t bytecnt; /* Byte count to split on. */ 67227183Sedstatic off_t chunks = 0; /* Chunks count to split into. */ 68227183Sedstatic long numlines; /* Line count to split on. */ 69227183Sedstatic int file_open; /* If a file open. */ 70227183Sedstatic int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 71227183Sedstatic char bfr[MAXBSIZE]; /* I/O buffer. */ 72227183Sedstatic char fname[MAXPATHLEN]; /* File name prefix. */ 73227183Sedstatic regex_t rgx; 74227183Sedstatic int pflag; 75250432Seadlerstatic bool dflag; 76227183Sedstatic long sufflen = 2; /* File name suffix length. */ 771590Srgrimes 78177256Sdasstatic void newfile(void); 79177256Sdasstatic void split1(void); 80177256Sdasstatic void split2(void); 81177256Sdasstatic void split3(void); 8292922Simpstatic void usage(void); 831590Srgrimes 841590Srgrimesint 8598253Sjmallettmain(int argc, char **argv) 861590Srgrimes{ 871590Srgrimes int ch; 88257884Seadler int error; 891590Srgrimes char *ep, *p; 901590Srgrimes 91131978Stjr setlocale(LC_ALL, ""); 92131978Stjr 93250432Seadler dflag = false; 94250432Seadler while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1) 951590Srgrimes switch (ch) { 961590Srgrimes case '0': case '1': case '2': case '3': case '4': 971590Srgrimes case '5': case '6': case '7': case '8': case '9': 981590Srgrimes /* 991590Srgrimes * Undocumented kludge: split was originally designed 1001590Srgrimes * to take a number after a dash. 1011590Srgrimes */ 1021590Srgrimes if (numlines == 0) { 1031590Srgrimes p = argv[optind - 1]; 1041590Srgrimes if (p[0] == '-' && p[1] == ch && !p[2]) 1051590Srgrimes numlines = strtol(++p, &ep, 10); 1061590Srgrimes else 1071590Srgrimes numlines = 1081590Srgrimes strtol(argv[optind] + 1, &ep, 10); 1091590Srgrimes if (numlines <= 0 || *ep) 11043513Sarchie errx(EX_USAGE, 11143513Sarchie "%s: illegal line count", optarg); 1121590Srgrimes } 1131590Srgrimes break; 11490048Smike case 'a': /* Suffix length */ 11590048Smike if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 11690048Smike errx(EX_USAGE, 11790048Smike "%s: illegal suffix length", optarg); 11890048Smike break; 1191590Srgrimes case 'b': /* Byte count. */ 120100024Stjr errno = 0; 121257884Seadler error = expand_number(optarg, &bytecnt); 122257884Seadler if (error == -1) 123100024Stjr errx(EX_USAGE, "%s: offset too large", optarg); 1241590Srgrimes break; 125250432Seadler case 'd': /* Decimal suffix */ 126250432Seadler dflag = true; 127250432Seadler break; 1281590Srgrimes case 'l': /* Line count. */ 1291590Srgrimes if (numlines != 0) 1301590Srgrimes usage(); 1319427Srgrimes if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 13243513Sarchie errx(EX_USAGE, 13343513Sarchie "%s: illegal line count", optarg); 1341590Srgrimes break; 135177256Sdas case 'n': /* Chunks. */ 136177256Sdas if (!isdigit((unsigned char)optarg[0]) || 137177256Sdas (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 138177256Sdas *ep != '\0') { 139177256Sdas errx(EX_USAGE, "%s: illegal number of chunks", 140177256Sdas optarg); 141177256Sdas } 142177256Sdas break; 143177256Sdas 144161106Skeramida case 'p': /* pattern matching. */ 145161106Skeramida if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 146161106Skeramida errx(EX_USAGE, "%s: illegal regexp", optarg); 147161106Skeramida pflag = 1; 148161106Skeramida break; 1491590Srgrimes default: 1501590Srgrimes usage(); 1511590Srgrimes } 1521590Srgrimes argv += optind; 1531590Srgrimes argc -= optind; 1541590Srgrimes 155149616Stjr if (*argv != NULL) { /* Input file. */ 156149616Stjr if (strcmp(*argv, "-") == 0) 157149616Stjr ifd = STDIN_FILENO; 158149616Stjr else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 159149616Stjr err(EX_NOINPUT, "%s", *argv); 160149616Stjr ++argv; 161149616Stjr } 1621590Srgrimes if (*argv != NULL) /* File name prefix. */ 16397332Stjr if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 16497332Stjr errx(EX_USAGE, "file name prefix is too long"); 1651590Srgrimes if (*argv != NULL) 1661590Srgrimes usage(); 1671590Srgrimes 16890048Smike if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 16990048Smike errx(EX_USAGE, "suffix is too long"); 170177256Sdas if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) 17143513Sarchie usage(); 17243513Sarchie 1731590Srgrimes if (numlines == 0) 1741590Srgrimes numlines = DEFLINE; 175177256Sdas else if (bytecnt != 0 || chunks != 0) 1761590Srgrimes usage(); 1771590Srgrimes 178177256Sdas if (bytecnt && chunks) 179177256Sdas usage(); 180177256Sdas 1811590Srgrimes if (ifd == -1) /* Stdin by default. */ 1821590Srgrimes ifd = 0; 1831590Srgrimes 1841590Srgrimes if (bytecnt) { 1851590Srgrimes split1(); 1861590Srgrimes exit (0); 187177256Sdas } else if (chunks) { 188177256Sdas split3(); 189177256Sdas exit (0); 1901590Srgrimes } 1911590Srgrimes split2(); 19243513Sarchie if (pflag) 19343513Sarchie regfree(&rgx); 1941590Srgrimes exit(0); 1951590Srgrimes} 1961590Srgrimes 1971590Srgrimes/* 1981590Srgrimes * split1 -- 1991590Srgrimes * Split the input by bytes. 2001590Srgrimes */ 201177256Sdasstatic void 20298253Sjmallettsplit1(void) 2031590Srgrimes{ 204100024Stjr off_t bcnt; 2051590Srgrimes char *C; 206100024Stjr ssize_t dist, len; 207177256Sdas int nfiles; 2081590Srgrimes 209177256Sdas nfiles = 0; 210177256Sdas 2111590Srgrimes for (bcnt = 0;;) 21243513Sarchie switch ((len = read(ifd, bfr, MAXBSIZE))) { 2131590Srgrimes case 0: 2141590Srgrimes exit(0); 2151590Srgrimes case -1: 21643513Sarchie err(EX_IOERR, "read"); 2171590Srgrimes /* NOTREACHED */ 2181590Srgrimes default: 219177256Sdas if (!file_open) { 220177256Sdas if (!chunks || (nfiles < chunks)) { 221177256Sdas newfile(); 222177256Sdas nfiles++; 223177256Sdas } 224177256Sdas } 225100024Stjr if (bcnt + len >= bytecnt) { 2261590Srgrimes dist = bytecnt - bcnt; 2271590Srgrimes if (write(ofd, bfr, dist) != dist) 22843513Sarchie err(EX_IOERR, "write"); 2291590Srgrimes len -= dist; 2301590Srgrimes for (C = bfr + dist; len >= bytecnt; 2311590Srgrimes len -= bytecnt, C += bytecnt) { 232177256Sdas if (!chunks || (nfiles < chunks)) { 2331590Srgrimes newfile(); 234177256Sdas nfiles++; 235177256Sdas } 2361590Srgrimes if (write(ofd, 23787765Smarkm C, bytecnt) != bytecnt) 23843513Sarchie err(EX_IOERR, "write"); 2391590Srgrimes } 24043513Sarchie if (len != 0) { 241177256Sdas if (!chunks || (nfiles < chunks)) { 2421590Srgrimes newfile(); 243177256Sdas nfiles++; 244177256Sdas } 2451590Srgrimes if (write(ofd, C, len) != len) 24643513Sarchie err(EX_IOERR, "write"); 2471590Srgrimes } else 2481590Srgrimes file_open = 0; 2491590Srgrimes bcnt = len; 2501590Srgrimes } else { 2511590Srgrimes bcnt += len; 2521590Srgrimes if (write(ofd, bfr, len) != len) 25343513Sarchie err(EX_IOERR, "write"); 2541590Srgrimes } 2551590Srgrimes } 2561590Srgrimes} 2571590Srgrimes 2581590Srgrimes/* 2591590Srgrimes * split2 -- 2601590Srgrimes * Split the input by lines. 2611590Srgrimes */ 262177256Sdasstatic void 26398253Sjmallettsplit2(void) 2641590Srgrimes{ 26543513Sarchie long lcnt = 0; 26643513Sarchie FILE *infp; 2671590Srgrimes 26843513Sarchie /* Stick a stream on top of input file descriptor */ 26943513Sarchie if ((infp = fdopen(ifd, "r")) == NULL) 27043513Sarchie err(EX_NOINPUT, "fdopen"); 27143513Sarchie 27243513Sarchie /* Process input one line at a time */ 27343513Sarchie while (fgets(bfr, sizeof(bfr), infp) != NULL) { 27443513Sarchie const int len = strlen(bfr); 27543513Sarchie 27643513Sarchie /* If line is too long to deal with, just write it out */ 27743513Sarchie if (bfr[len - 1] != '\n') 27843513Sarchie goto writeit; 27943513Sarchie 28043513Sarchie /* Check if we need to start a new file */ 28143513Sarchie if (pflag) { 28243513Sarchie regmatch_t pmatch; 28343513Sarchie 28443513Sarchie pmatch.rm_so = 0; 28543513Sarchie pmatch.rm_eo = len - 1; 28643513Sarchie if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 2871590Srgrimes newfile(); 28843513Sarchie } else if (lcnt++ == numlines) { 28943513Sarchie newfile(); 29043513Sarchie lcnt = 1; 2911590Srgrimes } 29243513Sarchie 29343513Sarchiewriteit: 29443513Sarchie /* Open output file if needed */ 29543513Sarchie if (!file_open) 29643513Sarchie newfile(); 29743513Sarchie 29843513Sarchie /* Write out line */ 29943513Sarchie if (write(ofd, bfr, len) != len) 30043513Sarchie err(EX_IOERR, "write"); 30143513Sarchie } 30243513Sarchie 30343513Sarchie /* EOF or error? */ 30443513Sarchie if (ferror(infp)) 30543513Sarchie err(EX_IOERR, "read"); 30643513Sarchie else 30743513Sarchie exit(0); 3081590Srgrimes} 3091590Srgrimes 3101590Srgrimes/* 311177256Sdas * split3 -- 312177256Sdas * Split the input into specified number of chunks 313177256Sdas */ 314177256Sdasstatic void 315177256Sdassplit3(void) 316177256Sdas{ 317177256Sdas struct stat sb; 318177256Sdas 319177256Sdas if (fstat(ifd, &sb) == -1) { 320177256Sdas err(1, "stat"); 321177256Sdas /* NOTREACHED */ 322177256Sdas } 323177256Sdas 324177256Sdas if (chunks > sb.st_size) { 325177256Sdas errx(1, "can't split into more than %d files", 326177256Sdas (int)sb.st_size); 327177256Sdas /* NOTREACHED */ 328177256Sdas } 329177256Sdas 330177256Sdas bytecnt = sb.st_size / chunks; 331177256Sdas split1(); 332177256Sdas} 333177256Sdas 334177256Sdas 335177256Sdas/* 3361590Srgrimes * newfile -- 3371590Srgrimes * Open a new output file. 3381590Srgrimes */ 339177256Sdasstatic void 34098253Sjmallettnewfile(void) 3411590Srgrimes{ 34290048Smike long i, maxfiles, tfnum; 3431590Srgrimes static long fnum; 3441590Srgrimes static char *fpnt; 345250432Seadler char beg, end; 346250432Seadler int pattlen; 3471590Srgrimes 3481590Srgrimes if (ofd == -1) { 3491590Srgrimes if (fname[0] == '\0') { 3501590Srgrimes fname[0] = 'x'; 3511590Srgrimes fpnt = fname + 1; 3521590Srgrimes } else { 3531590Srgrimes fpnt = fname + strlen(fname); 3541590Srgrimes } 3551590Srgrimes ofd = fileno(stdout); 3561590Srgrimes } 35790048Smike 358250432Seadler if (dflag) { 359250432Seadler beg = '0'; 360250432Seadler end = '9'; 361250432Seadler } 362250432Seadler else { 363250432Seadler beg = 'a'; 364250432Seadler end = 'z'; 365250432Seadler } 366250432Seadler pattlen = end - beg + 1; 367250432Seadler 368250432Seadler /* maxfiles = pattlen^sufflen, but don't use libm. */ 36990048Smike for (maxfiles = 1, i = 0; i < sufflen; i++) 370250882Seadler if (LONG_MAX / pattlen < maxfiles) 37190048Smike errx(EX_USAGE, "suffix is too long (max %ld)", i); 372250882Seadler else 373250882Seadler maxfiles *= pattlen; 37490048Smike 375149345Stjr if (fnum == maxfiles) 376149345Stjr errx(EX_DATAERR, "too many files"); 37790048Smike 37890048Smike /* Generate suffix of sufflen letters */ 37990048Smike tfnum = fnum; 38090048Smike i = sufflen - 1; 38190048Smike do { 382250432Seadler fpnt[i] = tfnum % pattlen + beg; 383250432Seadler tfnum /= pattlen; 38490048Smike } while (i-- > 0); 38590048Smike fpnt[sufflen] = '\0'; 38690048Smike 3871590Srgrimes ++fnum; 3881590Srgrimes if (!freopen(fname, "w", stdout)) 38943513Sarchie err(EX_IOERR, "%s", fname); 39043513Sarchie file_open = 1; 3911590Srgrimes} 3921590Srgrimes 39328071Scharnierstatic void 39498253Sjmallettusage(void) 3951590Srgrimes{ 39643625Sarchie (void)fprintf(stderr, 397161106Skeramida"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" 398161172Skeramida" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" 399177256Sdas" split -n chunk_count [-a suffix_length] [file [prefix]]\n" 400161106Skeramida" split -p pattern [-a suffix_length] [file [prefix]]\n"); 40143625Sarchie exit(EX_USAGE); 4021590Srgrimes} 403