split.c revision 98253
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 3487765Smarkm#include <sys/cdefs.h> 3587765Smarkm__FBSDID("$FreeBSD: head/usr.bin/split/split.c 98253 2002-06-15 11:03:28Z jmallett $"); 3687765Smarkm 371590Srgrimes#ifndef lint 3828071Scharnierstatic const char copyright[] = 391590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\ 401590Srgrimes The Regents of the University of California. All rights reserved.\n"; 4187765Smarkm#endif 421590Srgrimes 431590Srgrimes#ifndef lint 4487765Smarkmstatic const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 4528071Scharnier#endif 461590Srgrimes 471590Srgrimes#include <sys/param.h> 481590Srgrimes 491590Srgrimes#include <ctype.h> 501590Srgrimes#include <err.h> 511590Srgrimes#include <fcntl.h> 521590Srgrimes#include <stdio.h> 531590Srgrimes#include <stdlib.h> 541590Srgrimes#include <string.h> 551590Srgrimes#include <unistd.h> 5643513Sarchie#include <regex.h> 5743513Sarchie#include <sysexits.h> 581590Srgrimes 591590Srgrimes#define DEFLINE 1000 /* Default num lines per file. */ 601590Srgrimes 6187765Smarkmint bytecnt; /* Byte count to split on. */ 621590Srgrimeslong numlines; /* Line count to split on. */ 631590Srgrimesint file_open; /* If a file open. */ 641590Srgrimesint ifd = -1, ofd = -1; /* Input/output file descriptors. */ 651590Srgrimeschar bfr[MAXBSIZE]; /* I/O buffer. */ 661590Srgrimeschar fname[MAXPATHLEN]; /* File name prefix. */ 6743513Sarchieregex_t rgx; 6843513Sarchieint pflag; 6990048Smikelong sufflen = 2; /* File name suffix length. */ 701590Srgrimes 7192922Simpvoid newfile(void); 7292922Simpvoid split1(void); 7392922Simpvoid split2(void); 7492922Simpstatic void usage(void); 751590Srgrimes 761590Srgrimesint 7798253Sjmallettmain(int argc, char **argv) 781590Srgrimes{ 791590Srgrimes int ch; 801590Srgrimes char *ep, *p; 811590Srgrimes 8290048Smike while ((ch = getopt(argc, argv, "-0123456789a:b:l:p:")) != -1) 831590Srgrimes switch (ch) { 841590Srgrimes case '0': case '1': case '2': case '3': case '4': 851590Srgrimes case '5': case '6': case '7': case '8': case '9': 861590Srgrimes /* 871590Srgrimes * Undocumented kludge: split was originally designed 881590Srgrimes * to take a number after a dash. 891590Srgrimes */ 901590Srgrimes if (numlines == 0) { 911590Srgrimes p = argv[optind - 1]; 921590Srgrimes if (p[0] == '-' && p[1] == ch && !p[2]) 931590Srgrimes numlines = strtol(++p, &ep, 10); 941590Srgrimes else 951590Srgrimes numlines = 961590Srgrimes strtol(argv[optind] + 1, &ep, 10); 971590Srgrimes if (numlines <= 0 || *ep) 9843513Sarchie errx(EX_USAGE, 9943513Sarchie "%s: illegal line count", optarg); 1001590Srgrimes } 1011590Srgrimes break; 1021590Srgrimes case '-': /* Undocumented: historic stdin flag. */ 1031590Srgrimes if (ifd != -1) 1041590Srgrimes usage(); 1051590Srgrimes ifd = 0; 1061590Srgrimes break; 10790048Smike case 'a': /* Suffix length */ 10890048Smike if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 10990048Smike errx(EX_USAGE, 11090048Smike "%s: illegal suffix length", optarg); 11190048Smike break; 1121590Srgrimes case 'b': /* Byte count. */ 11368887Sjwd if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 || 11428071Scharnier (*ep != '\0' && *ep != 'k' && *ep != 'm')) 11543513Sarchie errx(EX_USAGE, 11643513Sarchie "%s: illegal byte count", optarg); 1171590Srgrimes if (*ep == 'k') 1181590Srgrimes bytecnt *= 1024; 1191590Srgrimes else if (*ep == 'm') 1201590Srgrimes bytecnt *= 1048576; 1211590Srgrimes break; 12243513Sarchie case 'p' : /* pattern matching. */ 12343513Sarchie if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 12443513Sarchie errx(EX_USAGE, "%s: illegal regexp", optarg); 12543513Sarchie pflag = 1; 12643513Sarchie break; 1271590Srgrimes case 'l': /* Line count. */ 1281590Srgrimes if (numlines != 0) 1291590Srgrimes usage(); 1309427Srgrimes if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 13143513Sarchie errx(EX_USAGE, 13243513Sarchie "%s: illegal line count", optarg); 1331590Srgrimes break; 1341590Srgrimes default: 1351590Srgrimes usage(); 1361590Srgrimes } 1371590Srgrimes argv += optind; 1381590Srgrimes argc -= optind; 1391590Srgrimes 1401590Srgrimes if (*argv != NULL) 1411590Srgrimes if (ifd == -1) { /* Input file. */ 14297334Stjr if (strcmp(*argv, "-") == 0) 14397334Stjr ifd = STDIN_FILENO; 14497334Stjr else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 14543513Sarchie err(EX_NOINPUT, "%s", *argv); 1461590Srgrimes ++argv; 1471590Srgrimes } 1481590Srgrimes if (*argv != NULL) /* File name prefix. */ 14997332Stjr if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 15097332Stjr errx(EX_USAGE, "file name prefix is too long"); 1511590Srgrimes if (*argv != NULL) 1521590Srgrimes usage(); 1531590Srgrimes 15490048Smike if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 15590048Smike errx(EX_USAGE, "suffix is too long"); 15643513Sarchie if (pflag && (numlines != 0 || bytecnt != 0)) 15743513Sarchie usage(); 15843513Sarchie 1591590Srgrimes if (numlines == 0) 1601590Srgrimes numlines = DEFLINE; 16143513Sarchie else if (bytecnt != 0) 1621590Srgrimes usage(); 1631590Srgrimes 1641590Srgrimes if (ifd == -1) /* Stdin by default. */ 1651590Srgrimes ifd = 0; 1661590Srgrimes 1671590Srgrimes if (bytecnt) { 1681590Srgrimes split1(); 1691590Srgrimes exit (0); 1701590Srgrimes } 1711590Srgrimes split2(); 17243513Sarchie if (pflag) 17343513Sarchie regfree(&rgx); 1741590Srgrimes exit(0); 1751590Srgrimes} 1761590Srgrimes 1771590Srgrimes/* 1781590Srgrimes * split1 -- 1791590Srgrimes * Split the input by bytes. 1801590Srgrimes */ 1811590Srgrimesvoid 18298253Sjmallettsplit1(void) 1831590Srgrimes{ 18487765Smarkm size_t bcnt; 1851590Srgrimes char *C; 18687765Smarkm int dist, len; 1871590Srgrimes 1881590Srgrimes for (bcnt = 0;;) 18943513Sarchie switch ((len = read(ifd, bfr, MAXBSIZE))) { 1901590Srgrimes case 0: 1911590Srgrimes exit(0); 1921590Srgrimes case -1: 19343513Sarchie err(EX_IOERR, "read"); 1941590Srgrimes /* NOTREACHED */ 1951590Srgrimes default: 19643513Sarchie if (!file_open) 1971590Srgrimes newfile(); 19887765Smarkm if (bcnt + len >= (u_int)bytecnt) { 1991590Srgrimes dist = bytecnt - bcnt; 2001590Srgrimes if (write(ofd, bfr, dist) != dist) 20143513Sarchie err(EX_IOERR, "write"); 2021590Srgrimes len -= dist; 2031590Srgrimes for (C = bfr + dist; len >= bytecnt; 2041590Srgrimes len -= bytecnt, C += bytecnt) { 2051590Srgrimes newfile(); 2061590Srgrimes if (write(ofd, 20787765Smarkm C, bytecnt) != bytecnt) 20843513Sarchie err(EX_IOERR, "write"); 2091590Srgrimes } 21043513Sarchie if (len != 0) { 2111590Srgrimes newfile(); 2121590Srgrimes if (write(ofd, C, len) != len) 21343513Sarchie err(EX_IOERR, "write"); 2141590Srgrimes } else 2151590Srgrimes file_open = 0; 2161590Srgrimes bcnt = len; 2171590Srgrimes } else { 2181590Srgrimes bcnt += len; 2191590Srgrimes if (write(ofd, bfr, len) != len) 22043513Sarchie err(EX_IOERR, "write"); 2211590Srgrimes } 2221590Srgrimes } 2231590Srgrimes} 2241590Srgrimes 2251590Srgrimes/* 2261590Srgrimes * split2 -- 2271590Srgrimes * Split the input by lines. 2281590Srgrimes */ 2291590Srgrimesvoid 23098253Sjmallettsplit2(void) 2311590Srgrimes{ 23243513Sarchie long lcnt = 0; 23343513Sarchie FILE *infp; 2341590Srgrimes 23543513Sarchie /* Stick a stream on top of input file descriptor */ 23643513Sarchie if ((infp = fdopen(ifd, "r")) == NULL) 23743513Sarchie err(EX_NOINPUT, "fdopen"); 23843513Sarchie 23943513Sarchie /* Process input one line at a time */ 24043513Sarchie while (fgets(bfr, sizeof(bfr), infp) != NULL) { 24143513Sarchie const int len = strlen(bfr); 24243513Sarchie 24343513Sarchie /* If line is too long to deal with, just write it out */ 24443513Sarchie if (bfr[len - 1] != '\n') 24543513Sarchie goto writeit; 24643513Sarchie 24743513Sarchie /* Check if we need to start a new file */ 24843513Sarchie if (pflag) { 24943513Sarchie regmatch_t pmatch; 25043513Sarchie 25143513Sarchie pmatch.rm_so = 0; 25243513Sarchie pmatch.rm_eo = len - 1; 25343513Sarchie if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 2541590Srgrimes newfile(); 25543513Sarchie } else if (lcnt++ == numlines) { 25643513Sarchie newfile(); 25743513Sarchie lcnt = 1; 2581590Srgrimes } 25943513Sarchie 26043513Sarchiewriteit: 26143513Sarchie /* Open output file if needed */ 26243513Sarchie if (!file_open) 26343513Sarchie newfile(); 26443513Sarchie 26543513Sarchie /* Write out line */ 26643513Sarchie if (write(ofd, bfr, len) != len) 26743513Sarchie err(EX_IOERR, "write"); 26843513Sarchie } 26943513Sarchie 27043513Sarchie /* EOF or error? */ 27143513Sarchie if (ferror(infp)) 27243513Sarchie err(EX_IOERR, "read"); 27343513Sarchie else 27443513Sarchie exit(0); 2751590Srgrimes} 2761590Srgrimes 2771590Srgrimes/* 2781590Srgrimes * newfile -- 2791590Srgrimes * Open a new output file. 2801590Srgrimes */ 2811590Srgrimesvoid 28298253Sjmallettnewfile(void) 2831590Srgrimes{ 28490048Smike long i, maxfiles, tfnum; 2851590Srgrimes static long fnum; 2861590Srgrimes static int defname; 2871590Srgrimes static char *fpnt; 2881590Srgrimes 2891590Srgrimes if (ofd == -1) { 2901590Srgrimes if (fname[0] == '\0') { 2911590Srgrimes fname[0] = 'x'; 2921590Srgrimes fpnt = fname + 1; 2931590Srgrimes defname = 1; 2941590Srgrimes } else { 2951590Srgrimes fpnt = fname + strlen(fname); 2961590Srgrimes defname = 0; 2971590Srgrimes } 2981590Srgrimes ofd = fileno(stdout); 2991590Srgrimes } 30090048Smike 30190048Smike /* maxfiles = 26^sufflen, but don't use libm. */ 30290048Smike for (maxfiles = 1, i = 0; i < sufflen; i++) 30390048Smike if ((maxfiles *= 26) <= 0) 30490048Smike errx(EX_USAGE, "suffix is too long (max %ld)", i); 30590048Smike 3061590Srgrimes /* 3071590Srgrimes * Hack to increase max files; original code wandered through 30890048Smike * magic characters. 3091590Srgrimes */ 31090048Smike if (fnum == maxfiles) { 3111590Srgrimes if (!defname || fname[0] == 'z') 31243513Sarchie errx(EX_DATAERR, "too many files"); 3131590Srgrimes ++fname[0]; 3141590Srgrimes fnum = 0; 3151590Srgrimes } 31690048Smike 31790048Smike /* Generate suffix of sufflen letters */ 31890048Smike tfnum = fnum; 31990048Smike i = sufflen - 1; 32090048Smike do { 32190048Smike fpnt[i] = tfnum % 26 + 'a'; 32290048Smike tfnum /= 26; 32390048Smike } while (i-- > 0); 32490048Smike fpnt[sufflen] = '\0'; 32590048Smike 3261590Srgrimes ++fnum; 3271590Srgrimes if (!freopen(fname, "w", stdout)) 32843513Sarchie err(EX_IOERR, "%s", fname); 32943513Sarchie file_open = 1; 3301590Srgrimes} 3311590Srgrimes 33228071Scharnierstatic void 33398253Sjmallettusage(void) 3341590Srgrimes{ 33543625Sarchie (void)fprintf(stderr, 33690048Smike"usage: split [-a sufflen] [-b byte_count] [-l line_count] [-p pattern]\n"); 33790048Smike (void)fprintf(stderr, 33890048Smike" [file [prefix]]\n"); 33943625Sarchie exit(EX_USAGE); 3401590Srgrimes} 341