split.c revision 68887
11590Srgrimes/* 21590Srgrimes * Copyright (c) 1987, 1993, 1994 31590Srgrimes * The Regents of the University of California. All rights reserved. 41590Srgrimes * 51590Srgrimes * Redistribution and use in source and binary forms, with or without 61590Srgrimes * modification, are permitted provided that the following conditions 71590Srgrimes * are met: 81590Srgrimes * 1. Redistributions of source code must retain the above copyright 91590Srgrimes * notice, this list of conditions and the following disclaimer. 101590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 111590Srgrimes * notice, this list of conditions and the following disclaimer in the 121590Srgrimes * documentation and/or other materials provided with the distribution. 131590Srgrimes * 3. All advertising materials mentioning features or use of this software 141590Srgrimes * must display the following acknowledgement: 151590Srgrimes * This product includes software developed by the University of 161590Srgrimes * California, Berkeley and its contributors. 171590Srgrimes * 4. Neither the name of the University nor the names of its contributors 181590Srgrimes * may be used to endorse or promote products derived from this software 191590Srgrimes * without specific prior written permission. 201590Srgrimes * 211590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 221590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 231590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 241590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 251590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 261590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 271590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 281590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 291590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 301590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 311590Srgrimes * SUCH DAMAGE. 321590Srgrimes */ 331590Srgrimes 341590Srgrimes#ifndef lint 3528071Scharnierstatic const char copyright[] = 361590Srgrimes"@(#) Copyright (c) 1987, 1993, 1994\n\ 371590Srgrimes The Regents of the University of California. All rights reserved.\n"; 381590Srgrimes#endif /* not lint */ 391590Srgrimes 401590Srgrimes#ifndef lint 4128071Scharnier#if 0 421590Srgrimesstatic char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 4368887Sjwd#else 4468887Sjwdstatic const char rcsid[] = 4568887Sjwd "$FreeBSD: head/usr.bin/split/split.c 68887 2000-11-19 01:44:20Z jwd $"; 4628071Scharnier#endif 471590Srgrimes#endif /* not lint */ 481590Srgrimes 491590Srgrimes#include <sys/param.h> 5043513Sarchie#include <sys/types.h> 511590Srgrimes 521590Srgrimes#include <ctype.h> 531590Srgrimes#include <err.h> 541590Srgrimes#include <fcntl.h> 551590Srgrimes#include <stdio.h> 561590Srgrimes#include <stdlib.h> 571590Srgrimes#include <string.h> 581590Srgrimes#include <unistd.h> 5943513Sarchie#include <regex.h> 6043513Sarchie#include <sysexits.h> 611590Srgrimes 621590Srgrimes#define DEFLINE 1000 /* Default num lines per file. */ 631590Srgrimes 6468887Sjwdsize_t bytecnt; /* Byte count to split on. */ 651590Srgrimeslong numlines; /* Line count to split on. */ 661590Srgrimesint file_open; /* If a file open. */ 671590Srgrimesint ifd = -1, ofd = -1; /* Input/output file descriptors. */ 681590Srgrimeschar bfr[MAXBSIZE]; /* I/O buffer. */ 691590Srgrimeschar fname[MAXPATHLEN]; /* File name prefix. */ 7043513Sarchieregex_t rgx; 7143513Sarchieint pflag; 721590Srgrimes 731590Srgrimesvoid newfile __P((void)); 741590Srgrimesvoid split1 __P((void)); 751590Srgrimesvoid split2 __P((void)); 7628071Scharnierstatic void usage __P((void)); 771590Srgrimes 781590Srgrimesint 791590Srgrimesmain(argc, argv) 801590Srgrimes int argc; 811590Srgrimes char *argv[]; 821590Srgrimes{ 831590Srgrimes int ch; 841590Srgrimes char *ep, *p; 851590Srgrimes 8643513Sarchie while ((ch = getopt(argc, argv, "-0123456789b:l:p:")) != -1) 871590Srgrimes switch (ch) { 881590Srgrimes case '0': case '1': case '2': case '3': case '4': 891590Srgrimes case '5': case '6': case '7': case '8': case '9': 901590Srgrimes /* 911590Srgrimes * Undocumented kludge: split was originally designed 921590Srgrimes * to take a number after a dash. 931590Srgrimes */ 941590Srgrimes if (numlines == 0) { 951590Srgrimes p = argv[optind - 1]; 961590Srgrimes if (p[0] == '-' && p[1] == ch && !p[2]) 971590Srgrimes numlines = strtol(++p, &ep, 10); 981590Srgrimes else 991590Srgrimes numlines = 1001590Srgrimes strtol(argv[optind] + 1, &ep, 10); 1011590Srgrimes if (numlines <= 0 || *ep) 10243513Sarchie errx(EX_USAGE, 10343513Sarchie "%s: illegal line count", optarg); 1041590Srgrimes } 1051590Srgrimes break; 1061590Srgrimes case '-': /* Undocumented: historic stdin flag. */ 1071590Srgrimes if (ifd != -1) 1081590Srgrimes usage(); 1091590Srgrimes ifd = 0; 1101590Srgrimes break; 1111590Srgrimes case 'b': /* Byte count. */ 11268887Sjwd if ((bytecnt = strtoq(optarg, &ep, 10)) <= 0 || 11328071Scharnier (*ep != '\0' && *ep != 'k' && *ep != 'm')) 11443513Sarchie errx(EX_USAGE, 11543513Sarchie "%s: illegal byte count", optarg); 1161590Srgrimes if (*ep == 'k') 1171590Srgrimes bytecnt *= 1024; 1181590Srgrimes else if (*ep == 'm') 1191590Srgrimes bytecnt *= 1048576; 1201590Srgrimes break; 12143513Sarchie case 'p' : /* pattern matching. */ 12243513Sarchie if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 12343513Sarchie errx(EX_USAGE, "%s: illegal regexp", optarg); 12443513Sarchie pflag = 1; 12543513Sarchie break; 1261590Srgrimes case 'l': /* Line count. */ 1271590Srgrimes if (numlines != 0) 1281590Srgrimes usage(); 1299427Srgrimes if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 13043513Sarchie errx(EX_USAGE, 13143513Sarchie "%s: illegal line count", optarg); 1321590Srgrimes break; 1331590Srgrimes default: 1341590Srgrimes usage(); 1351590Srgrimes } 1361590Srgrimes argv += optind; 1371590Srgrimes argc -= optind; 1381590Srgrimes 1391590Srgrimes if (*argv != NULL) 1401590Srgrimes if (ifd == -1) { /* Input file. */ 1411590Srgrimes if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 14243513Sarchie err(EX_NOINPUT, "%s", *argv); 1431590Srgrimes ++argv; 1441590Srgrimes } 1451590Srgrimes if (*argv != NULL) /* File name prefix. */ 1461590Srgrimes (void)strcpy(fname, *argv++); 1471590Srgrimes if (*argv != NULL) 1481590Srgrimes usage(); 1491590Srgrimes 15043513Sarchie if (pflag && (numlines != 0 || bytecnt != 0)) 15143513Sarchie usage(); 15243513Sarchie 1531590Srgrimes if (numlines == 0) 1541590Srgrimes numlines = DEFLINE; 15543513Sarchie else if (bytecnt != 0) 1561590Srgrimes usage(); 1571590Srgrimes 1581590Srgrimes if (ifd == -1) /* Stdin by default. */ 1591590Srgrimes ifd = 0; 1601590Srgrimes 1611590Srgrimes if (bytecnt) { 1621590Srgrimes split1(); 1631590Srgrimes exit (0); 1641590Srgrimes } 1651590Srgrimes split2(); 16643513Sarchie if (pflag) 16743513Sarchie regfree(&rgx); 1681590Srgrimes exit(0); 1691590Srgrimes} 1701590Srgrimes 1711590Srgrimes/* 1721590Srgrimes * split1 -- 1731590Srgrimes * Split the input by bytes. 1741590Srgrimes */ 1751590Srgrimesvoid 1761590Srgrimessplit1() 1771590Srgrimes{ 17868887Sjwd size_t bcnt, dist, len; 1791590Srgrimes char *C; 1801590Srgrimes 1811590Srgrimes for (bcnt = 0;;) 18243513Sarchie switch ((len = read(ifd, bfr, MAXBSIZE))) { 1831590Srgrimes case 0: 1841590Srgrimes exit(0); 1851590Srgrimes case -1: 18643513Sarchie err(EX_IOERR, "read"); 1871590Srgrimes /* NOTREACHED */ 1881590Srgrimes default: 18943513Sarchie if (!file_open) 1901590Srgrimes newfile(); 1911590Srgrimes if (bcnt + len >= bytecnt) { 1921590Srgrimes dist = bytecnt - bcnt; 1931590Srgrimes if (write(ofd, bfr, dist) != dist) 19443513Sarchie err(EX_IOERR, "write"); 1951590Srgrimes len -= dist; 1961590Srgrimes for (C = bfr + dist; len >= bytecnt; 1971590Srgrimes len -= bytecnt, C += bytecnt) { 1981590Srgrimes newfile(); 1991590Srgrimes if (write(ofd, 2001590Srgrimes C, (int)bytecnt) != bytecnt) 20143513Sarchie err(EX_IOERR, "write"); 2021590Srgrimes } 20343513Sarchie if (len != 0) { 2041590Srgrimes newfile(); 2051590Srgrimes if (write(ofd, C, len) != len) 20643513Sarchie err(EX_IOERR, "write"); 2071590Srgrimes } else 2081590Srgrimes file_open = 0; 2091590Srgrimes bcnt = len; 2101590Srgrimes } else { 2111590Srgrimes bcnt += len; 2121590Srgrimes if (write(ofd, bfr, len) != len) 21343513Sarchie err(EX_IOERR, "write"); 2141590Srgrimes } 2151590Srgrimes } 2161590Srgrimes} 2171590Srgrimes 2181590Srgrimes/* 2191590Srgrimes * split2 -- 2201590Srgrimes * Split the input by lines. 2211590Srgrimes */ 2221590Srgrimesvoid 2231590Srgrimessplit2() 2241590Srgrimes{ 22543513Sarchie long lcnt = 0; 22643513Sarchie FILE *infp; 2271590Srgrimes 22843513Sarchie /* Stick a stream on top of input file descriptor */ 22943513Sarchie if ((infp = fdopen(ifd, "r")) == NULL) 23043513Sarchie err(EX_NOINPUT, "fdopen"); 23143513Sarchie 23243513Sarchie /* Process input one line at a time */ 23343513Sarchie while (fgets(bfr, sizeof(bfr), infp) != NULL) { 23443513Sarchie const int len = strlen(bfr); 23543513Sarchie 23643513Sarchie /* If line is too long to deal with, just write it out */ 23743513Sarchie if (bfr[len - 1] != '\n') 23843513Sarchie goto writeit; 23943513Sarchie 24043513Sarchie /* Check if we need to start a new file */ 24143513Sarchie if (pflag) { 24243513Sarchie regmatch_t pmatch; 24343513Sarchie 24443513Sarchie pmatch.rm_so = 0; 24543513Sarchie pmatch.rm_eo = len - 1; 24643513Sarchie if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 2471590Srgrimes newfile(); 24843513Sarchie } else if (lcnt++ == numlines) { 24943513Sarchie newfile(); 25043513Sarchie lcnt = 1; 2511590Srgrimes } 25243513Sarchie 25343513Sarchiewriteit: 25443513Sarchie /* Open output file if needed */ 25543513Sarchie if (!file_open) 25643513Sarchie newfile(); 25743513Sarchie 25843513Sarchie /* Write out line */ 25943513Sarchie if (write(ofd, bfr, len) != len) 26043513Sarchie err(EX_IOERR, "write"); 26143513Sarchie } 26243513Sarchie 26343513Sarchie /* EOF or error? */ 26443513Sarchie if (ferror(infp)) 26543513Sarchie err(EX_IOERR, "read"); 26643513Sarchie else 26743513Sarchie exit(0); 2681590Srgrimes} 2691590Srgrimes 2701590Srgrimes/* 2711590Srgrimes * newfile -- 2721590Srgrimes * Open a new output file. 2731590Srgrimes */ 2741590Srgrimesvoid 2751590Srgrimesnewfile() 2761590Srgrimes{ 2771590Srgrimes static long fnum; 2781590Srgrimes static int defname; 2791590Srgrimes static char *fpnt; 2801590Srgrimes 2811590Srgrimes if (ofd == -1) { 2821590Srgrimes if (fname[0] == '\0') { 2831590Srgrimes fname[0] = 'x'; 2841590Srgrimes fpnt = fname + 1; 2851590Srgrimes defname = 1; 2861590Srgrimes } else { 2871590Srgrimes fpnt = fname + strlen(fname); 2881590Srgrimes defname = 0; 2891590Srgrimes } 2901590Srgrimes ofd = fileno(stdout); 2911590Srgrimes } 2921590Srgrimes /* 2931590Srgrimes * Hack to increase max files; original code wandered through 2941590Srgrimes * magic characters. Maximum files is 3 * 26 * 26 == 2028 2951590Srgrimes */ 2961590Srgrimes#define MAXFILES 676 2971590Srgrimes if (fnum == MAXFILES) { 2981590Srgrimes if (!defname || fname[0] == 'z') 29943513Sarchie errx(EX_DATAERR, "too many files"); 3001590Srgrimes ++fname[0]; 3011590Srgrimes fnum = 0; 3021590Srgrimes } 3031590Srgrimes fpnt[0] = fnum / 26 + 'a'; 3041590Srgrimes fpnt[1] = fnum % 26 + 'a'; 3051590Srgrimes ++fnum; 3061590Srgrimes if (!freopen(fname, "w", stdout)) 30743513Sarchie err(EX_IOERR, "%s", fname); 30843513Sarchie file_open = 1; 3091590Srgrimes} 3101590Srgrimes 31128071Scharnierstatic void 3121590Srgrimesusage() 3131590Srgrimes{ 31443625Sarchie (void)fprintf(stderr, 31543625Sarchie"usage: split [-b byte_count] [-l line_count] [-p pattern] [file [prefix]]\n"); 31643625Sarchie exit(EX_USAGE); 3171590Srgrimes} 318