150477Speter/* 235388Smjacob * Copyright (c) 1987, 1993, 1994 335388Smjacob * The Regents of the University of California. All rights reserved. 435388Smjacob * 566189Smjacob * Redistribution and use in source and binary forms, with or without 635388Smjacob * modification, are permitted provided that the following conditions 752347Smjacob * are met: 835388Smjacob * 1. Redistributions of source code must retain the above copyright 935388Smjacob * notice, this list of conditions and the following disclaimer. 1035388Smjacob * 2. Redistributions in binary form must reproduce the above copyright 1135388Smjacob * notice, this list of conditions and the following disclaimer in the 1235388Smjacob * documentation and/or other materials provided with the distribution. 1335388Smjacob * 4. Neither the name of the University nor the names of its contributors 1466189Smjacob * may be used to endorse or promote products derived from this software 1535388Smjacob * without specific prior written permission. 1635388Smjacob * 1735388Smjacob * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 1835388Smjacob * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1935388Smjacob * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2035388Smjacob * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 2135388Smjacob * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2235388Smjacob * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2335388Smjacob * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2435388Smjacob * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2535388Smjacob * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2635388Smjacob * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2735388Smjacob * SUCH DAMAGE. 2835388Smjacob */ 2935388Smjacob 3035388Smjacob#include <sys/cdefs.h> 3135388Smjacob__FBSDID("$FreeBSD$"); 3235388Smjacob 3335388Smjacob#ifndef lint 3444819Smjacobstatic const char copyright[] = 3535388Smjacob"@(#) Copyright (c) 1987, 1993, 1994\n\ 3653487Smjacob The Regents of the University of California. All rights reserved.\n"; 3753487Smjacob#endif 3856004Smjacob 3935388Smjacob#ifndef lint 4053487Smjacobstatic const char sccsid[] = "@(#)split.c 8.2 (Berkeley) 4/16/94"; 4135388Smjacob#endif 4235388Smjacob 4353487Smjacob#include <sys/param.h> 4453487Smjacob#include <sys/types.h> 4556004Smjacob#include <sys/stat.h> 4635388Smjacob 4753487Smjacob#include <ctype.h> 4835388Smjacob#include <err.h> 4942131Smjacob#include <errno.h> 5053487Smjacob#include <fcntl.h> 5153487Smjacob#include <inttypes.h> 5256004Smjacob#include <libutil.h> 5335388Smjacob#include <limits.h> 5453487Smjacob#include <locale.h> 5535388Smjacob#include <stdbool.h> 5664087Smjacob#include <stdint.h> 5793837Smjacob#include <stdio.h> 5839235Sgibbs#include <stdlib.h> 5935388Smjacob#include <string.h> 6043420Smjacob#include <unistd.h> 6135388Smjacob#include <regex.h> 6235388Smjacob#include <sysexits.h> 6335388Smjacob 6482689Smjacob#define DEFLINE 1000 /* Default num lines per file. */ 6582689Smjacob 6674229Smjacobstatic off_t bytecnt; /* Byte count to split on. */ 6774229Smjacobstatic off_t chunks = 0; /* Chunks count to split into. */ 6874229Smjacobstatic long numlines; /* Line count to split on. */ 6974229Smjacobstatic int file_open; /* If a file open. */ 7082689Smjacobstatic int ifd = -1, ofd = -1; /* Input/output file descriptors. */ 7135388Smjacobstatic char bfr[MAXBSIZE]; /* I/O buffer. */ 7282689Smjacobstatic char fname[MAXPATHLEN]; /* File name prefix. */ 7374229Smjacobstatic regex_t rgx; 7474229Smjacobstatic int pflag; 7574229Smjacobstatic bool dflag; 7692893Smjacobstatic long sufflen = 2; /* File name suffix length. */ 7735388Smjacob 7835388Smjacobstatic void newfile(void); 7935388Smjacobstatic void split1(void); 8035388Smjacobstatic void split2(void); 8164087Smjacobstatic void split3(void); 8264087Smjacobstatic void usage(void); 8364087Smjacob 8482689Smjacobint 8582689Smjacobmain(int argc, char **argv) 8649909Smjacob{ 8761772Smjacob int ch; 8849909Smjacob int error; 8982689Smjacob char *ep, *p; 9082689Smjacob 9182689Smjacob setlocale(LC_ALL, ""); 9282689Smjacob 9382689Smjacob dflag = false; 9482689Smjacob while ((ch = getopt(argc, argv, "0123456789a:b:dl:n:p:")) != -1) 9553487Smjacob switch (ch) { 9653487Smjacob case '0': case '1': case '2': case '3': case '4': 9764087Smjacob case '5': case '6': case '7': case '8': case '9': 9864087Smjacob /* 9953487Smjacob * Undocumented kludge: split was originally designed 10082689Smjacob * to take a number after a dash. 10182689Smjacob */ 10253487Smjacob if (numlines == 0) { 10353487Smjacob p = argv[optind - 1]; 10453487Smjacob if (p[0] == '-' && p[1] == ch && !p[2]) 10553487Smjacob numlines = strtol(++p, &ep, 10); 10653487Smjacob else 10753487Smjacob numlines = 10853487Smjacob strtol(argv[optind] + 1, &ep, 10); 10953487Smjacob if (numlines <= 0 || *ep) 11053487Smjacob errx(EX_USAGE, 11153487Smjacob "%s: illegal line count", optarg); 11253487Smjacob } 11353487Smjacob break; 11453487Smjacob case 'a': /* Suffix length */ 11553487Smjacob if ((sufflen = strtol(optarg, &ep, 10)) <= 0 || *ep) 11653487Smjacob errx(EX_USAGE, 11753487Smjacob "%s: illegal suffix length", optarg); 11853487Smjacob break; 11953487Smjacob case 'b': /* Byte count. */ 12053487Smjacob errno = 0; 12153487Smjacob error = expand_number(optarg, &bytecnt); 12253487Smjacob if (error == -1) 12364087Smjacob errx(EX_USAGE, "%s: offset too large", optarg); 12464087Smjacob break; 12553487Smjacob case 'd': /* Decimal suffix */ 12653487Smjacob dflag = true; 12753487Smjacob break; 12853487Smjacob case 'l': /* Line count. */ 12953487Smjacob if (numlines != 0) 13053487Smjacob usage(); 13153487Smjacob if ((numlines = strtol(optarg, &ep, 10)) <= 0 || *ep) 13264087Smjacob errx(EX_USAGE, 13364087Smjacob "%s: illegal line count", optarg); 13464087Smjacob break; 13564087Smjacob case 'n': /* Chunks. */ 13664087Smjacob if (!isdigit((unsigned char)optarg[0]) || 13764087Smjacob (chunks = (size_t)strtoul(optarg, &ep, 10)) == 0 || 13864087Smjacob *ep != '\0') { 13964087Smjacob errx(EX_USAGE, "%s: illegal number of chunks", 14064087Smjacob optarg); 14164087Smjacob } 14264087Smjacob break; 14364087Smjacob 14464087Smjacob case 'p': /* pattern matching. */ 14564087Smjacob if (regcomp(&rgx, optarg, REG_EXTENDED|REG_NOSUB) != 0) 14664087Smjacob errx(EX_USAGE, "%s: illegal regexp", optarg); 14764087Smjacob pflag = 1; 14864087Smjacob break; 14939235Sgibbs default: 15064087Smjacob usage(); 15164087Smjacob } 15293706Smjacob argv += optind; 15393706Smjacob argc -= optind; 15493706Smjacob 15564087Smjacob if (*argv != NULL) { /* Input file. */ 15664087Smjacob if (strcmp(*argv, "-") == 0) 15793706Smjacob ifd = STDIN_FILENO; 15839235Sgibbs else if ((ifd = open(*argv, O_RDONLY, 0)) < 0) 15939235Sgibbs err(EX_NOINPUT, "%s", *argv); 16039235Sgibbs ++argv; 16165140Smjacob } 16239235Sgibbs if (*argv != NULL) /* File name prefix. */ 16352347Smjacob if (strlcpy(fname, *argv++, sizeof(fname)) >= sizeof(fname)) 16465140Smjacob errx(EX_USAGE, "file name prefix is too long"); 16565140Smjacob if (*argv != NULL) 16653487Smjacob usage(); 16787635Smjacob 16887635Smjacob if (strlen(fname) + (unsigned long)sufflen >= sizeof(fname)) 16987635Smjacob errx(EX_USAGE, "suffix is too long"); 17087635Smjacob if (pflag && (numlines != 0 || bytecnt != 0 || chunks != 0)) 17153487Smjacob usage(); 17235388Smjacob 17346968Smjacob if (numlines == 0) 17435388Smjacob numlines = DEFLINE; 17535388Smjacob else if (bytecnt != 0 || chunks != 0) 17635388Smjacob usage(); 17746968Smjacob 17852347Smjacob if (bytecnt && chunks) 17952347Smjacob usage(); 18035388Smjacob 18135388Smjacob if (ifd == -1) /* Stdin by default. */ 18242461Smjacob ifd = 0; 18343420Smjacob 18435388Smjacob if (bytecnt) { 18545040Smjacob split1(); 18665140Smjacob exit (0); 18735388Smjacob } else if (chunks) { 18852347Smjacob split3(); 18952347Smjacob exit (0); 19052347Smjacob } 19135388Smjacob split2(); 19252347Smjacob if (pflag) 19352347Smjacob regfree(&rgx); 19452347Smjacob exit(0); 19535388Smjacob} 19680582Smjacob 19780582Smjacob/* 19846968Smjacob * split1 -- 19980582Smjacob * Split the input by bytes. 20043420Smjacob */ 20143420Smjacobstatic void 20280582Smjacobsplit1(void) 20380582Smjacob{ 20480582Smjacob off_t bcnt; 20580582Smjacob char *C; 20680582Smjacob ssize_t dist, len; 20780582Smjacob int nfiles; 20880582Smjacob 20980582Smjacob nfiles = 0; 21080582Smjacob 21135388Smjacob for (bcnt = 0;;) 21246968Smjacob switch ((len = read(ifd, bfr, MAXBSIZE))) { 21335388Smjacob case 0: 21435388Smjacob exit(0); 21535388Smjacob case -1: 21635388Smjacob err(EX_IOERR, "read"); 21739235Sgibbs /* NOTREACHED */ 21839235Sgibbs default: 21939235Sgibbs if (!file_open) { 22039235Sgibbs if (!chunks || (nfiles < chunks)) { 22139235Sgibbs newfile(); 22239235Sgibbs nfiles++; 22339235Sgibbs } 22439235Sgibbs } 22565140Smjacob if (bcnt + len >= bytecnt) { 22665140Smjacob dist = bytecnt - bcnt; 22765140Smjacob if (write(ofd, bfr, dist) != dist) 22843793Smjacob err(EX_IOERR, "write"); 22939235Sgibbs len -= dist; 23035388Smjacob for (C = bfr + dist; len >= bytecnt; 23139235Sgibbs len -= bytecnt, C += bytecnt) { 23245040Smjacob if (!chunks || (nfiles < chunks)) { 23365140Smjacob newfile(); 23465140Smjacob nfiles++; 23565140Smjacob } 23665140Smjacob if (write(ofd, 23752347Smjacob C, bytecnt) != bytecnt) 23852347Smjacob err(EX_IOERR, "write"); 23952347Smjacob } 24052347Smjacob if (len != 0) { 24135388Smjacob if (!chunks || (nfiles < chunks)) { 24235388Smjacob newfile(); 24335388Smjacob nfiles++; 24435388Smjacob } 24552347Smjacob if (write(ofd, C, len) != len) 24652347Smjacob err(EX_IOERR, "write"); 24752347Smjacob } else 24852347Smjacob file_open = 0; 24993837Smjacob bcnt = len; 25093837Smjacob } else { 25193837Smjacob bcnt += len; 25293837Smjacob if (write(ofd, bfr, len) != len) 25393837Smjacob err(EX_IOERR, "write"); 25435388Smjacob } 25552347Smjacob } 25684241Smjacob} 25772355Smjacob 25848484Smjacob/* 25977776Smjacob * split2 -- 26048484Smjacob * Split the input by lines. 26148484Smjacob */ 26259454Smjacobstatic void 26348484Smjacobsplit2(void) 26472355Smjacob{ 26539235Sgibbs long lcnt = 0; 26639235Sgibbs FILE *infp; 26772355Smjacob 26852347Smjacob /* Stick a stream on top of input file descriptor */ 26982841Smjacob if ((infp = fdopen(ifd, "r")) == NULL) 27039235Sgibbs err(EX_NOINPUT, "fdopen"); 27152347Smjacob 27252347Smjacob /* Process input one line at a time */ 27352347Smjacob while (fgets(bfr, sizeof(bfr), infp) != NULL) { 27439235Sgibbs const int len = strlen(bfr); 27539235Sgibbs 27648484Smjacob /* If line is too long to deal with, just write it out */ 27748484Smjacob if (bfr[len - 1] != '\n') 27835388Smjacob goto writeit; 27948484Smjacob 28048484Smjacob /* Check if we need to start a new file */ 28148484Smjacob if (pflag) { 28248484Smjacob regmatch_t pmatch; 28348484Smjacob 28448484Smjacob pmatch.rm_so = 0; 28548484Smjacob pmatch.rm_eo = len - 1; 28644819Smjacob if (regexec(&rgx, bfr, 0, &pmatch, REG_STARTEND) == 0) 28748484Smjacob newfile(); 28893837Smjacob } else if (lcnt++ == numlines) { 28993837Smjacob newfile(); 29093837Smjacob lcnt = 1; 29193837Smjacob } 29260221Smjacob 29393837Smjacobwriteit: 29477776Smjacob /* Open output file if needed */ 29571079Smjacob if (!file_open) 29671079Smjacob newfile(); 29760221Smjacob 29860221Smjacob /* Write out line */ 29960221Smjacob if (write(ofd, bfr, len) != len) 30052347Smjacob err(EX_IOERR, "write"); 30148484Smjacob } 30248484Smjacob 30372355Smjacob /* EOF or error? */ 30444819Smjacob if (ferror(infp)) 30544819Smjacob err(EX_IOERR, "read"); 30635388Smjacob else 30735388Smjacob exit(0); 30848195Smjacob} 30982689Smjacob 31090752Smjacob/* 31190752Smjacob * split3 -- 31290752Smjacob * Split the input into specified number of chunks 31335388Smjacob */ 31435388Smjacobstatic void 31548484Smjacobsplit3(void) 31648484Smjacob{ 31748484Smjacob struct stat sb; 31848484Smjacob 31948484Smjacob if (fstat(ifd, &sb) == -1) { 32048484Smjacob err(1, "stat"); 32148484Smjacob /* NOTREACHED */ 32248484Smjacob } 32335388Smjacob 32448484Smjacob if (chunks > sb.st_size) { 32548484Smjacob errx(1, "can't split into more than %d files", 32648484Smjacob (int)sb.st_size); 32772355Smjacob /* NOTREACHED */ 32872355Smjacob } 32972355Smjacob 33077776Smjacob bytecnt = sb.st_size / chunks; 33177776Smjacob split1(); 33277776Smjacob} 33335388Smjacob 33459454Smjacob 33559454Smjacob/* 33659454Smjacob * newfile -- 33759454Smjacob * Open a new output file. 33859454Smjacob */ 33959454Smjacobstatic void 34042131Smjacobnewfile(void) 34135388Smjacob{ 34235388Smjacob long i, maxfiles, tfnum; 34365140Smjacob static long fnum; 34435388Smjacob static char *fpnt; 34535388Smjacob char beg, end; 34635388Smjacob int pattlen; 34735388Smjacob 34835388Smjacob if (ofd == -1) { 34935388Smjacob if (fname[0] == '\0') { 35053487Smjacob fname[0] = 'x'; 35135388Smjacob fpnt = fname + 1; 35235388Smjacob } else { 35335388Smjacob fpnt = fname + strlen(fname); 35435388Smjacob } 35553487Smjacob ofd = fileno(stdout); 35653487Smjacob } 35735388Smjacob 35835388Smjacob if (dflag) { 35953487Smjacob beg = '0'; 36053487Smjacob end = '9'; 36153487Smjacob } 36253487Smjacob else { 36353487Smjacob beg = 'a'; 36453487Smjacob end = 'z'; 36561772Smjacob } 36635388Smjacob pattlen = end - beg + 1; 36771079Smjacob 36890224Smjacob /* maxfiles = pattlen^sufflen, but don't use libm. */ 36990224Smjacob for (maxfiles = 1, i = 0; i < sufflen; i++) 37087635Smjacob if (LONG_MAX / pattlen < maxfiles) 37187635Smjacob errx(EX_USAGE, "suffix is too long (max %ld)", i); 37253487Smjacob else 37353487Smjacob maxfiles *= pattlen; 37465140Smjacob 37587635Smjacob if (fnum == maxfiles) 37671079Smjacob errx(EX_DATAERR, "too many files"); 37771079Smjacob 37871079Smjacob /* Generate suffix of sufflen letters */ 37971079Smjacob tfnum = fnum; 38082689Smjacob i = sufflen - 1; 38182689Smjacob do { 38282689Smjacob fpnt[i] = tfnum % pattlen + beg; 38382689Smjacob tfnum /= pattlen; 38482689Smjacob } while (i-- > 0); 38569522Smjacob fpnt[sufflen] = '\0'; 38669522Smjacob 38769522Smjacob ++fnum; 38869522Smjacob if (!freopen(fname, "w", stdout)) 38969522Smjacob err(EX_IOERR, "%s", fname); 39088855Smjacob file_open = 1; 39188855Smjacob} 39288855Smjacob 39388855Smjacobstatic void 39488855Smjacobusage(void) 39588855Smjacob{ 39639235Sgibbs (void)fprintf(stderr, 39735388Smjacob"usage: split [-l line_count] [-a suffix_length] [file [prefix]]\n" 39839235Sgibbs" split -b byte_count[K|k|M|m|G|g] [-a suffix_length] [file [prefix]]\n" 39935388Smjacob" split -n chunk_count [-a suffix_length] [file [prefix]]\n" 40035388Smjacob" split -p pattern [-a suffix_length] [file [prefix]]\n"); 40162171Smjacob exit(EX_USAGE); 40287635Smjacob} 40387635Smjacob