1/* $NetBSD: sort.c,v 1.60 2010/12/18 23:09:48 christos Exp $ */ 2 3/*- 4 * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Ben Harris and Jaromir Dolecek. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/*- 33 * Copyright (c) 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * This code is derived from software contributed to Berkeley by 37 * Peter McIlroy. 38 * 39 * Redistribution and use in source and binary forms, with or without 40 * modification, are permitted provided that the following conditions 41 * are met: 42 * 1. Redistributions of source code must retain the above copyright 43 * notice, this list of conditions and the following disclaimer. 44 * 2. Redistributions in binary form must reproduce the above copyright 45 * notice, this list of conditions and the following disclaimer in the 46 * documentation and/or other materials provided with the distribution. 47 * 3. Neither the name of the University nor the names of its contributors 48 * may be used to endorse or promote products derived from this software 49 * without specific prior written permission. 50 * 51 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 52 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 61 * SUCH DAMAGE. 62 */ 63 64/* Sort sorts a file using an optional user-defined key. 65 * Sort uses radix sort for internal sorting, and allows 66 * a choice of merge sort and radix sort for external sorting. 67 */ 68 69#include <util.h> 70#include "sort.h" 71#include "fsort.h" 72#include "pathnames.h" 73 74#ifndef lint 75__COPYRIGHT("@(#) Copyright (c) 1993\ 76 The Regents of the University of California. All rights reserved."); 77#endif /* not lint */ 78 79__RCSID("$NetBSD: sort.c,v 1.60 2010/12/18 23:09:48 christos Exp $"); 80 81#include <sys/types.h> 82#include <sys/time.h> 83#include <sys/resource.h> 84 85#include <paths.h> 86#include <signal.h> 87#include <stdlib.h> 88#include <string.h> 89#include <unistd.h> 90#include <locale.h> 91 92int REC_D = '\n'; 93u_char d_mask[NBINS]; /* flags for rec_d, field_d, <blank> */ 94 95/* 96 * weight tables. Gweights is one of ascii, Rascii.. 97 * modified to weight rec_d = 0 (or 255) 98 */ 99u_char *const weight_tables[4] = { ascii, Rascii, Ftable, RFtable }; 100u_char ascii[NBINS], Rascii[NBINS], RFtable[NBINS], Ftable[NBINS]; 101 102int SINGL_FLD = 0, SEP_FLAG = 0, UNIQUE = 0; 103int REVERSE = 0; 104int posix_sort; 105 106unsigned int debug_flags = 0; 107 108static char toutpath[MAXPATHLEN]; 109 110const char *tmpdir; /* where temporary files should be put */ 111 112static void cleanup(void); 113static void onsignal(int); 114__dead static void usage(const char *); 115 116int 117main(int argc, char *argv[]) 118{ 119 int ch, i, stdinflag = 0; 120 char cflag = 0, mflag = 0; 121 char *outfile, *outpath = 0; 122 struct field *fldtab; 123 size_t fldtab_sz, fld_cnt; 124 struct filelist filelist; 125 int num_input_files; 126 FILE *outfp = NULL; 127 struct rlimit rl; 128 struct stat st; 129 130 setlocale(LC_ALL, ""); 131 132 /* bump RLIMIT_NOFILE to maximum our hard limit allows */ 133 if (getrlimit(RLIMIT_NOFILE, &rl) < 0) 134 err(2, "getrlimit"); 135 rl.rlim_cur = rl.rlim_max; 136 if (setrlimit(RLIMIT_NOFILE, &rl) < 0) 137 err(2, "setrlimit"); 138 139 d_mask[REC_D = '\n'] = REC_D_F; 140 d_mask['\t'] = d_mask[' '] = BLANK | FLD_D; 141 142 /* fldtab[0] is the global options. */ 143 fldtab_sz = 3; 144 fld_cnt = 0; 145 fldtab = emalloc(fldtab_sz * sizeof(*fldtab)); 146 memset(fldtab, 0, fldtab_sz * sizeof(*fldtab)); 147 148#define SORT_OPTS "bcdD:fHik:lmno:rR:sSt:T:ux" 149 150 /* Convert "+field" args to -f format */ 151 fixit(&argc, argv, SORT_OPTS); 152 153 if (!(tmpdir = getenv("TMPDIR"))) 154 tmpdir = _PATH_TMP; 155 156 while ((ch = getopt(argc, argv, SORT_OPTS)) != -1) { 157 switch (ch) { 158 case 'b': 159 fldtab[0].flags |= BI | BT; 160 break; 161 case 'c': 162 cflag = 1; 163 break; 164 case 'D': /* Debug flags */ 165 for (i = 0; optarg[i]; i++) 166 debug_flags |= 1 << (optarg[i] & 31); 167 break; 168 case 'd': case 'f': case 'i': case 'n': case 'l': 169 fldtab[0].flags |= optval(ch, 0); 170 break; 171 case 'H': 172 /* -H was ; use merge sort for blocks of large files' */ 173 /* That is now the default. */ 174 break; 175 case 'k': 176 fldtab = erealloc(fldtab, (fldtab_sz + 1) * sizeof(*fldtab)); 177 memset(&fldtab[fldtab_sz], 0, sizeof(fldtab[0])); 178 fldtab_sz++; 179 180 setfield(optarg, &fldtab[++fld_cnt], fldtab[0].flags); 181 break; 182 case 'm': 183 mflag = 1; 184 break; 185 case 'o': 186 outpath = optarg; 187 break; 188 case 'r': 189 REVERSE = 1; 190 break; 191 case 's': 192 /* 193 * Nominally 'stable sort', keep lines with equal keys 194 * in input file order. (Default for NetBSD) 195 * (-s for GNU sort compatibility.) 196 */ 197 posix_sort = 0; 198 break; 199 case 'S': 200 /* 201 * Reverse of -s! 202 * This needs to enforce a POSIX sort where records 203 * with equal keys are then sorted by the raw data. 204 * Currently not implemented! 205 * (using libc radixsort() v sradixsort() doesn't 206 * have the desired effect.) 207 */ 208 posix_sort = 1; 209 break; 210 case 't': 211 if (SEP_FLAG) 212 usage("multiple field delimiters"); 213 SEP_FLAG = 1; 214 d_mask[' '] &= ~FLD_D; 215 d_mask['\t'] &= ~FLD_D; 216 d_mask[(u_char)*optarg] |= FLD_D; 217 if (d_mask[(u_char)*optarg] & REC_D_F) 218 errx(2, "record/field delimiter clash"); 219 break; 220 case 'R': 221 if (REC_D != '\n') 222 usage("multiple record delimiters"); 223 REC_D = *optarg; 224 if (REC_D == '\n') 225 break; 226 if (optarg[1] != '\0') { 227 char *ep; 228 int t = 0; 229 if (optarg[0] == '\\') 230 optarg++, t = 8; 231 REC_D = (int)strtol(optarg, &ep, t); 232 if (*ep != '\0' || REC_D < 0 || 233 REC_D >= (int)__arraycount(d_mask)) 234 errx(2, "invalid record delimiter %s", 235 optarg); 236 } 237 d_mask['\n'] = d_mask[' ']; 238 d_mask[REC_D] = REC_D_F; 239 break; 240 case 'T': 241 /* -T tmpdir */ 242 tmpdir = optarg; 243 break; 244 case 'u': 245 UNIQUE = 1; 246 break; 247 case '?': 248 default: 249 usage(NULL); 250 } 251 } 252 253 if (UNIQUE) 254 /* Don't sort on raw record if keys match */ 255 posix_sort = 0; 256 257 if (cflag && argc > optind+1) 258 errx(2, "too many input files for -c option"); 259 if (argc - 2 > optind && !strcmp(argv[argc-2], "-o")) { 260 outpath = argv[argc-1]; 261 argc -= 2; 262 } 263 if (mflag && argc - optind > (MAXFCT - (16+1))*16) 264 errx(2, "too many input files for -m option"); 265 266 for (i = optind; i < argc; i++) { 267 /* allow one occurrence of /dev/stdin */ 268 if (!strcmp(argv[i], "-") || !strcmp(argv[i], _PATH_STDIN)) { 269 if (stdinflag) 270 warnx("ignoring extra \"%s\" in file list", 271 argv[i]); 272 else 273 stdinflag = 1; 274 275 /* change to /dev/stdin if '-' */ 276 if (argv[i][0] == '-') { 277 static char path_stdin[] = _PATH_STDIN; 278 argv[i] = path_stdin; 279 } 280 281 } else if ((ch = access(argv[i], R_OK))) 282 err(2, "%s", argv[i]); 283 } 284 285 if (fldtab[1].icol.num == 0) { 286 /* No sort key specified */ 287 if (fldtab[0].flags & (I|D|F|N|L)) { 288 /* Modified - generate a key that covers the line */ 289 fldtab[0].flags &= ~(BI|BT); 290 setfield("1", &fldtab[++fld_cnt], fldtab->flags); 291 fldreset(fldtab); 292 } else { 293 /* Unmodified, just compare the line */ 294 SINGL_FLD = 1; 295 fldtab[0].icol.num = 1; 296 } 297 } else { 298 fldreset(fldtab); 299 } 300 301 settables(); 302 303 if (optind == argc) { 304 static const char * const names[] = { _PATH_STDIN, NULL }; 305 filelist.names = names; 306 num_input_files = 1; 307 } else { 308 filelist.names = (const char * const *) &argv[optind]; 309 num_input_files = argc - optind; 310 } 311 312 if (cflag) { 313 order(&filelist, fldtab); 314 /* NOT REACHED */ 315 } 316 317 if (!outpath) { 318 toutpath[0] = '\0'; /* path not used in this case */ 319 outfile = outpath = toutpath; 320 outfp = stdout; 321 } else if (lstat(outpath, &st) == 0 322 && !S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) { 323 /* output file exists and isn't character or block device */ 324 struct sigaction act; 325 static const int sigtable[] = {SIGHUP, SIGINT, SIGPIPE, 326 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, 0}; 327 int outfd; 328 errno = 0; 329 if (access(outpath, W_OK)) 330 err(2, "%s", outpath); 331 (void)snprintf(toutpath, sizeof(toutpath), "%sXXXXXX", 332 outpath); 333 if ((outfd = mkstemp(toutpath)) == -1) 334 err(2, "Cannot create temporary file `%s'", toutpath); 335 (void)atexit(cleanup); 336 act.sa_handler = onsignal; 337 (void) sigemptyset(&act.sa_mask); 338 act.sa_flags = SA_RESTART | SA_RESETHAND; 339 for (i = 0; sigtable[i]; ++i) /* always unlink toutpath */ 340 sigaction(sigtable[i], &act, 0); 341 outfile = toutpath; 342 if ((outfp = fdopen(outfd, "w")) == NULL) 343 err(2, "Cannot open temporary file `%s'", toutpath); 344 } else { 345 outfile = outpath; 346 347 if ((outfp = fopen(outfile, "w")) == NULL) 348 err(2, "output file %s", outfile); 349 } 350 351 if (mflag) 352 fmerge(&filelist, num_input_files, outfp, fldtab); 353 else 354 fsort(&filelist, num_input_files, outfp, fldtab); 355 356 if (outfile != outpath) { 357 if (access(outfile, F_OK)) 358 err(2, "%s", outfile); 359 360 /* 361 * Copy file permissions bits of the original file. 362 * st is initialized above, when we create the 363 * temporary spool file. 364 */ 365 if (lchmod(outfile, st.st_mode & ALLPERMS) != 0) { 366 err(2, "cannot chmod %s: output left in %s", 367 outpath, outfile); 368 } 369 370 (void)unlink(outpath); 371 if (link(outfile, outpath)) 372 err(2, "cannot link %s: output left in %s", 373 outpath, outfile); 374 (void)unlink(outfile); 375 toutpath[0] = 0; 376 } 377 exit(0); 378} 379 380static void 381onsignal(int sig) 382{ 383 cleanup(); 384} 385 386static void 387cleanup(void) 388{ 389 if (toutpath[0]) 390 (void)unlink(toutpath); 391} 392 393static void 394usage(const char *msg) 395{ 396 if (msg != NULL) 397 (void)fprintf(stderr, "%s: %s\n", getprogname(), msg); 398 (void)fprintf(stderr, 399 "usage: %s [-bcdfHilmnrSsu] [-k field1[,field2]] [-o output]" 400 " [-R char] [-T dir]", getprogname()); 401 (void)fprintf(stderr, 402 " [-t char] [file ...]\n"); 403 exit(2); 404} 405 406RECHEADER * 407allocrec(RECHEADER *rec, size_t size) 408{ 409 410 return (erealloc(rec, size + sizeof(long) - 1)); 411} 412