1/* vi: set sw=4 ts=4: */ 2/* 3 * SuS3 compliant sort implementation for busybox 4 * 5 * Copyright (C) 2004 by Rob Landley <rob@landley.net> 6 * 7 * MAINTAINER: Rob Landley <rob@landley.net> 8 * 9 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 10 * 11 * See SuS3 sort standard at: 12 * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html 13 */ 14 15#include "libbb.h" 16 17/* This is a NOEXEC applet. Be very careful! */ 18 19 20/* 21 sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] 22 sort -c [-bdfinru][-t char][-k keydef][file] 23*/ 24 25/* These are sort types */ 26static const char OPT_STR[] ALIGN1 = "ngMucszbrdfimS:T:o:k:t:"; 27enum { 28 FLAG_n = 1, /* Numeric sort */ 29 FLAG_g = 2, /* Sort using strtod() */ 30 FLAG_M = 4, /* Sort date */ 31/* ucsz apply to root level only, not keys. b at root level implies bb */ 32 FLAG_u = 8, /* Unique */ 33 FLAG_c = 0x10, /* Check: no output, exit(!ordered) */ 34 FLAG_s = 0x20, /* Stable sort, no ascii fallback at end */ 35 FLAG_z = 0x40, /* Input is null terminated, not \n */ 36/* These can be applied to search keys, the previous four can't */ 37 FLAG_b = 0x80, /* Ignore leading blanks */ 38 FLAG_r = 0x100, /* Reverse */ 39 FLAG_d = 0x200, /* Ignore !(isalnum()|isspace()) */ 40 FLAG_f = 0x400, /* Force uppercase */ 41 FLAG_i = 0x800, /* Ignore !isprint() */ 42 FLAG_m = 0x1000, /* ignored: merge already sorted files; do not sort */ 43 FLAG_S = 0x2000, /* ignored: -S, --buffer-size=SIZE */ 44 FLAG_T = 0x4000, /* ignored: -T, --temporary-directory=DIR */ 45 FLAG_o = 0x8000, 46 FLAG_k = 0x10000, 47 FLAG_t = 0x20000, 48 FLAG_bb = 0x80000000, /* Ignore trailing blanks */ 49}; 50 51#if ENABLE_FEATURE_SORT_BIG 52static char key_separator; 53 54static struct sort_key { 55 struct sort_key *next_key; /* linked list */ 56 unsigned range[4]; /* start word, start char, end word, end char */ 57 unsigned flags; 58} *key_list; 59 60static char *get_key(char *str, struct sort_key *key, int flags) 61{ 62 int start = 0, end = 0, len, i, j; 63 64 /* Special case whole string, so we don't have to make a copy */ 65 if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3] 66 && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb)) 67 ) { 68 return str; 69 } 70 71 /* Find start of key on first pass, end on second pass */ 72 len = strlen(str); 73 for (j = 0; j < 2; j++) { 74 if (!key->range[2*j]) 75 end = len; 76 /* Loop through fields */ 77 else { 78 end = 0; 79 for (i = 1; i < key->range[2*j] + j; i++) { 80 if (key_separator) { 81 /* Skip body of key and separator */ 82 while (str[end]) { 83 if (str[end++] == key_separator) 84 break; 85 } 86 } else { 87 /* Skip leading blanks */ 88 while (isspace(str[end])) 89 end++; 90 /* Skip body of key */ 91 while (str[end]) { 92 if (isspace(str[end])) 93 break; 94 end++; 95 } 96 } 97 } 98 } 99 if (!j) start = end; 100 } 101 /* Strip leading whitespace if necessary */ 102 if (flags & FLAG_b) 103 while (isspace(str[start])) start++; 104 /* Strip trailing whitespace if necessary */ 105 if (flags & FLAG_bb) 106 while (end > start && isspace(str[end-1])) end--; 107 /* Handle offsets on start and end */ 108 if (key->range[3]) { 109 end += key->range[3] - 1; 110 if (end > len) end = len; 111 } 112 if (key->range[1]) { 113 start += key->range[1] - 1; 114 if (start > len) start = len; 115 } 116 /* Make the copy */ 117 if (end < start) end = start; 118 str = xstrndup(str+start, end-start); 119 /* Handle -d */ 120 if (flags & FLAG_d) { 121 for (start = end = 0; str[end]; end++) 122 if (isspace(str[end]) || isalnum(str[end])) 123 str[start++] = str[end]; 124 str[start] = '\0'; 125 } 126 /* Handle -i */ 127 if (flags & FLAG_i) { 128 for (start = end = 0; str[end]; end++) 129 if (isprint(str[end])) 130 str[start++] = str[end]; 131 str[start] = '\0'; 132 } 133 /* Handle -f */ 134 if (flags & FLAG_f) 135 for (i = 0; str[i]; i++) 136 str[i] = toupper(str[i]); 137 138 return str; 139} 140 141static struct sort_key *add_key(void) 142{ 143 struct sort_key **pkey = &key_list; 144 while (*pkey) 145 pkey = &((*pkey)->next_key); 146 return *pkey = xzalloc(sizeof(struct sort_key)); 147} 148 149#define GET_LINE(fp) \ 150 ((option_mask32 & FLAG_z) \ 151 ? bb_get_chunk_from_file(fp, NULL) \ 152 : xmalloc_getline(fp)) 153#else 154#define GET_LINE(fp) xmalloc_getline(fp) 155#endif 156 157/* Iterate through keys list and perform comparisons */ 158static int compare_keys(const void *xarg, const void *yarg) 159{ 160 int flags = option_mask32, retval = 0; 161 char *x, *y; 162 163#if ENABLE_FEATURE_SORT_BIG 164 struct sort_key *key; 165 166 for (key = key_list; !retval && key; key = key->next_key) { 167 flags = key->flags ? key->flags : option_mask32; 168 /* Chop out and modify key chunks, handling -dfib */ 169 x = get_key(*(char **)xarg, key, flags); 170 y = get_key(*(char **)yarg, key, flags); 171#else 172 /* This curly bracket serves no purpose but to match the nesting 173 level of the for () loop we're not using */ 174 { 175 x = *(char **)xarg; 176 y = *(char **)yarg; 177#endif 178 /* Perform actual comparison */ 179 switch (flags & 7) { 180 default: 181 bb_error_msg_and_die("unknown sort type"); 182 break; 183 /* Ascii sort */ 184 case 0: 185#if ENABLE_LOCALE_SUPPORT 186 retval = strcoll(x, y); 187#else 188 retval = strcmp(x, y); 189#endif 190 break; 191#if ENABLE_FEATURE_SORT_BIG 192 case FLAG_g: { 193 char *xx, *yy; 194 double dx = strtod(x, &xx); 195 double dy = strtod(y, &yy); 196 /* not numbers < NaN < -infinity < numbers < +infinity) */ 197 if (x == xx) 198 retval = (y == yy ? 0 : -1); 199 else if (y == yy) 200 retval = 1; 201 /* Check for isnan */ 202 else if (dx != dx) 203 retval = (dy != dy) ? 0 : -1; 204 else if (dy != dy) 205 retval = 1; 206 /* Check for infinity. Could underflow, but it avoids libm. */ 207 else if (1.0 / dx == 0.0) { 208 if (dx < 0) 209 retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; 210 else 211 retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; 212 } else if (1.0 / dy == 0.0) 213 retval = (dy < 0) ? 1 : -1; 214 else 215 retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); 216 break; 217 } 218 case FLAG_M: { 219 struct tm thyme; 220 int dx; 221 char *xx, *yy; 222 223 xx = strptime(x, "%b", &thyme); 224 dx = thyme.tm_mon; 225 yy = strptime(y, "%b", &thyme); 226 if (!xx) 227 retval = (!yy) ? 0 : -1; 228 else if (!yy) 229 retval = 1; 230 else 231 retval = (dx == thyme.tm_mon) ? 0 : dx - thyme.tm_mon; 232 break; 233 } 234 /* Full floating point version of -n */ 235 case FLAG_n: { 236 double dx = atof(x); 237 double dy = atof(y); 238 retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); 239 break; 240 } 241 } /* switch */ 242 /* Free key copies. */ 243 if (x != *(char **)xarg) free(x); 244 if (y != *(char **)yarg) free(y); 245 /* if (retval) break; - done by for () anyway */ 246#else 247 /* Integer version of -n for tiny systems */ 248 case FLAG_n: 249 retval = atoi(x) - atoi(y); 250 break; 251 } /* switch */ 252#endif 253 } /* for */ 254 255 /* Perform fallback sort if necessary */ 256 if (!retval && !(option_mask32 & FLAG_s)) 257 retval = strcmp(*(char **)xarg, *(char **)yarg); 258 259 if (flags & FLAG_r) return -retval; 260 return retval; 261} 262 263#if ENABLE_FEATURE_SORT_BIG 264static unsigned str2u(char **str) 265{ 266 unsigned long lu; 267 if (!isdigit((*str)[0])) 268 bb_error_msg_and_die("bad field specification"); 269 lu = strtoul(*str, str, 10); 270 if ((sizeof(long) > sizeof(int) && lu > INT_MAX) || !lu) 271 bb_error_msg_and_die("bad field specification"); 272 return lu; 273} 274#endif 275 276int sort_main(int argc, char **argv); 277int sort_main(int argc, char **argv) 278{ 279 FILE *fp, *outfile = stdout; 280 char *line, **lines = NULL; 281 char *str_ignored, *str_o, *str_t; 282 llist_t *lst_k = NULL; 283 int i, flag; 284 int linecount = 0; 285 286 xfunc_error_retval = 2; 287 288 /* Parse command line options */ 289 /* -o and -t can be given at most once */ 290 opt_complementary = "o--o:t--t:" /* -t, -o: maximum one of each */ 291 "k::"; /* -k takes list */ 292 getopt32(argv, OPT_STR, &str_ignored, &str_ignored, &str_o, &lst_k, &str_t); 293#if ENABLE_FEATURE_SORT_BIG 294 if (option_mask32 & FLAG_o) outfile = xfopen(str_o, "w"); 295 if (option_mask32 & FLAG_t) { 296 if (!str_t[0] || str_t[1]) 297 bb_error_msg_and_die("bad -t parameter"); 298 key_separator = str_t[0]; 299 } 300 /* parse sort key */ 301 while (lst_k) { 302 enum { 303 FLAG_allowed_for_k = 304 FLAG_n | /* Numeric sort */ 305 FLAG_g | /* Sort using strtod() */ 306 FLAG_M | /* Sort date */ 307 FLAG_b | /* Ignore leading blanks */ 308 FLAG_r | /* Reverse */ 309 FLAG_d | /* Ignore !(isalnum()|isspace()) */ 310 FLAG_f | /* Force uppercase */ 311 FLAG_i | /* Ignore !isprint() */ 312 0 313 }; 314 struct sort_key *key = add_key(); 315 char *str_k = lst_k->data; 316 const char *temp2; 317 318 i = 0; /* i==0 before comma, 1 after (-k3,6) */ 319 while (*str_k) { 320 /* Start of range */ 321 /* Cannot use bb_strtou - suffix can be a letter */ 322 key->range[2*i] = str2u(&str_k); 323 if (*str_k == '.') { 324 str_k++; 325 key->range[2*i+1] = str2u(&str_k); 326 } 327 while (*str_k) { 328 if (*str_k == ',' && !i++) { 329 str_k++; 330 break; 331 } /* no else needed: fall through to syntax error 332 because comma isn't in OPT_STR */ 333 temp2 = strchr(OPT_STR, *str_k); 334 if (!temp2) 335 bb_error_msg_and_die("unknown key option"); 336 flag = 1 << (temp2 - OPT_STR); 337 if (flag & ~FLAG_allowed_for_k) 338 bb_error_msg_and_die("unknown sort type"); 339 /* b after ',' means strip _trailing_ space */ 340 if (i && flag == FLAG_b) flag = FLAG_bb; 341 key->flags |= flag; 342 str_k++; 343 } 344 } 345 /* leaking lst_k... */ 346 lst_k = lst_k->link; 347 } 348#endif 349 /* global b strips leading and trailing spaces */ 350 if (option_mask32 & FLAG_b) option_mask32 |= FLAG_bb; 351 352 /* Open input files and read data */ 353 for (i = argv[optind] ? optind : optind-1; argv[i]; i++) { 354 fp = stdin; 355 if (i >= optind && NOT_LONE_DASH(argv[i])) 356 fp = xfopen(argv[i], "r"); 357 for (;;) { 358 line = GET_LINE(fp); 359 if (!line) break; 360 if (!(linecount & 63)) 361 lines = xrealloc(lines, sizeof(char *) * (linecount + 64)); 362 lines[linecount++] = line; 363 } 364 fclose(fp); 365 } 366#if ENABLE_FEATURE_SORT_BIG 367 /* if no key, perform alphabetic sort */ 368 if (!key_list) 369 add_key()->range[0] = 1; 370 /* handle -c */ 371 if (option_mask32 & FLAG_c) { 372 int j = (option_mask32 & FLAG_u) ? -1 : 0; 373 for (i = 1; i < linecount; i++) 374 if (compare_keys(&lines[i-1], &lines[i]) > j) { 375 fprintf(stderr, "Check line %d\n", i); 376 return 1; 377 } 378 return 0; 379 } 380#endif 381 /* Perform the actual sort */ 382 qsort(lines, linecount, sizeof(char *), compare_keys); 383 /* handle -u */ 384 if (option_mask32 & FLAG_u) { 385 flag = 0; 386 /* coreutils 6.3 drop lines for which only key is the same */ 387 /* -- disabling last-resort compare... */ 388 option_mask32 |= FLAG_s; 389 for (i = 1; i < linecount; i++) { 390 if (!compare_keys(&lines[flag], &lines[i])) 391 free(lines[i]); 392 else 393 lines[++flag] = lines[i]; 394 } 395 if (linecount) linecount = flag+1; 396 } 397 /* Print it */ 398 for (i = 0; i < linecount; i++) 399 fprintf(outfile, "%s\n", lines[i]); 400 401 fflush_stdout_and_exit(EXIT_SUCCESS); 402} 403