1/* vi: set sw=4 ts=4: */ 2/* 3 * Mini grep implementation for busybox using libc regex. 4 * 5 * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley 6 * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org> 7 * 8 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball. 9 */ 10/* BB_AUDIT SUSv3 defects - unsupported option -x. */ 11/* BB_AUDIT GNU defects - always acts as -a. */ 12/* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */ 13/* 14 * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> - 15 * correction "-e pattern1 -e pattern2" logic and more optimizations. 16 * precompiled regex 17 */ 18/* 19 * (C) 2006 Jac Goudsmit added -o option 20 */ 21 22#include "libbb.h" 23#include "xregex.h" 24 25/* options */ 26#define OPTSTR_GREP \ 27 "lnqvscFiHhe:f:Lorm:" \ 28 USE_FEATURE_GREP_CONTEXT("A:B:C:") \ 29 USE_FEATURE_GREP_EGREP_ALIAS("E") \ 30 USE_DESKTOP("w") \ 31 "aI" 32/* ignored: -a "assume all files to be text" */ 33/* ignored: -I "assume binary files have no matches" */ 34 35enum { 36 OPTBIT_l, /* list matched file names only */ 37 OPTBIT_n, /* print line# */ 38 OPTBIT_q, /* quiet - exit(0) of first match */ 39 OPTBIT_v, /* invert the match, to select non-matching lines */ 40 OPTBIT_s, /* suppress errors about file open errors */ 41 OPTBIT_c, /* count matches per file (suppresses normal output) */ 42 OPTBIT_F, /* literal match */ 43 OPTBIT_i, /* case-insensitive */ 44 OPTBIT_H, /* force filename display */ 45 OPTBIT_h, /* inhibit filename display */ 46 OPTBIT_e, /* -e PATTERN */ 47 OPTBIT_f, /* -f FILE_WITH_PATTERNS */ 48 OPTBIT_L, /* list unmatched file names only */ 49 OPTBIT_o, /* show only matching parts of lines */ 50 OPTBIT_r, /* recurse dirs */ 51 OPTBIT_m, /* -m MAX_MATCHES */ 52 USE_FEATURE_GREP_CONTEXT( OPTBIT_A ,) /* -A NUM: after-match context */ 53 USE_FEATURE_GREP_CONTEXT( OPTBIT_B ,) /* -B NUM: before-match context */ 54 USE_FEATURE_GREP_CONTEXT( OPTBIT_C ,) /* -C NUM: -A and -B combined */ 55 USE_FEATURE_GREP_EGREP_ALIAS(OPTBIT_E ,) /* extended regexp */ 56 USE_DESKTOP( OPTBIT_w ,) /* whole word match */ 57 OPT_l = 1 << OPTBIT_l, 58 OPT_n = 1 << OPTBIT_n, 59 OPT_q = 1 << OPTBIT_q, 60 OPT_v = 1 << OPTBIT_v, 61 OPT_s = 1 << OPTBIT_s, 62 OPT_c = 1 << OPTBIT_c, 63 OPT_F = 1 << OPTBIT_F, 64 OPT_i = 1 << OPTBIT_i, 65 OPT_H = 1 << OPTBIT_H, 66 OPT_h = 1 << OPTBIT_h, 67 OPT_e = 1 << OPTBIT_e, 68 OPT_f = 1 << OPTBIT_f, 69 OPT_L = 1 << OPTBIT_L, 70 OPT_o = 1 << OPTBIT_o, 71 OPT_r = 1 << OPTBIT_r, 72 OPT_m = 1 << OPTBIT_m, 73 OPT_A = USE_FEATURE_GREP_CONTEXT( (1 << OPTBIT_A)) + 0, 74 OPT_B = USE_FEATURE_GREP_CONTEXT( (1 << OPTBIT_B)) + 0, 75 OPT_C = USE_FEATURE_GREP_CONTEXT( (1 << OPTBIT_C)) + 0, 76 OPT_E = USE_FEATURE_GREP_EGREP_ALIAS((1 << OPTBIT_E)) + 0, 77 OPT_w = USE_DESKTOP( (1 << OPTBIT_w)) + 0, 78}; 79 80#define PRINT_FILES_WITH_MATCHES (option_mask32 & OPT_l) 81#define PRINT_LINE_NUM (option_mask32 & OPT_n) 82#define BE_QUIET (option_mask32 & OPT_q) 83#define SUPPRESS_ERR_MSGS (option_mask32 & OPT_s) 84#define PRINT_MATCH_COUNTS (option_mask32 & OPT_c) 85#define FGREP_FLAG (option_mask32 & OPT_F) 86#define PRINT_FILES_WITHOUT_MATCHES (option_mask32 & OPT_L) 87 88typedef unsigned char byte_t; 89 90static int max_matches; 91static int reflags; 92static byte_t invert_search; 93static byte_t print_filename; 94static byte_t open_errors; 95 96#if ENABLE_FEATURE_GREP_CONTEXT 97static byte_t did_print_line; 98static int lines_before; 99static int lines_after; 100static char **before_buf; 101static int last_line_printed; 102#endif /* ENABLE_FEATURE_GREP_CONTEXT */ 103/* globals used internally */ 104static llist_t *pattern_head; /* growable list of patterns to match */ 105static const char *cur_file; /* the current file we are reading */ 106 107typedef struct grep_list_data_t { 108 char *pattern; 109 regex_t preg; 110#define PATTERN_MEM_A 1 111#define COMPILED 2 112 int flg_mem_alocated_compiled; 113} grep_list_data_t; 114 115static void print_line(const char *line, int linenum, char decoration) 116{ 117#if ENABLE_FEATURE_GREP_CONTEXT 118 /* Happens when we go to next file, immediately hit match 119 * and try to print prev context... from prev file! Don't do it */ 120 if (linenum < 1) 121 return; 122 /* possibly print the little '--' separator */ 123 if ((lines_before || lines_after) && did_print_line && 124 last_line_printed != linenum - 1) { 125 puts("--"); 126 } 127 /* guard against printing "--" before first line of first file */ 128 did_print_line = 1; 129 last_line_printed = linenum; 130#endif 131 if (print_filename) 132 printf("%s%c", cur_file, decoration); 133 if (PRINT_LINE_NUM) 134 printf("%i%c", linenum, decoration); 135 /* Emulate weird GNU grep behavior with -ov */ 136 if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) 137 puts(line); 138} 139 140static int grep_file(FILE *file) 141{ 142 char *line; 143 byte_t ret; 144 int linenum = 0; 145 int nmatches = 0; 146 regmatch_t regmatch; 147#if ENABLE_FEATURE_GREP_CONTEXT 148 int print_n_lines_after = 0; 149 int curpos = 0; /* track where we are in the circular 'before' buffer */ 150 int idx = 0; /* used for iteration through the circular buffer */ 151#else 152 enum { print_n_lines_after = 0 }; 153#endif /* ENABLE_FEATURE_GREP_CONTEXT */ 154 155 while ((line = xmalloc_getline(file)) != NULL) { 156 llist_t *pattern_ptr = pattern_head; 157 grep_list_data_t * gl; 158 159 linenum++; 160 ret = 0; 161 while (pattern_ptr) { 162 gl = (grep_list_data_t *)pattern_ptr->data; 163 if (FGREP_FLAG) { 164 ret = strstr(line, gl->pattern) != NULL; 165 } else { 166 /* 167 * test for a postitive-assertion match (regexec returns success (0) 168 * and the user did not specify invert search), or a negative-assertion 169 * match (regexec returns failure (REG_NOMATCH) and the user specified 170 * invert search) 171 */ 172 if (!(gl->flg_mem_alocated_compiled & COMPILED)) { 173 gl->flg_mem_alocated_compiled |= COMPILED; 174 xregcomp(&(gl->preg), gl->pattern, reflags); 175 } 176 regmatch.rm_so = 0; 177 regmatch.rm_eo = 0; 178 if (regexec(&(gl->preg), line, 1, ®match, 0) == 0) { 179 if (!(option_mask32 & OPT_w)) 180 ret = 1; 181 else { 182 char c = ' '; 183 if (regmatch.rm_so) 184 c = line[regmatch.rm_so - 1]; 185 if (!isalnum(c) && c != '_') { 186 c = line[regmatch.rm_eo]; 187 if (!c || (!isalnum(c) && c != '_')) 188 ret = 1; 189 } 190 } 191 } 192 } 193 pattern_ptr = pattern_ptr->link; 194 } /* while (pattern_ptr) */ 195 196 if (ret ^ invert_search) { 197 /* keep track of matches */ 198 nmatches++; 199 200 /* quiet/print (non)matching file names only? */ 201 if (option_mask32 & (OPT_q|OPT_l|OPT_L)) { 202 free(line); /* we don't need line anymore */ 203 if (BE_QUIET) { 204 /* manpage says about -q: 205 * "exit immediately with zero status 206 * if any match is found, 207 * even if errors were detected" */ 208 exit(0); 209 } 210 /* if we're just printing filenames, we stop after the first match */ 211 if (PRINT_FILES_WITH_MATCHES) { 212 puts(cur_file); 213 /* fall thru to "return 1" */ 214 } 215 /* OPT_L aka PRINT_FILES_WITHOUT_MATCHES: return early */ 216 return 1; /* one match */ 217 } 218 219#if ENABLE_FEATURE_GREP_CONTEXT 220 /* Were we printing context and saw next (unwanted) match? */ 221 if ((option_mask32 & OPT_m) && nmatches > max_matches) 222 break; 223#endif 224 225 /* print the matched line */ 226 if (PRINT_MATCH_COUNTS == 0) { 227#if ENABLE_FEATURE_GREP_CONTEXT 228 int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1; 229 230 /* if we were told to print 'before' lines and there is at least 231 * one line in the circular buffer, print them */ 232 if (lines_before && before_buf[prevpos] != NULL) { 233 int first_buf_entry_line_num = linenum - lines_before; 234 235 /* advance to the first entry in the circular buffer, and 236 * figure out the line number is of the first line in the 237 * buffer */ 238 idx = curpos; 239 while (before_buf[idx] == NULL) { 240 idx = (idx + 1) % lines_before; 241 first_buf_entry_line_num++; 242 } 243 244 /* now print each line in the buffer, clearing them as we go */ 245 while (before_buf[idx] != NULL) { 246 print_line(before_buf[idx], first_buf_entry_line_num, '-'); 247 free(before_buf[idx]); 248 before_buf[idx] = NULL; 249 idx = (idx + 1) % lines_before; 250 first_buf_entry_line_num++; 251 } 252 } 253 254 /* make a note that we need to print 'after' lines */ 255 print_n_lines_after = lines_after; 256#endif 257 if (option_mask32 & OPT_o) { 258 line[regmatch.rm_eo] = '\0'; 259 print_line(line + regmatch.rm_so, linenum, ':'); 260 } else { 261 print_line(line, linenum, ':'); 262 } 263 } 264 } 265#if ENABLE_FEATURE_GREP_CONTEXT 266 else { /* no match */ 267 /* if we need to print some context lines after the last match, do so */ 268 if (print_n_lines_after) { 269 print_line(line, linenum, '-'); 270 print_n_lines_after--; 271 } else if (lines_before) { 272 /* Add the line to the circular 'before' buffer */ 273 free(before_buf[curpos]); 274 before_buf[curpos] = line; 275 curpos = (curpos + 1) % lines_before; 276 /* avoid free(line) - we took line */ 277 line = NULL; 278 } 279 } 280 281#endif /* ENABLE_FEATURE_GREP_CONTEXT */ 282 free(line); 283 284 /* Did we print all context after last requested match? */ 285 if ((option_mask32 & OPT_m) 286 && !print_n_lines_after && nmatches == max_matches) 287 break; 288 } 289 290 /* special-case file post-processing for options where we don't print line 291 * matches, just filenames and possibly match counts */ 292 293 /* grep -c: print [filename:]count, even if count is zero */ 294 if (PRINT_MATCH_COUNTS) { 295 if (print_filename) 296 printf("%s:", cur_file); 297 printf("%d\n", nmatches); 298 } 299 300 /* grep -L: print just the filename */ 301 if (PRINT_FILES_WITHOUT_MATCHES) { 302 /* nmatches is zero, no need to check it: 303 * we return 1 early if we detected a match 304 * and PRINT_FILES_WITHOUT_MATCHES is set */ 305 puts(cur_file); 306 } 307 308 return nmatches; 309} 310 311#if ENABLE_FEATURE_CLEAN_UP 312#define new_grep_list_data(p, m) add_grep_list_data(p, m) 313static char * add_grep_list_data(char *pattern, int flg_used_mem) 314#else 315#define new_grep_list_data(p, m) add_grep_list_data(p) 316static char * add_grep_list_data(char *pattern) 317#endif 318{ 319 grep_list_data_t *gl = xmalloc(sizeof(grep_list_data_t)); 320 gl->pattern = pattern; 321#if ENABLE_FEATURE_CLEAN_UP 322 gl->flg_mem_alocated_compiled = flg_used_mem; 323#else 324 gl->flg_mem_alocated_compiled = 0; 325#endif 326 return (char *)gl; 327} 328 329static void load_regexes_from_file(llist_t *fopt) 330{ 331 char *line; 332 FILE *f; 333 334 while (fopt) { 335 llist_t *cur = fopt; 336 char *ffile = cur->data; 337 338 fopt = cur->link; 339 free(cur); 340 f = xfopen(ffile, "r"); 341 while ((line = xmalloc_getline(f)) != NULL) { 342 llist_add_to(&pattern_head, 343 new_grep_list_data(line, PATTERN_MEM_A)); 344 } 345 } 346} 347 348static int file_action_grep(const char *filename, struct stat *statbuf, void* matched, int depth) 349{ 350 FILE *file = fopen(filename, "r"); 351 if (file == NULL) { 352 if (!SUPPRESS_ERR_MSGS) 353 bb_perror_msg("%s", cur_file); 354 open_errors = 1; 355 return 0; 356 } 357 cur_file = filename; 358 *(int*)matched += grep_file(file); 359 fclose(file); 360 return 1; 361} 362 363static int grep_dir(const char *dir) 364{ 365 int matched = 0; 366 recursive_action(dir, 367 /* recurse=yes */ ACTION_RECURSE | 368 /* followLinks=no */ 369 /* depthFirst=yes */ ACTION_DEPTHFIRST, 370 /* fileAction= */ file_action_grep, 371 /* dirAction= */ NULL, 372 /* userData= */ &matched, 373 /* depth= */ 0); 374 return matched; 375} 376 377int grep_main(int argc, char **argv); 378int grep_main(int argc, char **argv) 379{ 380 FILE *file; 381 int matched; 382 char *mopt; 383 llist_t *fopt = NULL; 384 385 /* do normal option parsing */ 386#if ENABLE_FEATURE_GREP_CONTEXT 387 char *slines_after; 388 char *slines_before; 389 char *Copt; 390 391 opt_complementary = "H-h:e::f::C-AB"; 392 getopt32(argv, 393 OPTSTR_GREP, 394 &pattern_head, &fopt, &mopt, 395 &slines_after, &slines_before, &Copt); 396 397 if (option_mask32 & OPT_C) { 398 /* -C unsets prev -A and -B, but following -A or -B 399 may override it */ 400 if (!(option_mask32 & OPT_A)) /* not overridden */ 401 slines_after = Copt; 402 if (!(option_mask32 & OPT_B)) /* not overridden */ 403 slines_before = Copt; 404 option_mask32 |= OPT_A|OPT_B; /* for parser */ 405 } 406 if (option_mask32 & OPT_A) { 407 lines_after = xatoi_u(slines_after); 408 } 409 if (option_mask32 & OPT_B) { 410 lines_before = xatoi_u(slines_before); 411 } 412 /* sanity checks */ 413 if (option_mask32 & (OPT_c|OPT_q|OPT_l|OPT_L)) { 414 option_mask32 &= ~OPT_n; 415 lines_before = 0; 416 lines_after = 0; 417 } else if (lines_before > 0) 418 before_buf = xzalloc(lines_before * sizeof(char *)); 419#else 420 /* with auto sanity checks */ 421 opt_complementary = "H-h:e::f::c-n:q-n:l-n"; 422 getopt32(argv, OPTSTR_GREP, 423 &pattern_head, &fopt, &mopt); 424#endif 425 if (option_mask32 & OPT_m) { 426 max_matches = xatoi_u(mopt); 427 } 428 invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */ 429 430 if (pattern_head != NULL) { 431 /* convert char **argv to grep_list_data_t */ 432 llist_t *cur; 433 434 for (cur = pattern_head; cur; cur = cur->link) 435 cur->data = new_grep_list_data(cur->data, 0); 436 } 437 if (option_mask32 & OPT_f) 438 load_regexes_from_file(fopt); 439 440 if (ENABLE_FEATURE_GREP_FGREP_ALIAS && applet_name[0] == 'f') 441 option_mask32 |= OPT_F; 442 443 if (!(option_mask32 & (OPT_o | OPT_w))) 444 reflags = REG_NOSUB; 445 446 if (ENABLE_FEATURE_GREP_EGREP_ALIAS && 447 (applet_name[0] == 'e' || (option_mask32 & OPT_E))) 448 reflags |= REG_EXTENDED; 449 450 if (option_mask32 & OPT_i) 451 reflags |= REG_ICASE; 452 453 argv += optind; 454 argc -= optind; 455 456 /* if we didn't get a pattern from a -e and no command file was specified, 457 * argv[optind] should be the pattern. no pattern, no worky */ 458 if (pattern_head == NULL) { 459 char *pattern; 460 if (*argv == NULL) 461 bb_show_usage(); 462 pattern = new_grep_list_data(*argv++, 0); 463 llist_add_to(&pattern_head, pattern); 464 argc--; 465 } 466 467 /* argv[(optind)..(argc-1)] should be names of file to grep through. If 468 * there is more than one file to grep, we will print the filenames. */ 469 if (argc > 1) 470 print_filename = 1; 471 /* -H / -h of course override */ 472 if (option_mask32 & OPT_H) 473 print_filename = 1; 474 if (option_mask32 & OPT_h) 475 print_filename = 0; 476 477 /* If no files were specified, or '-' was specified, take input from 478 * stdin. Otherwise, we grep through all the files specified. */ 479 if (argc == 0) 480 argc++; 481 matched = 0; 482 while (argc--) { 483 cur_file = *argv++; 484 file = stdin; 485 if (!cur_file || (*cur_file == '-' && !cur_file[1])) { 486 cur_file = "(standard input)"; 487 } else { 488 if (option_mask32 & OPT_r) { 489 struct stat st; 490 if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) { 491 if (!(option_mask32 & OPT_h)) 492 print_filename = 1; 493 matched += grep_dir(cur_file); 494 goto grep_done; 495 } 496 } 497 /* else: fopen(dir) will succeed, but reading won't */ 498 file = fopen(cur_file, "r"); 499 if (file == NULL) { 500 if (!SUPPRESS_ERR_MSGS) 501 bb_perror_msg("%s", cur_file); 502 open_errors = 1; 503 continue; 504 } 505 } 506 matched += grep_file(file); 507 fclose_if_not_stdin(file); 508 grep_done: ; 509 } 510 511 /* destroy all the elments in the pattern list */ 512 if (ENABLE_FEATURE_CLEAN_UP) { 513 while (pattern_head) { 514 llist_t *pattern_head_ptr = pattern_head; 515 grep_list_data_t *gl = 516 (grep_list_data_t *)pattern_head_ptr->data; 517 518 pattern_head = pattern_head->link; 519 if ((gl->flg_mem_alocated_compiled & PATTERN_MEM_A)) 520 free(gl->pattern); 521 if ((gl->flg_mem_alocated_compiled & COMPILED)) 522 regfree(&(gl->preg)); 523 free(gl); 524 free(pattern_head_ptr); 525 } 526 } 527 /* 0 = success, 1 = failed, 2 = error */ 528 if (open_errors) 529 return 2; 530 return !matched; /* invert return value 0 = success, 1 = failed */ 531} 532