util.c revision 322619
1/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2/* $FreeBSD: stable/11/usr.bin/grep/util.c 322619 2017-08-17 13:48:46Z kevans $ */ 3/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5/*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/11/usr.bin/grep/util.c 322619 2017-08-17 13:48:46Z kevans $"); 34 35#include <sys/stat.h> 36#include <sys/types.h> 37 38#include <ctype.h> 39#include <err.h> 40#include <errno.h> 41#include <fnmatch.h> 42#include <fts.h> 43#include <libgen.h> 44#include <stdbool.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include <unistd.h> 49#include <wchar.h> 50#include <wctype.h> 51 52#ifndef WITHOUT_FASTMATCH 53#include "fastmatch.h" 54#endif 55#include "grep.h" 56 57static bool first_match = true; 58 59/* 60 * Parsing context; used to hold things like matches made and 61 * other useful bits 62 */ 63struct parsec { 64 regmatch_t matches[MAX_LINE_MATCHES]; /* Matches made */ 65 struct str ln; /* Current line */ 66 size_t lnstart; /* Position in line */ 67 size_t matchidx; /* Latest match index */ 68 int printed; /* Metadata printed? */ 69 bool binary; /* Binary file? */ 70}; 71 72 73static int procline(struct parsec *pc); 74static void printline(struct parsec *pc, int sep); 75static void printline_metadata(struct str *line, int sep); 76 77bool 78file_matching(const char *fname) 79{ 80 char *fname_base, *fname_buf; 81 bool ret; 82 83 ret = finclude ? false : true; 84 fname_buf = strdup(fname); 85 if (fname_buf == NULL) 86 err(2, "strdup"); 87 fname_base = basename(fname_buf); 88 89 for (unsigned int i = 0; i < fpatterns; ++i) { 90 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 91 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 92 if (fpattern[i].mode == EXCL_PAT) { 93 ret = false; 94 break; 95 } else 96 ret = true; 97 } 98 } 99 free(fname_buf); 100 return (ret); 101} 102 103static inline bool 104dir_matching(const char *dname) 105{ 106 bool ret; 107 108 ret = dinclude ? false : true; 109 110 for (unsigned int i = 0; i < dpatterns; ++i) { 111 if (dname != NULL && 112 fnmatch(dpattern[i].pat, dname, 0) == 0) { 113 if (dpattern[i].mode == EXCL_PAT) 114 return (false); 115 else 116 ret = true; 117 } 118 } 119 return (ret); 120} 121 122/* 123 * Processes a directory when a recursive search is performed with 124 * the -R option. Each appropriate file is passed to procfile(). 125 */ 126int 127grep_tree(char **argv) 128{ 129 FTS *fts; 130 FTSENT *p; 131 int c, fts_flags; 132 bool ok; 133 const char *wd[] = { ".", NULL }; 134 135 c = fts_flags = 0; 136 137 switch(linkbehave) { 138 case LINK_EXPLICIT: 139 fts_flags = FTS_COMFOLLOW; 140 break; 141 case LINK_SKIP: 142 fts_flags = FTS_PHYSICAL; 143 break; 144 default: 145 fts_flags = FTS_LOGICAL; 146 147 } 148 149 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 150 151 fts = fts_open((argv[0] == NULL) ? 152 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 153 if (fts == NULL) 154 err(2, "fts_open"); 155 while ((p = fts_read(fts)) != NULL) { 156 switch (p->fts_info) { 157 case FTS_DNR: 158 /* FALLTHROUGH */ 159 case FTS_ERR: 160 file_err = true; 161 if(!sflag) 162 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 163 break; 164 case FTS_D: 165 /* FALLTHROUGH */ 166 case FTS_DP: 167 if (dexclude || dinclude) 168 if (!dir_matching(p->fts_name) || 169 !dir_matching(p->fts_path)) 170 fts_set(fts, p, FTS_SKIP); 171 break; 172 case FTS_DC: 173 /* Print a warning for recursive directory loop */ 174 warnx("warning: %s: recursive directory loop", 175 p->fts_path); 176 break; 177 default: 178 /* Check for file exclusion/inclusion */ 179 ok = true; 180 if (fexclude || finclude) 181 ok &= file_matching(p->fts_path); 182 183 if (ok) 184 c += procfile(p->fts_path); 185 break; 186 } 187 } 188 189 fts_close(fts); 190 return (c); 191} 192 193/* 194 * Opens a file and processes it. Each file is processed line-by-line 195 * passing the lines to procline(). 196 */ 197int 198procfile(const char *fn) 199{ 200 struct parsec pc; 201 long long tail; 202 struct file *f; 203 struct stat sb; 204 struct str *ln; 205 mode_t s; 206 int c, last_outed, t; 207 bool doctx, printmatch, same_file; 208 209 if (strcmp(fn, "-") == 0) { 210 fn = label != NULL ? label : getstr(1); 211 f = grep_open(NULL); 212 } else { 213 if (!stat(fn, &sb)) { 214 /* Check if we need to process the file */ 215 s = sb.st_mode & S_IFMT; 216 if (s == S_IFDIR && dirbehave == DIR_SKIP) 217 return (0); 218 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 219 || s == S_IFSOCK) && devbehave == DEV_SKIP) 220 return (0); 221 } 222 f = grep_open(fn); 223 } 224 if (f == NULL) { 225 file_err = true; 226 if (!sflag) 227 warn("%s", fn); 228 return (0); 229 } 230 231 /* Convenience */ 232 ln = &pc.ln; 233 pc.ln.file = grep_malloc(strlen(fn) + 1); 234 strcpy(pc.ln.file, fn); 235 pc.ln.line_no = 0; 236 pc.ln.len = 0; 237 pc.ln.boff = 0; 238 pc.ln.off = -1; 239 pc.binary = f->binary; 240 pc.printed = 0; 241 tail = 0; 242 last_outed = 0; 243 same_file = false; 244 doctx = false; 245 printmatch = true; 246 if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 247 lflag || Lflag) 248 printmatch = false; 249 if (printmatch && (Aflag != 0 || Bflag != 0)) 250 doctx = true; 251 mcount = mlimit; 252 253 for (c = 0; c == 0 || !(lflag || qflag); ) { 254 /* Reset per-line statistics */ 255 pc.printed = 0; 256 pc.matchidx = 0; 257 pc.lnstart = 0; 258 pc.ln.boff = 0; 259 pc.ln.off += pc.ln.len + 1; 260 if ((pc.ln.dat = grep_fgetln(f, &pc.ln.len)) == NULL || 261 pc.ln.len == 0) { 262 if (pc.ln.line_no == 0 && matchall) 263 /* 264 * An empty file with an empty pattern and the 265 * -w flag does not match 266 */ 267 exit(matchall && wflag ? 1 : 0); 268 else 269 break; 270 } 271 272 if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 273 --pc.ln.len; 274 pc.ln.line_no++; 275 276 /* Return if we need to skip a binary file */ 277 if (pc.binary && binbehave == BINFILE_SKIP) { 278 grep_close(f); 279 free(pc.ln.file); 280 free(f); 281 return (0); 282 } 283 284 if ((t = procline(&pc)) == 0) 285 ++c; 286 287 /* Deal with any -B context or context separators */ 288 if (t == 0 && doctx) { 289 if (!first_match && (!same_file || last_outed > 0)) 290 printf("--\n"); 291 if (Bflag > 0) 292 printqueue(); 293 tail = Aflag; 294 } 295 /* Print the matching line, but only if not quiet/binary */ 296 if (t == 0 && printmatch) { 297 printline(&pc, ':'); 298 while (pc.matchidx >= MAX_LINE_MATCHES) { 299 /* Reset matchidx and try again */ 300 pc.matchidx = 0; 301 if (procline(&pc) == 0) 302 printline(&pc, ':'); 303 else 304 break; 305 } 306 first_match = false; 307 same_file = true; 308 last_outed = 0; 309 } 310 if (t != 0 && doctx) { 311 /* Deal with any -A context */ 312 if (tail > 0) { 313 grep_printline(&pc.ln, '-'); 314 tail--; 315 if (Bflag > 0) 316 clearqueue(); 317 } else { 318 /* 319 * Enqueue non-matching lines for -B context. 320 * If we're not actually doing -B context or if 321 * the enqueue resulted in a line being rotated 322 * out, then go ahead and increment last_outed 323 * to signify a gap between context/match. 324 */ 325 if (Bflag == 0 || (Bflag > 0 && enqueue(ln))) 326 ++last_outed; 327 } 328 } 329 330 /* Count the matches if we have a match limit */ 331 if (t == 0 && mflag) { 332 --mcount; 333 if (mflag && mcount <= 0) 334 break; 335 } 336 337 } 338 if (Bflag > 0) 339 clearqueue(); 340 grep_close(f); 341 342 if (cflag) { 343 if (!hflag) 344 printf("%s:", pc.ln.file); 345 printf("%u\n", c); 346 } 347 if (lflag && !qflag && c != 0) 348 printf("%s%c", fn, nullflag ? 0 : '\n'); 349 if (Lflag && !qflag && c == 0) 350 printf("%s%c", fn, nullflag ? 0 : '\n'); 351 if (c && !cflag && !lflag && !Lflag && 352 binbehave == BINFILE_BIN && f->binary && !qflag) 353 printf(getstr(8), fn); 354 355 free(pc.ln.file); 356 free(f); 357 return (c); 358} 359 360#define iswword(x) (iswalnum((x)) || (x) == L'_') 361 362/* 363 * Processes a line comparing it with the specified patterns. Each pattern 364 * is looped to be compared along with the full string, saving each and every 365 * match, which is necessary to colorize the output and to count the 366 * matches. The matching lines are passed to printline() to display the 367 * appropriate output. 368 */ 369static int 370procline(struct parsec *pc) 371{ 372 regmatch_t pmatch, lastmatch, chkmatch; 373 wchar_t wbegin, wend; 374 size_t st, nst; 375 unsigned int i; 376 int c = 0, r = 0, lastmatches = 0, leflags = eflags; 377 size_t startm = 0, matchidx; 378 unsigned int retry; 379 380 matchidx = pc->matchidx; 381 382 /* Special case: empty pattern with -w flag, check first character */ 383 if (matchall && wflag) { 384 if (pc->ln.len == 0) 385 return (0); 386 wend = L' '; 387 if (sscanf(&pc->ln.dat[0], "%lc", &wend) != 1 || iswword(wend)) 388 return (1); 389 else 390 return (0); 391 } else if (matchall) 392 return (0); 393 394 st = pc->lnstart; 395 nst = 0; 396 /* Initialize to avoid a false positive warning from GCC. */ 397 lastmatch.rm_so = lastmatch.rm_eo = 0; 398 399 /* Loop to process the whole line */ 400 while (st <= pc->ln.len) { 401 lastmatches = 0; 402 startm = matchidx; 403 retry = 0; 404 if (st > 0) 405 leflags |= REG_NOTBOL; 406 /* Loop to compare with all the patterns */ 407 for (i = 0; i < patterns; i++) { 408 pmatch.rm_so = st; 409 pmatch.rm_eo = pc->ln.len; 410#ifndef WITHOUT_FASTMATCH 411 if (fg_pattern[i].pattern) 412 r = fastexec(&fg_pattern[i], 413 pc->ln.dat, 1, &pmatch, leflags); 414 else 415#endif 416 r = regexec(&r_pattern[i], pc->ln.dat, 1, 417 &pmatch, leflags); 418 if (r != 0) 419 continue; 420 /* Check for full match */ 421 if (xflag && (pmatch.rm_so != 0 || 422 (size_t)pmatch.rm_eo != pc->ln.len)) 423 continue; 424 /* Check for whole word match */ 425#ifndef WITHOUT_FASTMATCH 426 if (wflag || fg_pattern[i].word) { 427#else 428 if (wflag) { 429#endif 430 wbegin = wend = L' '; 431 if (pmatch.rm_so != 0 && 432 sscanf(&pc->ln.dat[pmatch.rm_so - 1], 433 "%lc", &wbegin) != 1) 434 r = REG_NOMATCH; 435 else if ((size_t)pmatch.rm_eo != 436 pc->ln.len && 437 sscanf(&pc->ln.dat[pmatch.rm_eo], 438 "%lc", &wend) != 1) 439 r = REG_NOMATCH; 440 else if (iswword(wbegin) || 441 iswword(wend)) 442 r = REG_NOMATCH; 443 /* 444 * If we're doing whole word matching and we 445 * matched once, then we should try the pattern 446 * again after advancing just past the start of 447 * the earliest match. This allows the pattern 448 * to match later on in the line and possibly 449 * still match a whole word. 450 */ 451 if (r == REG_NOMATCH && 452 (retry == pc->lnstart || 453 (unsigned int)pmatch.rm_so + 1 < retry)) 454 retry = pmatch.rm_so + 1; 455 if (r == REG_NOMATCH) 456 continue; 457 } 458 lastmatches++; 459 lastmatch = pmatch; 460 461 if (matchidx == 0) 462 c++; 463 464 /* 465 * Replace previous match if the new one is earlier 466 * and/or longer. This will lead to some amount of 467 * extra work if -o/--color are specified, but it's 468 * worth it from a correctness point of view. 469 */ 470 if (matchidx > startm) { 471 chkmatch = pc->matches[matchidx - 1]; 472 if (pmatch.rm_so < chkmatch.rm_so || 473 (pmatch.rm_so == chkmatch.rm_so && 474 (pmatch.rm_eo - pmatch.rm_so) > 475 (chkmatch.rm_eo - chkmatch.rm_so))) { 476 pc->matches[matchidx - 1] = pmatch; 477 nst = pmatch.rm_eo; 478 } 479 } else { 480 /* Advance as normal if not */ 481 pc->matches[matchidx++] = pmatch; 482 nst = pmatch.rm_eo; 483 } 484 /* avoid excessive matching - skip further patterns */ 485 if ((color == NULL && !oflag) || qflag || lflag || 486 matchidx >= MAX_LINE_MATCHES) { 487 pc->lnstart = nst; 488 lastmatches = 0; 489 break; 490 } 491 } 492 493 /* 494 * Advance to just past the start of the earliest match, try 495 * again just in case we still have a chance to match later in 496 * the string. 497 */ 498 if (lastmatches == 0 && retry > pc->lnstart) { 499 st = retry; 500 continue; 501 } 502 503 /* One pass if we are not recording matches */ 504 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 505 break; 506 507 /* If we didn't have any matches or REG_NOSUB set */ 508 if (lastmatches == 0 || (cflags & REG_NOSUB)) 509 nst = pc->ln.len; 510 511 if (lastmatches == 0) 512 /* No matches */ 513 break; 514 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 515 /* Zero-length match -- advance one more so we don't get stuck */ 516 nst++; 517 518 /* Advance st based on previous matches */ 519 st = nst; 520 pc->lnstart = st; 521 } 522 523 /* Reflect the new matchidx in the context */ 524 pc->matchidx = matchidx; 525 if (vflag) 526 c = !c; 527 return (c ? 0 : 1); 528} 529 530/* 531 * Safe malloc() for internal use. 532 */ 533void * 534grep_malloc(size_t size) 535{ 536 void *ptr; 537 538 if ((ptr = malloc(size)) == NULL) 539 err(2, "malloc"); 540 return (ptr); 541} 542 543/* 544 * Safe calloc() for internal use. 545 */ 546void * 547grep_calloc(size_t nmemb, size_t size) 548{ 549 void *ptr; 550 551 if ((ptr = calloc(nmemb, size)) == NULL) 552 err(2, "calloc"); 553 return (ptr); 554} 555 556/* 557 * Safe realloc() for internal use. 558 */ 559void * 560grep_realloc(void *ptr, size_t size) 561{ 562 563 if ((ptr = realloc(ptr, size)) == NULL) 564 err(2, "realloc"); 565 return (ptr); 566} 567 568/* 569 * Safe strdup() for internal use. 570 */ 571char * 572grep_strdup(const char *str) 573{ 574 char *ret; 575 576 if ((ret = strdup(str)) == NULL) 577 err(2, "strdup"); 578 return (ret); 579} 580 581/* 582 * Print an entire line as-is, there are no inline matches to consider. This is 583 * used for printing context. 584 */ 585void grep_printline(struct str *line, int sep) { 586 printline_metadata(line, sep); 587 fwrite(line->dat, line->len, 1, stdout); 588 putchar(fileeol); 589} 590 591static void 592printline_metadata(struct str *line, int sep) 593{ 594 bool printsep; 595 596 printsep = false; 597 if (!hflag) { 598 if (!nullflag) { 599 fputs(line->file, stdout); 600 printsep = true; 601 } else { 602 printf("%s", line->file); 603 putchar(0); 604 } 605 } 606 if (nflag) { 607 if (printsep) 608 putchar(sep); 609 printf("%d", line->line_no); 610 printsep = true; 611 } 612 if (bflag) { 613 if (printsep) 614 putchar(sep); 615 printf("%lld", (long long)(line->off + line->boff)); 616 printsep = true; 617 } 618 if (printsep) 619 putchar(sep); 620} 621 622/* 623 * Prints a matching line according to the command line options. 624 */ 625static void 626printline(struct parsec *pc, int sep) 627{ 628 size_t a = 0; 629 size_t i, matchidx; 630 regmatch_t match; 631 632 /* If matchall, everything matches but don't actually print for -o */ 633 if (oflag && matchall) 634 return; 635 636 matchidx = pc->matchidx; 637 638 /* --color and -o */ 639 if ((oflag || color) && matchidx > 0) { 640 /* Only print metadata once per line if --color */ 641 if (!oflag && pc->printed == 0) 642 printline_metadata(&pc->ln, sep); 643 for (i = 0; i < matchidx; i++) { 644 match = pc->matches[i]; 645 /* Don't output zero length matches */ 646 if (match.rm_so == match.rm_eo) 647 continue; 648 /* 649 * Metadata is printed on a per-line basis, so every 650 * match gets file metadata with the -o flag. 651 */ 652 if (oflag) { 653 pc->ln.boff = match.rm_so; 654 printline_metadata(&pc->ln, sep); 655 } else 656 fwrite(pc->ln.dat + a, match.rm_so - a, 1, 657 stdout); 658 if (color) 659 fprintf(stdout, "\33[%sm\33[K", color); 660 fwrite(pc->ln.dat + match.rm_so, 661 match.rm_eo - match.rm_so, 1, stdout); 662 if (color) 663 fprintf(stdout, "\33[m\33[K"); 664 a = match.rm_eo; 665 if (oflag) 666 putchar('\n'); 667 } 668 if (!oflag) { 669 if (pc->ln.len - a > 0) 670 fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 671 stdout); 672 putchar('\n'); 673 } 674 } else 675 grep_printline(&pc->ln, sep); 676 pc->printed++; 677} 678