util.c revision 322577
1/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2/* $FreeBSD: stable/11/usr.bin/grep/util.c 322577 2017-08-16 13:06:26Z kevans $ */ 3/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4 5/*- 6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 */ 31 32#include <sys/cdefs.h> 33__FBSDID("$FreeBSD: stable/11/usr.bin/grep/util.c 322577 2017-08-16 13:06:26Z kevans $"); 34 35#include <sys/stat.h> 36#include <sys/types.h> 37 38#include <ctype.h> 39#include <err.h> 40#include <errno.h> 41#include <fnmatch.h> 42#include <fts.h> 43#include <libgen.h> 44#include <stdbool.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include <unistd.h> 49#include <wchar.h> 50#include <wctype.h> 51 52#include "fastmatch.h" 53#include "grep.h" 54 55static int linesqueued; 56static int procline(struct str *l, int); 57 58static int lasta; 59static bool ctxover; 60 61bool 62file_matching(const char *fname) 63{ 64 char *fname_base, *fname_buf; 65 bool ret; 66 67 ret = finclude ? false : true; 68 fname_buf = strdup(fname); 69 if (fname_buf == NULL) 70 err(2, "strdup"); 71 fname_base = basename(fname_buf); 72 73 for (unsigned int i = 0; i < fpatterns; ++i) { 74 if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 75 fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 76 if (fpattern[i].mode == EXCL_PAT) { 77 ret = false; 78 break; 79 } else 80 ret = true; 81 } 82 } 83 free(fname_buf); 84 return (ret); 85} 86 87static inline bool 88dir_matching(const char *dname) 89{ 90 bool ret; 91 92 ret = dinclude ? false : true; 93 94 for (unsigned int i = 0; i < dpatterns; ++i) { 95 if (dname != NULL && 96 fnmatch(dpattern[i].pat, dname, 0) == 0) { 97 if (dpattern[i].mode == EXCL_PAT) 98 return (false); 99 else 100 ret = true; 101 } 102 } 103 return (ret); 104} 105 106/* 107 * Processes a directory when a recursive search is performed with 108 * the -R option. Each appropriate file is passed to procfile(). 109 */ 110int 111grep_tree(char **argv) 112{ 113 FTS *fts; 114 FTSENT *p; 115 int c, fts_flags; 116 bool ok; 117 const char *wd[] = { ".", NULL }; 118 119 c = fts_flags = 0; 120 121 switch(linkbehave) { 122 case LINK_EXPLICIT: 123 fts_flags = FTS_COMFOLLOW; 124 break; 125 case LINK_SKIP: 126 fts_flags = FTS_PHYSICAL; 127 break; 128 default: 129 fts_flags = FTS_LOGICAL; 130 131 } 132 133 fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 134 135 fts = fts_open((argv[0] == NULL) ? 136 __DECONST(char * const *, wd) : argv, fts_flags, NULL); 137 if (fts == NULL) 138 err(2, "fts_open"); 139 while ((p = fts_read(fts)) != NULL) { 140 switch (p->fts_info) { 141 case FTS_DNR: 142 /* FALLTHROUGH */ 143 case FTS_ERR: 144 file_err = true; 145 if(!sflag) 146 warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 147 break; 148 case FTS_D: 149 /* FALLTHROUGH */ 150 case FTS_DP: 151 if (dexclude || dinclude) 152 if (!dir_matching(p->fts_name) || 153 !dir_matching(p->fts_path)) 154 fts_set(fts, p, FTS_SKIP); 155 break; 156 case FTS_DC: 157 /* Print a warning for recursive directory loop */ 158 warnx("warning: %s: recursive directory loop", 159 p->fts_path); 160 break; 161 default: 162 /* Check for file exclusion/inclusion */ 163 ok = true; 164 if (fexclude || finclude) 165 ok &= file_matching(p->fts_path); 166 167 if (ok) 168 c += procfile(p->fts_path); 169 break; 170 } 171 } 172 173 fts_close(fts); 174 return (c); 175} 176 177/* 178 * Opens a file and processes it. Each file is processed line-by-line 179 * passing the lines to procline(). 180 */ 181int 182procfile(const char *fn) 183{ 184 struct file *f; 185 struct stat sb; 186 struct str ln; 187 mode_t s; 188 int c, t; 189 190 mcount = mlimit; 191 192 if (strcmp(fn, "-") == 0) { 193 fn = label != NULL ? label : getstr(1); 194 f = grep_open(NULL); 195 } else { 196 if (!stat(fn, &sb)) { 197 /* Check if we need to process the file */ 198 s = sb.st_mode & S_IFMT; 199 if (s == S_IFDIR && dirbehave == DIR_SKIP) 200 return (0); 201 if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 202 || s == S_IFSOCK) && devbehave == DEV_SKIP) 203 return (0); 204 } 205 f = grep_open(fn); 206 } 207 if (f == NULL) { 208 file_err = true; 209 if (!sflag) 210 warn("%s", fn); 211 return (0); 212 } 213 214 ln.file = grep_malloc(strlen(fn) + 1); 215 strcpy(ln.file, fn); 216 ln.line_no = 0; 217 ln.len = 0; 218 ctxover = false; 219 linesqueued = 0; 220 tail = 0; 221 lasta = 0; 222 ln.off = -1; 223 224 for (c = 0; c == 0 || !(lflag || qflag); ) { 225 ln.off += ln.len + 1; 226 if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { 227 if (ln.line_no == 0 && matchall) 228 exit(0); 229 else 230 break; 231 } 232 if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol) 233 --ln.len; 234 ln.line_no++; 235 236 /* Return if we need to skip a binary file */ 237 if (f->binary && binbehave == BINFILE_SKIP) { 238 grep_close(f); 239 free(ln.file); 240 free(f); 241 return (0); 242 } 243 244 /* Process the file line-by-line, enqueue non-matching lines */ 245 if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { 246 /* Except don't enqueue lines that appear in -A ctx */ 247 if (ln.line_no == 0 || lasta != ln.line_no) { 248 /* queue is maxed to Bflag number of lines */ 249 enqueue(&ln); 250 linesqueued++; 251 ctxover = false; 252 } else { 253 /* 254 * Indicate to procline() that we have ctx 255 * overlap and make sure queue is empty. 256 */ 257 if (!ctxover) 258 clearqueue(); 259 ctxover = true; 260 } 261 } 262 c += t; 263 if (mflag && mcount <= 0) 264 break; 265 } 266 if (Bflag > 0) 267 clearqueue(); 268 grep_close(f); 269 270 if (cflag) { 271 if (!hflag) 272 printf("%s:", ln.file); 273 printf("%u\n", c); 274 } 275 if (lflag && !qflag && c != 0) 276 printf("%s%c", fn, nullflag ? 0 : '\n'); 277 if (Lflag && !qflag && c == 0) 278 printf("%s%c", fn, nullflag ? 0 : '\n'); 279 if (c && !cflag && !lflag && !Lflag && 280 binbehave == BINFILE_BIN && f->binary && !qflag) 281 printf(getstr(8), fn); 282 283 free(ln.file); 284 free(f); 285 return (c); 286} 287 288#define iswword(x) (iswalnum((x)) || (x) == L'_') 289 290/* 291 * Processes a line comparing it with the specified patterns. Each pattern 292 * is looped to be compared along with the full string, saving each and every 293 * match, which is necessary to colorize the output and to count the 294 * matches. The matching lines are passed to printline() to display the 295 * appropriate output. 296 */ 297static int 298procline(struct str *l, int nottext) 299{ 300 regmatch_t matches[MAX_LINE_MATCHES]; 301 regmatch_t pmatch, lastmatch; 302 size_t st = 0, nst = 0; 303 unsigned int i; 304 int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags; 305 int startm = 0; 306 307 /* Initialize to avoid a false positive warning from GCC. */ 308 lastmatch.rm_so = lastmatch.rm_eo = 0; 309 310 /* Loop to process the whole line */ 311 while (st <= l->len) { 312 lastmatches = 0; 313 startm = m; 314 if (st > 0) 315 leflags |= REG_NOTBOL; 316 /* Loop to compare with all the patterns */ 317 for (i = 0; i < patterns; i++) { 318 pmatch.rm_so = st; 319 pmatch.rm_eo = l->len; 320 if (fg_pattern[i].pattern) 321 r = fastexec(&fg_pattern[i], 322 l->dat, 1, &pmatch, leflags); 323 else 324 r = regexec(&r_pattern[i], l->dat, 1, 325 &pmatch, leflags); 326 r = (r == 0) ? 0 : REG_NOMATCH; 327 if (r == REG_NOMATCH) 328 continue; 329 /* Check for full match */ 330 if (r == 0 && xflag) 331 if (pmatch.rm_so != 0 || 332 (size_t)pmatch.rm_eo != l->len) 333 r = REG_NOMATCH; 334 /* Check for whole word match */ 335 if (r == 0 && (wflag || fg_pattern[i].word)) { 336 wchar_t wbegin, wend; 337 338 wbegin = wend = L' '; 339 if (pmatch.rm_so != 0 && 340 sscanf(&l->dat[pmatch.rm_so - 1], 341 "%lc", &wbegin) != 1) 342 r = REG_NOMATCH; 343 else if ((size_t)pmatch.rm_eo != 344 l->len && 345 sscanf(&l->dat[pmatch.rm_eo], 346 "%lc", &wend) != 1) 347 r = REG_NOMATCH; 348 else if (iswword(wbegin) || 349 iswword(wend)) 350 r = REG_NOMATCH; 351 } 352 if (r == 0) { 353 lastmatches++; 354 lastmatch = pmatch; 355 if (m == 0) 356 c++; 357 358 if (m < MAX_LINE_MATCHES) { 359 /* Replace previous match if the new one is earlier and/or longer */ 360 if (m > startm) { 361 if (pmatch.rm_so < matches[m-1].rm_so || 362 (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) { 363 matches[m-1] = pmatch; 364 nst = pmatch.rm_eo; 365 } 366 } else { 367 /* Advance as normal if not */ 368 matches[m++] = pmatch; 369 nst = pmatch.rm_eo; 370 } 371 } 372 373 /* matches - skip further patterns */ 374 if ((color == NULL && !oflag) || 375 qflag || lflag) 376 break; 377 } 378 } 379 380 if (vflag) { 381 c = !c; 382 break; 383 } 384 385 /* One pass if we are not recording matches */ 386 if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 387 break; 388 389 /* If we didn't have any matches or REG_NOSUB set */ 390 if (lastmatches == 0 || (cflags & REG_NOSUB)) 391 nst = l->len; 392 393 if (lastmatches == 0) 394 /* No matches */ 395 break; 396 else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 397 /* Zero-length match -- advance one more so we don't get stuck */ 398 nst++; 399 400 /* Advance st based on previous matches */ 401 st = nst; 402 } 403 404 405 /* Count the matches if we have a match limit */ 406 if (mflag) 407 mcount -= c; 408 409 if (c && binbehave == BINFILE_BIN && nottext) 410 return (c); /* Binary file */ 411 412 /* Dealing with the context */ 413 if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 414 if (c) { 415 if (!first && !prev && !tail && (Bflag || Aflag) && 416 !ctxover) 417 printf("--\n"); 418 tail = Aflag; 419 if (Bflag > 0) { 420 printqueue(); 421 ctxover = false; 422 } 423 linesqueued = 0; 424 printline(l, ':', matches, m); 425 } else { 426 /* Print -A lines following matches */ 427 lasta = l->line_no; 428 printline(l, '-', matches, m); 429 tail--; 430 } 431 } 432 433 if (c) { 434 prev = true; 435 first = false; 436 } else 437 prev = false; 438 439 return (c); 440} 441 442/* 443 * Safe malloc() for internal use. 444 */ 445void * 446grep_malloc(size_t size) 447{ 448 void *ptr; 449 450 if ((ptr = malloc(size)) == NULL) 451 err(2, "malloc"); 452 return (ptr); 453} 454 455/* 456 * Safe calloc() for internal use. 457 */ 458void * 459grep_calloc(size_t nmemb, size_t size) 460{ 461 void *ptr; 462 463 if ((ptr = calloc(nmemb, size)) == NULL) 464 err(2, "calloc"); 465 return (ptr); 466} 467 468/* 469 * Safe realloc() for internal use. 470 */ 471void * 472grep_realloc(void *ptr, size_t size) 473{ 474 475 if ((ptr = realloc(ptr, size)) == NULL) 476 err(2, "realloc"); 477 return (ptr); 478} 479 480/* 481 * Safe strdup() for internal use. 482 */ 483char * 484grep_strdup(const char *str) 485{ 486 char *ret; 487 488 if ((ret = strdup(str)) == NULL) 489 err(2, "strdup"); 490 return (ret); 491} 492 493/* 494 * Prints a matching line according to the command line options. 495 */ 496void 497printline(struct str *line, int sep, regmatch_t *matches, int m) 498{ 499 size_t a = 0; 500 int i, n = 0; 501 502 /* If matchall, everything matches but don't actually print for -o */ 503 if (oflag && matchall) 504 return; 505 506 if (!hflag) { 507 if (!nullflag) { 508 fputs(line->file, stdout); 509 ++n; 510 } else { 511 printf("%s", line->file); 512 putchar(0); 513 } 514 } 515 if (nflag) { 516 if (n > 0) 517 putchar(sep); 518 printf("%d", line->line_no); 519 ++n; 520 } 521 if (bflag) { 522 if (n > 0) 523 putchar(sep); 524 printf("%lld", (long long)line->off); 525 ++n; 526 } 527 if (n) 528 putchar(sep); 529 /* --color and -o */ 530 if ((oflag || color) && m > 0) { 531 for (i = 0; i < m; i++) { 532 /* Don't output zero length matches */ 533 if (matches[i].rm_so == matches[i].rm_eo) 534 continue; 535 if (!oflag) 536 fwrite(line->dat + a, matches[i].rm_so - a, 1, 537 stdout); 538 if (color) 539 fprintf(stdout, "\33[%sm\33[K", color); 540 541 fwrite(line->dat + matches[i].rm_so, 542 matches[i].rm_eo - matches[i].rm_so, 1, 543 stdout); 544 if (color) 545 fprintf(stdout, "\33[m\33[K"); 546 a = matches[i].rm_eo; 547 if (oflag) 548 putchar('\n'); 549 } 550 if (!oflag) { 551 if (line->len - a > 0) 552 fwrite(line->dat + a, line->len - a, 1, stdout); 553 putchar('\n'); 554 } 555 } else { 556 fwrite(line->dat, line->len, 1, stdout); 557 putchar(fileeol); 558 } 559} 560