util.c revision 210622
1210389Sgabor/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 2210389Sgabor 3210389Sgabor/*- 4210389Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 6210389Sgabor * All rights reserved. 7210389Sgabor * 8210389Sgabor * Redistribution and use in source and binary forms, with or without 9210389Sgabor * modification, are permitted provided that the following conditions 10210389Sgabor * are met: 11210389Sgabor * 1. Redistributions of source code must retain the above copyright 12210389Sgabor * notice, this list of conditions and the following disclaimer. 13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14210389Sgabor * notice, this list of conditions and the following disclaimer in the 15210389Sgabor * documentation and/or other materials provided with the distribution. 16210389Sgabor * 17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27210389Sgabor * SUCH DAMAGE. 28210389Sgabor */ 29210389Sgabor 30210389Sgabor#include <sys/cdefs.h> 31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 210622 2010-07-29 18:02:57Z gabor $"); 32210389Sgabor 33210389Sgabor#include <sys/stat.h> 34210389Sgabor#include <sys/types.h> 35210389Sgabor 36210389Sgabor#include <ctype.h> 37210389Sgabor#include <err.h> 38210389Sgabor#include <errno.h> 39210389Sgabor#include <fnmatch.h> 40210389Sgabor#include <fts.h> 41210389Sgabor#include <libgen.h> 42210578Sgabor#include <stdbool.h> 43210389Sgabor#include <stdio.h> 44210389Sgabor#include <stdlib.h> 45210389Sgabor#include <string.h> 46210389Sgabor#include <unistd.h> 47210389Sgabor#include <wchar.h> 48210389Sgabor#include <wctype.h> 49210389Sgabor 50210389Sgabor#include "grep.h" 51210389Sgabor 52210389Sgaborstatic int linesqueued; 53210389Sgaborstatic int procline(struct str *l, int); 54210389Sgabor 55210578Sgaborbool 56210578Sgaborfile_matching(const char *fname) 57210578Sgabor{ 58210578Sgabor bool ret; 59210578Sgabor 60210578Sgabor ret = finclude ? false : true; 61210578Sgabor 62210578Sgabor for (unsigned int i = 0; i < fpatterns; ++i) { 63210578Sgabor if (fnmatch(fpattern[i].pat, 64210578Sgabor fname, 0) == 0 || fnmatch(fpattern[i].pat, 65210578Sgabor basename(fname), 0) == 0) { 66210578Sgabor if (fpattern[i].mode == EXCL_PAT) 67210578Sgabor return (false); 68210578Sgabor else 69210578Sgabor ret = true; 70210578Sgabor } 71210578Sgabor } 72210578Sgabor return (ret); 73210578Sgabor} 74210578Sgabor 75210578Sgaborbool 76210578Sgabordir_matching(const char *dname) 77210578Sgabor{ 78210578Sgabor bool ret; 79210578Sgabor 80210578Sgabor ret = dinclude ? false : true; 81210578Sgabor 82210578Sgabor for (unsigned int i = 0; i < dpatterns; ++i) { 83210578Sgabor if (dname != NULL && 84210578Sgabor fnmatch(dname, dpattern[i].pat, 0) == 0) { 85210578Sgabor if (dpattern[i].mode == EXCL_PAT) 86210578Sgabor return (false); 87210578Sgabor else 88210578Sgabor ret = true; 89210578Sgabor } 90210578Sgabor } 91210578Sgabor return (ret); 92210578Sgabor} 93210578Sgabor 94210389Sgabor/* 95210389Sgabor * Processes a directory when a recursive search is performed with 96210389Sgabor * the -R option. Each appropriate file is passed to procfile(). 97210389Sgabor */ 98210389Sgaborint 99210389Sgaborgrep_tree(char **argv) 100210389Sgabor{ 101210389Sgabor FTS *fts; 102210389Sgabor FTSENT *p; 103210430Sdelphij char *d, *dir = NULL; 104210389Sgabor int c, fts_flags; 105210389Sgabor bool ok; 106210389Sgabor 107210389Sgabor c = fts_flags = 0; 108210389Sgabor 109210389Sgabor switch(linkbehave) { 110210389Sgabor case LINK_EXPLICIT: 111210389Sgabor fts_flags = FTS_COMFOLLOW; 112210389Sgabor break; 113210389Sgabor case LINK_SKIP: 114210389Sgabor fts_flags = FTS_PHYSICAL; 115210389Sgabor break; 116210389Sgabor default: 117210389Sgabor fts_flags = FTS_LOGICAL; 118210389Sgabor 119210389Sgabor } 120210389Sgabor 121210389Sgabor fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 122210389Sgabor 123210389Sgabor if (!(fts = fts_open(argv, fts_flags, NULL))) 124210430Sdelphij err(2, "fts_open"); 125210389Sgabor while ((p = fts_read(fts)) != NULL) { 126210389Sgabor switch (p->fts_info) { 127210389Sgabor case FTS_DNR: 128210389Sgabor /* FALLTHROUGH */ 129210389Sgabor case FTS_ERR: 130210389Sgabor errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 131210389Sgabor break; 132210389Sgabor case FTS_D: 133210389Sgabor /* FALLTHROUGH */ 134210389Sgabor case FTS_DP: 135210389Sgabor break; 136210389Sgabor case FTS_DC: 137210389Sgabor /* Print a warning for recursive directory loop */ 138210389Sgabor warnx("warning: %s: recursive directory loop", 139210389Sgabor p->fts_path); 140210389Sgabor break; 141210389Sgabor default: 142210389Sgabor /* Check for file exclusion/inclusion */ 143210389Sgabor ok = true; 144210578Sgabor if (dexclude || dinclude) { 145210430Sdelphij if ((d = strrchr(p->fts_path, '/')) != NULL) { 146210430Sdelphij dir = grep_malloc(sizeof(char) * 147210430Sdelphij (d - p->fts_path + 2)); 148210430Sdelphij strlcpy(dir, p->fts_path, 149210430Sdelphij (d - p->fts_path + 1)); 150210430Sdelphij } 151210578Sgabor ok = dir_matching(dir); 152210430Sdelphij free(dir); 153210430Sdelphij dir = NULL; 154210389Sgabor } 155210578Sgabor if (fexclude || finclude) 156210578Sgabor ok &= file_matching(p->fts_path); 157210389Sgabor 158210389Sgabor if (ok) 159210389Sgabor c += procfile(p->fts_path); 160210389Sgabor break; 161210389Sgabor } 162210389Sgabor } 163210389Sgabor 164210430Sdelphij fts_close(fts); 165210389Sgabor return (c); 166210389Sgabor} 167210389Sgabor 168210389Sgabor/* 169210389Sgabor * Opens a file and processes it. Each file is processed line-by-line 170210389Sgabor * passing the lines to procline(). 171210389Sgabor */ 172210389Sgaborint 173210389Sgaborprocfile(const char *fn) 174210389Sgabor{ 175210389Sgabor struct file *f; 176210389Sgabor struct stat sb; 177210389Sgabor struct str ln; 178210389Sgabor mode_t s; 179210389Sgabor int c, t; 180210389Sgabor 181210389Sgabor if (mflag && (mcount <= 0)) 182210389Sgabor return (0); 183210389Sgabor 184210389Sgabor if (strcmp(fn, "-") == 0) { 185210389Sgabor fn = label != NULL ? label : getstr(1); 186210389Sgabor f = grep_stdin_open(); 187210389Sgabor } else { 188210389Sgabor if (!stat(fn, &sb)) { 189210389Sgabor /* Check if we need to process the file */ 190210389Sgabor s = sb.st_mode & S_IFMT; 191210389Sgabor if (s == S_IFDIR && dirbehave == DIR_SKIP) 192210389Sgabor return (0); 193210389Sgabor if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 194210389Sgabor || s == S_IFSOCK) && devbehave == DEV_SKIP) 195210389Sgabor return (0); 196210389Sgabor } 197210389Sgabor f = grep_open(fn); 198210389Sgabor } 199210389Sgabor if (f == NULL) { 200210389Sgabor if (!sflag) 201210389Sgabor warn("%s", fn); 202210389Sgabor if (errno == ENOENT) 203210389Sgabor notfound = true; 204210389Sgabor return (0); 205210389Sgabor } 206210389Sgabor 207210389Sgabor ln.file = grep_malloc(strlen(fn) + 1); 208210389Sgabor strcpy(ln.file, fn); 209210389Sgabor ln.line_no = 0; 210210389Sgabor ln.len = 0; 211210389Sgabor linesqueued = 0; 212210389Sgabor tail = 0; 213210389Sgabor ln.off = -1; 214210389Sgabor 215210389Sgabor for (c = 0; c == 0 || !(lflag || qflag); ) { 216210389Sgabor ln.off += ln.len + 1; 217210389Sgabor if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) { 218210389Sgabor if (ln.line_no == 0 && matchall) 219210389Sgabor exit(0); 220210389Sgabor else 221210389Sgabor break; 222210389Sgabor } 223210389Sgabor if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 224210389Sgabor --ln.len; 225210389Sgabor ln.line_no++; 226210389Sgabor 227210389Sgabor /* Return if we need to skip a binary file */ 228210389Sgabor if (f->binary && binbehave == BINFILE_SKIP) { 229210389Sgabor grep_close(f); 230210430Sdelphij free(ln.file); 231210389Sgabor free(f); 232210389Sgabor return (0); 233210389Sgabor } 234210389Sgabor /* Process the file line-by-line */ 235210389Sgabor if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { 236210389Sgabor enqueue(&ln); 237210389Sgabor linesqueued++; 238210389Sgabor } 239210389Sgabor c += t; 240210389Sgabor 241210389Sgabor /* Count the matches if we have a match limit */ 242210389Sgabor if (mflag) { 243210389Sgabor mcount -= t; 244210389Sgabor if (mcount <= 0) 245210389Sgabor break; 246210389Sgabor } 247210389Sgabor } 248210389Sgabor if (Bflag > 0) 249210389Sgabor clearqueue(); 250210389Sgabor grep_close(f); 251210389Sgabor 252210389Sgabor if (cflag) { 253210389Sgabor if (!hflag) 254210389Sgabor printf("%s:", ln.file); 255210389Sgabor printf("%u\n", c); 256210389Sgabor } 257210461Sgabor if (lflag && !qflag && c != 0) 258210389Sgabor printf("%s\n", fn); 259210461Sgabor if (Lflag && !qflag && c == 0) 260210389Sgabor printf("%s\n", fn); 261210389Sgabor if (c && !cflag && !lflag && !Lflag && 262210389Sgabor binbehave == BINFILE_BIN && f->binary && !qflag) 263210622Sgabor printf(getstr(8), fn); 264210389Sgabor 265210430Sdelphij free(ln.file); 266210389Sgabor free(f); 267210389Sgabor return (c); 268210389Sgabor} 269210389Sgabor 270210389Sgabor#define iswword(x) (iswalnum((x)) || (x) == L'_') 271210389Sgabor 272210389Sgabor/* 273210389Sgabor * Processes a line comparing it with the specified patterns. Each pattern 274210389Sgabor * is looped to be compared along with the full string, saving each and every 275210389Sgabor * match, which is necessary to colorize the output and to count the 276210389Sgabor * matches. The matching lines are passed to printline() to display the 277210389Sgabor * appropriate output. 278210389Sgabor */ 279210389Sgaborstatic int 280210389Sgaborprocline(struct str *l, int nottext) 281210389Sgabor{ 282210389Sgabor regmatch_t matches[MAX_LINE_MATCHES]; 283210389Sgabor regmatch_t pmatch; 284210389Sgabor size_t st = 0; 285210389Sgabor unsigned int i; 286210389Sgabor int c = 0, m = 0, r = 0; 287210389Sgabor 288210389Sgabor if (!matchall) { 289210389Sgabor /* Loop to process the whole line */ 290210389Sgabor while (st <= l->len) { 291210389Sgabor pmatch.rm_so = st; 292210389Sgabor pmatch.rm_eo = l->len; 293210389Sgabor 294210389Sgabor /* Loop to compare with all the patterns */ 295210389Sgabor for (i = 0; i < patterns; i++) { 296210389Sgabor/* 297210389Sgabor * XXX: grep_search() is a workaround for speed up and should be 298210389Sgabor * removed in the future. See fastgrep.c. 299210389Sgabor */ 300210389Sgabor if (fg_pattern[i].pattern) { 301210389Sgabor r = grep_search(&fg_pattern[i], 302210389Sgabor (unsigned char *)l->dat, 303210389Sgabor l->len, &pmatch); 304210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 305210389Sgabor st = pmatch.rm_eo; 306210389Sgabor } else { 307210389Sgabor r = regexec(&r_pattern[i], l->dat, 1, 308210389Sgabor &pmatch, eflags); 309210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 310210389Sgabor st = pmatch.rm_eo; 311210389Sgabor } 312210389Sgabor if (r == REG_NOMATCH) 313210389Sgabor continue; 314210389Sgabor /* Check for full match */ 315210389Sgabor if (r == 0 && xflag) 316210389Sgabor if (pmatch.rm_so != 0 || 317210389Sgabor (size_t)pmatch.rm_eo != l->len) 318210389Sgabor r = REG_NOMATCH; 319210389Sgabor /* Check for whole word match */ 320210389Sgabor if (r == 0 && wflag && pmatch.rm_so != 0 && 321210389Sgabor (size_t)pmatch.rm_eo != l->len) { 322210389Sgabor wchar_t *wbegin; 323210389Sgabor wint_t wend; 324210389Sgabor size_t size; 325210389Sgabor 326210389Sgabor size = mbstowcs(NULL, l->dat, 327210389Sgabor pmatch.rm_so); 328210389Sgabor 329210389Sgabor if (size == ((size_t) - 1)) 330210389Sgabor r = REG_NOMATCH; 331210389Sgabor else { 332210389Sgabor wbegin = grep_malloc(size); 333210389Sgabor if (mbstowcs(wbegin, l->dat, 334210389Sgabor pmatch.rm_so) == ((size_t) - 1)) 335210389Sgabor r = REG_NOMATCH; 336210389Sgabor else if (sscanf(&l->dat[pmatch.rm_eo], 337210389Sgabor "%lc", &wend) != 1) 338210389Sgabor r = REG_NOMATCH; 339210389Sgabor else if (iswword(wbegin[wcslen(wbegin)]) || 340210389Sgabor iswword(wend)) 341210389Sgabor r = REG_NOMATCH; 342210389Sgabor free(wbegin); 343210389Sgabor } 344210389Sgabor } 345210389Sgabor if (r == 0) { 346210389Sgabor if (m == 0) 347210389Sgabor c++; 348210389Sgabor if (m < MAX_LINE_MATCHES) 349210389Sgabor matches[m++] = pmatch; 350210389Sgabor /* matches - skip further patterns */ 351210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 352210461Sgabor break; 353210389Sgabor } 354210389Sgabor } 355210389Sgabor 356210389Sgabor if (vflag) { 357210389Sgabor c = !c; 358210389Sgabor break; 359210389Sgabor } 360210389Sgabor /* One pass if we are not recording matches */ 361210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 362210389Sgabor break; 363210389Sgabor 364210389Sgabor if (st == (size_t)pmatch.rm_so) 365210389Sgabor break; /* No matches */ 366210389Sgabor } 367210389Sgabor } else 368210389Sgabor c = !vflag; 369210389Sgabor 370210389Sgabor if (c && binbehave == BINFILE_BIN && nottext) 371210389Sgabor return (c); /* Binary file */ 372210389Sgabor 373210389Sgabor /* Dealing with the context */ 374210479Sgabor if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 375210389Sgabor if (c) { 376210389Sgabor if (!first && !prev && !tail && Aflag) 377210389Sgabor printf("--\n"); 378210389Sgabor tail = Aflag; 379210389Sgabor if (Bflag > 0) { 380210389Sgabor if (!first && !prev) 381210389Sgabor printf("--\n"); 382210389Sgabor printqueue(); 383210389Sgabor } 384210389Sgabor linesqueued = 0; 385210389Sgabor printline(l, ':', matches, m); 386210389Sgabor } else { 387210389Sgabor printline(l, '-', matches, m); 388210389Sgabor tail--; 389210389Sgabor } 390210389Sgabor } 391210389Sgabor 392210389Sgabor if (c) { 393210389Sgabor prev = true; 394210389Sgabor first = false; 395210389Sgabor } else 396210389Sgabor prev = false; 397210389Sgabor 398210389Sgabor return (c); 399210389Sgabor} 400210389Sgabor 401210389Sgabor/* 402210389Sgabor * Safe malloc() for internal use. 403210389Sgabor */ 404210389Sgaborvoid * 405210389Sgaborgrep_malloc(size_t size) 406210389Sgabor{ 407210389Sgabor void *ptr; 408210389Sgabor 409210389Sgabor if ((ptr = malloc(size)) == NULL) 410210389Sgabor err(2, "malloc"); 411210389Sgabor return (ptr); 412210389Sgabor} 413210389Sgabor 414210389Sgabor/* 415210389Sgabor * Safe calloc() for internal use. 416210389Sgabor */ 417210389Sgaborvoid * 418210389Sgaborgrep_calloc(size_t nmemb, size_t size) 419210389Sgabor{ 420210389Sgabor void *ptr; 421210389Sgabor 422210389Sgabor if ((ptr = calloc(nmemb, size)) == NULL) 423210389Sgabor err(2, "calloc"); 424210389Sgabor return (ptr); 425210389Sgabor} 426210389Sgabor 427210389Sgabor/* 428210389Sgabor * Safe realloc() for internal use. 429210389Sgabor */ 430210389Sgaborvoid * 431210389Sgaborgrep_realloc(void *ptr, size_t size) 432210389Sgabor{ 433210389Sgabor 434210389Sgabor if ((ptr = realloc(ptr, size)) == NULL) 435210389Sgabor err(2, "realloc"); 436210389Sgabor return (ptr); 437210389Sgabor} 438210389Sgabor 439210389Sgabor/* 440210578Sgabor * Safe strdup() for internal use. 441210578Sgabor */ 442210578Sgaborchar * 443210578Sgaborgrep_strdup(const char *str) 444210578Sgabor{ 445210578Sgabor char *ret; 446210578Sgabor 447210578Sgabor if ((ret = strdup(str)) == NULL) 448210578Sgabor err(2, "strdup"); 449210578Sgabor return (ret); 450210578Sgabor} 451210578Sgabor 452210578Sgabor/* 453210389Sgabor * Prints a matching line according to the command line options. 454210389Sgabor */ 455210389Sgaborvoid 456210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m) 457210389Sgabor{ 458210389Sgabor size_t a = 0; 459210389Sgabor int i, n = 0; 460210389Sgabor 461210389Sgabor if (!hflag) { 462210389Sgabor if (nullflag == 0) 463210389Sgabor fputs(line->file, stdout); 464210389Sgabor else { 465210389Sgabor printf("%s", line->file); 466210389Sgabor putchar(0); 467210389Sgabor } 468210389Sgabor ++n; 469210389Sgabor } 470210389Sgabor if (nflag) { 471210389Sgabor if (n > 0) 472210389Sgabor putchar(sep); 473210389Sgabor printf("%d", line->line_no); 474210389Sgabor ++n; 475210389Sgabor } 476210389Sgabor if (bflag) { 477210389Sgabor if (n > 0) 478210389Sgabor putchar(sep); 479210389Sgabor printf("%lld", (long long)line->off); 480210389Sgabor ++n; 481210389Sgabor } 482210389Sgabor if (n) 483210389Sgabor putchar(sep); 484210389Sgabor /* --color and -o */ 485210389Sgabor if ((oflag || color) && m > 0) { 486210389Sgabor for (i = 0; i < m; i++) { 487210389Sgabor if (!oflag) 488210389Sgabor fwrite(line->dat + a, matches[i].rm_so - a, 1, 489210389Sgabor stdout); 490210389Sgabor if (color) 491210389Sgabor fprintf(stdout, "\33[%sm\33[K", color); 492210389Sgabor 493210389Sgabor fwrite(line->dat + matches[i].rm_so, 494210389Sgabor matches[i].rm_eo - matches[i].rm_so, 1, 495210389Sgabor stdout); 496210389Sgabor if (color) 497210389Sgabor fprintf(stdout, "\33[m\33[K"); 498210389Sgabor a = matches[i].rm_eo; 499210389Sgabor if (oflag) 500210389Sgabor putchar('\n'); 501210389Sgabor } 502210389Sgabor if (!oflag) { 503210389Sgabor if (line->len - a > 0) 504210389Sgabor fwrite(line->dat + a, line->len - a, 1, stdout); 505210389Sgabor putchar('\n'); 506210389Sgabor } 507210389Sgabor } else { 508210389Sgabor fwrite(line->dat, line->len, 1, stdout); 509210389Sgabor putchar('\n'); 510210389Sgabor } 511210389Sgabor} 512