util.c revision 220422
1220422Sgabor/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2220422Sgabor/* $FreeBSD: head/usr.bin/grep/util.c 220422 2011-04-07 13:03:35Z gabor $ */ 3210389Sgabor/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8210389Sgabor * All rights reserved. 9210389Sgabor * 10210389Sgabor * Redistribution and use in source and binary forms, with or without 11210389Sgabor * modification, are permitted provided that the following conditions 12210389Sgabor * are met: 13210389Sgabor * 1. Redistributions of source code must retain the above copyright 14210389Sgabor * notice, this list of conditions and the following disclaimer. 15210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 16210389Sgabor * notice, this list of conditions and the following disclaimer in the 17210389Sgabor * documentation and/or other materials provided with the distribution. 18210389Sgabor * 19210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29210389Sgabor * SUCH DAMAGE. 30210389Sgabor */ 31210389Sgabor 32210389Sgabor#include <sys/cdefs.h> 33210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 220422 2011-04-07 13:03:35Z gabor $"); 34210389Sgabor 35210389Sgabor#include <sys/stat.h> 36210389Sgabor#include <sys/types.h> 37210389Sgabor 38210389Sgabor#include <ctype.h> 39210389Sgabor#include <err.h> 40210389Sgabor#include <errno.h> 41210389Sgabor#include <fnmatch.h> 42210389Sgabor#include <fts.h> 43210389Sgabor#include <libgen.h> 44210578Sgabor#include <stdbool.h> 45210389Sgabor#include <stdio.h> 46210389Sgabor#include <stdlib.h> 47210389Sgabor#include <string.h> 48210389Sgabor#include <unistd.h> 49210389Sgabor#include <wchar.h> 50210389Sgabor#include <wctype.h> 51210389Sgabor 52210389Sgabor#include "grep.h" 53210389Sgabor 54210389Sgaborstatic int linesqueued; 55210389Sgaborstatic int procline(struct str *l, int); 56210389Sgabor 57210578Sgaborbool 58210578Sgaborfile_matching(const char *fname) 59210578Sgabor{ 60220421Sgabor char *fname_base; 61210578Sgabor bool ret; 62210578Sgabor 63210578Sgabor ret = finclude ? false : true; 64220421Sgabor fname_base = basename(fname); 65210578Sgabor 66210578Sgabor for (unsigned int i = 0; i < fpatterns; ++i) { 67220421Sgabor if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 68220421Sgabor fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 69210578Sgabor if (fpattern[i].mode == EXCL_PAT) 70210578Sgabor return (false); 71210578Sgabor else 72210578Sgabor ret = true; 73210578Sgabor } 74210578Sgabor } 75210578Sgabor return (ret); 76210578Sgabor} 77210578Sgabor 78211364Sgaborstatic inline bool 79210578Sgabordir_matching(const char *dname) 80210578Sgabor{ 81210578Sgabor bool ret; 82210578Sgabor 83210578Sgabor ret = dinclude ? false : true; 84210578Sgabor 85210578Sgabor for (unsigned int i = 0; i < dpatterns; ++i) { 86210578Sgabor if (dname != NULL && 87210578Sgabor fnmatch(dname, dpattern[i].pat, 0) == 0) { 88210578Sgabor if (dpattern[i].mode == EXCL_PAT) 89210578Sgabor return (false); 90210578Sgabor else 91210578Sgabor ret = true; 92210578Sgabor } 93210578Sgabor } 94210578Sgabor return (ret); 95210578Sgabor} 96210578Sgabor 97210389Sgabor/* 98210389Sgabor * Processes a directory when a recursive search is performed with 99210389Sgabor * the -R option. Each appropriate file is passed to procfile(). 100210389Sgabor */ 101210389Sgaborint 102210389Sgaborgrep_tree(char **argv) 103210389Sgabor{ 104210389Sgabor FTS *fts; 105210389Sgabor FTSENT *p; 106210430Sdelphij char *d, *dir = NULL; 107210389Sgabor int c, fts_flags; 108210389Sgabor bool ok; 109210389Sgabor 110210389Sgabor c = fts_flags = 0; 111210389Sgabor 112210389Sgabor switch(linkbehave) { 113210389Sgabor case LINK_EXPLICIT: 114210389Sgabor fts_flags = FTS_COMFOLLOW; 115210389Sgabor break; 116210389Sgabor case LINK_SKIP: 117210389Sgabor fts_flags = FTS_PHYSICAL; 118210389Sgabor break; 119210389Sgabor default: 120210389Sgabor fts_flags = FTS_LOGICAL; 121210389Sgabor 122210389Sgabor } 123210389Sgabor 124210389Sgabor fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 125210389Sgabor 126210389Sgabor if (!(fts = fts_open(argv, fts_flags, NULL))) 127210430Sdelphij err(2, "fts_open"); 128210389Sgabor while ((p = fts_read(fts)) != NULL) { 129210389Sgabor switch (p->fts_info) { 130210389Sgabor case FTS_DNR: 131210389Sgabor /* FALLTHROUGH */ 132210389Sgabor case FTS_ERR: 133210389Sgabor errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 134210389Sgabor break; 135210389Sgabor case FTS_D: 136210389Sgabor /* FALLTHROUGH */ 137210389Sgabor case FTS_DP: 138210389Sgabor break; 139210389Sgabor case FTS_DC: 140210389Sgabor /* Print a warning for recursive directory loop */ 141210389Sgabor warnx("warning: %s: recursive directory loop", 142210389Sgabor p->fts_path); 143210389Sgabor break; 144210389Sgabor default: 145210389Sgabor /* Check for file exclusion/inclusion */ 146210389Sgabor ok = true; 147210578Sgabor if (dexclude || dinclude) { 148210430Sdelphij if ((d = strrchr(p->fts_path, '/')) != NULL) { 149210430Sdelphij dir = grep_malloc(sizeof(char) * 150210430Sdelphij (d - p->fts_path + 1)); 151211364Sgabor memcpy(dir, p->fts_path, 152211364Sgabor d - p->fts_path); 153211364Sgabor dir[d - p->fts_path] = '\0'; 154210430Sdelphij } 155210578Sgabor ok = dir_matching(dir); 156210430Sdelphij free(dir); 157210430Sdelphij dir = NULL; 158210389Sgabor } 159210578Sgabor if (fexclude || finclude) 160210578Sgabor ok &= file_matching(p->fts_path); 161210389Sgabor 162210389Sgabor if (ok) 163210389Sgabor c += procfile(p->fts_path); 164210389Sgabor break; 165210389Sgabor } 166210389Sgabor } 167210389Sgabor 168210430Sdelphij fts_close(fts); 169210389Sgabor return (c); 170210389Sgabor} 171210389Sgabor 172210389Sgabor/* 173210389Sgabor * Opens a file and processes it. Each file is processed line-by-line 174210389Sgabor * passing the lines to procline(). 175210389Sgabor */ 176210389Sgaborint 177210389Sgaborprocfile(const char *fn) 178210389Sgabor{ 179210389Sgabor struct file *f; 180210389Sgabor struct stat sb; 181210389Sgabor struct str ln; 182210389Sgabor mode_t s; 183210389Sgabor int c, t; 184210389Sgabor 185210389Sgabor if (mflag && (mcount <= 0)) 186210389Sgabor return (0); 187210389Sgabor 188210389Sgabor if (strcmp(fn, "-") == 0) { 189210389Sgabor fn = label != NULL ? label : getstr(1); 190211463Sgabor f = grep_open(NULL); 191210389Sgabor } else { 192210389Sgabor if (!stat(fn, &sb)) { 193210389Sgabor /* Check if we need to process the file */ 194210389Sgabor s = sb.st_mode & S_IFMT; 195210389Sgabor if (s == S_IFDIR && dirbehave == DIR_SKIP) 196210389Sgabor return (0); 197210389Sgabor if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 198210389Sgabor || s == S_IFSOCK) && devbehave == DEV_SKIP) 199210389Sgabor return (0); 200210389Sgabor } 201210389Sgabor f = grep_open(fn); 202210389Sgabor } 203210389Sgabor if (f == NULL) { 204210389Sgabor if (!sflag) 205210389Sgabor warn("%s", fn); 206210389Sgabor if (errno == ENOENT) 207210389Sgabor notfound = true; 208210389Sgabor return (0); 209210389Sgabor } 210210389Sgabor 211210389Sgabor ln.file = grep_malloc(strlen(fn) + 1); 212210389Sgabor strcpy(ln.file, fn); 213210389Sgabor ln.line_no = 0; 214210389Sgabor ln.len = 0; 215210389Sgabor linesqueued = 0; 216210389Sgabor tail = 0; 217210389Sgabor ln.off = -1; 218210389Sgabor 219210389Sgabor for (c = 0; c == 0 || !(lflag || qflag); ) { 220210389Sgabor ln.off += ln.len + 1; 221211463Sgabor if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { 222210389Sgabor if (ln.line_no == 0 && matchall) 223210389Sgabor exit(0); 224210389Sgabor else 225210389Sgabor break; 226210389Sgabor } 227210389Sgabor if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 228210389Sgabor --ln.len; 229210389Sgabor ln.line_no++; 230210389Sgabor 231210389Sgabor /* Return if we need to skip a binary file */ 232210389Sgabor if (f->binary && binbehave == BINFILE_SKIP) { 233210389Sgabor grep_close(f); 234210430Sdelphij free(ln.file); 235210389Sgabor free(f); 236210389Sgabor return (0); 237210389Sgabor } 238210389Sgabor /* Process the file line-by-line */ 239210389Sgabor if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { 240210389Sgabor enqueue(&ln); 241210389Sgabor linesqueued++; 242210389Sgabor } 243210389Sgabor c += t; 244210389Sgabor 245210389Sgabor /* Count the matches if we have a match limit */ 246210389Sgabor if (mflag) { 247210389Sgabor mcount -= t; 248210389Sgabor if (mcount <= 0) 249210389Sgabor break; 250210389Sgabor } 251210389Sgabor } 252210389Sgabor if (Bflag > 0) 253210389Sgabor clearqueue(); 254210389Sgabor grep_close(f); 255210389Sgabor 256210389Sgabor if (cflag) { 257210389Sgabor if (!hflag) 258210389Sgabor printf("%s:", ln.file); 259210389Sgabor printf("%u\n", c); 260210389Sgabor } 261210461Sgabor if (lflag && !qflag && c != 0) 262210389Sgabor printf("%s\n", fn); 263210461Sgabor if (Lflag && !qflag && c == 0) 264210389Sgabor printf("%s\n", fn); 265210389Sgabor if (c && !cflag && !lflag && !Lflag && 266210389Sgabor binbehave == BINFILE_BIN && f->binary && !qflag) 267210622Sgabor printf(getstr(8), fn); 268210389Sgabor 269210430Sdelphij free(ln.file); 270210389Sgabor free(f); 271210389Sgabor return (c); 272210389Sgabor} 273210389Sgabor 274210389Sgabor#define iswword(x) (iswalnum((x)) || (x) == L'_') 275210389Sgabor 276210389Sgabor/* 277210389Sgabor * Processes a line comparing it with the specified patterns. Each pattern 278210389Sgabor * is looped to be compared along with the full string, saving each and every 279210389Sgabor * match, which is necessary to colorize the output and to count the 280210389Sgabor * matches. The matching lines are passed to printline() to display the 281210389Sgabor * appropriate output. 282210389Sgabor */ 283220421Sgaborstatic int 284210389Sgaborprocline(struct str *l, int nottext) 285210389Sgabor{ 286210389Sgabor regmatch_t matches[MAX_LINE_MATCHES]; 287210389Sgabor regmatch_t pmatch; 288210389Sgabor size_t st = 0; 289210389Sgabor unsigned int i; 290210389Sgabor int c = 0, m = 0, r = 0; 291210389Sgabor 292210389Sgabor if (!matchall) { 293210389Sgabor /* Loop to process the whole line */ 294210389Sgabor while (st <= l->len) { 295210389Sgabor pmatch.rm_so = st; 296210389Sgabor pmatch.rm_eo = l->len; 297210389Sgabor 298210389Sgabor /* Loop to compare with all the patterns */ 299210389Sgabor for (i = 0; i < patterns; i++) { 300210389Sgabor/* 301210389Sgabor * XXX: grep_search() is a workaround for speed up and should be 302210389Sgabor * removed in the future. See fastgrep.c. 303210389Sgabor */ 304210389Sgabor if (fg_pattern[i].pattern) { 305210389Sgabor r = grep_search(&fg_pattern[i], 306210389Sgabor (unsigned char *)l->dat, 307210389Sgabor l->len, &pmatch); 308210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 309210389Sgabor st = pmatch.rm_eo; 310210389Sgabor } else { 311210389Sgabor r = regexec(&r_pattern[i], l->dat, 1, 312210389Sgabor &pmatch, eflags); 313210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 314210389Sgabor st = pmatch.rm_eo; 315210389Sgabor } 316210389Sgabor if (r == REG_NOMATCH) 317210389Sgabor continue; 318210389Sgabor /* Check for full match */ 319210389Sgabor if (r == 0 && xflag) 320210389Sgabor if (pmatch.rm_so != 0 || 321210389Sgabor (size_t)pmatch.rm_eo != l->len) 322210389Sgabor r = REG_NOMATCH; 323210389Sgabor /* Check for whole word match */ 324220421Sgabor if (r == 0 && fg_pattern[i].word && 325220421Sgabor pmatch.rm_so != 0) { 326211364Sgabor wint_t wbegin, wend; 327210389Sgabor 328211364Sgabor wbegin = wend = L' '; 329211364Sgabor if (pmatch.rm_so != 0 && 330211364Sgabor sscanf(&l->dat[pmatch.rm_so - 1], 331211364Sgabor "%lc", &wbegin) != 1) 332210389Sgabor r = REG_NOMATCH; 333211364Sgabor else if ((size_t)pmatch.rm_eo != l->len && 334211364Sgabor sscanf(&l->dat[pmatch.rm_eo], 335211364Sgabor "%lc", &wend) != 1) 336211364Sgabor r = REG_NOMATCH; 337211364Sgabor else if (iswword(wbegin) || iswword(wend)) 338211364Sgabor r = REG_NOMATCH; 339210389Sgabor } 340210389Sgabor if (r == 0) { 341210389Sgabor if (m == 0) 342210389Sgabor c++; 343210389Sgabor if (m < MAX_LINE_MATCHES) 344210389Sgabor matches[m++] = pmatch; 345210389Sgabor /* matches - skip further patterns */ 346210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 347210461Sgabor break; 348210389Sgabor } 349210389Sgabor } 350210389Sgabor 351210389Sgabor if (vflag) { 352210389Sgabor c = !c; 353210389Sgabor break; 354210389Sgabor } 355210389Sgabor /* One pass if we are not recording matches */ 356210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 357210389Sgabor break; 358210389Sgabor 359210389Sgabor if (st == (size_t)pmatch.rm_so) 360210389Sgabor break; /* No matches */ 361210389Sgabor } 362210389Sgabor } else 363210389Sgabor c = !vflag; 364210389Sgabor 365210389Sgabor if (c && binbehave == BINFILE_BIN && nottext) 366210389Sgabor return (c); /* Binary file */ 367210389Sgabor 368210389Sgabor /* Dealing with the context */ 369210479Sgabor if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 370210389Sgabor if (c) { 371210389Sgabor if (!first && !prev && !tail && Aflag) 372210389Sgabor printf("--\n"); 373210389Sgabor tail = Aflag; 374210389Sgabor if (Bflag > 0) { 375210389Sgabor if (!first && !prev) 376210389Sgabor printf("--\n"); 377210389Sgabor printqueue(); 378210389Sgabor } 379210389Sgabor linesqueued = 0; 380210389Sgabor printline(l, ':', matches, m); 381210389Sgabor } else { 382210389Sgabor printline(l, '-', matches, m); 383210389Sgabor tail--; 384210389Sgabor } 385210389Sgabor } 386210389Sgabor 387210389Sgabor if (c) { 388210389Sgabor prev = true; 389210389Sgabor first = false; 390210389Sgabor } else 391210389Sgabor prev = false; 392210389Sgabor 393210389Sgabor return (c); 394210389Sgabor} 395210389Sgabor 396210389Sgabor/* 397210389Sgabor * Safe malloc() for internal use. 398210389Sgabor */ 399210389Sgaborvoid * 400210389Sgaborgrep_malloc(size_t size) 401210389Sgabor{ 402210389Sgabor void *ptr; 403210389Sgabor 404210389Sgabor if ((ptr = malloc(size)) == NULL) 405210389Sgabor err(2, "malloc"); 406210389Sgabor return (ptr); 407210389Sgabor} 408210389Sgabor 409210389Sgabor/* 410210389Sgabor * Safe calloc() for internal use. 411210389Sgabor */ 412210389Sgaborvoid * 413210389Sgaborgrep_calloc(size_t nmemb, size_t size) 414210389Sgabor{ 415210389Sgabor void *ptr; 416210389Sgabor 417210389Sgabor if ((ptr = calloc(nmemb, size)) == NULL) 418210389Sgabor err(2, "calloc"); 419210389Sgabor return (ptr); 420210389Sgabor} 421210389Sgabor 422210389Sgabor/* 423210389Sgabor * Safe realloc() for internal use. 424210389Sgabor */ 425210389Sgaborvoid * 426210389Sgaborgrep_realloc(void *ptr, size_t size) 427210389Sgabor{ 428210389Sgabor 429210389Sgabor if ((ptr = realloc(ptr, size)) == NULL) 430210389Sgabor err(2, "realloc"); 431210389Sgabor return (ptr); 432210389Sgabor} 433210389Sgabor 434210389Sgabor/* 435210578Sgabor * Safe strdup() for internal use. 436210578Sgabor */ 437210578Sgaborchar * 438210578Sgaborgrep_strdup(const char *str) 439210578Sgabor{ 440210578Sgabor char *ret; 441210578Sgabor 442210578Sgabor if ((ret = strdup(str)) == NULL) 443210578Sgabor err(2, "strdup"); 444210578Sgabor return (ret); 445210578Sgabor} 446210578Sgabor 447210578Sgabor/* 448210389Sgabor * Prints a matching line according to the command line options. 449210389Sgabor */ 450210389Sgaborvoid 451210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m) 452210389Sgabor{ 453210389Sgabor size_t a = 0; 454210389Sgabor int i, n = 0; 455210389Sgabor 456210389Sgabor if (!hflag) { 457210389Sgabor if (nullflag == 0) 458210389Sgabor fputs(line->file, stdout); 459210389Sgabor else { 460210389Sgabor printf("%s", line->file); 461210389Sgabor putchar(0); 462210389Sgabor } 463210389Sgabor ++n; 464210389Sgabor } 465210389Sgabor if (nflag) { 466210389Sgabor if (n > 0) 467210389Sgabor putchar(sep); 468210389Sgabor printf("%d", line->line_no); 469210389Sgabor ++n; 470210389Sgabor } 471210389Sgabor if (bflag) { 472210389Sgabor if (n > 0) 473210389Sgabor putchar(sep); 474210389Sgabor printf("%lld", (long long)line->off); 475210389Sgabor ++n; 476210389Sgabor } 477210389Sgabor if (n) 478210389Sgabor putchar(sep); 479210389Sgabor /* --color and -o */ 480210389Sgabor if ((oflag || color) && m > 0) { 481210389Sgabor for (i = 0; i < m; i++) { 482210389Sgabor if (!oflag) 483210389Sgabor fwrite(line->dat + a, matches[i].rm_so - a, 1, 484210389Sgabor stdout); 485210389Sgabor if (color) 486210389Sgabor fprintf(stdout, "\33[%sm\33[K", color); 487210389Sgabor 488210389Sgabor fwrite(line->dat + matches[i].rm_so, 489210389Sgabor matches[i].rm_eo - matches[i].rm_so, 1, 490210389Sgabor stdout); 491210389Sgabor if (color) 492210389Sgabor fprintf(stdout, "\33[m\33[K"); 493210389Sgabor a = matches[i].rm_eo; 494210389Sgabor if (oflag) 495210389Sgabor putchar('\n'); 496210389Sgabor } 497210389Sgabor if (!oflag) { 498210389Sgabor if (line->len - a > 0) 499210389Sgabor fwrite(line->dat + a, line->len - a, 1, stdout); 500210389Sgabor putchar('\n'); 501210389Sgabor } 502210389Sgabor } else { 503210389Sgabor fwrite(line->dat, line->len, 1, stdout); 504210389Sgabor putchar('\n'); 505210389Sgabor } 506210389Sgabor} 507