util.c revision 220421
1210389Sgabor/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 2210389Sgabor 3210389Sgabor/*- 4211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 5210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 6210389Sgabor * All rights reserved. 7210389Sgabor * 8210389Sgabor * Redistribution and use in source and binary forms, with or without 9210389Sgabor * modification, are permitted provided that the following conditions 10210389Sgabor * are met: 11210389Sgabor * 1. Redistributions of source code must retain the above copyright 12210389Sgabor * notice, this list of conditions and the following disclaimer. 13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14210389Sgabor * notice, this list of conditions and the following disclaimer in the 15210389Sgabor * documentation and/or other materials provided with the distribution. 16210389Sgabor * 17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27210389Sgabor * SUCH DAMAGE. 28210389Sgabor */ 29210389Sgabor 30210389Sgabor#include <sys/cdefs.h> 31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 220421 2011-04-07 13:01:03Z gabor $"); 32210389Sgabor 33210389Sgabor#include <sys/stat.h> 34210389Sgabor#include <sys/types.h> 35210389Sgabor 36210389Sgabor#include <ctype.h> 37210389Sgabor#include <err.h> 38210389Sgabor#include <errno.h> 39210389Sgabor#include <fnmatch.h> 40210389Sgabor#include <fts.h> 41210389Sgabor#include <libgen.h> 42210578Sgabor#include <stdbool.h> 43210389Sgabor#include <stdio.h> 44210389Sgabor#include <stdlib.h> 45210389Sgabor#include <string.h> 46210389Sgabor#include <unistd.h> 47210389Sgabor#include <wchar.h> 48210389Sgabor#include <wctype.h> 49210389Sgabor 50210389Sgabor#include "grep.h" 51210389Sgabor 52210389Sgaborstatic int linesqueued; 53210389Sgaborstatic int procline(struct str *l, int); 54210389Sgabor 55210578Sgaborbool 56210578Sgaborfile_matching(const char *fname) 57210578Sgabor{ 58220421Sgabor char *fname_base; 59210578Sgabor bool ret; 60210578Sgabor 61210578Sgabor ret = finclude ? false : true; 62220421Sgabor fname_base = basename(fname); 63210578Sgabor 64210578Sgabor for (unsigned int i = 0; i < fpatterns; ++i) { 65220421Sgabor if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 66220421Sgabor fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 67210578Sgabor if (fpattern[i].mode == EXCL_PAT) 68210578Sgabor return (false); 69210578Sgabor else 70210578Sgabor ret = true; 71210578Sgabor } 72210578Sgabor } 73210578Sgabor return (ret); 74210578Sgabor} 75210578Sgabor 76211364Sgaborstatic inline bool 77210578Sgabordir_matching(const char *dname) 78210578Sgabor{ 79210578Sgabor bool ret; 80210578Sgabor 81210578Sgabor ret = dinclude ? false : true; 82210578Sgabor 83210578Sgabor for (unsigned int i = 0; i < dpatterns; ++i) { 84210578Sgabor if (dname != NULL && 85210578Sgabor fnmatch(dname, dpattern[i].pat, 0) == 0) { 86210578Sgabor if (dpattern[i].mode == EXCL_PAT) 87210578Sgabor return (false); 88210578Sgabor else 89210578Sgabor ret = true; 90210578Sgabor } 91210578Sgabor } 92210578Sgabor return (ret); 93210578Sgabor} 94210578Sgabor 95210389Sgabor/* 96210389Sgabor * Processes a directory when a recursive search is performed with 97210389Sgabor * the -R option. Each appropriate file is passed to procfile(). 98210389Sgabor */ 99210389Sgaborint 100210389Sgaborgrep_tree(char **argv) 101210389Sgabor{ 102210389Sgabor FTS *fts; 103210389Sgabor FTSENT *p; 104210430Sdelphij char *d, *dir = NULL; 105210389Sgabor int c, fts_flags; 106210389Sgabor bool ok; 107210389Sgabor 108210389Sgabor c = fts_flags = 0; 109210389Sgabor 110210389Sgabor switch(linkbehave) { 111210389Sgabor case LINK_EXPLICIT: 112210389Sgabor fts_flags = FTS_COMFOLLOW; 113210389Sgabor break; 114210389Sgabor case LINK_SKIP: 115210389Sgabor fts_flags = FTS_PHYSICAL; 116210389Sgabor break; 117210389Sgabor default: 118210389Sgabor fts_flags = FTS_LOGICAL; 119210389Sgabor 120210389Sgabor } 121210389Sgabor 122210389Sgabor fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 123210389Sgabor 124210389Sgabor if (!(fts = fts_open(argv, fts_flags, NULL))) 125210430Sdelphij err(2, "fts_open"); 126210389Sgabor while ((p = fts_read(fts)) != NULL) { 127210389Sgabor switch (p->fts_info) { 128210389Sgabor case FTS_DNR: 129210389Sgabor /* FALLTHROUGH */ 130210389Sgabor case FTS_ERR: 131210389Sgabor errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno)); 132210389Sgabor break; 133210389Sgabor case FTS_D: 134210389Sgabor /* FALLTHROUGH */ 135210389Sgabor case FTS_DP: 136210389Sgabor break; 137210389Sgabor case FTS_DC: 138210389Sgabor /* Print a warning for recursive directory loop */ 139210389Sgabor warnx("warning: %s: recursive directory loop", 140210389Sgabor p->fts_path); 141210389Sgabor break; 142210389Sgabor default: 143210389Sgabor /* Check for file exclusion/inclusion */ 144210389Sgabor ok = true; 145210578Sgabor if (dexclude || dinclude) { 146210430Sdelphij if ((d = strrchr(p->fts_path, '/')) != NULL) { 147210430Sdelphij dir = grep_malloc(sizeof(char) * 148210430Sdelphij (d - p->fts_path + 1)); 149211364Sgabor memcpy(dir, p->fts_path, 150211364Sgabor d - p->fts_path); 151211364Sgabor dir[d - p->fts_path] = '\0'; 152210430Sdelphij } 153210578Sgabor ok = dir_matching(dir); 154210430Sdelphij free(dir); 155210430Sdelphij dir = NULL; 156210389Sgabor } 157210578Sgabor if (fexclude || finclude) 158210578Sgabor ok &= file_matching(p->fts_path); 159210389Sgabor 160210389Sgabor if (ok) 161210389Sgabor c += procfile(p->fts_path); 162210389Sgabor break; 163210389Sgabor } 164210389Sgabor } 165210389Sgabor 166210430Sdelphij fts_close(fts); 167210389Sgabor return (c); 168210389Sgabor} 169210389Sgabor 170210389Sgabor/* 171210389Sgabor * Opens a file and processes it. Each file is processed line-by-line 172210389Sgabor * passing the lines to procline(). 173210389Sgabor */ 174210389Sgaborint 175210389Sgaborprocfile(const char *fn) 176210389Sgabor{ 177210389Sgabor struct file *f; 178210389Sgabor struct stat sb; 179210389Sgabor struct str ln; 180210389Sgabor mode_t s; 181210389Sgabor int c, t; 182210389Sgabor 183210389Sgabor if (mflag && (mcount <= 0)) 184210389Sgabor return (0); 185210389Sgabor 186210389Sgabor if (strcmp(fn, "-") == 0) { 187210389Sgabor fn = label != NULL ? label : getstr(1); 188211463Sgabor f = grep_open(NULL); 189210389Sgabor } else { 190210389Sgabor if (!stat(fn, &sb)) { 191210389Sgabor /* Check if we need to process the file */ 192210389Sgabor s = sb.st_mode & S_IFMT; 193210389Sgabor if (s == S_IFDIR && dirbehave == DIR_SKIP) 194210389Sgabor return (0); 195210389Sgabor if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 196210389Sgabor || s == S_IFSOCK) && devbehave == DEV_SKIP) 197210389Sgabor return (0); 198210389Sgabor } 199210389Sgabor f = grep_open(fn); 200210389Sgabor } 201210389Sgabor if (f == NULL) { 202210389Sgabor if (!sflag) 203210389Sgabor warn("%s", fn); 204210389Sgabor if (errno == ENOENT) 205210389Sgabor notfound = true; 206210389Sgabor return (0); 207210389Sgabor } 208210389Sgabor 209210389Sgabor ln.file = grep_malloc(strlen(fn) + 1); 210210389Sgabor strcpy(ln.file, fn); 211210389Sgabor ln.line_no = 0; 212210389Sgabor ln.len = 0; 213210389Sgabor linesqueued = 0; 214210389Sgabor tail = 0; 215210389Sgabor ln.off = -1; 216210389Sgabor 217210389Sgabor for (c = 0; c == 0 || !(lflag || qflag); ) { 218210389Sgabor ln.off += ln.len + 1; 219211463Sgabor if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { 220210389Sgabor if (ln.line_no == 0 && matchall) 221210389Sgabor exit(0); 222210389Sgabor else 223210389Sgabor break; 224210389Sgabor } 225210389Sgabor if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 226210389Sgabor --ln.len; 227210389Sgabor ln.line_no++; 228210389Sgabor 229210389Sgabor /* Return if we need to skip a binary file */ 230210389Sgabor if (f->binary && binbehave == BINFILE_SKIP) { 231210389Sgabor grep_close(f); 232210430Sdelphij free(ln.file); 233210389Sgabor free(f); 234210389Sgabor return (0); 235210389Sgabor } 236210389Sgabor /* Process the file line-by-line */ 237210389Sgabor if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { 238210389Sgabor enqueue(&ln); 239210389Sgabor linesqueued++; 240210389Sgabor } 241210389Sgabor c += t; 242210389Sgabor 243210389Sgabor /* Count the matches if we have a match limit */ 244210389Sgabor if (mflag) { 245210389Sgabor mcount -= t; 246210389Sgabor if (mcount <= 0) 247210389Sgabor break; 248210389Sgabor } 249210389Sgabor } 250210389Sgabor if (Bflag > 0) 251210389Sgabor clearqueue(); 252210389Sgabor grep_close(f); 253210389Sgabor 254210389Sgabor if (cflag) { 255210389Sgabor if (!hflag) 256210389Sgabor printf("%s:", ln.file); 257210389Sgabor printf("%u\n", c); 258210389Sgabor } 259210461Sgabor if (lflag && !qflag && c != 0) 260210389Sgabor printf("%s\n", fn); 261210461Sgabor if (Lflag && !qflag && c == 0) 262210389Sgabor printf("%s\n", fn); 263210389Sgabor if (c && !cflag && !lflag && !Lflag && 264210389Sgabor binbehave == BINFILE_BIN && f->binary && !qflag) 265210622Sgabor printf(getstr(8), fn); 266210389Sgabor 267210430Sdelphij free(ln.file); 268210389Sgabor free(f); 269210389Sgabor return (c); 270210389Sgabor} 271210389Sgabor 272210389Sgabor#define iswword(x) (iswalnum((x)) || (x) == L'_') 273210389Sgabor 274210389Sgabor/* 275210389Sgabor * Processes a line comparing it with the specified patterns. Each pattern 276210389Sgabor * is looped to be compared along with the full string, saving each and every 277210389Sgabor * match, which is necessary to colorize the output and to count the 278210389Sgabor * matches. The matching lines are passed to printline() to display the 279210389Sgabor * appropriate output. 280210389Sgabor */ 281220421Sgaborstatic int 282210389Sgaborprocline(struct str *l, int nottext) 283210389Sgabor{ 284210389Sgabor regmatch_t matches[MAX_LINE_MATCHES]; 285210389Sgabor regmatch_t pmatch; 286210389Sgabor size_t st = 0; 287210389Sgabor unsigned int i; 288210389Sgabor int c = 0, m = 0, r = 0; 289210389Sgabor 290210389Sgabor if (!matchall) { 291210389Sgabor /* Loop to process the whole line */ 292210389Sgabor while (st <= l->len) { 293210389Sgabor pmatch.rm_so = st; 294210389Sgabor pmatch.rm_eo = l->len; 295210389Sgabor 296210389Sgabor /* Loop to compare with all the patterns */ 297210389Sgabor for (i = 0; i < patterns; i++) { 298210389Sgabor/* 299210389Sgabor * XXX: grep_search() is a workaround for speed up and should be 300210389Sgabor * removed in the future. See fastgrep.c. 301210389Sgabor */ 302210389Sgabor if (fg_pattern[i].pattern) { 303210389Sgabor r = grep_search(&fg_pattern[i], 304210389Sgabor (unsigned char *)l->dat, 305210389Sgabor l->len, &pmatch); 306210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 307210389Sgabor st = pmatch.rm_eo; 308210389Sgabor } else { 309210389Sgabor r = regexec(&r_pattern[i], l->dat, 1, 310210389Sgabor &pmatch, eflags); 311210389Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 312210389Sgabor st = pmatch.rm_eo; 313210389Sgabor } 314210389Sgabor if (r == REG_NOMATCH) 315210389Sgabor continue; 316210389Sgabor /* Check for full match */ 317210389Sgabor if (r == 0 && xflag) 318210389Sgabor if (pmatch.rm_so != 0 || 319210389Sgabor (size_t)pmatch.rm_eo != l->len) 320210389Sgabor r = REG_NOMATCH; 321210389Sgabor /* Check for whole word match */ 322220421Sgabor if (r == 0 && fg_pattern[i].word && 323220421Sgabor pmatch.rm_so != 0) { 324211364Sgabor wint_t wbegin, wend; 325210389Sgabor 326211364Sgabor wbegin = wend = L' '; 327211364Sgabor if (pmatch.rm_so != 0 && 328211364Sgabor sscanf(&l->dat[pmatch.rm_so - 1], 329211364Sgabor "%lc", &wbegin) != 1) 330210389Sgabor r = REG_NOMATCH; 331211364Sgabor else if ((size_t)pmatch.rm_eo != l->len && 332211364Sgabor sscanf(&l->dat[pmatch.rm_eo], 333211364Sgabor "%lc", &wend) != 1) 334211364Sgabor r = REG_NOMATCH; 335211364Sgabor else if (iswword(wbegin) || iswword(wend)) 336211364Sgabor r = REG_NOMATCH; 337210389Sgabor } 338210389Sgabor if (r == 0) { 339210389Sgabor if (m == 0) 340210389Sgabor c++; 341210389Sgabor if (m < MAX_LINE_MATCHES) 342210389Sgabor matches[m++] = pmatch; 343210389Sgabor /* matches - skip further patterns */ 344210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 345210461Sgabor break; 346210389Sgabor } 347210389Sgabor } 348210389Sgabor 349210389Sgabor if (vflag) { 350210389Sgabor c = !c; 351210389Sgabor break; 352210389Sgabor } 353210389Sgabor /* One pass if we are not recording matches */ 354210461Sgabor if ((color != NULL && !oflag) || qflag || lflag) 355210389Sgabor break; 356210389Sgabor 357210389Sgabor if (st == (size_t)pmatch.rm_so) 358210389Sgabor break; /* No matches */ 359210389Sgabor } 360210389Sgabor } else 361210389Sgabor c = !vflag; 362210389Sgabor 363210389Sgabor if (c && binbehave == BINFILE_BIN && nottext) 364210389Sgabor return (c); /* Binary file */ 365210389Sgabor 366210389Sgabor /* Dealing with the context */ 367210479Sgabor if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 368210389Sgabor if (c) { 369210389Sgabor if (!first && !prev && !tail && Aflag) 370210389Sgabor printf("--\n"); 371210389Sgabor tail = Aflag; 372210389Sgabor if (Bflag > 0) { 373210389Sgabor if (!first && !prev) 374210389Sgabor printf("--\n"); 375210389Sgabor printqueue(); 376210389Sgabor } 377210389Sgabor linesqueued = 0; 378210389Sgabor printline(l, ':', matches, m); 379210389Sgabor } else { 380210389Sgabor printline(l, '-', matches, m); 381210389Sgabor tail--; 382210389Sgabor } 383210389Sgabor } 384210389Sgabor 385210389Sgabor if (c) { 386210389Sgabor prev = true; 387210389Sgabor first = false; 388210389Sgabor } else 389210389Sgabor prev = false; 390210389Sgabor 391210389Sgabor return (c); 392210389Sgabor} 393210389Sgabor 394210389Sgabor/* 395210389Sgabor * Safe malloc() for internal use. 396210389Sgabor */ 397210389Sgaborvoid * 398210389Sgaborgrep_malloc(size_t size) 399210389Sgabor{ 400210389Sgabor void *ptr; 401210389Sgabor 402210389Sgabor if ((ptr = malloc(size)) == NULL) 403210389Sgabor err(2, "malloc"); 404210389Sgabor return (ptr); 405210389Sgabor} 406210389Sgabor 407210389Sgabor/* 408210389Sgabor * Safe calloc() for internal use. 409210389Sgabor */ 410210389Sgaborvoid * 411210389Sgaborgrep_calloc(size_t nmemb, size_t size) 412210389Sgabor{ 413210389Sgabor void *ptr; 414210389Sgabor 415210389Sgabor if ((ptr = calloc(nmemb, size)) == NULL) 416210389Sgabor err(2, "calloc"); 417210389Sgabor return (ptr); 418210389Sgabor} 419210389Sgabor 420210389Sgabor/* 421210389Sgabor * Safe realloc() for internal use. 422210389Sgabor */ 423210389Sgaborvoid * 424210389Sgaborgrep_realloc(void *ptr, size_t size) 425210389Sgabor{ 426210389Sgabor 427210389Sgabor if ((ptr = realloc(ptr, size)) == NULL) 428210389Sgabor err(2, "realloc"); 429210389Sgabor return (ptr); 430210389Sgabor} 431210389Sgabor 432210389Sgabor/* 433210578Sgabor * Safe strdup() for internal use. 434210578Sgabor */ 435210578Sgaborchar * 436210578Sgaborgrep_strdup(const char *str) 437210578Sgabor{ 438210578Sgabor char *ret; 439210578Sgabor 440210578Sgabor if ((ret = strdup(str)) == NULL) 441210578Sgabor err(2, "strdup"); 442210578Sgabor return (ret); 443210578Sgabor} 444210578Sgabor 445210578Sgabor/* 446210389Sgabor * Prints a matching line according to the command line options. 447210389Sgabor */ 448210389Sgaborvoid 449210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m) 450210389Sgabor{ 451210389Sgabor size_t a = 0; 452210389Sgabor int i, n = 0; 453210389Sgabor 454210389Sgabor if (!hflag) { 455210389Sgabor if (nullflag == 0) 456210389Sgabor fputs(line->file, stdout); 457210389Sgabor else { 458210389Sgabor printf("%s", line->file); 459210389Sgabor putchar(0); 460210389Sgabor } 461210389Sgabor ++n; 462210389Sgabor } 463210389Sgabor if (nflag) { 464210389Sgabor if (n > 0) 465210389Sgabor putchar(sep); 466210389Sgabor printf("%d", line->line_no); 467210389Sgabor ++n; 468210389Sgabor } 469210389Sgabor if (bflag) { 470210389Sgabor if (n > 0) 471210389Sgabor putchar(sep); 472210389Sgabor printf("%lld", (long long)line->off); 473210389Sgabor ++n; 474210389Sgabor } 475210389Sgabor if (n) 476210389Sgabor putchar(sep); 477210389Sgabor /* --color and -o */ 478210389Sgabor if ((oflag || color) && m > 0) { 479210389Sgabor for (i = 0; i < m; i++) { 480210389Sgabor if (!oflag) 481210389Sgabor fwrite(line->dat + a, matches[i].rm_so - a, 1, 482210389Sgabor stdout); 483210389Sgabor if (color) 484210389Sgabor fprintf(stdout, "\33[%sm\33[K", color); 485210389Sgabor 486210389Sgabor fwrite(line->dat + matches[i].rm_so, 487210389Sgabor matches[i].rm_eo - matches[i].rm_so, 1, 488210389Sgabor stdout); 489210389Sgabor if (color) 490210389Sgabor fprintf(stdout, "\33[m\33[K"); 491210389Sgabor a = matches[i].rm_eo; 492210389Sgabor if (oflag) 493210389Sgabor putchar('\n'); 494210389Sgabor } 495210389Sgabor if (!oflag) { 496210389Sgabor if (line->len - a > 0) 497210389Sgabor fwrite(line->dat + a, line->len - a, 1, stdout); 498210389Sgabor putchar('\n'); 499210389Sgabor } 500210389Sgabor } else { 501210389Sgabor fwrite(line->dat, line->len, 1, stdout); 502210389Sgabor putchar('\n'); 503210389Sgabor } 504210389Sgabor} 505