util.c revision 270132
1220422Sgabor/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2220422Sgabor/* $FreeBSD: head/usr.bin/grep/util.c 270132 2014-08-18 12:29:28Z gabor $ */ 3210389Sgabor/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8210389Sgabor * All rights reserved. 9210389Sgabor * 10210389Sgabor * Redistribution and use in source and binary forms, with or without 11210389Sgabor * modification, are permitted provided that the following conditions 12210389Sgabor * are met: 13210389Sgabor * 1. Redistributions of source code must retain the above copyright 14210389Sgabor * notice, this list of conditions and the following disclaimer. 15210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 16210389Sgabor * notice, this list of conditions and the following disclaimer in the 17210389Sgabor * documentation and/or other materials provided with the distribution. 18210389Sgabor * 19210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 20210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 23210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29210389Sgabor * SUCH DAMAGE. 30210389Sgabor */ 31210389Sgabor 32210389Sgabor#include <sys/cdefs.h> 33210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 270132 2014-08-18 12:29:28Z gabor $"); 34210389Sgabor 35210389Sgabor#include <sys/stat.h> 36210389Sgabor#include <sys/types.h> 37210389Sgabor 38210389Sgabor#include <ctype.h> 39210389Sgabor#include <err.h> 40210389Sgabor#include <errno.h> 41210389Sgabor#include <fnmatch.h> 42210389Sgabor#include <fts.h> 43210389Sgabor#include <libgen.h> 44210578Sgabor#include <stdbool.h> 45210389Sgabor#include <stdio.h> 46210389Sgabor#include <stdlib.h> 47210389Sgabor#include <string.h> 48210389Sgabor#include <unistd.h> 49210389Sgabor#include <wchar.h> 50210389Sgabor#include <wctype.h> 51210389Sgabor 52226035Sgabor#include "fastmatch.h" 53210389Sgabor#include "grep.h" 54210389Sgabor 55210389Sgaborstatic int linesqueued; 56210389Sgaborstatic int procline(struct str *l, int); 57210389Sgabor 58210578Sgaborbool 59210578Sgaborfile_matching(const char *fname) 60210578Sgabor{ 61220421Sgabor char *fname_base; 62210578Sgabor bool ret; 63210578Sgabor 64210578Sgabor ret = finclude ? false : true; 65220421Sgabor fname_base = basename(fname); 66210578Sgabor 67210578Sgabor for (unsigned int i = 0; i < fpatterns; ++i) { 68220421Sgabor if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 69220421Sgabor fnmatch(fpattern[i].pat, fname_base, 0) == 0) { 70210578Sgabor if (fpattern[i].mode == EXCL_PAT) 71210578Sgabor return (false); 72210578Sgabor else 73210578Sgabor ret = true; 74210578Sgabor } 75210578Sgabor } 76210578Sgabor return (ret); 77210578Sgabor} 78210578Sgabor 79211364Sgaborstatic inline bool 80210578Sgabordir_matching(const char *dname) 81210578Sgabor{ 82210578Sgabor bool ret; 83210578Sgabor 84210578Sgabor ret = dinclude ? false : true; 85210578Sgabor 86210578Sgabor for (unsigned int i = 0; i < dpatterns; ++i) { 87210578Sgabor if (dname != NULL && 88224938Sgabor fnmatch(dpattern[i].pat, dname, 0) == 0) { 89210578Sgabor if (dpattern[i].mode == EXCL_PAT) 90210578Sgabor return (false); 91210578Sgabor else 92210578Sgabor ret = true; 93210578Sgabor } 94210578Sgabor } 95210578Sgabor return (ret); 96210578Sgabor} 97210578Sgabor 98210389Sgabor/* 99210389Sgabor * Processes a directory when a recursive search is performed with 100210389Sgabor * the -R option. Each appropriate file is passed to procfile(). 101210389Sgabor */ 102210389Sgaborint 103210389Sgaborgrep_tree(char **argv) 104210389Sgabor{ 105210389Sgabor FTS *fts; 106210389Sgabor FTSENT *p; 107210389Sgabor int c, fts_flags; 108210389Sgabor bool ok; 109210389Sgabor 110210389Sgabor c = fts_flags = 0; 111210389Sgabor 112210389Sgabor switch(linkbehave) { 113210389Sgabor case LINK_EXPLICIT: 114210389Sgabor fts_flags = FTS_COMFOLLOW; 115210389Sgabor break; 116210389Sgabor case LINK_SKIP: 117210389Sgabor fts_flags = FTS_PHYSICAL; 118210389Sgabor break; 119210389Sgabor default: 120210389Sgabor fts_flags = FTS_LOGICAL; 121210389Sgabor 122210389Sgabor } 123210389Sgabor 124210389Sgabor fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 125210389Sgabor 126210389Sgabor if (!(fts = fts_open(argv, fts_flags, NULL))) 127210430Sdelphij err(2, "fts_open"); 128210389Sgabor while ((p = fts_read(fts)) != NULL) { 129210389Sgabor switch (p->fts_info) { 130210389Sgabor case FTS_DNR: 131210389Sgabor /* FALLTHROUGH */ 132210389Sgabor case FTS_ERR: 133228319Sgabor file_err = true; 134228097Sgabor if(!sflag) 135228097Sgabor warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 136210389Sgabor break; 137210389Sgabor case FTS_D: 138210389Sgabor /* FALLTHROUGH */ 139210389Sgabor case FTS_DP: 140224938Sgabor if (dexclude || dinclude) 141224938Sgabor if (!dir_matching(p->fts_name) || 142224938Sgabor !dir_matching(p->fts_path)) 143224938Sgabor fts_set(fts, p, FTS_SKIP); 144210389Sgabor break; 145210389Sgabor case FTS_DC: 146210389Sgabor /* Print a warning for recursive directory loop */ 147210389Sgabor warnx("warning: %s: recursive directory loop", 148210389Sgabor p->fts_path); 149210389Sgabor break; 150210389Sgabor default: 151210389Sgabor /* Check for file exclusion/inclusion */ 152210389Sgabor ok = true; 153210578Sgabor if (fexclude || finclude) 154210578Sgabor ok &= file_matching(p->fts_path); 155210389Sgabor 156210389Sgabor if (ok) 157210389Sgabor c += procfile(p->fts_path); 158210389Sgabor break; 159210389Sgabor } 160210389Sgabor } 161210389Sgabor 162210430Sdelphij fts_close(fts); 163210389Sgabor return (c); 164210389Sgabor} 165210389Sgabor 166210389Sgabor/* 167210389Sgabor * Opens a file and processes it. Each file is processed line-by-line 168210389Sgabor * passing the lines to procline(). 169210389Sgabor */ 170210389Sgaborint 171210389Sgaborprocfile(const char *fn) 172210389Sgabor{ 173210389Sgabor struct file *f; 174210389Sgabor struct stat sb; 175210389Sgabor struct str ln; 176210389Sgabor mode_t s; 177210389Sgabor int c, t; 178210389Sgabor 179244493Seadler mcount = mlimit; 180210389Sgabor 181210389Sgabor if (strcmp(fn, "-") == 0) { 182210389Sgabor fn = label != NULL ? label : getstr(1); 183211463Sgabor f = grep_open(NULL); 184210389Sgabor } else { 185210389Sgabor if (!stat(fn, &sb)) { 186210389Sgabor /* Check if we need to process the file */ 187210389Sgabor s = sb.st_mode & S_IFMT; 188210389Sgabor if (s == S_IFDIR && dirbehave == DIR_SKIP) 189210389Sgabor return (0); 190210389Sgabor if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK 191210389Sgabor || s == S_IFSOCK) && devbehave == DEV_SKIP) 192210389Sgabor return (0); 193210389Sgabor } 194210389Sgabor f = grep_open(fn); 195210389Sgabor } 196210389Sgabor if (f == NULL) { 197228319Sgabor file_err = true; 198210389Sgabor if (!sflag) 199210389Sgabor warn("%s", fn); 200210389Sgabor return (0); 201210389Sgabor } 202210389Sgabor 203210389Sgabor ln.file = grep_malloc(strlen(fn) + 1); 204210389Sgabor strcpy(ln.file, fn); 205210389Sgabor ln.line_no = 0; 206210389Sgabor ln.len = 0; 207210389Sgabor linesqueued = 0; 208210389Sgabor tail = 0; 209210389Sgabor ln.off = -1; 210210389Sgabor 211210389Sgabor for (c = 0; c == 0 || !(lflag || qflag); ) { 212210389Sgabor ln.off += ln.len + 1; 213211463Sgabor if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) { 214210389Sgabor if (ln.line_no == 0 && matchall) 215210389Sgabor exit(0); 216210389Sgabor else 217210389Sgabor break; 218210389Sgabor } 219210389Sgabor if (ln.len > 0 && ln.dat[ln.len - 1] == '\n') 220210389Sgabor --ln.len; 221210389Sgabor ln.line_no++; 222210389Sgabor 223210389Sgabor /* Return if we need to skip a binary file */ 224210389Sgabor if (f->binary && binbehave == BINFILE_SKIP) { 225210389Sgabor grep_close(f); 226210430Sdelphij free(ln.file); 227210389Sgabor free(f); 228210389Sgabor return (0); 229210389Sgabor } 230210389Sgabor /* Process the file line-by-line */ 231210389Sgabor if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) { 232210389Sgabor enqueue(&ln); 233210389Sgabor linesqueued++; 234210389Sgabor } 235210389Sgabor c += t; 236226273Sgabor if (mflag && mcount <= 0) 237226035Sgabor break; 238210389Sgabor } 239210389Sgabor if (Bflag > 0) 240210389Sgabor clearqueue(); 241210389Sgabor grep_close(f); 242210389Sgabor 243210389Sgabor if (cflag) { 244210389Sgabor if (!hflag) 245210389Sgabor printf("%s:", ln.file); 246210389Sgabor printf("%u\n", c); 247210389Sgabor } 248210461Sgabor if (lflag && !qflag && c != 0) 249228093Sgabor printf("%s%c", fn, nullflag ? 0 : '\n'); 250210461Sgabor if (Lflag && !qflag && c == 0) 251228093Sgabor printf("%s%c", fn, nullflag ? 0 : '\n'); 252210389Sgabor if (c && !cflag && !lflag && !Lflag && 253210389Sgabor binbehave == BINFILE_BIN && f->binary && !qflag) 254210622Sgabor printf(getstr(8), fn); 255210389Sgabor 256210430Sdelphij free(ln.file); 257210389Sgabor free(f); 258210389Sgabor return (c); 259210389Sgabor} 260210389Sgabor 261210389Sgabor#define iswword(x) (iswalnum((x)) || (x) == L'_') 262210389Sgabor 263210389Sgabor/* 264210389Sgabor * Processes a line comparing it with the specified patterns. Each pattern 265210389Sgabor * is looped to be compared along with the full string, saving each and every 266210389Sgabor * match, which is necessary to colorize the output and to count the 267210389Sgabor * matches. The matching lines are passed to printline() to display the 268210389Sgabor * appropriate output. 269210389Sgabor */ 270220421Sgaborstatic int 271210389Sgaborprocline(struct str *l, int nottext) 272210389Sgabor{ 273210389Sgabor regmatch_t matches[MAX_LINE_MATCHES]; 274210389Sgabor regmatch_t pmatch; 275210389Sgabor size_t st = 0; 276210389Sgabor unsigned int i; 277210389Sgabor int c = 0, m = 0, r = 0; 278210389Sgabor 279226035Sgabor /* Loop to process the whole line */ 280226035Sgabor while (st <= l->len) { 281226035Sgabor pmatch.rm_so = st; 282226035Sgabor pmatch.rm_eo = l->len; 283210389Sgabor 284226035Sgabor /* Loop to compare with all the patterns */ 285226035Sgabor for (i = 0; i < patterns; i++) { 286226035Sgabor if (fg_pattern[i].pattern) 287226035Sgabor r = fastexec(&fg_pattern[i], 288226035Sgabor l->dat, 1, &pmatch, eflags); 289226035Sgabor else 290226035Sgabor r = regexec(&r_pattern[i], l->dat, 1, 291226035Sgabor &pmatch, eflags); 292226035Sgabor r = (r == 0) ? 0 : REG_NOMATCH; 293226035Sgabor st = (cflags & REG_NOSUB) 294226035Sgabor ? (size_t)l->len 295226035Sgabor : (size_t)pmatch.rm_eo; 296226035Sgabor if (r == REG_NOMATCH) 297226035Sgabor continue; 298226035Sgabor /* Check for full match */ 299226035Sgabor if (r == 0 && xflag) 300226035Sgabor if (pmatch.rm_so != 0 || 301226035Sgabor (size_t)pmatch.rm_eo != l->len) 302226035Sgabor r = REG_NOMATCH; 303226035Sgabor /* Check for whole word match */ 304226035Sgabor if (r == 0 && (wflag || fg_pattern[i].word)) { 305268798Spfg wchar_t wbegin, wend; 306210389Sgabor 307226035Sgabor wbegin = wend = L' '; 308226035Sgabor if (pmatch.rm_so != 0 && 309226035Sgabor sscanf(&l->dat[pmatch.rm_so - 1], 310226035Sgabor "%lc", &wbegin) != 1) 311226035Sgabor r = REG_NOMATCH; 312226035Sgabor else if ((size_t)pmatch.rm_eo != 313226035Sgabor l->len && 314226035Sgabor sscanf(&l->dat[pmatch.rm_eo], 315226035Sgabor "%lc", &wend) != 1) 316226035Sgabor r = REG_NOMATCH; 317226035Sgabor else if (iswword(wbegin) || 318226035Sgabor iswword(wend)) 319226035Sgabor r = REG_NOMATCH; 320210389Sgabor } 321226035Sgabor if (r == 0) { 322226035Sgabor if (m == 0) 323226035Sgabor c++; 324226035Sgabor if (m < MAX_LINE_MATCHES) 325226035Sgabor matches[m++] = pmatch; 326226035Sgabor /* matches - skip further patterns */ 327226035Sgabor if ((color == NULL && !oflag) || 328226035Sgabor qflag || lflag) 329226035Sgabor break; 330210389Sgabor } 331226035Sgabor } 332210389Sgabor 333226035Sgabor if (vflag) { 334226035Sgabor c = !c; 335226035Sgabor break; 336210389Sgabor } 337210389Sgabor 338226035Sgabor /* One pass if we are not recording matches */ 339270132Sgabor if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 340226035Sgabor break; 341226035Sgabor 342226035Sgabor if (st == (size_t)pmatch.rm_so) 343226035Sgabor break; /* No matches */ 344226035Sgabor } 345226035Sgabor 346226035Sgabor 347226035Sgabor /* Count the matches if we have a match limit */ 348226035Sgabor if (mflag) 349226035Sgabor mcount -= c; 350226035Sgabor 351210389Sgabor if (c && binbehave == BINFILE_BIN && nottext) 352210389Sgabor return (c); /* Binary file */ 353210389Sgabor 354210389Sgabor /* Dealing with the context */ 355210479Sgabor if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) { 356210389Sgabor if (c) { 357210389Sgabor if (!first && !prev && !tail && Aflag) 358210389Sgabor printf("--\n"); 359210389Sgabor tail = Aflag; 360210389Sgabor if (Bflag > 0) { 361210389Sgabor if (!first && !prev) 362210389Sgabor printf("--\n"); 363210389Sgabor printqueue(); 364210389Sgabor } 365210389Sgabor linesqueued = 0; 366210389Sgabor printline(l, ':', matches, m); 367210389Sgabor } else { 368210389Sgabor printline(l, '-', matches, m); 369210389Sgabor tail--; 370210389Sgabor } 371210389Sgabor } 372210389Sgabor 373210389Sgabor if (c) { 374210389Sgabor prev = true; 375210389Sgabor first = false; 376210389Sgabor } else 377210389Sgabor prev = false; 378210389Sgabor 379210389Sgabor return (c); 380210389Sgabor} 381210389Sgabor 382210389Sgabor/* 383210389Sgabor * Safe malloc() for internal use. 384210389Sgabor */ 385210389Sgaborvoid * 386210389Sgaborgrep_malloc(size_t size) 387210389Sgabor{ 388210389Sgabor void *ptr; 389210389Sgabor 390210389Sgabor if ((ptr = malloc(size)) == NULL) 391210389Sgabor err(2, "malloc"); 392210389Sgabor return (ptr); 393210389Sgabor} 394210389Sgabor 395210389Sgabor/* 396210389Sgabor * Safe calloc() for internal use. 397210389Sgabor */ 398210389Sgaborvoid * 399210389Sgaborgrep_calloc(size_t nmemb, size_t size) 400210389Sgabor{ 401210389Sgabor void *ptr; 402210389Sgabor 403210389Sgabor if ((ptr = calloc(nmemb, size)) == NULL) 404210389Sgabor err(2, "calloc"); 405210389Sgabor return (ptr); 406210389Sgabor} 407210389Sgabor 408210389Sgabor/* 409210389Sgabor * Safe realloc() for internal use. 410210389Sgabor */ 411210389Sgaborvoid * 412210389Sgaborgrep_realloc(void *ptr, size_t size) 413210389Sgabor{ 414210389Sgabor 415210389Sgabor if ((ptr = realloc(ptr, size)) == NULL) 416210389Sgabor err(2, "realloc"); 417210389Sgabor return (ptr); 418210389Sgabor} 419210389Sgabor 420210389Sgabor/* 421210578Sgabor * Safe strdup() for internal use. 422210578Sgabor */ 423210578Sgaborchar * 424210578Sgaborgrep_strdup(const char *str) 425210578Sgabor{ 426210578Sgabor char *ret; 427210578Sgabor 428210578Sgabor if ((ret = strdup(str)) == NULL) 429210578Sgabor err(2, "strdup"); 430210578Sgabor return (ret); 431210578Sgabor} 432210578Sgabor 433210578Sgabor/* 434210389Sgabor * Prints a matching line according to the command line options. 435210389Sgabor */ 436210389Sgaborvoid 437210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m) 438210389Sgabor{ 439210389Sgabor size_t a = 0; 440210389Sgabor int i, n = 0; 441210389Sgabor 442210389Sgabor if (!hflag) { 443228093Sgabor if (!nullflag) { 444210389Sgabor fputs(line->file, stdout); 445228093Sgabor ++n; 446228093Sgabor } else { 447210389Sgabor printf("%s", line->file); 448210389Sgabor putchar(0); 449210389Sgabor } 450210389Sgabor } 451210389Sgabor if (nflag) { 452210389Sgabor if (n > 0) 453210389Sgabor putchar(sep); 454210389Sgabor printf("%d", line->line_no); 455210389Sgabor ++n; 456210389Sgabor } 457210389Sgabor if (bflag) { 458210389Sgabor if (n > 0) 459210389Sgabor putchar(sep); 460210389Sgabor printf("%lld", (long long)line->off); 461210389Sgabor ++n; 462210389Sgabor } 463210389Sgabor if (n) 464210389Sgabor putchar(sep); 465210389Sgabor /* --color and -o */ 466210389Sgabor if ((oflag || color) && m > 0) { 467210389Sgabor for (i = 0; i < m; i++) { 468210389Sgabor if (!oflag) 469210389Sgabor fwrite(line->dat + a, matches[i].rm_so - a, 1, 470210389Sgabor stdout); 471210389Sgabor if (color) 472210389Sgabor fprintf(stdout, "\33[%sm\33[K", color); 473210389Sgabor 474210389Sgabor fwrite(line->dat + matches[i].rm_so, 475210389Sgabor matches[i].rm_eo - matches[i].rm_so, 1, 476210389Sgabor stdout); 477210389Sgabor if (color) 478210389Sgabor fprintf(stdout, "\33[m\33[K"); 479210389Sgabor a = matches[i].rm_eo; 480210389Sgabor if (oflag) 481210389Sgabor putchar('\n'); 482210389Sgabor } 483210389Sgabor if (!oflag) { 484210389Sgabor if (line->len - a > 0) 485210389Sgabor fwrite(line->dat + a, line->len - a, 1, stdout); 486210389Sgabor putchar('\n'); 487210389Sgabor } 488210389Sgabor } else { 489210389Sgabor fwrite(line->dat, line->len, 1, stdout); 490210389Sgabor putchar('\n'); 491210389Sgabor } 492210389Sgabor} 493