1220422Sgabor/* $NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $ */ 2220422Sgabor/* $FreeBSD: stable/11/usr.bin/grep/util.c 354628 2019-11-11 19:54:08Z kevans $ */ 3210389Sgabor/* $OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6330449Seadler * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 7330449Seadler * 8211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 9210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 10322625Skevans * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org> 11210389Sgabor * All rights reserved. 12210389Sgabor * 13210389Sgabor * Redistribution and use in source and binary forms, with or without 14210389Sgabor * modification, are permitted provided that the following conditions 15210389Sgabor * are met: 16210389Sgabor * 1. Redistributions of source code must retain the above copyright 17210389Sgabor * notice, this list of conditions and the following disclaimer. 18210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 19210389Sgabor * notice, this list of conditions and the following disclaimer in the 20210389Sgabor * documentation and/or other materials provided with the distribution. 21210389Sgabor * 22210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32210389Sgabor * SUCH DAMAGE. 33210389Sgabor */ 34210389Sgabor 35210389Sgabor#include <sys/cdefs.h> 36210389Sgabor__FBSDID("$FreeBSD: stable/11/usr.bin/grep/util.c 354628 2019-11-11 19:54:08Z kevans $"); 37210389Sgabor 38210389Sgabor#include <sys/stat.h> 39210389Sgabor#include <sys/types.h> 40210389Sgabor 41210389Sgabor#include <ctype.h> 42210389Sgabor#include <err.h> 43210389Sgabor#include <errno.h> 44210389Sgabor#include <fnmatch.h> 45210389Sgabor#include <fts.h> 46210389Sgabor#include <libgen.h> 47210578Sgabor#include <stdbool.h> 48210389Sgabor#include <stdio.h> 49210389Sgabor#include <stdlib.h> 50210389Sgabor#include <string.h> 51210389Sgabor#include <unistd.h> 52210389Sgabor#include <wchar.h> 53210389Sgabor#include <wctype.h> 54210389Sgabor 55322582Skevans#ifndef WITHOUT_FASTMATCH 56226035Sgabor#include "fastmatch.h" 57322582Skevans#endif 58210389Sgabor#include "grep.h" 59210389Sgabor 60322587Skevansstatic bool first_match = true; 61210389Sgabor 62322587Skevans/* 63354628Skevans * Match printing context 64322587Skevans */ 65354628Skevansstruct mprintc { 66354628Skevans long long tail; /* Number of trailing lines to record */ 67354628Skevans int last_outed; /* Number of lines since last output */ 68354628Skevans bool doctx; /* Printing context? */ 69354628Skevans bool printmatch; /* Printing matches? */ 70354628Skevans bool same_file; /* Same file as previously printed? */ 71322587Skevans}; 72322562Skevans 73354628Skevansstatic void procmatch_match(struct mprintc *mc, struct parsec *pc); 74354628Skevansstatic void procmatch_nomatch(struct mprintc *mc, struct parsec *pc); 75354628Skevansstatic bool procmatches(struct mprintc *mc, struct parsec *pc, bool matched); 76323443Skevans#ifdef WITH_INTERNAL_NOSPEC 77323443Skevansstatic int litexec(const struct pat *pat, const char *string, 78323443Skevans size_t nmatch, regmatch_t pmatch[]); 79323443Skevans#endif 80354628Skevansstatic bool procline(struct parsec *pc); 81322587Skevansstatic void printline(struct parsec *pc, int sep); 82322587Skevansstatic void printline_metadata(struct str *line, int sep); 83322587Skevans 84210578Sgaborbool 85210578Sgaborfile_matching(const char *fname) 86210578Sgabor{ 87322577Skevans char *fname_base, *fname_buf; 88210578Sgabor bool ret; 89210578Sgabor 90210578Sgabor ret = finclude ? false : true; 91322577Skevans fname_buf = strdup(fname); 92322577Skevans if (fname_buf == NULL) 93322577Skevans err(2, "strdup"); 94322577Skevans fname_base = basename(fname_buf); 95210578Sgabor 96210578Sgabor for (unsigned int i = 0; i < fpatterns; ++i) { 97220421Sgabor if (fnmatch(fpattern[i].pat, fname, 0) == 0 || 98354628Skevans fnmatch(fpattern[i].pat, fname_base, 0) == 0) 99354628Skevans /* 100354628Skevans * The last pattern matched wins exclusion/inclusion 101354628Skevans * rights, so we can't reasonably bail out early here. 102354628Skevans */ 103354628Skevans ret = (fpattern[i].mode != EXCL_PAT); 104210578Sgabor } 105322577Skevans free(fname_buf); 106210578Sgabor return (ret); 107210578Sgabor} 108210578Sgabor 109211364Sgaborstatic inline bool 110210578Sgabordir_matching(const char *dname) 111210578Sgabor{ 112210578Sgabor bool ret; 113210578Sgabor 114210578Sgabor ret = dinclude ? false : true; 115210578Sgabor 116210578Sgabor for (unsigned int i = 0; i < dpatterns; ++i) { 117354628Skevans if (dname != NULL && fnmatch(dpattern[i].pat, dname, 0) == 0) 118354628Skevans /* 119354628Skevans * The last pattern matched wins exclusion/inclusion 120354628Skevans * rights, so we can't reasonably bail out early here. 121354628Skevans */ 122354628Skevans ret = (dpattern[i].mode != EXCL_PAT); 123210578Sgabor } 124210578Sgabor return (ret); 125210578Sgabor} 126210578Sgabor 127210389Sgabor/* 128210389Sgabor * Processes a directory when a recursive search is performed with 129210389Sgabor * the -R option. Each appropriate file is passed to procfile(). 130210389Sgabor */ 131354628Skevansbool 132210389Sgaborgrep_tree(char **argv) 133210389Sgabor{ 134210389Sgabor FTS *fts; 135210389Sgabor FTSENT *p; 136354628Skevans int fts_flags; 137354628Skevans bool matched, ok; 138322564Skevans const char *wd[] = { ".", NULL }; 139210389Sgabor 140354628Skevans matched = false; 141210389Sgabor 142354628Skevans /* This switch effectively initializes 'fts_flags' */ 143210389Sgabor switch(linkbehave) { 144210389Sgabor case LINK_EXPLICIT: 145210389Sgabor fts_flags = FTS_COMFOLLOW; 146210389Sgabor break; 147210389Sgabor case LINK_SKIP: 148210389Sgabor fts_flags = FTS_PHYSICAL; 149210389Sgabor break; 150210389Sgabor default: 151210389Sgabor fts_flags = FTS_LOGICAL; 152210389Sgabor } 153210389Sgabor 154210389Sgabor fts_flags |= FTS_NOSTAT | FTS_NOCHDIR; 155210389Sgabor 156322564Skevans fts = fts_open((argv[0] == NULL) ? 157322564Skevans __DECONST(char * const *, wd) : argv, fts_flags, NULL); 158322564Skevans if (fts == NULL) 159210430Sdelphij err(2, "fts_open"); 160210389Sgabor while ((p = fts_read(fts)) != NULL) { 161210389Sgabor switch (p->fts_info) { 162210389Sgabor case FTS_DNR: 163210389Sgabor /* FALLTHROUGH */ 164210389Sgabor case FTS_ERR: 165228319Sgabor file_err = true; 166228097Sgabor if(!sflag) 167228097Sgabor warnx("%s: %s", p->fts_path, strerror(p->fts_errno)); 168210389Sgabor break; 169210389Sgabor case FTS_D: 170210389Sgabor /* FALLTHROUGH */ 171210389Sgabor case FTS_DP: 172224938Sgabor if (dexclude || dinclude) 173224938Sgabor if (!dir_matching(p->fts_name) || 174224938Sgabor !dir_matching(p->fts_path)) 175224938Sgabor fts_set(fts, p, FTS_SKIP); 176210389Sgabor break; 177210389Sgabor case FTS_DC: 178210389Sgabor /* Print a warning for recursive directory loop */ 179210389Sgabor warnx("warning: %s: recursive directory loop", 180354628Skevans p->fts_path); 181210389Sgabor break; 182210389Sgabor default: 183210389Sgabor /* Check for file exclusion/inclusion */ 184210389Sgabor ok = true; 185210578Sgabor if (fexclude || finclude) 186210578Sgabor ok &= file_matching(p->fts_path); 187210389Sgabor 188354628Skevans if (ok && procfile(p->fts_path)) 189354628Skevans matched = true; 190210389Sgabor break; 191210389Sgabor } 192210389Sgabor } 193210389Sgabor 194210430Sdelphij fts_close(fts); 195354628Skevans return (matched); 196210389Sgabor} 197210389Sgabor 198354628Skevansstatic void 199354628Skevansprocmatch_match(struct mprintc *mc, struct parsec *pc) 200354628Skevans{ 201354628Skevans 202354628Skevans if (mc->doctx) { 203354628Skevans if (!first_match && (!mc->same_file || mc->last_outed > 0)) 204354628Skevans printf("--\n"); 205354628Skevans if (Bflag > 0) 206354628Skevans printqueue(); 207354628Skevans mc->tail = Aflag; 208354628Skevans } 209354628Skevans 210354628Skevans /* Print the matching line, but only if not quiet/binary */ 211354628Skevans if (mc->printmatch) { 212354628Skevans printline(pc, ':'); 213354628Skevans while (pc->matchidx >= MAX_MATCHES) { 214354628Skevans /* Reset matchidx and try again */ 215354628Skevans pc->matchidx = 0; 216354628Skevans if (procline(pc) == !vflag) 217354628Skevans printline(pc, ':'); 218354628Skevans else 219354628Skevans break; 220354628Skevans } 221354628Skevans first_match = false; 222354628Skevans mc->same_file = true; 223354628Skevans mc->last_outed = 0; 224354628Skevans } 225354628Skevans} 226354628Skevans 227354628Skevansstatic void 228354628Skevansprocmatch_nomatch(struct mprintc *mc, struct parsec *pc) 229354628Skevans{ 230354628Skevans 231354628Skevans /* Deal with any -A context as needed */ 232354628Skevans if (mc->tail > 0) { 233354628Skevans grep_printline(&pc->ln, '-'); 234354628Skevans mc->tail--; 235354628Skevans if (Bflag > 0) 236354628Skevans clearqueue(); 237354628Skevans } else if (Bflag == 0 || (Bflag > 0 && enqueue(&pc->ln))) 238354628Skevans /* 239354628Skevans * Enqueue non-matching lines for -B context. If we're not 240354628Skevans * actually doing -B context or if the enqueue resulted in a 241354628Skevans * line being rotated out, then go ahead and increment 242354628Skevans * last_outed to signify a gap between context/match. 243354628Skevans */ 244354628Skevans ++mc->last_outed; 245354628Skevans} 246354628Skevans 247210389Sgabor/* 248354628Skevans * Process any matches in the current parsing context, return a boolean 249354628Skevans * indicating whether we should halt any further processing or not. 'true' to 250354628Skevans * continue processing, 'false' to halt. 251354628Skevans */ 252354628Skevansstatic bool 253354628Skevansprocmatches(struct mprintc *mc, struct parsec *pc, bool matched) 254354628Skevans{ 255354628Skevans 256354628Skevans /* 257354628Skevans * XXX TODO: This should loop over pc->matches and handle things on a 258354628Skevans * line-by-line basis, setting up a `struct str` as needed. 259354628Skevans */ 260354628Skevans /* Deal with any -B context or context separators */ 261354628Skevans if (matched) { 262354628Skevans procmatch_match(mc, pc); 263354628Skevans 264354628Skevans /* Count the matches if we have a match limit */ 265354628Skevans if (mflag) { 266354628Skevans /* XXX TODO: Decrement by number of matched lines */ 267354628Skevans mcount -= 1; 268354628Skevans if (mcount <= 0) 269354628Skevans return (false); 270354628Skevans } 271354628Skevans } else if (mc->doctx) 272354628Skevans procmatch_nomatch(mc, pc); 273354628Skevans 274354628Skevans return (true); 275354628Skevans} 276354628Skevans 277354628Skevans/* 278210389Sgabor * Opens a file and processes it. Each file is processed line-by-line 279210389Sgabor * passing the lines to procline(). 280210389Sgabor */ 281354628Skevansbool 282210389Sgaborprocfile(const char *fn) 283210389Sgabor{ 284322587Skevans struct parsec pc; 285354628Skevans struct mprintc mc; 286210389Sgabor struct file *f; 287210389Sgabor struct stat sb; 288210389Sgabor mode_t s; 289354628Skevans int lines; 290354628Skevans bool line_matched; 291210389Sgabor 292210389Sgabor if (strcmp(fn, "-") == 0) { 293210389Sgabor fn = label != NULL ? label : getstr(1); 294211463Sgabor f = grep_open(NULL); 295210389Sgabor } else { 296354628Skevans if (stat(fn, &sb) == 0) { 297210389Sgabor /* Check if we need to process the file */ 298210389Sgabor s = sb.st_mode & S_IFMT; 299354628Skevans if (dirbehave == DIR_SKIP && s == S_IFDIR) 300354628Skevans return (false); 301354628Skevans if (devbehave == DEV_SKIP && (s == S_IFIFO || 302354628Skevans s == S_IFCHR || s == S_IFBLK || s == S_IFSOCK)) 303354628Skevans return (false); 304210389Sgabor } 305210389Sgabor f = grep_open(fn); 306210389Sgabor } 307210389Sgabor if (f == NULL) { 308228319Sgabor file_err = true; 309210389Sgabor if (!sflag) 310210389Sgabor warn("%s", fn); 311354628Skevans return (false); 312210389Sgabor } 313210389Sgabor 314354628Skevans pc.ln.file = grep_strdup(fn); 315322587Skevans pc.ln.line_no = 0; 316322587Skevans pc.ln.len = 0; 317322610Skevans pc.ln.boff = 0; 318322587Skevans pc.ln.off = -1; 319322587Skevans pc.binary = f->binary; 320354628Skevans pc.cntlines = false; 321354628Skevans memset(&mc, 0, sizeof(mc)); 322354628Skevans mc.printmatch = true; 323322587Skevans if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag || 324322587Skevans lflag || Lflag) 325354628Skevans mc.printmatch = false; 326354628Skevans if (mc.printmatch && (Aflag != 0 || Bflag != 0)) 327354628Skevans mc.doctx = true; 328354628Skevans if (mc.printmatch && (Aflag != 0 || Bflag != 0 || mflag || nflag)) 329354628Skevans pc.cntlines = true; 330322587Skevans mcount = mlimit; 331210389Sgabor 332354628Skevans for (lines = 0; lines == 0 || !(lflag || qflag); ) { 333354628Skevans /* 334354628Skevans * XXX TODO: We need to revisit this in a chunking world. We're 335354628Skevans * not going to be doing per-line statistics because of the 336354628Skevans * overhead involved. procmatches can figure that stuff out as 337354628Skevans * needed. */ 338322610Skevans /* Reset per-line statistics */ 339322610Skevans pc.printed = 0; 340322587Skevans pc.matchidx = 0; 341322609Skevans pc.lnstart = 0; 342322610Skevans pc.ln.boff = 0; 343322587Skevans pc.ln.off += pc.ln.len + 1; 344354628Skevans /* XXX TODO: Grab a chunk */ 345354628Skevans if ((pc.ln.dat = grep_fgetln(f, &pc)) == NULL || 346322777Skevans pc.ln.len == 0) 347322777Skevans break; 348210389Sgabor 349322587Skevans if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol) 350322587Skevans --pc.ln.len; 351322587Skevans pc.ln.line_no++; 352322587Skevans 353210389Sgabor /* Return if we need to skip a binary file */ 354322587Skevans if (pc.binary && binbehave == BINFILE_SKIP) { 355210389Sgabor grep_close(f); 356322587Skevans free(pc.ln.file); 357210389Sgabor free(f); 358210389Sgabor return (0); 359210389Sgabor } 360322562Skevans 361354628Skevans line_matched = procline(&pc) == !vflag; 362354628Skevans if (line_matched) 363354628Skevans ++lines; 364322587Skevans 365354628Skevans /* Halt processing if we hit our match limit */ 366354628Skevans if (!procmatches(&mc, &pc, line_matched)) 367354628Skevans break; 368210389Sgabor } 369210389Sgabor if (Bflag > 0) 370210389Sgabor clearqueue(); 371210389Sgabor grep_close(f); 372210389Sgabor 373210389Sgabor if (cflag) { 374210389Sgabor if (!hflag) 375322587Skevans printf("%s:", pc.ln.file); 376354628Skevans printf("%u\n", lines); 377210389Sgabor } 378354628Skevans if (lflag && !qflag && lines != 0) 379228093Sgabor printf("%s%c", fn, nullflag ? 0 : '\n'); 380354628Skevans if (Lflag && !qflag && lines == 0) 381228093Sgabor printf("%s%c", fn, nullflag ? 0 : '\n'); 382354628Skevans if (lines != 0 && !cflag && !lflag && !Lflag && 383210389Sgabor binbehave == BINFILE_BIN && f->binary && !qflag) 384210622Sgabor printf(getstr(8), fn); 385210389Sgabor 386322587Skevans free(pc.ln.file); 387210389Sgabor free(f); 388354628Skevans return (lines != 0); 389210389Sgabor} 390210389Sgabor 391323443Skevans#ifdef WITH_INTERNAL_NOSPEC 392323443Skevans/* 393323443Skevans * Internal implementation of literal string search within a string, modeled 394323443Skevans * after regexec(3), for use when the regex(3) implementation doesn't offer 395323443Skevans * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD 396323443Skevans * config, but in other scenarios such as building against libgnuregex or on 397323443Skevans * some non-FreeBSD OSes. 398323443Skevans */ 399323443Skevansstatic int 400323443Skevanslitexec(const struct pat *pat, const char *string, size_t nmatch, 401323443Skevans regmatch_t pmatch[]) 402323443Skevans{ 403323443Skevans char *(*strstr_fn)(const char *, const char *); 404323443Skevans char *sub, *subject; 405323443Skevans const char *search; 406323443Skevans size_t idx, n, ofs, stringlen; 407323443Skevans 408323443Skevans if (cflags & REG_ICASE) 409323443Skevans strstr_fn = strcasestr; 410323443Skevans else 411323443Skevans strstr_fn = strstr; 412323443Skevans idx = 0; 413323443Skevans ofs = pmatch[0].rm_so; 414323443Skevans stringlen = pmatch[0].rm_eo; 415323443Skevans if (ofs >= stringlen) 416323443Skevans return (REG_NOMATCH); 417323443Skevans subject = strndup(string, stringlen); 418323443Skevans if (subject == NULL) 419323443Skevans return (REG_ESPACE); 420323443Skevans for (n = 0; ofs < stringlen;) { 421323443Skevans search = (subject + ofs); 422323443Skevans if ((unsigned long)pat->len > strlen(search)) 423323443Skevans break; 424323443Skevans sub = strstr_fn(search, pat->pat); 425323443Skevans /* 426323443Skevans * Ignoring the empty string possibility due to context: grep optimizes 427323443Skevans * for empty patterns and will never reach this point. 428323443Skevans */ 429323443Skevans if (sub == NULL) 430323443Skevans break; 431323443Skevans ++n; 432323443Skevans /* Fill in pmatch if necessary */ 433323443Skevans if (nmatch > 0) { 434323443Skevans pmatch[idx].rm_so = ofs + (sub - search); 435323443Skevans pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len; 436323443Skevans if (++idx == nmatch) 437323443Skevans break; 438323443Skevans ofs = pmatch[idx].rm_so + 1; 439323443Skevans } else 440323443Skevans /* We only needed to know if we match or not */ 441323443Skevans break; 442323443Skevans } 443323443Skevans free(subject); 444323443Skevans if (n > 0 && nmatch > 0) 445323443Skevans for (n = idx; n < nmatch; ++n) 446323443Skevans pmatch[n].rm_so = pmatch[n].rm_eo = -1; 447323443Skevans 448323443Skevans return (n > 0 ? 0 : REG_NOMATCH); 449323443Skevans} 450323443Skevans#endif /* WITH_INTERNAL_NOSPEC */ 451323443Skevans 452210389Sgabor#define iswword(x) (iswalnum((x)) || (x) == L'_') 453210389Sgabor 454210389Sgabor/* 455210389Sgabor * Processes a line comparing it with the specified patterns. Each pattern 456210389Sgabor * is looped to be compared along with the full string, saving each and every 457210389Sgabor * match, which is necessary to colorize the output and to count the 458210389Sgabor * matches. The matching lines are passed to printline() to display the 459210389Sgabor * appropriate output. 460210389Sgabor */ 461354628Skevansstatic bool 462322587Skevansprocline(struct parsec *pc) 463210389Sgabor{ 464322587Skevans regmatch_t pmatch, lastmatch, chkmatch; 465322587Skevans wchar_t wbegin, wend; 466322609Skevans size_t st, nst; 467210389Sgabor unsigned int i; 468354628Skevans int r = 0, leflags = eflags; 469322587Skevans size_t startm = 0, matchidx; 470322609Skevans unsigned int retry; 471354628Skevans bool lastmatched, matched; 472210389Sgabor 473322587Skevans matchidx = pc->matchidx; 474322587Skevans 475354628Skevans /* 476354628Skevans * With matchall (empty pattern), we can try to take some shortcuts. 477354628Skevans * Emtpy patterns trivially match every line except in the -w and -x 478354628Skevans * cases. For -w (whole-word) cases, we only match if the first 479354628Skevans * character isn't a word-character. For -x (whole-line) cases, we only 480354628Skevans * match if the line is empty. 481354628Skevans */ 482354628Skevans if (matchall) { 483322587Skevans if (pc->ln.len == 0) 484354628Skevans return (true); 485354628Skevans if (wflag) { 486354628Skevans wend = L' '; 487354628Skevans if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 && 488354628Skevans !iswword(wend)) 489354628Skevans return (true); 490354628Skevans } else if (!xflag) 491354628Skevans return (true); 492322587Skevans 493354628Skevans /* 494354628Skevans * If we don't have any other patterns, we really don't match. 495354628Skevans * If we do have other patterns, we must fall through and check 496354628Skevans * them. 497354628Skevans */ 498354628Skevans if (patterns == 0) 499354628Skevans return (false); 500354628Skevans } 501354628Skevans 502354628Skevans matched = false; 503322609Skevans st = pc->lnstart; 504322609Skevans nst = 0; 505322555Skevans /* Initialize to avoid a false positive warning from GCC. */ 506322555Skevans lastmatch.rm_so = lastmatch.rm_eo = 0; 507322555Skevans 508226035Sgabor /* Loop to process the whole line */ 509322587Skevans while (st <= pc->ln.len) { 510354628Skevans lastmatched = false; 511322587Skevans startm = matchidx; 512322583Skevans retry = 0; 513322622Skevans if (st > 0 && pc->ln.dat[st - 1] != fileeol) 514322555Skevans leflags |= REG_NOTBOL; 515226035Sgabor /* Loop to compare with all the patterns */ 516226035Sgabor for (i = 0; i < patterns; i++) { 517322555Skevans pmatch.rm_so = st; 518322587Skevans pmatch.rm_eo = pc->ln.len; 519323443Skevans#ifdef WITH_INTERNAL_NOSPEC 520323443Skevans if (grepbehave == GREP_FIXED) 521323443Skevans r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch); 522323443Skevans else 523323443Skevans#endif 524322582Skevans#ifndef WITHOUT_FASTMATCH 525226035Sgabor if (fg_pattern[i].pattern) 526226035Sgabor r = fastexec(&fg_pattern[i], 527322587Skevans pc->ln.dat, 1, &pmatch, leflags); 528226035Sgabor else 529322582Skevans#endif 530322587Skevans r = regexec(&r_pattern[i], pc->ln.dat, 1, 531322555Skevans &pmatch, leflags); 532322587Skevans if (r != 0) 533226035Sgabor continue; 534226035Sgabor /* Check for full match */ 535322587Skevans if (xflag && (pmatch.rm_so != 0 || 536322587Skevans (size_t)pmatch.rm_eo != pc->ln.len)) 537322587Skevans continue; 538226035Sgabor /* Check for whole word match */ 539322582Skevans#ifndef WITHOUT_FASTMATCH 540322587Skevans if (wflag || fg_pattern[i].word) { 541322582Skevans#else 542322587Skevans if (wflag) { 543322582Skevans#endif 544226035Sgabor wbegin = wend = L' '; 545226035Sgabor if (pmatch.rm_so != 0 && 546322587Skevans sscanf(&pc->ln.dat[pmatch.rm_so - 1], 547226035Sgabor "%lc", &wbegin) != 1) 548226035Sgabor r = REG_NOMATCH; 549226035Sgabor else if ((size_t)pmatch.rm_eo != 550322587Skevans pc->ln.len && 551322587Skevans sscanf(&pc->ln.dat[pmatch.rm_eo], 552226035Sgabor "%lc", &wend) != 1) 553226035Sgabor r = REG_NOMATCH; 554226035Sgabor else if (iswword(wbegin) || 555226035Sgabor iswword(wend)) 556226035Sgabor r = REG_NOMATCH; 557322583Skevans /* 558322583Skevans * If we're doing whole word matching and we 559322583Skevans * matched once, then we should try the pattern 560322587Skevans * again after advancing just past the start of 561322583Skevans * the earliest match. This allows the pattern 562322583Skevans * to match later on in the line and possibly 563322583Skevans * still match a whole word. 564322583Skevans */ 565322583Skevans if (r == REG_NOMATCH && 566322609Skevans (retry == pc->lnstart || 567322619Skevans (unsigned int)pmatch.rm_so + 1 < retry)) 568322583Skevans retry = pmatch.rm_so + 1; 569322587Skevans if (r == REG_NOMATCH) 570322587Skevans continue; 571210389Sgabor } 572354628Skevans lastmatched = true; 573322587Skevans lastmatch = pmatch; 574322587Skevans 575322587Skevans if (matchidx == 0) 576354628Skevans matched = true; 577322587Skevans 578322587Skevans /* 579322587Skevans * Replace previous match if the new one is earlier 580322587Skevans * and/or longer. This will lead to some amount of 581322587Skevans * extra work if -o/--color are specified, but it's 582322587Skevans * worth it from a correctness point of view. 583322587Skevans */ 584322587Skevans if (matchidx > startm) { 585322587Skevans chkmatch = pc->matches[matchidx - 1]; 586322587Skevans if (pmatch.rm_so < chkmatch.rm_so || 587322587Skevans (pmatch.rm_so == chkmatch.rm_so && 588322587Skevans (pmatch.rm_eo - pmatch.rm_so) > 589322587Skevans (chkmatch.rm_eo - chkmatch.rm_so))) { 590322587Skevans pc->matches[matchidx - 1] = pmatch; 591322587Skevans nst = pmatch.rm_eo; 592322555Skevans } 593322587Skevans } else { 594322587Skevans /* Advance as normal if not */ 595322587Skevans pc->matches[matchidx++] = pmatch; 596322587Skevans nst = pmatch.rm_eo; 597210389Sgabor } 598322587Skevans /* avoid excessive matching - skip further patterns */ 599322587Skevans if ((color == NULL && !oflag) || qflag || lflag || 600322622Skevans matchidx >= MAX_MATCHES) { 601322609Skevans pc->lnstart = nst; 602354628Skevans lastmatched = false; 603322587Skevans break; 604322609Skevans } 605226035Sgabor } 606210389Sgabor 607322583Skevans /* 608322583Skevans * Advance to just past the start of the earliest match, try 609322583Skevans * again just in case we still have a chance to match later in 610322583Skevans * the string. 611322583Skevans */ 612354628Skevans if (!lastmatched && retry > pc->lnstart) { 613322583Skevans st = retry; 614322583Skevans continue; 615210389Sgabor } 616210389Sgabor 617354628Skevans /* XXX TODO: We will need to keep going, since we're chunky */ 618226035Sgabor /* One pass if we are not recording matches */ 619270132Sgabor if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag)) 620226035Sgabor break; 621226035Sgabor 622322555Skevans /* If we didn't have any matches or REG_NOSUB set */ 623354628Skevans if (!lastmatched || (cflags & REG_NOSUB)) 624322587Skevans nst = pc->ln.len; 625322555Skevans 626354628Skevans if (!lastmatched) 627322555Skevans /* No matches */ 628322555Skevans break; 629322555Skevans else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo) 630322555Skevans /* Zero-length match -- advance one more so we don't get stuck */ 631322555Skevans nst++; 632322555Skevans 633322555Skevans /* Advance st based on previous matches */ 634322555Skevans st = nst; 635322609Skevans pc->lnstart = st; 636226035Sgabor } 637226035Sgabor 638322587Skevans /* Reflect the new matchidx in the context */ 639322587Skevans pc->matchidx = matchidx; 640354628Skevans return matched; 641210389Sgabor} 642210389Sgabor 643210389Sgabor/* 644210389Sgabor * Safe malloc() for internal use. 645210389Sgabor */ 646210389Sgaborvoid * 647210389Sgaborgrep_malloc(size_t size) 648210389Sgabor{ 649210389Sgabor void *ptr; 650210389Sgabor 651210389Sgabor if ((ptr = malloc(size)) == NULL) 652210389Sgabor err(2, "malloc"); 653210389Sgabor return (ptr); 654210389Sgabor} 655210389Sgabor 656210389Sgabor/* 657210389Sgabor * Safe calloc() for internal use. 658210389Sgabor */ 659210389Sgaborvoid * 660210389Sgaborgrep_calloc(size_t nmemb, size_t size) 661210389Sgabor{ 662210389Sgabor void *ptr; 663210389Sgabor 664210389Sgabor if ((ptr = calloc(nmemb, size)) == NULL) 665210389Sgabor err(2, "calloc"); 666210389Sgabor return (ptr); 667210389Sgabor} 668210389Sgabor 669210389Sgabor/* 670210389Sgabor * Safe realloc() for internal use. 671210389Sgabor */ 672210389Sgaborvoid * 673210389Sgaborgrep_realloc(void *ptr, size_t size) 674210389Sgabor{ 675210389Sgabor 676210389Sgabor if ((ptr = realloc(ptr, size)) == NULL) 677210389Sgabor err(2, "realloc"); 678210389Sgabor return (ptr); 679210389Sgabor} 680210389Sgabor 681210389Sgabor/* 682210578Sgabor * Safe strdup() for internal use. 683210578Sgabor */ 684210578Sgaborchar * 685210578Sgaborgrep_strdup(const char *str) 686210578Sgabor{ 687210578Sgabor char *ret; 688210578Sgabor 689210578Sgabor if ((ret = strdup(str)) == NULL) 690210578Sgabor err(2, "strdup"); 691210578Sgabor return (ret); 692210578Sgabor} 693210578Sgabor 694210578Sgabor/* 695322587Skevans * Print an entire line as-is, there are no inline matches to consider. This is 696322587Skevans * used for printing context. 697210389Sgabor */ 698322587Skevansvoid grep_printline(struct str *line, int sep) { 699322587Skevans printline_metadata(line, sep); 700322587Skevans fwrite(line->dat, line->len, 1, stdout); 701322587Skevans putchar(fileeol); 702322587Skevans} 703322587Skevans 704322587Skevansstatic void 705322587Skevansprintline_metadata(struct str *line, int sep) 706210389Sgabor{ 707322587Skevans bool printsep; 708210389Sgabor 709322587Skevans printsep = false; 710210389Sgabor if (!hflag) { 711228093Sgabor if (!nullflag) { 712210389Sgabor fputs(line->file, stdout); 713322587Skevans printsep = true; 714228093Sgabor } else { 715210389Sgabor printf("%s", line->file); 716210389Sgabor putchar(0); 717210389Sgabor } 718210389Sgabor } 719210389Sgabor if (nflag) { 720322587Skevans if (printsep) 721210389Sgabor putchar(sep); 722210389Sgabor printf("%d", line->line_no); 723322587Skevans printsep = true; 724210389Sgabor } 725210389Sgabor if (bflag) { 726322587Skevans if (printsep) 727210389Sgabor putchar(sep); 728322610Skevans printf("%lld", (long long)(line->off + line->boff)); 729322587Skevans printsep = true; 730210389Sgabor } 731322587Skevans if (printsep) 732210389Sgabor putchar(sep); 733322587Skevans} 734322587Skevans 735322587Skevans/* 736322587Skevans * Prints a matching line according to the command line options. 737322587Skevans */ 738322587Skevansstatic void 739322587Skevansprintline(struct parsec *pc, int sep) 740322587Skevans{ 741322587Skevans size_t a = 0; 742322587Skevans size_t i, matchidx; 743322587Skevans regmatch_t match; 744322587Skevans 745322587Skevans /* If matchall, everything matches but don't actually print for -o */ 746322587Skevans if (oflag && matchall) 747322587Skevans return; 748322587Skevans 749322587Skevans matchidx = pc->matchidx; 750322587Skevans 751210389Sgabor /* --color and -o */ 752322587Skevans if ((oflag || color) && matchidx > 0) { 753322610Skevans /* Only print metadata once per line if --color */ 754322610Skevans if (!oflag && pc->printed == 0) 755322610Skevans printline_metadata(&pc->ln, sep); 756322587Skevans for (i = 0; i < matchidx; i++) { 757322587Skevans match = pc->matches[i]; 758322555Skevans /* Don't output zero length matches */ 759322587Skevans if (match.rm_so == match.rm_eo) 760322555Skevans continue; 761322610Skevans /* 762322610Skevans * Metadata is printed on a per-line basis, so every 763322610Skevans * match gets file metadata with the -o flag. 764322610Skevans */ 765322610Skevans if (oflag) { 766322610Skevans pc->ln.boff = match.rm_so; 767322610Skevans printline_metadata(&pc->ln, sep); 768322610Skevans } else 769322587Skevans fwrite(pc->ln.dat + a, match.rm_so - a, 1, 770210389Sgabor stdout); 771322587Skevans if (color) 772210389Sgabor fprintf(stdout, "\33[%sm\33[K", color); 773322587Skevans fwrite(pc->ln.dat + match.rm_so, 774322587Skevans match.rm_eo - match.rm_so, 1, stdout); 775322587Skevans if (color) 776210389Sgabor fprintf(stdout, "\33[m\33[K"); 777322587Skevans a = match.rm_eo; 778210389Sgabor if (oflag) 779210389Sgabor putchar('\n'); 780210389Sgabor } 781210389Sgabor if (!oflag) { 782322587Skevans if (pc->ln.len - a > 0) 783322587Skevans fwrite(pc->ln.dat + a, pc->ln.len - a, 1, 784322587Skevans stdout); 785210389Sgabor putchar('\n'); 786210389Sgabor } 787322587Skevans } else 788322587Skevans grep_printline(&pc->ln, sep); 789322610Skevans pc->printed++; 790210389Sgabor} 791