1220422Sgabor/*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2220422Sgabor/*	$FreeBSD: stable/11/usr.bin/grep/util.c 354628 2019-11-11 19:54:08Z kevans $	*/
3210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4210389Sgabor
5210389Sgabor/*-
6330449Seadler * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
7330449Seadler *
8211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
9210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
10322625Skevans * Copyright (C) 2017 Kyle Evans <kevans@FreeBSD.org>
11210389Sgabor * All rights reserved.
12210389Sgabor *
13210389Sgabor * Redistribution and use in source and binary forms, with or without
14210389Sgabor * modification, are permitted provided that the following conditions
15210389Sgabor * are met:
16210389Sgabor * 1. Redistributions of source code must retain the above copyright
17210389Sgabor *    notice, this list of conditions and the following disclaimer.
18210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
19210389Sgabor *    notice, this list of conditions and the following disclaimer in the
20210389Sgabor *    documentation and/or other materials provided with the distribution.
21210389Sgabor *
22210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32210389Sgabor * SUCH DAMAGE.
33210389Sgabor */
34210389Sgabor
35210389Sgabor#include <sys/cdefs.h>
36210389Sgabor__FBSDID("$FreeBSD: stable/11/usr.bin/grep/util.c 354628 2019-11-11 19:54:08Z kevans $");
37210389Sgabor
38210389Sgabor#include <sys/stat.h>
39210389Sgabor#include <sys/types.h>
40210389Sgabor
41210389Sgabor#include <ctype.h>
42210389Sgabor#include <err.h>
43210389Sgabor#include <errno.h>
44210389Sgabor#include <fnmatch.h>
45210389Sgabor#include <fts.h>
46210389Sgabor#include <libgen.h>
47210578Sgabor#include <stdbool.h>
48210389Sgabor#include <stdio.h>
49210389Sgabor#include <stdlib.h>
50210389Sgabor#include <string.h>
51210389Sgabor#include <unistd.h>
52210389Sgabor#include <wchar.h>
53210389Sgabor#include <wctype.h>
54210389Sgabor
55322582Skevans#ifndef WITHOUT_FASTMATCH
56226035Sgabor#include "fastmatch.h"
57322582Skevans#endif
58210389Sgabor#include "grep.h"
59210389Sgabor
60322587Skevansstatic bool	 first_match = true;
61210389Sgabor
62322587Skevans/*
63354628Skevans * Match printing context
64322587Skevans */
65354628Skevansstruct mprintc {
66354628Skevans	long long	tail;		/* Number of trailing lines to record */
67354628Skevans	int		last_outed;	/* Number of lines since last output */
68354628Skevans	bool		doctx;		/* Printing context? */
69354628Skevans	bool		printmatch;	/* Printing matches? */
70354628Skevans	bool		same_file;	/* Same file as previously printed? */
71322587Skevans};
72322562Skevans
73354628Skevansstatic void procmatch_match(struct mprintc *mc, struct parsec *pc);
74354628Skevansstatic void procmatch_nomatch(struct mprintc *mc, struct parsec *pc);
75354628Skevansstatic bool procmatches(struct mprintc *mc, struct parsec *pc, bool matched);
76323443Skevans#ifdef WITH_INTERNAL_NOSPEC
77323443Skevansstatic int litexec(const struct pat *pat, const char *string,
78323443Skevans    size_t nmatch, regmatch_t pmatch[]);
79323443Skevans#endif
80354628Skevansstatic bool procline(struct parsec *pc);
81322587Skevansstatic void printline(struct parsec *pc, int sep);
82322587Skevansstatic void printline_metadata(struct str *line, int sep);
83322587Skevans
84210578Sgaborbool
85210578Sgaborfile_matching(const char *fname)
86210578Sgabor{
87322577Skevans	char *fname_base, *fname_buf;
88210578Sgabor	bool ret;
89210578Sgabor
90210578Sgabor	ret = finclude ? false : true;
91322577Skevans	fname_buf = strdup(fname);
92322577Skevans	if (fname_buf == NULL)
93322577Skevans		err(2, "strdup");
94322577Skevans	fname_base = basename(fname_buf);
95210578Sgabor
96210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
97220421Sgabor		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
98354628Skevans		    fnmatch(fpattern[i].pat, fname_base, 0) == 0)
99354628Skevans			/*
100354628Skevans			 * The last pattern matched wins exclusion/inclusion
101354628Skevans			 * rights, so we can't reasonably bail out early here.
102354628Skevans			 */
103354628Skevans			ret = (fpattern[i].mode != EXCL_PAT);
104210578Sgabor	}
105322577Skevans	free(fname_buf);
106210578Sgabor	return (ret);
107210578Sgabor}
108210578Sgabor
109211364Sgaborstatic inline bool
110210578Sgabordir_matching(const char *dname)
111210578Sgabor{
112210578Sgabor	bool ret;
113210578Sgabor
114210578Sgabor	ret = dinclude ? false : true;
115210578Sgabor
116210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
117354628Skevans		if (dname != NULL && fnmatch(dpattern[i].pat, dname, 0) == 0)
118354628Skevans			/*
119354628Skevans			 * The last pattern matched wins exclusion/inclusion
120354628Skevans			 * rights, so we can't reasonably bail out early here.
121354628Skevans			 */
122354628Skevans			ret = (dpattern[i].mode != EXCL_PAT);
123210578Sgabor	}
124210578Sgabor	return (ret);
125210578Sgabor}
126210578Sgabor
127210389Sgabor/*
128210389Sgabor * Processes a directory when a recursive search is performed with
129210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
130210389Sgabor */
131354628Skevansbool
132210389Sgaborgrep_tree(char **argv)
133210389Sgabor{
134210389Sgabor	FTS *fts;
135210389Sgabor	FTSENT *p;
136354628Skevans	int fts_flags;
137354628Skevans	bool matched, ok;
138322564Skevans	const char *wd[] = { ".", NULL };
139210389Sgabor
140354628Skevans	matched = false;
141210389Sgabor
142354628Skevans	/* This switch effectively initializes 'fts_flags' */
143210389Sgabor	switch(linkbehave) {
144210389Sgabor	case LINK_EXPLICIT:
145210389Sgabor		fts_flags = FTS_COMFOLLOW;
146210389Sgabor		break;
147210389Sgabor	case LINK_SKIP:
148210389Sgabor		fts_flags = FTS_PHYSICAL;
149210389Sgabor		break;
150210389Sgabor	default:
151210389Sgabor		fts_flags = FTS_LOGICAL;
152210389Sgabor	}
153210389Sgabor
154210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
155210389Sgabor
156322564Skevans	fts = fts_open((argv[0] == NULL) ?
157322564Skevans	    __DECONST(char * const *, wd) : argv, fts_flags, NULL);
158322564Skevans	if (fts == NULL)
159210430Sdelphij		err(2, "fts_open");
160210389Sgabor	while ((p = fts_read(fts)) != NULL) {
161210389Sgabor		switch (p->fts_info) {
162210389Sgabor		case FTS_DNR:
163210389Sgabor			/* FALLTHROUGH */
164210389Sgabor		case FTS_ERR:
165228319Sgabor			file_err = true;
166228097Sgabor			if(!sflag)
167228097Sgabor				warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
168210389Sgabor			break;
169210389Sgabor		case FTS_D:
170210389Sgabor			/* FALLTHROUGH */
171210389Sgabor		case FTS_DP:
172224938Sgabor			if (dexclude || dinclude)
173224938Sgabor				if (!dir_matching(p->fts_name) ||
174224938Sgabor				    !dir_matching(p->fts_path))
175224938Sgabor					fts_set(fts, p, FTS_SKIP);
176210389Sgabor			break;
177210389Sgabor		case FTS_DC:
178210389Sgabor			/* Print a warning for recursive directory loop */
179210389Sgabor			warnx("warning: %s: recursive directory loop",
180354628Skevans			    p->fts_path);
181210389Sgabor			break;
182210389Sgabor		default:
183210389Sgabor			/* Check for file exclusion/inclusion */
184210389Sgabor			ok = true;
185210578Sgabor			if (fexclude || finclude)
186210578Sgabor				ok &= file_matching(p->fts_path);
187210389Sgabor
188354628Skevans			if (ok && procfile(p->fts_path))
189354628Skevans				matched = true;
190210389Sgabor			break;
191210389Sgabor		}
192210389Sgabor	}
193210389Sgabor
194210430Sdelphij	fts_close(fts);
195354628Skevans	return (matched);
196210389Sgabor}
197210389Sgabor
198354628Skevansstatic void
199354628Skevansprocmatch_match(struct mprintc *mc, struct parsec *pc)
200354628Skevans{
201354628Skevans
202354628Skevans	if (mc->doctx) {
203354628Skevans		if (!first_match && (!mc->same_file || mc->last_outed > 0))
204354628Skevans			printf("--\n");
205354628Skevans		if (Bflag > 0)
206354628Skevans			printqueue();
207354628Skevans		mc->tail = Aflag;
208354628Skevans	}
209354628Skevans
210354628Skevans	/* Print the matching line, but only if not quiet/binary */
211354628Skevans	if (mc->printmatch) {
212354628Skevans		printline(pc, ':');
213354628Skevans		while (pc->matchidx >= MAX_MATCHES) {
214354628Skevans			/* Reset matchidx and try again */
215354628Skevans			pc->matchidx = 0;
216354628Skevans			if (procline(pc) == !vflag)
217354628Skevans				printline(pc, ':');
218354628Skevans			else
219354628Skevans				break;
220354628Skevans		}
221354628Skevans		first_match = false;
222354628Skevans		mc->same_file = true;
223354628Skevans		mc->last_outed = 0;
224354628Skevans	}
225354628Skevans}
226354628Skevans
227354628Skevansstatic void
228354628Skevansprocmatch_nomatch(struct mprintc *mc, struct parsec *pc)
229354628Skevans{
230354628Skevans
231354628Skevans	/* Deal with any -A context as needed */
232354628Skevans	if (mc->tail > 0) {
233354628Skevans		grep_printline(&pc->ln, '-');
234354628Skevans		mc->tail--;
235354628Skevans		if (Bflag > 0)
236354628Skevans			clearqueue();
237354628Skevans	} else if (Bflag == 0 || (Bflag > 0 && enqueue(&pc->ln)))
238354628Skevans		/*
239354628Skevans		 * Enqueue non-matching lines for -B context. If we're not
240354628Skevans		 * actually doing -B context or if the enqueue resulted in a
241354628Skevans		 * line being rotated out, then go ahead and increment
242354628Skevans		 * last_outed to signify a gap between context/match.
243354628Skevans		 */
244354628Skevans		++mc->last_outed;
245354628Skevans}
246354628Skevans
247210389Sgabor/*
248354628Skevans * Process any matches in the current parsing context, return a boolean
249354628Skevans * indicating whether we should halt any further processing or not. 'true' to
250354628Skevans * continue processing, 'false' to halt.
251354628Skevans */
252354628Skevansstatic bool
253354628Skevansprocmatches(struct mprintc *mc, struct parsec *pc, bool matched)
254354628Skevans{
255354628Skevans
256354628Skevans	/*
257354628Skevans	 * XXX TODO: This should loop over pc->matches and handle things on a
258354628Skevans	 * line-by-line basis, setting up a `struct str` as needed.
259354628Skevans	 */
260354628Skevans	/* Deal with any -B context or context separators */
261354628Skevans	if (matched) {
262354628Skevans		procmatch_match(mc, pc);
263354628Skevans
264354628Skevans		/* Count the matches if we have a match limit */
265354628Skevans		if (mflag) {
266354628Skevans			/* XXX TODO: Decrement by number of matched lines */
267354628Skevans			mcount -= 1;
268354628Skevans			if (mcount <= 0)
269354628Skevans				return (false);
270354628Skevans		}
271354628Skevans	} else if (mc->doctx)
272354628Skevans		procmatch_nomatch(mc, pc);
273354628Skevans
274354628Skevans	return (true);
275354628Skevans}
276354628Skevans
277354628Skevans/*
278210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
279210389Sgabor * passing the lines to procline().
280210389Sgabor */
281354628Skevansbool
282210389Sgaborprocfile(const char *fn)
283210389Sgabor{
284322587Skevans	struct parsec pc;
285354628Skevans	struct mprintc mc;
286210389Sgabor	struct file *f;
287210389Sgabor	struct stat sb;
288210389Sgabor	mode_t s;
289354628Skevans	int lines;
290354628Skevans	bool line_matched;
291210389Sgabor
292210389Sgabor	if (strcmp(fn, "-") == 0) {
293210389Sgabor		fn = label != NULL ? label : getstr(1);
294211463Sgabor		f = grep_open(NULL);
295210389Sgabor	} else {
296354628Skevans		if (stat(fn, &sb) == 0) {
297210389Sgabor			/* Check if we need to process the file */
298210389Sgabor			s = sb.st_mode & S_IFMT;
299354628Skevans			if (dirbehave == DIR_SKIP && s == S_IFDIR)
300354628Skevans				return (false);
301354628Skevans			if (devbehave == DEV_SKIP && (s == S_IFIFO ||
302354628Skevans			    s == S_IFCHR || s == S_IFBLK || s == S_IFSOCK))
303354628Skevans				return (false);
304210389Sgabor		}
305210389Sgabor		f = grep_open(fn);
306210389Sgabor	}
307210389Sgabor	if (f == NULL) {
308228319Sgabor		file_err = true;
309210389Sgabor		if (!sflag)
310210389Sgabor			warn("%s", fn);
311354628Skevans		return (false);
312210389Sgabor	}
313210389Sgabor
314354628Skevans	pc.ln.file = grep_strdup(fn);
315322587Skevans	pc.ln.line_no = 0;
316322587Skevans	pc.ln.len = 0;
317322610Skevans	pc.ln.boff = 0;
318322587Skevans	pc.ln.off = -1;
319322587Skevans	pc.binary = f->binary;
320354628Skevans	pc.cntlines = false;
321354628Skevans	memset(&mc, 0, sizeof(mc));
322354628Skevans	mc.printmatch = true;
323322587Skevans	if ((pc.binary && binbehave == BINFILE_BIN) || cflag || qflag ||
324322587Skevans	    lflag || Lflag)
325354628Skevans		mc.printmatch = false;
326354628Skevans	if (mc.printmatch && (Aflag != 0 || Bflag != 0))
327354628Skevans		mc.doctx = true;
328354628Skevans	if (mc.printmatch && (Aflag != 0 || Bflag != 0 || mflag || nflag))
329354628Skevans		pc.cntlines = true;
330322587Skevans	mcount = mlimit;
331210389Sgabor
332354628Skevans	for (lines = 0; lines == 0 || !(lflag || qflag); ) {
333354628Skevans		/*
334354628Skevans		 * XXX TODO: We need to revisit this in a chunking world. We're
335354628Skevans		 * not going to be doing per-line statistics because of the
336354628Skevans		 * overhead involved. procmatches can figure that stuff out as
337354628Skevans		 * needed. */
338322610Skevans		/* Reset per-line statistics */
339322610Skevans		pc.printed = 0;
340322587Skevans		pc.matchidx = 0;
341322609Skevans		pc.lnstart = 0;
342322610Skevans		pc.ln.boff = 0;
343322587Skevans		pc.ln.off += pc.ln.len + 1;
344354628Skevans		/* XXX TODO: Grab a chunk */
345354628Skevans		if ((pc.ln.dat = grep_fgetln(f, &pc)) == NULL ||
346322777Skevans		    pc.ln.len == 0)
347322777Skevans			break;
348210389Sgabor
349322587Skevans		if (pc.ln.len > 0 && pc.ln.dat[pc.ln.len - 1] == fileeol)
350322587Skevans			--pc.ln.len;
351322587Skevans		pc.ln.line_no++;
352322587Skevans
353210389Sgabor		/* Return if we need to skip a binary file */
354322587Skevans		if (pc.binary && binbehave == BINFILE_SKIP) {
355210389Sgabor			grep_close(f);
356322587Skevans			free(pc.ln.file);
357210389Sgabor			free(f);
358210389Sgabor			return (0);
359210389Sgabor		}
360322562Skevans
361354628Skevans		line_matched = procline(&pc) == !vflag;
362354628Skevans		if (line_matched)
363354628Skevans			++lines;
364322587Skevans
365354628Skevans		/* Halt processing if we hit our match limit */
366354628Skevans		if (!procmatches(&mc, &pc, line_matched))
367354628Skevans			break;
368210389Sgabor	}
369210389Sgabor	if (Bflag > 0)
370210389Sgabor		clearqueue();
371210389Sgabor	grep_close(f);
372210389Sgabor
373210389Sgabor	if (cflag) {
374210389Sgabor		if (!hflag)
375322587Skevans			printf("%s:", pc.ln.file);
376354628Skevans		printf("%u\n", lines);
377210389Sgabor	}
378354628Skevans	if (lflag && !qflag && lines != 0)
379228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
380354628Skevans	if (Lflag && !qflag && lines == 0)
381228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
382354628Skevans	if (lines != 0 && !cflag && !lflag && !Lflag &&
383210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
384210622Sgabor		printf(getstr(8), fn);
385210389Sgabor
386322587Skevans	free(pc.ln.file);
387210389Sgabor	free(f);
388354628Skevans	return (lines != 0);
389210389Sgabor}
390210389Sgabor
391323443Skevans#ifdef WITH_INTERNAL_NOSPEC
392323443Skevans/*
393323443Skevans * Internal implementation of literal string search within a string, modeled
394323443Skevans * after regexec(3), for use when the regex(3) implementation doesn't offer
395323443Skevans * either REG_NOSPEC or REG_LITERAL. This does not apply in the default FreeBSD
396323443Skevans * config, but in other scenarios such as building against libgnuregex or on
397323443Skevans * some non-FreeBSD OSes.
398323443Skevans */
399323443Skevansstatic int
400323443Skevanslitexec(const struct pat *pat, const char *string, size_t nmatch,
401323443Skevans    regmatch_t pmatch[])
402323443Skevans{
403323443Skevans	char *(*strstr_fn)(const char *, const char *);
404323443Skevans	char *sub, *subject;
405323443Skevans	const char *search;
406323443Skevans	size_t idx, n, ofs, stringlen;
407323443Skevans
408323443Skevans	if (cflags & REG_ICASE)
409323443Skevans		strstr_fn = strcasestr;
410323443Skevans	else
411323443Skevans		strstr_fn = strstr;
412323443Skevans	idx = 0;
413323443Skevans	ofs = pmatch[0].rm_so;
414323443Skevans	stringlen = pmatch[0].rm_eo;
415323443Skevans	if (ofs >= stringlen)
416323443Skevans		return (REG_NOMATCH);
417323443Skevans	subject = strndup(string, stringlen);
418323443Skevans	if (subject == NULL)
419323443Skevans		return (REG_ESPACE);
420323443Skevans	for (n = 0; ofs < stringlen;) {
421323443Skevans		search = (subject + ofs);
422323443Skevans		if ((unsigned long)pat->len > strlen(search))
423323443Skevans			break;
424323443Skevans		sub = strstr_fn(search, pat->pat);
425323443Skevans		/*
426323443Skevans		 * Ignoring the empty string possibility due to context: grep optimizes
427323443Skevans		 * for empty patterns and will never reach this point.
428323443Skevans		 */
429323443Skevans		if (sub == NULL)
430323443Skevans			break;
431323443Skevans		++n;
432323443Skevans		/* Fill in pmatch if necessary */
433323443Skevans		if (nmatch > 0) {
434323443Skevans			pmatch[idx].rm_so = ofs + (sub - search);
435323443Skevans			pmatch[idx].rm_eo = pmatch[idx].rm_so + pat->len;
436323443Skevans			if (++idx == nmatch)
437323443Skevans				break;
438323443Skevans			ofs = pmatch[idx].rm_so + 1;
439323443Skevans		} else
440323443Skevans			/* We only needed to know if we match or not */
441323443Skevans			break;
442323443Skevans	}
443323443Skevans	free(subject);
444323443Skevans	if (n > 0 && nmatch > 0)
445323443Skevans		for (n = idx; n < nmatch; ++n)
446323443Skevans			pmatch[n].rm_so = pmatch[n].rm_eo = -1;
447323443Skevans
448323443Skevans	return (n > 0 ? 0 : REG_NOMATCH);
449323443Skevans}
450323443Skevans#endif /* WITH_INTERNAL_NOSPEC */
451323443Skevans
452210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
453210389Sgabor
454210389Sgabor/*
455210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
456210389Sgabor * is looped to be compared along with the full string, saving each and every
457210389Sgabor * match, which is necessary to colorize the output and to count the
458210389Sgabor * matches.  The matching lines are passed to printline() to display the
459210389Sgabor * appropriate output.
460210389Sgabor */
461354628Skevansstatic bool
462322587Skevansprocline(struct parsec *pc)
463210389Sgabor{
464322587Skevans	regmatch_t pmatch, lastmatch, chkmatch;
465322587Skevans	wchar_t wbegin, wend;
466322609Skevans	size_t st, nst;
467210389Sgabor	unsigned int i;
468354628Skevans	int r = 0, leflags = eflags;
469322587Skevans	size_t startm = 0, matchidx;
470322609Skevans	unsigned int retry;
471354628Skevans	bool lastmatched, matched;
472210389Sgabor
473322587Skevans	matchidx = pc->matchidx;
474322587Skevans
475354628Skevans	/*
476354628Skevans	 * With matchall (empty pattern), we can try to take some shortcuts.
477354628Skevans	 * Emtpy patterns trivially match every line except in the -w and -x
478354628Skevans	 * cases.  For -w (whole-word) cases, we only match if the first
479354628Skevans	 * character isn't a word-character.  For -x (whole-line) cases, we only
480354628Skevans	 * match if the line is empty.
481354628Skevans	 */
482354628Skevans	if (matchall) {
483322587Skevans		if (pc->ln.len == 0)
484354628Skevans			return (true);
485354628Skevans		if (wflag) {
486354628Skevans			wend = L' ';
487354628Skevans			if (sscanf(&pc->ln.dat[0], "%lc", &wend) == 1 &&
488354628Skevans			    !iswword(wend))
489354628Skevans				return (true);
490354628Skevans		} else if (!xflag)
491354628Skevans			return (true);
492322587Skevans
493354628Skevans		/*
494354628Skevans		 * If we don't have any other patterns, we really don't match.
495354628Skevans		 * If we do have other patterns, we must fall through and check
496354628Skevans		 * them.
497354628Skevans		 */
498354628Skevans		if (patterns == 0)
499354628Skevans			return (false);
500354628Skevans	}
501354628Skevans
502354628Skevans	matched = false;
503322609Skevans	st = pc->lnstart;
504322609Skevans	nst = 0;
505322555Skevans	/* Initialize to avoid a false positive warning from GCC. */
506322555Skevans	lastmatch.rm_so = lastmatch.rm_eo = 0;
507322555Skevans
508226035Sgabor	/* Loop to process the whole line */
509322587Skevans	while (st <= pc->ln.len) {
510354628Skevans		lastmatched = false;
511322587Skevans		startm = matchidx;
512322583Skevans		retry = 0;
513322622Skevans		if (st > 0 && pc->ln.dat[st - 1] != fileeol)
514322555Skevans			leflags |= REG_NOTBOL;
515226035Sgabor		/* Loop to compare with all the patterns */
516226035Sgabor		for (i = 0; i < patterns; i++) {
517322555Skevans			pmatch.rm_so = st;
518322587Skevans			pmatch.rm_eo = pc->ln.len;
519323443Skevans#ifdef WITH_INTERNAL_NOSPEC
520323443Skevans			if (grepbehave == GREP_FIXED)
521323443Skevans				r = litexec(&pattern[i], pc->ln.dat, 1, &pmatch);
522323443Skevans			else
523323443Skevans#endif
524322582Skevans#ifndef WITHOUT_FASTMATCH
525226035Sgabor			if (fg_pattern[i].pattern)
526226035Sgabor				r = fastexec(&fg_pattern[i],
527322587Skevans				    pc->ln.dat, 1, &pmatch, leflags);
528226035Sgabor			else
529322582Skevans#endif
530322587Skevans				r = regexec(&r_pattern[i], pc->ln.dat, 1,
531322555Skevans				    &pmatch, leflags);
532322587Skevans			if (r != 0)
533226035Sgabor				continue;
534226035Sgabor			/* Check for full match */
535322587Skevans			if (xflag && (pmatch.rm_so != 0 ||
536322587Skevans			    (size_t)pmatch.rm_eo != pc->ln.len))
537322587Skevans				continue;
538226035Sgabor			/* Check for whole word match */
539322582Skevans#ifndef WITHOUT_FASTMATCH
540322587Skevans			if (wflag || fg_pattern[i].word) {
541322582Skevans#else
542322587Skevans			if (wflag) {
543322582Skevans#endif
544226035Sgabor				wbegin = wend = L' ';
545226035Sgabor				if (pmatch.rm_so != 0 &&
546322587Skevans				    sscanf(&pc->ln.dat[pmatch.rm_so - 1],
547226035Sgabor				    "%lc", &wbegin) != 1)
548226035Sgabor					r = REG_NOMATCH;
549226035Sgabor				else if ((size_t)pmatch.rm_eo !=
550322587Skevans				    pc->ln.len &&
551322587Skevans				    sscanf(&pc->ln.dat[pmatch.rm_eo],
552226035Sgabor				    "%lc", &wend) != 1)
553226035Sgabor					r = REG_NOMATCH;
554226035Sgabor				else if (iswword(wbegin) ||
555226035Sgabor				    iswword(wend))
556226035Sgabor					r = REG_NOMATCH;
557322583Skevans				/*
558322583Skevans				 * If we're doing whole word matching and we
559322583Skevans				 * matched once, then we should try the pattern
560322587Skevans				 * again after advancing just past the start of
561322583Skevans				 * the earliest match. This allows the pattern
562322583Skevans				 * to  match later on in the line and possibly
563322583Skevans				 * still match a whole word.
564322583Skevans				 */
565322583Skevans				if (r == REG_NOMATCH &&
566322609Skevans				    (retry == pc->lnstart ||
567322619Skevans				    (unsigned int)pmatch.rm_so + 1 < retry))
568322583Skevans					retry = pmatch.rm_so + 1;
569322587Skevans				if (r == REG_NOMATCH)
570322587Skevans					continue;
571210389Sgabor			}
572354628Skevans			lastmatched = true;
573322587Skevans			lastmatch = pmatch;
574322587Skevans
575322587Skevans			if (matchidx == 0)
576354628Skevans				matched = true;
577322587Skevans
578322587Skevans			/*
579322587Skevans			 * Replace previous match if the new one is earlier
580322587Skevans			 * and/or longer. This will lead to some amount of
581322587Skevans			 * extra work if -o/--color are specified, but it's
582322587Skevans			 * worth it from a correctness point of view.
583322587Skevans			 */
584322587Skevans			if (matchidx > startm) {
585322587Skevans				chkmatch = pc->matches[matchidx - 1];
586322587Skevans				if (pmatch.rm_so < chkmatch.rm_so ||
587322587Skevans				    (pmatch.rm_so == chkmatch.rm_so &&
588322587Skevans				    (pmatch.rm_eo - pmatch.rm_so) >
589322587Skevans				    (chkmatch.rm_eo - chkmatch.rm_so))) {
590322587Skevans					pc->matches[matchidx - 1] = pmatch;
591322587Skevans					nst = pmatch.rm_eo;
592322555Skevans				}
593322587Skevans			} else {
594322587Skevans				/* Advance as normal if not */
595322587Skevans				pc->matches[matchidx++] = pmatch;
596322587Skevans				nst = pmatch.rm_eo;
597210389Sgabor			}
598322587Skevans			/* avoid excessive matching - skip further patterns */
599322587Skevans			if ((color == NULL && !oflag) || qflag || lflag ||
600322622Skevans			    matchidx >= MAX_MATCHES) {
601322609Skevans				pc->lnstart = nst;
602354628Skevans				lastmatched = false;
603322587Skevans				break;
604322609Skevans			}
605226035Sgabor		}
606210389Sgabor
607322583Skevans		/*
608322583Skevans		 * Advance to just past the start of the earliest match, try
609322583Skevans		 * again just in case we still have a chance to match later in
610322583Skevans		 * the string.
611322583Skevans		 */
612354628Skevans		if (!lastmatched && retry > pc->lnstart) {
613322583Skevans			st = retry;
614322583Skevans			continue;
615210389Sgabor		}
616210389Sgabor
617354628Skevans		/* XXX TODO: We will need to keep going, since we're chunky */
618226035Sgabor		/* One pass if we are not recording matches */
619270132Sgabor		if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
620226035Sgabor			break;
621226035Sgabor
622322555Skevans		/* If we didn't have any matches or REG_NOSUB set */
623354628Skevans		if (!lastmatched || (cflags & REG_NOSUB))
624322587Skevans			nst = pc->ln.len;
625322555Skevans
626354628Skevans		if (!lastmatched)
627322555Skevans			/* No matches */
628322555Skevans			break;
629322555Skevans		else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
630322555Skevans			/* Zero-length match -- advance one more so we don't get stuck */
631322555Skevans			nst++;
632322555Skevans
633322555Skevans		/* Advance st based on previous matches */
634322555Skevans		st = nst;
635322609Skevans		pc->lnstart = st;
636226035Sgabor	}
637226035Sgabor
638322587Skevans	/* Reflect the new matchidx in the context */
639322587Skevans	pc->matchidx = matchidx;
640354628Skevans	return matched;
641210389Sgabor}
642210389Sgabor
643210389Sgabor/*
644210389Sgabor * Safe malloc() for internal use.
645210389Sgabor */
646210389Sgaborvoid *
647210389Sgaborgrep_malloc(size_t size)
648210389Sgabor{
649210389Sgabor	void *ptr;
650210389Sgabor
651210389Sgabor	if ((ptr = malloc(size)) == NULL)
652210389Sgabor		err(2, "malloc");
653210389Sgabor	return (ptr);
654210389Sgabor}
655210389Sgabor
656210389Sgabor/*
657210389Sgabor * Safe calloc() for internal use.
658210389Sgabor */
659210389Sgaborvoid *
660210389Sgaborgrep_calloc(size_t nmemb, size_t size)
661210389Sgabor{
662210389Sgabor	void *ptr;
663210389Sgabor
664210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
665210389Sgabor		err(2, "calloc");
666210389Sgabor	return (ptr);
667210389Sgabor}
668210389Sgabor
669210389Sgabor/*
670210389Sgabor * Safe realloc() for internal use.
671210389Sgabor */
672210389Sgaborvoid *
673210389Sgaborgrep_realloc(void *ptr, size_t size)
674210389Sgabor{
675210389Sgabor
676210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
677210389Sgabor		err(2, "realloc");
678210389Sgabor	return (ptr);
679210389Sgabor}
680210389Sgabor
681210389Sgabor/*
682210578Sgabor * Safe strdup() for internal use.
683210578Sgabor */
684210578Sgaborchar *
685210578Sgaborgrep_strdup(const char *str)
686210578Sgabor{
687210578Sgabor	char *ret;
688210578Sgabor
689210578Sgabor	if ((ret = strdup(str)) == NULL)
690210578Sgabor		err(2, "strdup");
691210578Sgabor	return (ret);
692210578Sgabor}
693210578Sgabor
694210578Sgabor/*
695322587Skevans * Print an entire line as-is, there are no inline matches to consider. This is
696322587Skevans * used for printing context.
697210389Sgabor */
698322587Skevansvoid grep_printline(struct str *line, int sep) {
699322587Skevans	printline_metadata(line, sep);
700322587Skevans	fwrite(line->dat, line->len, 1, stdout);
701322587Skevans	putchar(fileeol);
702322587Skevans}
703322587Skevans
704322587Skevansstatic void
705322587Skevansprintline_metadata(struct str *line, int sep)
706210389Sgabor{
707322587Skevans	bool printsep;
708210389Sgabor
709322587Skevans	printsep = false;
710210389Sgabor	if (!hflag) {
711228093Sgabor		if (!nullflag) {
712210389Sgabor			fputs(line->file, stdout);
713322587Skevans			printsep = true;
714228093Sgabor		} else {
715210389Sgabor			printf("%s", line->file);
716210389Sgabor			putchar(0);
717210389Sgabor		}
718210389Sgabor	}
719210389Sgabor	if (nflag) {
720322587Skevans		if (printsep)
721210389Sgabor			putchar(sep);
722210389Sgabor		printf("%d", line->line_no);
723322587Skevans		printsep = true;
724210389Sgabor	}
725210389Sgabor	if (bflag) {
726322587Skevans		if (printsep)
727210389Sgabor			putchar(sep);
728322610Skevans		printf("%lld", (long long)(line->off + line->boff));
729322587Skevans		printsep = true;
730210389Sgabor	}
731322587Skevans	if (printsep)
732210389Sgabor		putchar(sep);
733322587Skevans}
734322587Skevans
735322587Skevans/*
736322587Skevans * Prints a matching line according to the command line options.
737322587Skevans */
738322587Skevansstatic void
739322587Skevansprintline(struct parsec *pc, int sep)
740322587Skevans{
741322587Skevans	size_t a = 0;
742322587Skevans	size_t i, matchidx;
743322587Skevans	regmatch_t match;
744322587Skevans
745322587Skevans	/* If matchall, everything matches but don't actually print for -o */
746322587Skevans	if (oflag && matchall)
747322587Skevans		return;
748322587Skevans
749322587Skevans	matchidx = pc->matchidx;
750322587Skevans
751210389Sgabor	/* --color and -o */
752322587Skevans	if ((oflag || color) && matchidx > 0) {
753322610Skevans		/* Only print metadata once per line if --color */
754322610Skevans		if (!oflag && pc->printed == 0)
755322610Skevans			printline_metadata(&pc->ln, sep);
756322587Skevans		for (i = 0; i < matchidx; i++) {
757322587Skevans			match = pc->matches[i];
758322555Skevans			/* Don't output zero length matches */
759322587Skevans			if (match.rm_so == match.rm_eo)
760322555Skevans				continue;
761322610Skevans			/*
762322610Skevans			 * Metadata is printed on a per-line basis, so every
763322610Skevans			 * match gets file metadata with the -o flag.
764322610Skevans			 */
765322610Skevans			if (oflag) {
766322610Skevans				pc->ln.boff = match.rm_so;
767322610Skevans				printline_metadata(&pc->ln, sep);
768322610Skevans			} else
769322587Skevans				fwrite(pc->ln.dat + a, match.rm_so - a, 1,
770210389Sgabor				    stdout);
771322587Skevans			if (color)
772210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
773322587Skevans			fwrite(pc->ln.dat + match.rm_so,
774322587Skevans			    match.rm_eo - match.rm_so, 1, stdout);
775322587Skevans			if (color)
776210389Sgabor				fprintf(stdout, "\33[m\33[K");
777322587Skevans			a = match.rm_eo;
778210389Sgabor			if (oflag)
779210389Sgabor				putchar('\n');
780210389Sgabor		}
781210389Sgabor		if (!oflag) {
782322587Skevans			if (pc->ln.len - a > 0)
783322587Skevans				fwrite(pc->ln.dat + a, pc->ln.len - a, 1,
784322587Skevans				    stdout);
785210389Sgabor			putchar('\n');
786210389Sgabor		}
787322587Skevans	} else
788322587Skevans		grep_printline(&pc->ln, sep);
789322610Skevans	pc->printed++;
790210389Sgabor}
791