util.c revision 322577
1/*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2/*	$FreeBSD: stable/11/usr.bin/grep/util.c 322577 2017-08-16 13:06:26Z kevans $	*/
3/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 *    notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 *    notice, this list of conditions and the following disclaimer in the
17 *    documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD: stable/11/usr.bin/grep/util.c 322577 2017-08-16 13:06:26Z kevans $");
34
35#include <sys/stat.h>
36#include <sys/types.h>
37
38#include <ctype.h>
39#include <err.h>
40#include <errno.h>
41#include <fnmatch.h>
42#include <fts.h>
43#include <libgen.h>
44#include <stdbool.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49#include <wchar.h>
50#include <wctype.h>
51
52#include "fastmatch.h"
53#include "grep.h"
54
55static int	 linesqueued;
56static int	 procline(struct str *l, int);
57
58static int	 lasta;
59static bool	 ctxover;
60
61bool
62file_matching(const char *fname)
63{
64	char *fname_base, *fname_buf;
65	bool ret;
66
67	ret = finclude ? false : true;
68	fname_buf = strdup(fname);
69	if (fname_buf == NULL)
70		err(2, "strdup");
71	fname_base = basename(fname_buf);
72
73	for (unsigned int i = 0; i < fpatterns; ++i) {
74		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
75		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
76			if (fpattern[i].mode == EXCL_PAT) {
77				ret = false;
78				break;
79			} else
80				ret = true;
81		}
82	}
83	free(fname_buf);
84	return (ret);
85}
86
87static inline bool
88dir_matching(const char *dname)
89{
90	bool ret;
91
92	ret = dinclude ? false : true;
93
94	for (unsigned int i = 0; i < dpatterns; ++i) {
95		if (dname != NULL &&
96		    fnmatch(dpattern[i].pat, dname, 0) == 0) {
97			if (dpattern[i].mode == EXCL_PAT)
98				return (false);
99			else
100				ret = true;
101		}
102	}
103	return (ret);
104}
105
106/*
107 * Processes a directory when a recursive search is performed with
108 * the -R option.  Each appropriate file is passed to procfile().
109 */
110int
111grep_tree(char **argv)
112{
113	FTS *fts;
114	FTSENT *p;
115	int c, fts_flags;
116	bool ok;
117	const char *wd[] = { ".", NULL };
118
119	c = fts_flags = 0;
120
121	switch(linkbehave) {
122	case LINK_EXPLICIT:
123		fts_flags = FTS_COMFOLLOW;
124		break;
125	case LINK_SKIP:
126		fts_flags = FTS_PHYSICAL;
127		break;
128	default:
129		fts_flags = FTS_LOGICAL;
130
131	}
132
133	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
134
135	fts = fts_open((argv[0] == NULL) ?
136	    __DECONST(char * const *, wd) : argv, fts_flags, NULL);
137	if (fts == NULL)
138		err(2, "fts_open");
139	while ((p = fts_read(fts)) != NULL) {
140		switch (p->fts_info) {
141		case FTS_DNR:
142			/* FALLTHROUGH */
143		case FTS_ERR:
144			file_err = true;
145			if(!sflag)
146				warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
147			break;
148		case FTS_D:
149			/* FALLTHROUGH */
150		case FTS_DP:
151			if (dexclude || dinclude)
152				if (!dir_matching(p->fts_name) ||
153				    !dir_matching(p->fts_path))
154					fts_set(fts, p, FTS_SKIP);
155			break;
156		case FTS_DC:
157			/* Print a warning for recursive directory loop */
158			warnx("warning: %s: recursive directory loop",
159				p->fts_path);
160			break;
161		default:
162			/* Check for file exclusion/inclusion */
163			ok = true;
164			if (fexclude || finclude)
165				ok &= file_matching(p->fts_path);
166
167			if (ok)
168				c += procfile(p->fts_path);
169			break;
170		}
171	}
172
173	fts_close(fts);
174	return (c);
175}
176
177/*
178 * Opens a file and processes it.  Each file is processed line-by-line
179 * passing the lines to procline().
180 */
181int
182procfile(const char *fn)
183{
184	struct file *f;
185	struct stat sb;
186	struct str ln;
187	mode_t s;
188	int c, t;
189
190	mcount = mlimit;
191
192	if (strcmp(fn, "-") == 0) {
193		fn = label != NULL ? label : getstr(1);
194		f = grep_open(NULL);
195	} else {
196		if (!stat(fn, &sb)) {
197			/* Check if we need to process the file */
198			s = sb.st_mode & S_IFMT;
199			if (s == S_IFDIR && dirbehave == DIR_SKIP)
200				return (0);
201			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
202				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
203					return (0);
204		}
205		f = grep_open(fn);
206	}
207	if (f == NULL) {
208		file_err = true;
209		if (!sflag)
210			warn("%s", fn);
211		return (0);
212	}
213
214	ln.file = grep_malloc(strlen(fn) + 1);
215	strcpy(ln.file, fn);
216	ln.line_no = 0;
217	ln.len = 0;
218	ctxover = false;
219	linesqueued = 0;
220	tail = 0;
221	lasta = 0;
222	ln.off = -1;
223
224	for (c = 0;  c == 0 || !(lflag || qflag); ) {
225		ln.off += ln.len + 1;
226		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
227			if (ln.line_no == 0 && matchall)
228				exit(0);
229			else
230				break;
231		}
232		if (ln.len > 0 && ln.dat[ln.len - 1] == fileeol)
233			--ln.len;
234		ln.line_no++;
235
236		/* Return if we need to skip a binary file */
237		if (f->binary && binbehave == BINFILE_SKIP) {
238			grep_close(f);
239			free(ln.file);
240			free(f);
241			return (0);
242		}
243
244		/* Process the file line-by-line, enqueue non-matching lines */
245		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
246			/* Except don't enqueue lines that appear in -A ctx */
247			if (ln.line_no == 0 || lasta != ln.line_no) {
248				/* queue is maxed to Bflag number of lines */
249				enqueue(&ln);
250				linesqueued++;
251				ctxover = false;
252			} else {
253				/*
254				 * Indicate to procline() that we have ctx
255				 * overlap and make sure queue is empty.
256				 */
257				if (!ctxover)
258					clearqueue();
259				ctxover = true;
260			}
261		}
262		c += t;
263		if (mflag && mcount <= 0)
264			break;
265	}
266	if (Bflag > 0)
267		clearqueue();
268	grep_close(f);
269
270	if (cflag) {
271		if (!hflag)
272			printf("%s:", ln.file);
273		printf("%u\n", c);
274	}
275	if (lflag && !qflag && c != 0)
276		printf("%s%c", fn, nullflag ? 0 : '\n');
277	if (Lflag && !qflag && c == 0)
278		printf("%s%c", fn, nullflag ? 0 : '\n');
279	if (c && !cflag && !lflag && !Lflag &&
280	    binbehave == BINFILE_BIN && f->binary && !qflag)
281		printf(getstr(8), fn);
282
283	free(ln.file);
284	free(f);
285	return (c);
286}
287
288#define iswword(x)	(iswalnum((x)) || (x) == L'_')
289
290/*
291 * Processes a line comparing it with the specified patterns.  Each pattern
292 * is looped to be compared along with the full string, saving each and every
293 * match, which is necessary to colorize the output and to count the
294 * matches.  The matching lines are passed to printline() to display the
295 * appropriate output.
296 */
297static int
298procline(struct str *l, int nottext)
299{
300	regmatch_t matches[MAX_LINE_MATCHES];
301	regmatch_t pmatch, lastmatch;
302	size_t st = 0, nst = 0;
303	unsigned int i;
304	int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
305	int startm = 0;
306
307	/* Initialize to avoid a false positive warning from GCC. */
308	lastmatch.rm_so = lastmatch.rm_eo = 0;
309
310	/* Loop to process the whole line */
311	while (st <= l->len) {
312		lastmatches = 0;
313		startm = m;
314		if (st > 0)
315			leflags |= REG_NOTBOL;
316		/* Loop to compare with all the patterns */
317		for (i = 0; i < patterns; i++) {
318			pmatch.rm_so = st;
319			pmatch.rm_eo = l->len;
320			if (fg_pattern[i].pattern)
321				r = fastexec(&fg_pattern[i],
322				    l->dat, 1, &pmatch, leflags);
323			else
324				r = regexec(&r_pattern[i], l->dat, 1,
325				    &pmatch, leflags);
326			r = (r == 0) ? 0 : REG_NOMATCH;
327			if (r == REG_NOMATCH)
328				continue;
329			/* Check for full match */
330			if (r == 0 && xflag)
331				if (pmatch.rm_so != 0 ||
332				    (size_t)pmatch.rm_eo != l->len)
333					r = REG_NOMATCH;
334			/* Check for whole word match */
335			if (r == 0 && (wflag || fg_pattern[i].word)) {
336				wchar_t wbegin, wend;
337
338				wbegin = wend = L' ';
339				if (pmatch.rm_so != 0 &&
340				    sscanf(&l->dat[pmatch.rm_so - 1],
341				    "%lc", &wbegin) != 1)
342					r = REG_NOMATCH;
343				else if ((size_t)pmatch.rm_eo !=
344				    l->len &&
345				    sscanf(&l->dat[pmatch.rm_eo],
346				    "%lc", &wend) != 1)
347					r = REG_NOMATCH;
348				else if (iswword(wbegin) ||
349				    iswword(wend))
350					r = REG_NOMATCH;
351			}
352			if (r == 0) {
353				lastmatches++;
354				lastmatch = pmatch;
355				if (m == 0)
356					c++;
357
358				if (m < MAX_LINE_MATCHES) {
359					/* Replace previous match if the new one is earlier and/or longer */
360					if (m > startm) {
361						if (pmatch.rm_so < matches[m-1].rm_so ||
362						    (pmatch.rm_so == matches[m-1].rm_so && (pmatch.rm_eo - pmatch.rm_so) > (matches[m-1].rm_eo - matches[m-1].rm_so))) {
363							matches[m-1] = pmatch;
364							nst = pmatch.rm_eo;
365						}
366					} else {
367						/* Advance as normal if not */
368						matches[m++] = pmatch;
369						nst = pmatch.rm_eo;
370					}
371				}
372
373				/* matches - skip further patterns */
374				if ((color == NULL && !oflag) ||
375				    qflag || lflag)
376					break;
377			}
378		}
379
380		if (vflag) {
381			c = !c;
382			break;
383		}
384
385		/* One pass if we are not recording matches */
386		if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
387			break;
388
389		/* If we didn't have any matches or REG_NOSUB set */
390		if (lastmatches == 0 || (cflags & REG_NOSUB))
391			nst = l->len;
392
393		if (lastmatches == 0)
394			/* No matches */
395			break;
396		else if (st == nst && lastmatch.rm_so == lastmatch.rm_eo)
397			/* Zero-length match -- advance one more so we don't get stuck */
398			nst++;
399
400		/* Advance st based on previous matches */
401		st = nst;
402	}
403
404
405	/* Count the matches if we have a match limit */
406	if (mflag)
407		mcount -= c;
408
409	if (c && binbehave == BINFILE_BIN && nottext)
410		return (c); /* Binary file */
411
412	/* Dealing with the context */
413	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
414		if (c) {
415			if (!first && !prev && !tail && (Bflag || Aflag) &&
416			    !ctxover)
417				printf("--\n");
418			tail = Aflag;
419			if (Bflag > 0) {
420				printqueue();
421				ctxover = false;
422			}
423			linesqueued = 0;
424			printline(l, ':', matches, m);
425		} else {
426			/* Print -A lines following matches */
427			lasta = l->line_no;
428			printline(l, '-', matches, m);
429			tail--;
430		}
431	}
432
433	if (c) {
434		prev = true;
435		first = false;
436	} else
437		prev = false;
438
439	return (c);
440}
441
442/*
443 * Safe malloc() for internal use.
444 */
445void *
446grep_malloc(size_t size)
447{
448	void *ptr;
449
450	if ((ptr = malloc(size)) == NULL)
451		err(2, "malloc");
452	return (ptr);
453}
454
455/*
456 * Safe calloc() for internal use.
457 */
458void *
459grep_calloc(size_t nmemb, size_t size)
460{
461	void *ptr;
462
463	if ((ptr = calloc(nmemb, size)) == NULL)
464		err(2, "calloc");
465	return (ptr);
466}
467
468/*
469 * Safe realloc() for internal use.
470 */
471void *
472grep_realloc(void *ptr, size_t size)
473{
474
475	if ((ptr = realloc(ptr, size)) == NULL)
476		err(2, "realloc");
477	return (ptr);
478}
479
480/*
481 * Safe strdup() for internal use.
482 */
483char *
484grep_strdup(const char *str)
485{
486	char *ret;
487
488	if ((ret = strdup(str)) == NULL)
489		err(2, "strdup");
490	return (ret);
491}
492
493/*
494 * Prints a matching line according to the command line options.
495 */
496void
497printline(struct str *line, int sep, regmatch_t *matches, int m)
498{
499	size_t a = 0;
500	int i, n = 0;
501
502	/* If matchall, everything matches but don't actually print for -o */
503	if (oflag && matchall)
504		return;
505
506	if (!hflag) {
507		if (!nullflag) {
508			fputs(line->file, stdout);
509			++n;
510		} else {
511			printf("%s", line->file);
512			putchar(0);
513		}
514	}
515	if (nflag) {
516		if (n > 0)
517			putchar(sep);
518		printf("%d", line->line_no);
519		++n;
520	}
521	if (bflag) {
522		if (n > 0)
523			putchar(sep);
524		printf("%lld", (long long)line->off);
525		++n;
526	}
527	if (n)
528		putchar(sep);
529	/* --color and -o */
530	if ((oflag || color) && m > 0) {
531		for (i = 0; i < m; i++) {
532			/* Don't output zero length matches */
533			if (matches[i].rm_so == matches[i].rm_eo)
534				continue;
535			if (!oflag)
536				fwrite(line->dat + a, matches[i].rm_so - a, 1,
537				    stdout);
538			if (color)
539				fprintf(stdout, "\33[%sm\33[K", color);
540
541				fwrite(line->dat + matches[i].rm_so,
542				    matches[i].rm_eo - matches[i].rm_so, 1,
543				    stdout);
544			if (color)
545				fprintf(stdout, "\33[m\33[K");
546			a = matches[i].rm_eo;
547			if (oflag)
548				putchar('\n');
549		}
550		if (!oflag) {
551			if (line->len - a > 0)
552				fwrite(line->dat + a, line->len - a, 1, stdout);
553			putchar('\n');
554		}
555	} else {
556		fwrite(line->dat, line->len, 1, stdout);
557		putchar(fileeol);
558	}
559}
560