util.c revision 270132
1220422Sgabor/*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2220422Sgabor/*	$FreeBSD: head/usr.bin/grep/util.c 270132 2014-08-18 12:29:28Z gabor $	*/
3210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4210389Sgabor
5210389Sgabor/*-
6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8210389Sgabor * All rights reserved.
9210389Sgabor *
10210389Sgabor * Redistribution and use in source and binary forms, with or without
11210389Sgabor * modification, are permitted provided that the following conditions
12210389Sgabor * are met:
13210389Sgabor * 1. Redistributions of source code must retain the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer.
15210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
16210389Sgabor *    notice, this list of conditions and the following disclaimer in the
17210389Sgabor *    documentation and/or other materials provided with the distribution.
18210389Sgabor *
19210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29210389Sgabor * SUCH DAMAGE.
30210389Sgabor */
31210389Sgabor
32210389Sgabor#include <sys/cdefs.h>
33210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 270132 2014-08-18 12:29:28Z gabor $");
34210389Sgabor
35210389Sgabor#include <sys/stat.h>
36210389Sgabor#include <sys/types.h>
37210389Sgabor
38210389Sgabor#include <ctype.h>
39210389Sgabor#include <err.h>
40210389Sgabor#include <errno.h>
41210389Sgabor#include <fnmatch.h>
42210389Sgabor#include <fts.h>
43210389Sgabor#include <libgen.h>
44210578Sgabor#include <stdbool.h>
45210389Sgabor#include <stdio.h>
46210389Sgabor#include <stdlib.h>
47210389Sgabor#include <string.h>
48210389Sgabor#include <unistd.h>
49210389Sgabor#include <wchar.h>
50210389Sgabor#include <wctype.h>
51210389Sgabor
52226035Sgabor#include "fastmatch.h"
53210389Sgabor#include "grep.h"
54210389Sgabor
55210389Sgaborstatic int	 linesqueued;
56210389Sgaborstatic int	 procline(struct str *l, int);
57210389Sgabor
58210578Sgaborbool
59210578Sgaborfile_matching(const char *fname)
60210578Sgabor{
61220421Sgabor	char *fname_base;
62210578Sgabor	bool ret;
63210578Sgabor
64210578Sgabor	ret = finclude ? false : true;
65220421Sgabor	fname_base = basename(fname);
66210578Sgabor
67210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
68220421Sgabor		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
69220421Sgabor		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
70210578Sgabor			if (fpattern[i].mode == EXCL_PAT)
71210578Sgabor				return (false);
72210578Sgabor			else
73210578Sgabor				ret = true;
74210578Sgabor		}
75210578Sgabor	}
76210578Sgabor	return (ret);
77210578Sgabor}
78210578Sgabor
79211364Sgaborstatic inline bool
80210578Sgabordir_matching(const char *dname)
81210578Sgabor{
82210578Sgabor	bool ret;
83210578Sgabor
84210578Sgabor	ret = dinclude ? false : true;
85210578Sgabor
86210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
87210578Sgabor		if (dname != NULL &&
88224938Sgabor		    fnmatch(dpattern[i].pat, dname, 0) == 0) {
89210578Sgabor			if (dpattern[i].mode == EXCL_PAT)
90210578Sgabor				return (false);
91210578Sgabor			else
92210578Sgabor				ret = true;
93210578Sgabor		}
94210578Sgabor	}
95210578Sgabor	return (ret);
96210578Sgabor}
97210578Sgabor
98210389Sgabor/*
99210389Sgabor * Processes a directory when a recursive search is performed with
100210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
101210389Sgabor */
102210389Sgaborint
103210389Sgaborgrep_tree(char **argv)
104210389Sgabor{
105210389Sgabor	FTS *fts;
106210389Sgabor	FTSENT *p;
107210389Sgabor	int c, fts_flags;
108210389Sgabor	bool ok;
109210389Sgabor
110210389Sgabor	c = fts_flags = 0;
111210389Sgabor
112210389Sgabor	switch(linkbehave) {
113210389Sgabor	case LINK_EXPLICIT:
114210389Sgabor		fts_flags = FTS_COMFOLLOW;
115210389Sgabor		break;
116210389Sgabor	case LINK_SKIP:
117210389Sgabor		fts_flags = FTS_PHYSICAL;
118210389Sgabor		break;
119210389Sgabor	default:
120210389Sgabor		fts_flags = FTS_LOGICAL;
121210389Sgabor
122210389Sgabor	}
123210389Sgabor
124210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
125210389Sgabor
126210389Sgabor	if (!(fts = fts_open(argv, fts_flags, NULL)))
127210430Sdelphij		err(2, "fts_open");
128210389Sgabor	while ((p = fts_read(fts)) != NULL) {
129210389Sgabor		switch (p->fts_info) {
130210389Sgabor		case FTS_DNR:
131210389Sgabor			/* FALLTHROUGH */
132210389Sgabor		case FTS_ERR:
133228319Sgabor			file_err = true;
134228097Sgabor			if(!sflag)
135228097Sgabor				warnx("%s: %s", p->fts_path, strerror(p->fts_errno));
136210389Sgabor			break;
137210389Sgabor		case FTS_D:
138210389Sgabor			/* FALLTHROUGH */
139210389Sgabor		case FTS_DP:
140224938Sgabor			if (dexclude || dinclude)
141224938Sgabor				if (!dir_matching(p->fts_name) ||
142224938Sgabor				    !dir_matching(p->fts_path))
143224938Sgabor					fts_set(fts, p, FTS_SKIP);
144210389Sgabor			break;
145210389Sgabor		case FTS_DC:
146210389Sgabor			/* Print a warning for recursive directory loop */
147210389Sgabor			warnx("warning: %s: recursive directory loop",
148210389Sgabor				p->fts_path);
149210389Sgabor			break;
150210389Sgabor		default:
151210389Sgabor			/* Check for file exclusion/inclusion */
152210389Sgabor			ok = true;
153210578Sgabor			if (fexclude || finclude)
154210578Sgabor				ok &= file_matching(p->fts_path);
155210389Sgabor
156210389Sgabor			if (ok)
157210389Sgabor				c += procfile(p->fts_path);
158210389Sgabor			break;
159210389Sgabor		}
160210389Sgabor	}
161210389Sgabor
162210430Sdelphij	fts_close(fts);
163210389Sgabor	return (c);
164210389Sgabor}
165210389Sgabor
166210389Sgabor/*
167210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
168210389Sgabor * passing the lines to procline().
169210389Sgabor */
170210389Sgaborint
171210389Sgaborprocfile(const char *fn)
172210389Sgabor{
173210389Sgabor	struct file *f;
174210389Sgabor	struct stat sb;
175210389Sgabor	struct str ln;
176210389Sgabor	mode_t s;
177210389Sgabor	int c, t;
178210389Sgabor
179244493Seadler	mcount = mlimit;
180210389Sgabor
181210389Sgabor	if (strcmp(fn, "-") == 0) {
182210389Sgabor		fn = label != NULL ? label : getstr(1);
183211463Sgabor		f = grep_open(NULL);
184210389Sgabor	} else {
185210389Sgabor		if (!stat(fn, &sb)) {
186210389Sgabor			/* Check if we need to process the file */
187210389Sgabor			s = sb.st_mode & S_IFMT;
188210389Sgabor			if (s == S_IFDIR && dirbehave == DIR_SKIP)
189210389Sgabor				return (0);
190210389Sgabor			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
191210389Sgabor				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
192210389Sgabor					return (0);
193210389Sgabor		}
194210389Sgabor		f = grep_open(fn);
195210389Sgabor	}
196210389Sgabor	if (f == NULL) {
197228319Sgabor		file_err = true;
198210389Sgabor		if (!sflag)
199210389Sgabor			warn("%s", fn);
200210389Sgabor		return (0);
201210389Sgabor	}
202210389Sgabor
203210389Sgabor	ln.file = grep_malloc(strlen(fn) + 1);
204210389Sgabor	strcpy(ln.file, fn);
205210389Sgabor	ln.line_no = 0;
206210389Sgabor	ln.len = 0;
207210389Sgabor	linesqueued = 0;
208210389Sgabor	tail = 0;
209210389Sgabor	ln.off = -1;
210210389Sgabor
211210389Sgabor	for (c = 0;  c == 0 || !(lflag || qflag); ) {
212210389Sgabor		ln.off += ln.len + 1;
213211463Sgabor		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
214210389Sgabor			if (ln.line_no == 0 && matchall)
215210389Sgabor				exit(0);
216210389Sgabor			else
217210389Sgabor				break;
218210389Sgabor		}
219210389Sgabor		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
220210389Sgabor			--ln.len;
221210389Sgabor		ln.line_no++;
222210389Sgabor
223210389Sgabor		/* Return if we need to skip a binary file */
224210389Sgabor		if (f->binary && binbehave == BINFILE_SKIP) {
225210389Sgabor			grep_close(f);
226210430Sdelphij			free(ln.file);
227210389Sgabor			free(f);
228210389Sgabor			return (0);
229210389Sgabor		}
230210389Sgabor		/* Process the file line-by-line */
231210389Sgabor		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
232210389Sgabor			enqueue(&ln);
233210389Sgabor			linesqueued++;
234210389Sgabor		}
235210389Sgabor		c += t;
236226273Sgabor		if (mflag && mcount <= 0)
237226035Sgabor			break;
238210389Sgabor	}
239210389Sgabor	if (Bflag > 0)
240210389Sgabor		clearqueue();
241210389Sgabor	grep_close(f);
242210389Sgabor
243210389Sgabor	if (cflag) {
244210389Sgabor		if (!hflag)
245210389Sgabor			printf("%s:", ln.file);
246210389Sgabor		printf("%u\n", c);
247210389Sgabor	}
248210461Sgabor	if (lflag && !qflag && c != 0)
249228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
250210461Sgabor	if (Lflag && !qflag && c == 0)
251228093Sgabor		printf("%s%c", fn, nullflag ? 0 : '\n');
252210389Sgabor	if (c && !cflag && !lflag && !Lflag &&
253210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
254210622Sgabor		printf(getstr(8), fn);
255210389Sgabor
256210430Sdelphij	free(ln.file);
257210389Sgabor	free(f);
258210389Sgabor	return (c);
259210389Sgabor}
260210389Sgabor
261210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
262210389Sgabor
263210389Sgabor/*
264210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
265210389Sgabor * is looped to be compared along with the full string, saving each and every
266210389Sgabor * match, which is necessary to colorize the output and to count the
267210389Sgabor * matches.  The matching lines are passed to printline() to display the
268210389Sgabor * appropriate output.
269210389Sgabor */
270220421Sgaborstatic int
271210389Sgaborprocline(struct str *l, int nottext)
272210389Sgabor{
273210389Sgabor	regmatch_t matches[MAX_LINE_MATCHES];
274210389Sgabor	regmatch_t pmatch;
275210389Sgabor	size_t st = 0;
276210389Sgabor	unsigned int i;
277210389Sgabor	int c = 0, m = 0, r = 0;
278210389Sgabor
279226035Sgabor	/* Loop to process the whole line */
280226035Sgabor	while (st <= l->len) {
281226035Sgabor		pmatch.rm_so = st;
282226035Sgabor		pmatch.rm_eo = l->len;
283210389Sgabor
284226035Sgabor		/* Loop to compare with all the patterns */
285226035Sgabor		for (i = 0; i < patterns; i++) {
286226035Sgabor			if (fg_pattern[i].pattern)
287226035Sgabor				r = fastexec(&fg_pattern[i],
288226035Sgabor				    l->dat, 1, &pmatch, eflags);
289226035Sgabor			else
290226035Sgabor				r = regexec(&r_pattern[i], l->dat, 1,
291226035Sgabor				    &pmatch, eflags);
292226035Sgabor			r = (r == 0) ? 0 : REG_NOMATCH;
293226035Sgabor			st = (cflags & REG_NOSUB)
294226035Sgabor				? (size_t)l->len
295226035Sgabor				: (size_t)pmatch.rm_eo;
296226035Sgabor			if (r == REG_NOMATCH)
297226035Sgabor				continue;
298226035Sgabor			/* Check for full match */
299226035Sgabor			if (r == 0 && xflag)
300226035Sgabor				if (pmatch.rm_so != 0 ||
301226035Sgabor				    (size_t)pmatch.rm_eo != l->len)
302226035Sgabor					r = REG_NOMATCH;
303226035Sgabor			/* Check for whole word match */
304226035Sgabor			if (r == 0 && (wflag || fg_pattern[i].word)) {
305268798Spfg				wchar_t wbegin, wend;
306210389Sgabor
307226035Sgabor				wbegin = wend = L' ';
308226035Sgabor				if (pmatch.rm_so != 0 &&
309226035Sgabor				    sscanf(&l->dat[pmatch.rm_so - 1],
310226035Sgabor				    "%lc", &wbegin) != 1)
311226035Sgabor					r = REG_NOMATCH;
312226035Sgabor				else if ((size_t)pmatch.rm_eo !=
313226035Sgabor				    l->len &&
314226035Sgabor				    sscanf(&l->dat[pmatch.rm_eo],
315226035Sgabor				    "%lc", &wend) != 1)
316226035Sgabor					r = REG_NOMATCH;
317226035Sgabor				else if (iswword(wbegin) ||
318226035Sgabor				    iswword(wend))
319226035Sgabor					r = REG_NOMATCH;
320210389Sgabor			}
321226035Sgabor			if (r == 0) {
322226035Sgabor				if (m == 0)
323226035Sgabor					c++;
324226035Sgabor				if (m < MAX_LINE_MATCHES)
325226035Sgabor					matches[m++] = pmatch;
326226035Sgabor				/* matches - skip further patterns */
327226035Sgabor				if ((color == NULL && !oflag) ||
328226035Sgabor				    qflag || lflag)
329226035Sgabor					break;
330210389Sgabor			}
331226035Sgabor		}
332210389Sgabor
333226035Sgabor		if (vflag) {
334226035Sgabor			c = !c;
335226035Sgabor			break;
336210389Sgabor		}
337210389Sgabor
338226035Sgabor		/* One pass if we are not recording matches */
339270132Sgabor		if (!wflag && ((color == NULL && !oflag) || qflag || lflag || Lflag))
340226035Sgabor			break;
341226035Sgabor
342226035Sgabor		if (st == (size_t)pmatch.rm_so)
343226035Sgabor			break; 	/* No matches */
344226035Sgabor	}
345226035Sgabor
346226035Sgabor
347226035Sgabor	/* Count the matches if we have a match limit */
348226035Sgabor	if (mflag)
349226035Sgabor		mcount -= c;
350226035Sgabor
351210389Sgabor	if (c && binbehave == BINFILE_BIN && nottext)
352210389Sgabor		return (c); /* Binary file */
353210389Sgabor
354210389Sgabor	/* Dealing with the context */
355210479Sgabor	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
356210389Sgabor		if (c) {
357210389Sgabor			if (!first && !prev && !tail && Aflag)
358210389Sgabor				printf("--\n");
359210389Sgabor			tail = Aflag;
360210389Sgabor			if (Bflag > 0) {
361210389Sgabor				if (!first && !prev)
362210389Sgabor					printf("--\n");
363210389Sgabor				printqueue();
364210389Sgabor			}
365210389Sgabor			linesqueued = 0;
366210389Sgabor			printline(l, ':', matches, m);
367210389Sgabor		} else {
368210389Sgabor			printline(l, '-', matches, m);
369210389Sgabor			tail--;
370210389Sgabor		}
371210389Sgabor	}
372210389Sgabor
373210389Sgabor	if (c) {
374210389Sgabor		prev = true;
375210389Sgabor		first = false;
376210389Sgabor	} else
377210389Sgabor		prev = false;
378210389Sgabor
379210389Sgabor	return (c);
380210389Sgabor}
381210389Sgabor
382210389Sgabor/*
383210389Sgabor * Safe malloc() for internal use.
384210389Sgabor */
385210389Sgaborvoid *
386210389Sgaborgrep_malloc(size_t size)
387210389Sgabor{
388210389Sgabor	void *ptr;
389210389Sgabor
390210389Sgabor	if ((ptr = malloc(size)) == NULL)
391210389Sgabor		err(2, "malloc");
392210389Sgabor	return (ptr);
393210389Sgabor}
394210389Sgabor
395210389Sgabor/*
396210389Sgabor * Safe calloc() for internal use.
397210389Sgabor */
398210389Sgaborvoid *
399210389Sgaborgrep_calloc(size_t nmemb, size_t size)
400210389Sgabor{
401210389Sgabor	void *ptr;
402210389Sgabor
403210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
404210389Sgabor		err(2, "calloc");
405210389Sgabor	return (ptr);
406210389Sgabor}
407210389Sgabor
408210389Sgabor/*
409210389Sgabor * Safe realloc() for internal use.
410210389Sgabor */
411210389Sgaborvoid *
412210389Sgaborgrep_realloc(void *ptr, size_t size)
413210389Sgabor{
414210389Sgabor
415210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
416210389Sgabor		err(2, "realloc");
417210389Sgabor	return (ptr);
418210389Sgabor}
419210389Sgabor
420210389Sgabor/*
421210578Sgabor * Safe strdup() for internal use.
422210578Sgabor */
423210578Sgaborchar *
424210578Sgaborgrep_strdup(const char *str)
425210578Sgabor{
426210578Sgabor	char *ret;
427210578Sgabor
428210578Sgabor	if ((ret = strdup(str)) == NULL)
429210578Sgabor		err(2, "strdup");
430210578Sgabor	return (ret);
431210578Sgabor}
432210578Sgabor
433210578Sgabor/*
434210389Sgabor * Prints a matching line according to the command line options.
435210389Sgabor */
436210389Sgaborvoid
437210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m)
438210389Sgabor{
439210389Sgabor	size_t a = 0;
440210389Sgabor	int i, n = 0;
441210389Sgabor
442210389Sgabor	if (!hflag) {
443228093Sgabor		if (!nullflag) {
444210389Sgabor			fputs(line->file, stdout);
445228093Sgabor			++n;
446228093Sgabor		} else {
447210389Sgabor			printf("%s", line->file);
448210389Sgabor			putchar(0);
449210389Sgabor		}
450210389Sgabor	}
451210389Sgabor	if (nflag) {
452210389Sgabor		if (n > 0)
453210389Sgabor			putchar(sep);
454210389Sgabor		printf("%d", line->line_no);
455210389Sgabor		++n;
456210389Sgabor	}
457210389Sgabor	if (bflag) {
458210389Sgabor		if (n > 0)
459210389Sgabor			putchar(sep);
460210389Sgabor		printf("%lld", (long long)line->off);
461210389Sgabor		++n;
462210389Sgabor	}
463210389Sgabor	if (n)
464210389Sgabor		putchar(sep);
465210389Sgabor	/* --color and -o */
466210389Sgabor	if ((oflag || color) && m > 0) {
467210389Sgabor		for (i = 0; i < m; i++) {
468210389Sgabor			if (!oflag)
469210389Sgabor				fwrite(line->dat + a, matches[i].rm_so - a, 1,
470210389Sgabor				    stdout);
471210389Sgabor			if (color)
472210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
473210389Sgabor
474210389Sgabor				fwrite(line->dat + matches[i].rm_so,
475210389Sgabor				    matches[i].rm_eo - matches[i].rm_so, 1,
476210389Sgabor				    stdout);
477210389Sgabor			if (color)
478210389Sgabor				fprintf(stdout, "\33[m\33[K");
479210389Sgabor			a = matches[i].rm_eo;
480210389Sgabor			if (oflag)
481210389Sgabor				putchar('\n');
482210389Sgabor		}
483210389Sgabor		if (!oflag) {
484210389Sgabor			if (line->len - a > 0)
485210389Sgabor				fwrite(line->dat + a, line->len - a, 1, stdout);
486210389Sgabor			putchar('\n');
487210389Sgabor		}
488210389Sgabor	} else {
489210389Sgabor		fwrite(line->dat, line->len, 1, stdout);
490210389Sgabor		putchar('\n');
491210389Sgabor	}
492210389Sgabor}
493