util.c revision 220422
1220422Sgabor/*	$NetBSD: util.c,v 1.9 2011/02/27 17:33:37 joerg Exp $	*/
2220422Sgabor/*	$FreeBSD: head/usr.bin/grep/util.c 220422 2011-04-07 13:03:35Z gabor $	*/
3210389Sgabor/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
4210389Sgabor
5210389Sgabor/*-
6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7210389Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8210389Sgabor * All rights reserved.
9210389Sgabor *
10210389Sgabor * Redistribution and use in source and binary forms, with or without
11210389Sgabor * modification, are permitted provided that the following conditions
12210389Sgabor * are met:
13210389Sgabor * 1. Redistributions of source code must retain the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer.
15210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
16210389Sgabor *    notice, this list of conditions and the following disclaimer in the
17210389Sgabor *    documentation and/or other materials provided with the distribution.
18210389Sgabor *
19210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29210389Sgabor * SUCH DAMAGE.
30210389Sgabor */
31210389Sgabor
32210389Sgabor#include <sys/cdefs.h>
33210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 220422 2011-04-07 13:03:35Z gabor $");
34210389Sgabor
35210389Sgabor#include <sys/stat.h>
36210389Sgabor#include <sys/types.h>
37210389Sgabor
38210389Sgabor#include <ctype.h>
39210389Sgabor#include <err.h>
40210389Sgabor#include <errno.h>
41210389Sgabor#include <fnmatch.h>
42210389Sgabor#include <fts.h>
43210389Sgabor#include <libgen.h>
44210578Sgabor#include <stdbool.h>
45210389Sgabor#include <stdio.h>
46210389Sgabor#include <stdlib.h>
47210389Sgabor#include <string.h>
48210389Sgabor#include <unistd.h>
49210389Sgabor#include <wchar.h>
50210389Sgabor#include <wctype.h>
51210389Sgabor
52210389Sgabor#include "grep.h"
53210389Sgabor
54210389Sgaborstatic int	 linesqueued;
55210389Sgaborstatic int	 procline(struct str *l, int);
56210389Sgabor
57210578Sgaborbool
58210578Sgaborfile_matching(const char *fname)
59210578Sgabor{
60220421Sgabor	char *fname_base;
61210578Sgabor	bool ret;
62210578Sgabor
63210578Sgabor	ret = finclude ? false : true;
64220421Sgabor	fname_base = basename(fname);
65210578Sgabor
66210578Sgabor	for (unsigned int i = 0; i < fpatterns; ++i) {
67220421Sgabor		if (fnmatch(fpattern[i].pat, fname, 0) == 0 ||
68220421Sgabor		    fnmatch(fpattern[i].pat, fname_base, 0) == 0) {
69210578Sgabor			if (fpattern[i].mode == EXCL_PAT)
70210578Sgabor				return (false);
71210578Sgabor			else
72210578Sgabor				ret = true;
73210578Sgabor		}
74210578Sgabor	}
75210578Sgabor	return (ret);
76210578Sgabor}
77210578Sgabor
78211364Sgaborstatic inline bool
79210578Sgabordir_matching(const char *dname)
80210578Sgabor{
81210578Sgabor	bool ret;
82210578Sgabor
83210578Sgabor	ret = dinclude ? false : true;
84210578Sgabor
85210578Sgabor	for (unsigned int i = 0; i < dpatterns; ++i) {
86210578Sgabor		if (dname != NULL &&
87210578Sgabor		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
88210578Sgabor			if (dpattern[i].mode == EXCL_PAT)
89210578Sgabor				return (false);
90210578Sgabor			else
91210578Sgabor				ret = true;
92210578Sgabor		}
93210578Sgabor	}
94210578Sgabor	return (ret);
95210578Sgabor}
96210578Sgabor
97210389Sgabor/*
98210389Sgabor * Processes a directory when a recursive search is performed with
99210389Sgabor * the -R option.  Each appropriate file is passed to procfile().
100210389Sgabor */
101210389Sgaborint
102210389Sgaborgrep_tree(char **argv)
103210389Sgabor{
104210389Sgabor	FTS *fts;
105210389Sgabor	FTSENT *p;
106210430Sdelphij	char *d, *dir = NULL;
107210389Sgabor	int c, fts_flags;
108210389Sgabor	bool ok;
109210389Sgabor
110210389Sgabor	c = fts_flags = 0;
111210389Sgabor
112210389Sgabor	switch(linkbehave) {
113210389Sgabor	case LINK_EXPLICIT:
114210389Sgabor		fts_flags = FTS_COMFOLLOW;
115210389Sgabor		break;
116210389Sgabor	case LINK_SKIP:
117210389Sgabor		fts_flags = FTS_PHYSICAL;
118210389Sgabor		break;
119210389Sgabor	default:
120210389Sgabor		fts_flags = FTS_LOGICAL;
121210389Sgabor
122210389Sgabor	}
123210389Sgabor
124210389Sgabor	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
125210389Sgabor
126210389Sgabor	if (!(fts = fts_open(argv, fts_flags, NULL)))
127210430Sdelphij		err(2, "fts_open");
128210389Sgabor	while ((p = fts_read(fts)) != NULL) {
129210389Sgabor		switch (p->fts_info) {
130210389Sgabor		case FTS_DNR:
131210389Sgabor			/* FALLTHROUGH */
132210389Sgabor		case FTS_ERR:
133210389Sgabor			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
134210389Sgabor			break;
135210389Sgabor		case FTS_D:
136210389Sgabor			/* FALLTHROUGH */
137210389Sgabor		case FTS_DP:
138210389Sgabor			break;
139210389Sgabor		case FTS_DC:
140210389Sgabor			/* Print a warning for recursive directory loop */
141210389Sgabor			warnx("warning: %s: recursive directory loop",
142210389Sgabor				p->fts_path);
143210389Sgabor			break;
144210389Sgabor		default:
145210389Sgabor			/* Check for file exclusion/inclusion */
146210389Sgabor			ok = true;
147210578Sgabor			if (dexclude || dinclude) {
148210430Sdelphij				if ((d = strrchr(p->fts_path, '/')) != NULL) {
149210430Sdelphij					dir = grep_malloc(sizeof(char) *
150210430Sdelphij					    (d - p->fts_path + 1));
151211364Sgabor					memcpy(dir, p->fts_path,
152211364Sgabor					    d - p->fts_path);
153211364Sgabor					dir[d - p->fts_path] = '\0';
154210430Sdelphij				}
155210578Sgabor				ok = dir_matching(dir);
156210430Sdelphij				free(dir);
157210430Sdelphij				dir = NULL;
158210389Sgabor			}
159210578Sgabor			if (fexclude || finclude)
160210578Sgabor				ok &= file_matching(p->fts_path);
161210389Sgabor
162210389Sgabor			if (ok)
163210389Sgabor				c += procfile(p->fts_path);
164210389Sgabor			break;
165210389Sgabor		}
166210389Sgabor	}
167210389Sgabor
168210430Sdelphij	fts_close(fts);
169210389Sgabor	return (c);
170210389Sgabor}
171210389Sgabor
172210389Sgabor/*
173210389Sgabor * Opens a file and processes it.  Each file is processed line-by-line
174210389Sgabor * passing the lines to procline().
175210389Sgabor */
176210389Sgaborint
177210389Sgaborprocfile(const char *fn)
178210389Sgabor{
179210389Sgabor	struct file *f;
180210389Sgabor	struct stat sb;
181210389Sgabor	struct str ln;
182210389Sgabor	mode_t s;
183210389Sgabor	int c, t;
184210389Sgabor
185210389Sgabor	if (mflag && (mcount <= 0))
186210389Sgabor		return (0);
187210389Sgabor
188210389Sgabor	if (strcmp(fn, "-") == 0) {
189210389Sgabor		fn = label != NULL ? label : getstr(1);
190211463Sgabor		f = grep_open(NULL);
191210389Sgabor	} else {
192210389Sgabor		if (!stat(fn, &sb)) {
193210389Sgabor			/* Check if we need to process the file */
194210389Sgabor			s = sb.st_mode & S_IFMT;
195210389Sgabor			if (s == S_IFDIR && dirbehave == DIR_SKIP)
196210389Sgabor				return (0);
197210389Sgabor			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
198210389Sgabor				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
199210389Sgabor					return (0);
200210389Sgabor		}
201210389Sgabor		f = grep_open(fn);
202210389Sgabor	}
203210389Sgabor	if (f == NULL) {
204210389Sgabor		if (!sflag)
205210389Sgabor			warn("%s", fn);
206210389Sgabor		if (errno == ENOENT)
207210389Sgabor			notfound = true;
208210389Sgabor		return (0);
209210389Sgabor	}
210210389Sgabor
211210389Sgabor	ln.file = grep_malloc(strlen(fn) + 1);
212210389Sgabor	strcpy(ln.file, fn);
213210389Sgabor	ln.line_no = 0;
214210389Sgabor	ln.len = 0;
215210389Sgabor	linesqueued = 0;
216210389Sgabor	tail = 0;
217210389Sgabor	ln.off = -1;
218210389Sgabor
219210389Sgabor	for (c = 0;  c == 0 || !(lflag || qflag); ) {
220210389Sgabor		ln.off += ln.len + 1;
221211463Sgabor		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL || ln.len == 0) {
222210389Sgabor			if (ln.line_no == 0 && matchall)
223210389Sgabor				exit(0);
224210389Sgabor			else
225210389Sgabor				break;
226210389Sgabor		}
227210389Sgabor		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
228210389Sgabor			--ln.len;
229210389Sgabor		ln.line_no++;
230210389Sgabor
231210389Sgabor		/* Return if we need to skip a binary file */
232210389Sgabor		if (f->binary && binbehave == BINFILE_SKIP) {
233210389Sgabor			grep_close(f);
234210430Sdelphij			free(ln.file);
235210389Sgabor			free(f);
236210389Sgabor			return (0);
237210389Sgabor		}
238210389Sgabor		/* Process the file line-by-line */
239210389Sgabor		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
240210389Sgabor			enqueue(&ln);
241210389Sgabor			linesqueued++;
242210389Sgabor		}
243210389Sgabor		c += t;
244210389Sgabor
245210389Sgabor		/* Count the matches if we have a match limit */
246210389Sgabor		if (mflag) {
247210389Sgabor			mcount -= t;
248210389Sgabor			if (mcount <= 0)
249210389Sgabor				break;
250210389Sgabor		}
251210389Sgabor	}
252210389Sgabor	if (Bflag > 0)
253210389Sgabor		clearqueue();
254210389Sgabor	grep_close(f);
255210389Sgabor
256210389Sgabor	if (cflag) {
257210389Sgabor		if (!hflag)
258210389Sgabor			printf("%s:", ln.file);
259210389Sgabor		printf("%u\n", c);
260210389Sgabor	}
261210461Sgabor	if (lflag && !qflag && c != 0)
262210389Sgabor		printf("%s\n", fn);
263210461Sgabor	if (Lflag && !qflag && c == 0)
264210389Sgabor		printf("%s\n", fn);
265210389Sgabor	if (c && !cflag && !lflag && !Lflag &&
266210389Sgabor	    binbehave == BINFILE_BIN && f->binary && !qflag)
267210622Sgabor		printf(getstr(8), fn);
268210389Sgabor
269210430Sdelphij	free(ln.file);
270210389Sgabor	free(f);
271210389Sgabor	return (c);
272210389Sgabor}
273210389Sgabor
274210389Sgabor#define iswword(x)	(iswalnum((x)) || (x) == L'_')
275210389Sgabor
276210389Sgabor/*
277210389Sgabor * Processes a line comparing it with the specified patterns.  Each pattern
278210389Sgabor * is looped to be compared along with the full string, saving each and every
279210389Sgabor * match, which is necessary to colorize the output and to count the
280210389Sgabor * matches.  The matching lines are passed to printline() to display the
281210389Sgabor * appropriate output.
282210389Sgabor */
283220421Sgaborstatic int
284210389Sgaborprocline(struct str *l, int nottext)
285210389Sgabor{
286210389Sgabor	regmatch_t matches[MAX_LINE_MATCHES];
287210389Sgabor	regmatch_t pmatch;
288210389Sgabor	size_t st = 0;
289210389Sgabor	unsigned int i;
290210389Sgabor	int c = 0, m = 0, r = 0;
291210389Sgabor
292210389Sgabor	if (!matchall) {
293210389Sgabor		/* Loop to process the whole line */
294210389Sgabor		while (st <= l->len) {
295210389Sgabor			pmatch.rm_so = st;
296210389Sgabor			pmatch.rm_eo = l->len;
297210389Sgabor
298210389Sgabor			/* Loop to compare with all the patterns */
299210389Sgabor			for (i = 0; i < patterns; i++) {
300210389Sgabor/*
301210389Sgabor * XXX: grep_search() is a workaround for speed up and should be
302210389Sgabor * removed in the future.  See fastgrep.c.
303210389Sgabor */
304210389Sgabor				if (fg_pattern[i].pattern) {
305210389Sgabor					r = grep_search(&fg_pattern[i],
306210389Sgabor					    (unsigned char *)l->dat,
307210389Sgabor					    l->len, &pmatch);
308210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
309210389Sgabor					st = pmatch.rm_eo;
310210389Sgabor				} else {
311210389Sgabor					r = regexec(&r_pattern[i], l->dat, 1,
312210389Sgabor					    &pmatch, eflags);
313210389Sgabor					r = (r == 0) ? 0 : REG_NOMATCH;
314210389Sgabor					st = pmatch.rm_eo;
315210389Sgabor				}
316210389Sgabor				if (r == REG_NOMATCH)
317210389Sgabor					continue;
318210389Sgabor				/* Check for full match */
319210389Sgabor				if (r == 0 && xflag)
320210389Sgabor					if (pmatch.rm_so != 0 ||
321210389Sgabor					    (size_t)pmatch.rm_eo != l->len)
322210389Sgabor						r = REG_NOMATCH;
323210389Sgabor				/* Check for whole word match */
324220421Sgabor				if (r == 0 && fg_pattern[i].word &&
325220421Sgabor				    pmatch.rm_so != 0) {
326211364Sgabor					wint_t wbegin, wend;
327210389Sgabor
328211364Sgabor					wbegin = wend = L' ';
329211364Sgabor					if (pmatch.rm_so != 0 &&
330211364Sgabor					    sscanf(&l->dat[pmatch.rm_so - 1],
331211364Sgabor					    "%lc", &wbegin) != 1)
332210389Sgabor						r = REG_NOMATCH;
333211364Sgabor					else if ((size_t)pmatch.rm_eo != l->len &&
334211364Sgabor					    sscanf(&l->dat[pmatch.rm_eo],
335211364Sgabor					    "%lc", &wend) != 1)
336211364Sgabor						r = REG_NOMATCH;
337211364Sgabor					else if (iswword(wbegin) || iswword(wend))
338211364Sgabor						r = REG_NOMATCH;
339210389Sgabor				}
340210389Sgabor				if (r == 0) {
341210389Sgabor					if (m == 0)
342210389Sgabor						c++;
343210389Sgabor					if (m < MAX_LINE_MATCHES)
344210389Sgabor						matches[m++] = pmatch;
345210389Sgabor					/* matches - skip further patterns */
346210461Sgabor					if ((color != NULL && !oflag) || qflag || lflag)
347210461Sgabor						break;
348210389Sgabor				}
349210389Sgabor			}
350210389Sgabor
351210389Sgabor			if (vflag) {
352210389Sgabor				c = !c;
353210389Sgabor				break;
354210389Sgabor			}
355210389Sgabor			/* One pass if we are not recording matches */
356210461Sgabor			if ((color != NULL && !oflag) || qflag || lflag)
357210389Sgabor				break;
358210389Sgabor
359210389Sgabor			if (st == (size_t)pmatch.rm_so)
360210389Sgabor				break; 	/* No matches */
361210389Sgabor		}
362210389Sgabor	} else
363210389Sgabor		c = !vflag;
364210389Sgabor
365210389Sgabor	if (c && binbehave == BINFILE_BIN && nottext)
366210389Sgabor		return (c); /* Binary file */
367210389Sgabor
368210389Sgabor	/* Dealing with the context */
369210479Sgabor	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
370210389Sgabor		if (c) {
371210389Sgabor			if (!first && !prev && !tail && Aflag)
372210389Sgabor				printf("--\n");
373210389Sgabor			tail = Aflag;
374210389Sgabor			if (Bflag > 0) {
375210389Sgabor				if (!first && !prev)
376210389Sgabor					printf("--\n");
377210389Sgabor				printqueue();
378210389Sgabor			}
379210389Sgabor			linesqueued = 0;
380210389Sgabor			printline(l, ':', matches, m);
381210389Sgabor		} else {
382210389Sgabor			printline(l, '-', matches, m);
383210389Sgabor			tail--;
384210389Sgabor		}
385210389Sgabor	}
386210389Sgabor
387210389Sgabor	if (c) {
388210389Sgabor		prev = true;
389210389Sgabor		first = false;
390210389Sgabor	} else
391210389Sgabor		prev = false;
392210389Sgabor
393210389Sgabor	return (c);
394210389Sgabor}
395210389Sgabor
396210389Sgabor/*
397210389Sgabor * Safe malloc() for internal use.
398210389Sgabor */
399210389Sgaborvoid *
400210389Sgaborgrep_malloc(size_t size)
401210389Sgabor{
402210389Sgabor	void *ptr;
403210389Sgabor
404210389Sgabor	if ((ptr = malloc(size)) == NULL)
405210389Sgabor		err(2, "malloc");
406210389Sgabor	return (ptr);
407210389Sgabor}
408210389Sgabor
409210389Sgabor/*
410210389Sgabor * Safe calloc() for internal use.
411210389Sgabor */
412210389Sgaborvoid *
413210389Sgaborgrep_calloc(size_t nmemb, size_t size)
414210389Sgabor{
415210389Sgabor	void *ptr;
416210389Sgabor
417210389Sgabor	if ((ptr = calloc(nmemb, size)) == NULL)
418210389Sgabor		err(2, "calloc");
419210389Sgabor	return (ptr);
420210389Sgabor}
421210389Sgabor
422210389Sgabor/*
423210389Sgabor * Safe realloc() for internal use.
424210389Sgabor */
425210389Sgaborvoid *
426210389Sgaborgrep_realloc(void *ptr, size_t size)
427210389Sgabor{
428210389Sgabor
429210389Sgabor	if ((ptr = realloc(ptr, size)) == NULL)
430210389Sgabor		err(2, "realloc");
431210389Sgabor	return (ptr);
432210389Sgabor}
433210389Sgabor
434210389Sgabor/*
435210578Sgabor * Safe strdup() for internal use.
436210578Sgabor */
437210578Sgaborchar *
438210578Sgaborgrep_strdup(const char *str)
439210578Sgabor{
440210578Sgabor	char *ret;
441210578Sgabor
442210578Sgabor	if ((ret = strdup(str)) == NULL)
443210578Sgabor		err(2, "strdup");
444210578Sgabor	return (ret);
445210578Sgabor}
446210578Sgabor
447210578Sgabor/*
448210389Sgabor * Prints a matching line according to the command line options.
449210389Sgabor */
450210389Sgaborvoid
451210389Sgaborprintline(struct str *line, int sep, regmatch_t *matches, int m)
452210389Sgabor{
453210389Sgabor	size_t a = 0;
454210389Sgabor	int i, n = 0;
455210389Sgabor
456210389Sgabor	if (!hflag) {
457210389Sgabor		if (nullflag == 0)
458210389Sgabor			fputs(line->file, stdout);
459210389Sgabor		else {
460210389Sgabor			printf("%s", line->file);
461210389Sgabor			putchar(0);
462210389Sgabor		}
463210389Sgabor		++n;
464210389Sgabor	}
465210389Sgabor	if (nflag) {
466210389Sgabor		if (n > 0)
467210389Sgabor			putchar(sep);
468210389Sgabor		printf("%d", line->line_no);
469210389Sgabor		++n;
470210389Sgabor	}
471210389Sgabor	if (bflag) {
472210389Sgabor		if (n > 0)
473210389Sgabor			putchar(sep);
474210389Sgabor		printf("%lld", (long long)line->off);
475210389Sgabor		++n;
476210389Sgabor	}
477210389Sgabor	if (n)
478210389Sgabor		putchar(sep);
479210389Sgabor	/* --color and -o */
480210389Sgabor	if ((oflag || color) && m > 0) {
481210389Sgabor		for (i = 0; i < m; i++) {
482210389Sgabor			if (!oflag)
483210389Sgabor				fwrite(line->dat + a, matches[i].rm_so - a, 1,
484210389Sgabor				    stdout);
485210389Sgabor			if (color)
486210389Sgabor				fprintf(stdout, "\33[%sm\33[K", color);
487210389Sgabor
488210389Sgabor				fwrite(line->dat + matches[i].rm_so,
489210389Sgabor				    matches[i].rm_eo - matches[i].rm_so, 1,
490210389Sgabor				    stdout);
491210389Sgabor			if (color)
492210389Sgabor				fprintf(stdout, "\33[m\33[K");
493210389Sgabor			a = matches[i].rm_eo;
494210389Sgabor			if (oflag)
495210389Sgabor				putchar('\n');
496210389Sgabor		}
497210389Sgabor		if (!oflag) {
498210389Sgabor			if (line->len - a > 0)
499210389Sgabor				fwrite(line->dat + a, line->len - a, 1, stdout);
500210389Sgabor			putchar('\n');
501210389Sgabor		}
502210389Sgabor	} else {
503210389Sgabor		fwrite(line->dat, line->len, 1, stdout);
504210389Sgabor		putchar('\n');
505210389Sgabor	}
506210389Sgabor}
507