util.c revision 210622
1/*	$OpenBSD: util.c,v 1.39 2010/07/02 22:18:03 tedu Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/util.c 210622 2010-07-29 18:02:57Z gabor $");
32
33#include <sys/stat.h>
34#include <sys/types.h>
35
36#include <ctype.h>
37#include <err.h>
38#include <errno.h>
39#include <fnmatch.h>
40#include <fts.h>
41#include <libgen.h>
42#include <stdbool.h>
43#include <stdio.h>
44#include <stdlib.h>
45#include <string.h>
46#include <unistd.h>
47#include <wchar.h>
48#include <wctype.h>
49
50#include "grep.h"
51
52static int	 linesqueued;
53static int	 procline(struct str *l, int);
54
55bool
56file_matching(const char *fname)
57{
58	bool ret;
59
60	ret = finclude ? false : true;
61
62	for (unsigned int i = 0; i < fpatterns; ++i) {
63		if (fnmatch(fpattern[i].pat,
64		    fname, 0) == 0 || fnmatch(fpattern[i].pat,
65		    basename(fname), 0) == 0) {
66			if (fpattern[i].mode == EXCL_PAT)
67				return (false);
68			else
69				ret = true;
70		}
71	}
72	return (ret);
73}
74
75bool
76dir_matching(const char *dname)
77{
78	bool ret;
79
80	ret = dinclude ? false : true;
81
82	for (unsigned int i = 0; i < dpatterns; ++i) {
83		if (dname != NULL &&
84		    fnmatch(dname, dpattern[i].pat, 0) == 0) {
85			if (dpattern[i].mode == EXCL_PAT)
86				return (false);
87			else
88				ret = true;
89		}
90	}
91	return (ret);
92}
93
94/*
95 * Processes a directory when a recursive search is performed with
96 * the -R option.  Each appropriate file is passed to procfile().
97 */
98int
99grep_tree(char **argv)
100{
101	FTS *fts;
102	FTSENT *p;
103	char *d, *dir = NULL;
104	int c, fts_flags;
105	bool ok;
106
107	c = fts_flags = 0;
108
109	switch(linkbehave) {
110	case LINK_EXPLICIT:
111		fts_flags = FTS_COMFOLLOW;
112		break;
113	case LINK_SKIP:
114		fts_flags = FTS_PHYSICAL;
115		break;
116	default:
117		fts_flags = FTS_LOGICAL;
118
119	}
120
121	fts_flags |= FTS_NOSTAT | FTS_NOCHDIR;
122
123	if (!(fts = fts_open(argv, fts_flags, NULL)))
124		err(2, "fts_open");
125	while ((p = fts_read(fts)) != NULL) {
126		switch (p->fts_info) {
127		case FTS_DNR:
128			/* FALLTHROUGH */
129		case FTS_ERR:
130			errx(2, "%s: %s", p->fts_path, strerror(p->fts_errno));
131			break;
132		case FTS_D:
133			/* FALLTHROUGH */
134		case FTS_DP:
135			break;
136		case FTS_DC:
137			/* Print a warning for recursive directory loop */
138			warnx("warning: %s: recursive directory loop",
139				p->fts_path);
140			break;
141		default:
142			/* Check for file exclusion/inclusion */
143			ok = true;
144			if (dexclude || dinclude) {
145				if ((d = strrchr(p->fts_path, '/')) != NULL) {
146					dir = grep_malloc(sizeof(char) *
147					    (d - p->fts_path + 2));
148					strlcpy(dir, p->fts_path,
149					    (d - p->fts_path + 1));
150				}
151				ok = dir_matching(dir);
152				free(dir);
153				dir = NULL;
154			}
155			if (fexclude || finclude)
156				ok &= file_matching(p->fts_path);
157
158			if (ok)
159				c += procfile(p->fts_path);
160			break;
161		}
162	}
163
164	fts_close(fts);
165	return (c);
166}
167
168/*
169 * Opens a file and processes it.  Each file is processed line-by-line
170 * passing the lines to procline().
171 */
172int
173procfile(const char *fn)
174{
175	struct file *f;
176	struct stat sb;
177	struct str ln;
178	mode_t s;
179	int c, t;
180
181	if (mflag && (mcount <= 0))
182		return (0);
183
184	if (strcmp(fn, "-") == 0) {
185		fn = label != NULL ? label : getstr(1);
186		f = grep_stdin_open();
187	} else {
188		if (!stat(fn, &sb)) {
189			/* Check if we need to process the file */
190			s = sb.st_mode & S_IFMT;
191			if (s == S_IFDIR && dirbehave == DIR_SKIP)
192				return (0);
193			if ((s == S_IFIFO || s == S_IFCHR || s == S_IFBLK
194				|| s == S_IFSOCK) && devbehave == DEV_SKIP)
195					return (0);
196		}
197		f = grep_open(fn);
198	}
199	if (f == NULL) {
200		if (!sflag)
201			warn("%s", fn);
202		if (errno == ENOENT)
203			notfound = true;
204		return (0);
205	}
206
207	ln.file = grep_malloc(strlen(fn) + 1);
208	strcpy(ln.file, fn);
209	ln.line_no = 0;
210	ln.len = 0;
211	linesqueued = 0;
212	tail = 0;
213	ln.off = -1;
214
215	for (c = 0;  c == 0 || !(lflag || qflag); ) {
216		ln.off += ln.len + 1;
217		if ((ln.dat = grep_fgetln(f, &ln.len)) == NULL) {
218			if (ln.line_no == 0 && matchall)
219				exit(0);
220			else
221				break;
222		}
223		if (ln.len > 0 && ln.dat[ln.len - 1] == '\n')
224			--ln.len;
225		ln.line_no++;
226
227		/* Return if we need to skip a binary file */
228		if (f->binary && binbehave == BINFILE_SKIP) {
229			grep_close(f);
230			free(ln.file);
231			free(f);
232			return (0);
233		}
234		/* Process the file line-by-line */
235		if ((t = procline(&ln, f->binary)) == 0 && Bflag > 0) {
236			enqueue(&ln);
237			linesqueued++;
238		}
239		c += t;
240
241		/* Count the matches if we have a match limit */
242		if (mflag) {
243			mcount -= t;
244			if (mcount <= 0)
245				break;
246		}
247	}
248	if (Bflag > 0)
249		clearqueue();
250	grep_close(f);
251
252	if (cflag) {
253		if (!hflag)
254			printf("%s:", ln.file);
255		printf("%u\n", c);
256	}
257	if (lflag && !qflag && c != 0)
258		printf("%s\n", fn);
259	if (Lflag && !qflag && c == 0)
260		printf("%s\n", fn);
261	if (c && !cflag && !lflag && !Lflag &&
262	    binbehave == BINFILE_BIN && f->binary && !qflag)
263		printf(getstr(8), fn);
264
265	free(ln.file);
266	free(f);
267	return (c);
268}
269
270#define iswword(x)	(iswalnum((x)) || (x) == L'_')
271
272/*
273 * Processes a line comparing it with the specified patterns.  Each pattern
274 * is looped to be compared along with the full string, saving each and every
275 * match, which is necessary to colorize the output and to count the
276 * matches.  The matching lines are passed to printline() to display the
277 * appropriate output.
278 */
279static int
280procline(struct str *l, int nottext)
281{
282	regmatch_t matches[MAX_LINE_MATCHES];
283	regmatch_t pmatch;
284	size_t st = 0;
285	unsigned int i;
286	int c = 0, m = 0, r = 0;
287
288	if (!matchall) {
289		/* Loop to process the whole line */
290		while (st <= l->len) {
291			pmatch.rm_so = st;
292			pmatch.rm_eo = l->len;
293
294			/* Loop to compare with all the patterns */
295			for (i = 0; i < patterns; i++) {
296/*
297 * XXX: grep_search() is a workaround for speed up and should be
298 * removed in the future.  See fastgrep.c.
299 */
300				if (fg_pattern[i].pattern) {
301					r = grep_search(&fg_pattern[i],
302					    (unsigned char *)l->dat,
303					    l->len, &pmatch);
304					r = (r == 0) ? 0 : REG_NOMATCH;
305					st = pmatch.rm_eo;
306				} else {
307					r = regexec(&r_pattern[i], l->dat, 1,
308					    &pmatch, eflags);
309					r = (r == 0) ? 0 : REG_NOMATCH;
310					st = pmatch.rm_eo;
311				}
312				if (r == REG_NOMATCH)
313					continue;
314				/* Check for full match */
315				if (r == 0 && xflag)
316					if (pmatch.rm_so != 0 ||
317					    (size_t)pmatch.rm_eo != l->len)
318						r = REG_NOMATCH;
319				/* Check for whole word match */
320				if (r == 0 && wflag && pmatch.rm_so != 0 &&
321				    (size_t)pmatch.rm_eo != l->len) {
322					wchar_t *wbegin;
323					wint_t wend;
324					size_t size;
325
326					size = mbstowcs(NULL, l->dat,
327					    pmatch.rm_so);
328
329					if (size == ((size_t) - 1))
330						r = REG_NOMATCH;
331					else {
332						wbegin = grep_malloc(size);
333						if (mbstowcs(wbegin, l->dat,
334						    pmatch.rm_so) == ((size_t) - 1))
335							r = REG_NOMATCH;
336						else if (sscanf(&l->dat[pmatch.rm_eo],
337						    "%lc", &wend) != 1)
338							r = REG_NOMATCH;
339						else if (iswword(wbegin[wcslen(wbegin)]) ||
340						    iswword(wend))
341							r = REG_NOMATCH;
342						free(wbegin);
343					}
344				}
345				if (r == 0) {
346					if (m == 0)
347						c++;
348					if (m < MAX_LINE_MATCHES)
349						matches[m++] = pmatch;
350					/* matches - skip further patterns */
351					if ((color != NULL && !oflag) || qflag || lflag)
352						break;
353				}
354			}
355
356			if (vflag) {
357				c = !c;
358				break;
359			}
360			/* One pass if we are not recording matches */
361			if ((color != NULL && !oflag) || qflag || lflag)
362				break;
363
364			if (st == (size_t)pmatch.rm_so)
365				break; 	/* No matches */
366		}
367	} else
368		c = !vflag;
369
370	if (c && binbehave == BINFILE_BIN && nottext)
371		return (c); /* Binary file */
372
373	/* Dealing with the context */
374	if ((tail || c) && !cflag && !qflag && !lflag && !Lflag) {
375		if (c) {
376			if (!first && !prev && !tail && Aflag)
377				printf("--\n");
378			tail = Aflag;
379			if (Bflag > 0) {
380				if (!first && !prev)
381					printf("--\n");
382				printqueue();
383			}
384			linesqueued = 0;
385			printline(l, ':', matches, m);
386		} else {
387			printline(l, '-', matches, m);
388			tail--;
389		}
390	}
391
392	if (c) {
393		prev = true;
394		first = false;
395	} else
396		prev = false;
397
398	return (c);
399}
400
401/*
402 * Safe malloc() for internal use.
403 */
404void *
405grep_malloc(size_t size)
406{
407	void *ptr;
408
409	if ((ptr = malloc(size)) == NULL)
410		err(2, "malloc");
411	return (ptr);
412}
413
414/*
415 * Safe calloc() for internal use.
416 */
417void *
418grep_calloc(size_t nmemb, size_t size)
419{
420	void *ptr;
421
422	if ((ptr = calloc(nmemb, size)) == NULL)
423		err(2, "calloc");
424	return (ptr);
425}
426
427/*
428 * Safe realloc() for internal use.
429 */
430void *
431grep_realloc(void *ptr, size_t size)
432{
433
434	if ((ptr = realloc(ptr, size)) == NULL)
435		err(2, "realloc");
436	return (ptr);
437}
438
439/*
440 * Safe strdup() for internal use.
441 */
442char *
443grep_strdup(const char *str)
444{
445	char *ret;
446
447	if ((ret = strdup(str)) == NULL)
448		err(2, "strdup");
449	return (ret);
450}
451
452/*
453 * Prints a matching line according to the command line options.
454 */
455void
456printline(struct str *line, int sep, regmatch_t *matches, int m)
457{
458	size_t a = 0;
459	int i, n = 0;
460
461	if (!hflag) {
462		if (nullflag == 0)
463			fputs(line->file, stdout);
464		else {
465			printf("%s", line->file);
466			putchar(0);
467		}
468		++n;
469	}
470	if (nflag) {
471		if (n > 0)
472			putchar(sep);
473		printf("%d", line->line_no);
474		++n;
475	}
476	if (bflag) {
477		if (n > 0)
478			putchar(sep);
479		printf("%lld", (long long)line->off);
480		++n;
481	}
482	if (n)
483		putchar(sep);
484	/* --color and -o */
485	if ((oflag || color) && m > 0) {
486		for (i = 0; i < m; i++) {
487			if (!oflag)
488				fwrite(line->dat + a, matches[i].rm_so - a, 1,
489				    stdout);
490			if (color)
491				fprintf(stdout, "\33[%sm\33[K", color);
492
493				fwrite(line->dat + matches[i].rm_so,
494				    matches[i].rm_eo - matches[i].rm_so, 1,
495				    stdout);
496			if (color)
497				fprintf(stdout, "\33[m\33[K");
498			a = matches[i].rm_eo;
499			if (oflag)
500				putchar('\n');
501		}
502		if (!oflag) {
503			if (line->len - a > 0)
504				fwrite(line->dat + a, line->len - a, 1, stdout);
505			putchar('\n');
506		}
507	} else {
508		fwrite(line->dat, line->len, 1, stdout);
509		putchar('\n');
510	}
511}
512