file.c revision 211364
1/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211364 2010-08-15 22:15:04Z gabor $");
32
33#include <sys/param.h>
34#include <sys/types.h>
35#include <sys/stat.h>
36
37#include <bzlib.h>
38#include <err.h>
39#include <errno.h>
40#include <stdio.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <wchar.h>
45#include <wctype.h>
46#include <zlib.h>
47
48#include "grep.h"
49
50static char	 fname[MAXPATHLEN];	/* file name */
51
52#define		 MAXBUFSIZ	(16 * 1024)
53#define		 PREREAD_M	0.2
54
55/* Some global variables for the buffering and reading. */
56static char	*lnbuf;
57static size_t	 lnbuflen;
58static unsigned char *binbuf;
59static int	 binbufsiz;
60unsigned char	*binbufptr;
61static int	 bzerr;
62
63#define iswbinary(ch)	(!iswspace((ch)) && iswcntrl((ch)) && \
64			    (ch != L'\b') && (ch != L'\0'))
65
66/*
67 * Returns a single character according to the file type.
68 * Returns -1 on failure.
69 */
70static inline int
71grep_fgetc(struct file *f)
72{
73	unsigned char c;
74
75	switch (filebehave) {
76	case FILE_STDIO:
77		return (getc_unlocked(f->f));
78	case FILE_GZIP:
79		return (gzgetc(f->gzf));
80	case FILE_BZIP:
81		BZ2_bzRead(&bzerr, f->bzf, &c, 1);
82		if (bzerr == BZ_STREAM_END)
83			return (-1);
84		else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
85			errx(2, "%s", getstr(2));
86		return (c);
87	}
88	return (-1);
89}
90
91/*
92 * Returns true if the file position is a EOF, returns false
93 * otherwise.
94 */
95static inline int
96grep_feof(struct file *f)
97{
98
99	switch (filebehave) {
100	case FILE_STDIO:
101		return (feof_unlocked(f->f));
102	case FILE_GZIP:
103		return (gzeof(f->gzf));
104	case FILE_BZIP:
105		return (bzerr == BZ_STREAM_END);
106	}
107	return (1);
108}
109
110/*
111 * At the first call, fills in an internal buffer and checks if the given
112 * file is a binary file and sets the binary flag accordingly.  Then returns
113 * a single line and sets len to the length of the returned line.
114 * At any other call returns a single line either from the internal buffer
115 * or from the file if the buffer is exhausted and sets len to the length
116 * of the line.
117 */
118char *
119grep_fgetln(struct file *f, size_t *len)
120{
121	struct stat st;
122	size_t bufsiz, i = 0;
123	int ch = 0;
124
125	/* Fill in the buffer if it is empty. */
126	if (binbufptr == NULL) {
127
128		/* Only pre-read to the buffer if we need the binary check. */
129		if (binbehave != BINFILE_TEXT) {
130			if (f->stdin)
131				st.st_size = MAXBUFSIZ;
132			else if (stat(fname, &st) != 0)
133				err(2, NULL);
134			/* no need to allocate buffer. */
135			if (st.st_size == 0)
136				return (NULL);
137
138			bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
139			    (st.st_size / 2) : MAXBUFSIZ;
140
141			binbuf = grep_malloc(sizeof(char) * bufsiz);
142
143			while (i < bufsiz) {
144				ch = grep_fgetc(f);
145				if (ch == EOF)
146					break;
147				binbuf[i++] = ch;
148				if ((ch == '\n') && lbflag)
149					break;
150			}
151
152			f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
153			    '\0' : '\200', i - 1) != NULL;
154		}
155		binbufsiz = i;
156		binbufptr = binbuf;
157	}
158
159	/* Read a line whether from the buffer or from the file itself. */
160	for (i = 0; !(grep_feof(f) &&
161	    (binbufptr == &binbuf[binbufsiz])); i++) {
162		if (binbufptr == &binbuf[binbufsiz]) {
163			ch = grep_fgetc(f);
164		} else {
165			ch = binbufptr[0];
166			binbufptr++;
167		}
168		if (i >= lnbuflen) {
169			lnbuflen *= 2;
170			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
171		}
172		if ((ch == '\n') || (ch == EOF)) {
173			lnbuf[i] = '\0';
174			break;
175		} else
176			lnbuf[i] = ch;
177	}
178	if (grep_feof(f) && (i == 0) && (ch != '\n'))
179		return (NULL);
180	*len = i;
181	return (lnbuf);
182}
183
184/*
185 * Opens the standard input for processing.
186 */
187struct file *
188grep_stdin_open(void)
189{
190	struct file *f;
191
192	/* Processing stdin implies --line-buffered for tail -f to work. */
193	lbflag = true;
194
195	snprintf(fname, sizeof fname, "%s", getstr(1));
196
197	f = grep_malloc(sizeof *f);
198
199	binbuf = NULL;
200	if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
201		flockfile(f->f);
202		f->stdin = true;
203		return (f);
204	}
205
206	free(f);
207	return (NULL);
208}
209
210/*
211 * Opens a normal, a gzipped or a bzip2 compressed file for processing.
212 */
213struct file *
214grep_open(const char *path)
215{
216	struct file *f;
217
218	snprintf(fname, sizeof fname, "%s", path);
219
220	f = grep_malloc(sizeof *f);
221
222	binbuf = NULL;
223	f->stdin = false;
224	switch (filebehave) {
225	case FILE_STDIO:
226		if ((f->f = fopen(path, "r")) != NULL) {
227			flockfile(f->f);
228			return (f);
229		}
230		break;
231	case FILE_GZIP:
232		if ((f->gzf = gzopen(fname, "r")) != NULL)
233			return (f);
234		break;
235	case FILE_BZIP:
236		if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
237			return (f);
238		break;
239	}
240
241	free(f);
242	return (NULL);
243}
244
245/*
246 * Closes a normal, a gzipped or a bzip2 compressed file.
247 */
248void
249grep_close(struct file *f)
250{
251
252	switch (filebehave) {
253	case FILE_STDIO:
254		funlockfile(f->f);
255		fclose(f->f);
256		break;
257	case FILE_GZIP:
258		gzclose(f->gzf);
259		break;
260	case FILE_BZIP:
261		BZ2_bzclose(f->bzf);
262		break;
263	}
264
265	/* Reset read buffer for the file we are closing */
266	binbufptr = NULL;
267	free(binbuf);
268}
269