file.c revision 211496
1/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 *    notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $");
33
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37
38#include <bzlib.h>
39#include <err.h>
40#include <errno.h>
41#include <fcntl.h>
42#include <stddef.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <wchar.h>
47#include <wctype.h>
48#include <zlib.h>
49
50#include "grep.h"
51
52#define	MAXBUFSIZ	(32 * 1024)
53#define	LNBUFBUMP	80
54
55static gzFile gzbufdesc;
56static BZFILE* bzbufdesc;
57
58static unsigned char buffer[MAXBUFSIZ];
59static unsigned char *bufpos;
60static size_t bufrem;
61
62static unsigned char *lnbuf;
63static size_t lnbuflen;
64
65static inline int
66grep_refill(struct file *f)
67{
68	ssize_t nr;
69	int bzerr;
70
71	bufpos = buffer;
72	bufrem = 0;
73
74	if (filebehave == FILE_GZIP)
75		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
76	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
77		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
78		switch (bzerr) {
79		case BZ_OK:
80		case BZ_STREAM_END:
81			/* No problem, nr will be okay */
82			break;
83		case BZ_DATA_ERROR_MAGIC:
84			/*
85			 * As opposed to gzread(), which simply returns the
86			 * plain file data, if it is not in the correct
87			 * compressed format, BZ2_bzRead() instead aborts.
88			 *
89			 * So, just restart at the beginning of the file again,
90			 * and use plain reads from now on.
91			 */
92			BZ2_bzReadClose(&bzerr, bzbufdesc);
93			bzbufdesc = NULL;
94			if (lseek(f->fd, 0, SEEK_SET) == -1)
95				return (-1);
96			nr = read(f->fd, buffer, MAXBUFSIZ);
97			break;
98		default:
99			/* Make sure we exit with an error */
100			nr = -1;
101		}
102	} else
103		nr = read(f->fd, buffer, MAXBUFSIZ);
104
105	if (nr < 0)
106		return (-1);
107
108	bufrem = nr;
109	return (0);
110}
111
112static inline int
113grep_lnbufgrow(size_t newlen)
114{
115
116	if (lnbuflen < newlen) {
117		lnbuf = grep_realloc(lnbuf, newlen);
118		lnbuflen = newlen;
119	}
120
121	return (0);
122}
123
124char *
125grep_fgetln(struct file *f, size_t *lenp)
126{
127	unsigned char *p;
128	char *ret;
129	size_t len;
130	size_t off;
131	ptrdiff_t diff;
132
133	/* Fill the buffer, if necessary */
134	if (bufrem == 0 && grep_refill(f) != 0)
135		goto error;
136
137	if (bufrem == 0) {
138		/* Return zero length to indicate EOF */
139		*lenp = 0;
140		return (bufpos);
141	}
142
143	/* Look for a newline in the remaining part of the buffer */
144	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
145		++p; /* advance over newline */
146		ret = bufpos;
147		len = p - bufpos;
148		bufrem -= len;
149		bufpos = p;
150		*lenp = len;
151		return (ret);
152	}
153
154	/* We have to copy the current buffered data to the line buffer */
155	for (len = bufrem, off = 0; ; len += bufrem) {
156		/* Make sure there is room for more data */
157		if (grep_lnbufgrow(len + LNBUFBUMP))
158			goto error;
159		memcpy(lnbuf + off, bufpos, len - off);
160		off = len;
161		if (grep_refill(f) != 0)
162			goto error;
163		if (bufrem == 0)
164			/* EOF: return partial line */
165			break;
166		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
167			continue;
168		/* got it: finish up the line (like code above) */
169		++p;
170		diff = p - bufpos;
171		len += diff;
172		if (grep_lnbufgrow(len))
173		    goto error;
174		memcpy(lnbuf + off, bufpos, diff);
175		bufrem -= diff;
176		bufpos = p;
177		break;
178	}
179	*lenp = len;
180	return (lnbuf);
181
182error:
183	*lenp = 0;
184	return (NULL);
185}
186
187static inline struct file *
188grep_file_init(struct file *f)
189{
190
191	if (filebehave == FILE_GZIP &&
192	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
193		goto error;
194
195	if (filebehave == FILE_BZIP &&
196	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
197		goto error;
198
199	/* Fill read buffer, also catches errors early */
200	if (grep_refill(f) != 0)
201		goto error;
202
203	/* Check for binary stuff, if necessary */
204	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
205		f->binary = true;
206
207	return (f);
208error:
209	close(f->fd);
210	free(f);
211	return (NULL);
212}
213
214/*
215 * Opens a file for processing.
216 */
217struct file *
218grep_open(const char *path)
219{
220	struct file *f;
221
222	f = grep_malloc(sizeof *f);
223	memset(f, 0, sizeof *f);
224	if (path == NULL) {
225		/* Processing stdin implies --line-buffered. */
226		lbflag = true;
227		f->fd = STDIN_FILENO;
228	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
229		free(f);
230		return (NULL);
231	}
232
233	return (grep_file_init(f));
234}
235
236/*
237 * Closes a file.
238 */
239void
240grep_close(struct file *f)
241{
242
243	close(f->fd);
244
245	/* Reset read buffer and line buffer */
246	bufpos = buffer;
247	bufrem = 0;
248
249	free(lnbuf);
250	lnbuf = NULL;
251	lnbuflen = 0;
252}
253