file.c revision 225736
1/*	$NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $	*/
2/*	$FreeBSD: stable/9/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $	*/
3/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
34__FBSDID("$FreeBSD: stable/9/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $");
35
36#include <sys/param.h>
37#include <sys/types.h>
38#include <sys/stat.h>
39
40#include <bzlib.h>
41#include <err.h>
42#include <errno.h>
43#include <fcntl.h>
44#include <stddef.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <wchar.h>
49#include <wctype.h>
50#include <zlib.h>
51
52#include "grep.h"
53
54#define	MAXBUFSIZ	(32 * 1024)
55#define	LNBUFBUMP	80
56
57static gzFile gzbufdesc;
58static BZFILE* bzbufdesc;
59
60static unsigned char buffer[MAXBUFSIZ];
61static unsigned char *bufpos;
62static size_t bufrem;
63
64static unsigned char *lnbuf;
65static size_t lnbuflen;
66
67static inline int
68grep_refill(struct file *f)
69{
70	ssize_t nr;
71	int bzerr;
72
73	bufpos = buffer;
74	bufrem = 0;
75
76	if (filebehave == FILE_GZIP)
77		nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
78	else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
79		nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
80		switch (bzerr) {
81		case BZ_OK:
82		case BZ_STREAM_END:
83			/* No problem, nr will be okay */
84			break;
85		case BZ_DATA_ERROR_MAGIC:
86			/*
87			 * As opposed to gzread(), which simply returns the
88			 * plain file data, if it is not in the correct
89			 * compressed format, BZ2_bzRead() instead aborts.
90			 *
91			 * So, just restart at the beginning of the file again,
92			 * and use plain reads from now on.
93			 */
94			BZ2_bzReadClose(&bzerr, bzbufdesc);
95			bzbufdesc = NULL;
96			if (lseek(f->fd, 0, SEEK_SET) == -1)
97				return (-1);
98			nr = read(f->fd, buffer, MAXBUFSIZ);
99			break;
100		default:
101			/* Make sure we exit with an error */
102			nr = -1;
103		}
104	} else
105		nr = read(f->fd, buffer, MAXBUFSIZ);
106
107	if (nr < 0)
108		return (-1);
109
110	bufrem = nr;
111	return (0);
112}
113
114static inline int
115grep_lnbufgrow(size_t newlen)
116{
117
118	if (lnbuflen < newlen) {
119		lnbuf = grep_realloc(lnbuf, newlen);
120		lnbuflen = newlen;
121	}
122
123	return (0);
124}
125
126char *
127grep_fgetln(struct file *f, size_t *lenp)
128{
129	unsigned char *p;
130	char *ret;
131	size_t len;
132	size_t off;
133	ptrdiff_t diff;
134
135	/* Fill the buffer, if necessary */
136	if (bufrem == 0 && grep_refill(f) != 0)
137		goto error;
138
139	if (bufrem == 0) {
140		/* Return zero length to indicate EOF */
141		*lenp = 0;
142		return (bufpos);
143	}
144
145	/* Look for a newline in the remaining part of the buffer */
146	if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
147		++p; /* advance over newline */
148		ret = bufpos;
149		len = p - bufpos;
150		bufrem -= len;
151		bufpos = p;
152		*lenp = len;
153		return (ret);
154	}
155
156	/* We have to copy the current buffered data to the line buffer */
157	for (len = bufrem, off = 0; ; len += bufrem) {
158		/* Make sure there is room for more data */
159		if (grep_lnbufgrow(len + LNBUFBUMP))
160			goto error;
161		memcpy(lnbuf + off, bufpos, len - off);
162		off = len;
163		if (grep_refill(f) != 0)
164			goto error;
165		if (bufrem == 0)
166			/* EOF: return partial line */
167			break;
168		if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
169			continue;
170		/* got it: finish up the line (like code above) */
171		++p;
172		diff = p - bufpos;
173		len += diff;
174		if (grep_lnbufgrow(len))
175		    goto error;
176		memcpy(lnbuf + off, bufpos, diff);
177		bufrem -= diff;
178		bufpos = p;
179		break;
180	}
181	*lenp = len;
182	return (lnbuf);
183
184error:
185	*lenp = 0;
186	return (NULL);
187}
188
189static inline struct file *
190grep_file_init(struct file *f)
191{
192
193	if (filebehave == FILE_GZIP &&
194	    (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
195		goto error;
196
197	if (filebehave == FILE_BZIP &&
198	    (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
199		goto error;
200
201	/* Fill read buffer, also catches errors early */
202	if (grep_refill(f) != 0)
203		goto error;
204
205	/* Check for binary stuff, if necessary */
206	if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
207		f->binary = true;
208
209	return (f);
210error:
211	close(f->fd);
212	free(f);
213	return (NULL);
214}
215
216/*
217 * Opens a file for processing.
218 */
219struct file *
220grep_open(const char *path)
221{
222	struct file *f;
223
224	f = grep_malloc(sizeof *f);
225	memset(f, 0, sizeof *f);
226	if (path == NULL) {
227		/* Processing stdin implies --line-buffered. */
228		lbflag = true;
229		f->fd = STDIN_FILENO;
230	} else if ((f->fd = open(path, O_RDONLY)) == -1) {
231		free(f);
232		return (NULL);
233	}
234
235	return (grep_file_init(f));
236}
237
238/*
239 * Closes a file.
240 */
241void
242grep_close(struct file *f)
243{
244
245	close(f->fd);
246
247	/* Reset read buffer and line buffer */
248	bufpos = buffer;
249	bufrem = 0;
250
251	free(lnbuf);
252	lnbuf = NULL;
253	lnbuflen = 0;
254}
255