file.c revision 210389
1210389Sgabor/*	$OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $	*/
2210389Sgabor
3210389Sgabor/*-
4210389Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5210389Sgabor * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
6210389Sgabor * All rights reserved.
7210389Sgabor *
8210389Sgabor * Redistribution and use in source and binary forms, with or without
9210389Sgabor * modification, are permitted provided that the following conditions
10210389Sgabor * are met:
11210389Sgabor * 1. Redistributions of source code must retain the above copyright
12210389Sgabor *    notice, this list of conditions and the following disclaimer.
13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright
14210389Sgabor *    notice, this list of conditions and the following disclaimer in the
15210389Sgabor *    documentation and/or other materials provided with the distribution.
16210389Sgabor *
17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20210389Sgabor * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27210389Sgabor * SUCH DAMAGE.
28210389Sgabor */
29210389Sgabor
30210389Sgabor#include <sys/cdefs.h>
31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 210389 2010-07-22 19:11:57Z gabor $");
32210389Sgabor
33210389Sgabor#include <sys/param.h>
34210389Sgabor#include <sys/types.h>
35210389Sgabor#include <sys/stat.h>
36210389Sgabor
37210389Sgabor#include <bzlib.h>
38210389Sgabor#include <err.h>
39210389Sgabor#include <errno.h>
40210389Sgabor#include <stdio.h>
41210389Sgabor#include <stdlib.h>
42210389Sgabor#include <string.h>
43210389Sgabor#include <unistd.h>
44210389Sgabor#include <wchar.h>
45210389Sgabor#include <wctype.h>
46210389Sgabor#include <zlib.h>
47210389Sgabor
48210389Sgabor#include "grep.h"
49210389Sgabor
50210389Sgaborstatic char	 fname[MAXPATHLEN];	/* file name */
51210389Sgabor
52210389Sgabor#define		 MAXBUFSIZ	(16 * 1024)
53210389Sgabor#define		 PREREAD_M	0.2
54210389Sgabor
55210389Sgabor/* Some global variables for the buffering and reading. */
56210389Sgaborstatic char	*lnbuf;
57210389Sgaborstatic size_t	 lnbuflen;
58210389Sgaborstatic unsigned char *binbuf;
59210389Sgaborstatic int	 binbufsiz;
60210389Sgaborunsigned char	*binbufptr;
61210389Sgaborstatic int	 bzerr;
62210389Sgabor
63210389Sgabor#define iswbinary(ch)	(!iswspace((ch)) && iswcntrl((ch)) && \
64210389Sgabor			    (ch != L'\b') && (ch != L'\0'))
65210389Sgabor
66210389Sgabor/*
67210389Sgabor * Returns a single character according to the file type.
68210389Sgabor * Returns -1 on failure.
69210389Sgabor */
70210389Sgaborint
71210389Sgaborgrep_fgetc(struct file *f)
72210389Sgabor{
73210389Sgabor	unsigned char c;
74210389Sgabor
75210389Sgabor	switch (filebehave) {
76210389Sgabor	case FILE_STDIO:
77210389Sgabor		return (fgetc(f->f));
78210389Sgabor	case FILE_GZIP:
79210389Sgabor		return (gzgetc(f->gzf));
80210389Sgabor	case FILE_BZIP:
81210389Sgabor		BZ2_bzRead(&bzerr, f->bzf, &c, 1);
82210389Sgabor		if (bzerr == BZ_STREAM_END)
83210389Sgabor			return (-1);
84210389Sgabor		else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
85210389Sgabor			errx(2, "%s", getstr(2));
86210389Sgabor		return (c);
87210389Sgabor	}
88210389Sgabor	return (-1);
89210389Sgabor}
90210389Sgabor
91210389Sgabor/*
92210389Sgabor * Returns true if the file position is a EOF, returns false
93210389Sgabor * otherwise.
94210389Sgabor */
95210389Sgaborint
96210389Sgaborgrep_feof(struct file *f)
97210389Sgabor{
98210389Sgabor
99210389Sgabor	switch (filebehave) {
100210389Sgabor	case FILE_STDIO:
101210389Sgabor		return (feof(f->f));
102210389Sgabor	case FILE_GZIP:
103210389Sgabor		return (gzeof(f->gzf));
104210389Sgabor	case FILE_BZIP:
105210389Sgabor		return (bzerr == BZ_STREAM_END);
106210389Sgabor	}
107210389Sgabor	return (1);
108210389Sgabor}
109210389Sgabor
110210389Sgabor/*
111210389Sgabor * At the first call, fills in an internal buffer and checks if the given
112210389Sgabor * file is a binary file and sets the binary flag accordingly.  Then returns
113210389Sgabor * a single line and sets len to the length of the returned line.
114210389Sgabor * At any other call returns a single line either from the internal buffer
115210389Sgabor * or from the file if the buffer is exhausted and sets len to the length
116210389Sgabor * of the line.
117210389Sgabor */
118210389Sgaborchar *
119210389Sgaborgrep_fgetln(struct file *f, size_t *len)
120210389Sgabor{
121210389Sgabor	struct stat st;
122210389Sgabor	size_t bufsiz, i = 0;
123210389Sgabor	int ch = 0;
124210389Sgabor
125210389Sgabor	/* Fill in the buffer if it is empty. */
126210389Sgabor	if (binbufptr == NULL) {
127210389Sgabor
128210389Sgabor		/* Only pre-read to the buffer if we need the binary check. */
129210389Sgabor		if (binbehave != BINFILE_TEXT) {
130210389Sgabor			if (f->stdin)
131210389Sgabor				st.st_size = MAXBUFSIZ;
132210389Sgabor			else if (stat(fname, &st) != 0)
133210389Sgabor				err(2, NULL);
134210389Sgabor
135210389Sgabor			bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
136210389Sgabor			    (st.st_size / 2) : MAXBUFSIZ;
137210389Sgabor
138210389Sgabor			binbuf = grep_malloc(sizeof(char) * bufsiz);
139210389Sgabor
140210389Sgabor			while (i < bufsiz) {
141210389Sgabor				ch = grep_fgetc(f);
142210389Sgabor				if (ch == EOF)
143210389Sgabor					break;
144210389Sgabor				binbuf[i++] = ch;
145210389Sgabor			}
146210389Sgabor
147210389Sgabor			f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
148210389Sgabor			    '\0' : '\200', i - 1) != NULL;
149210389Sgabor		}
150210389Sgabor		binbufsiz = i;
151210389Sgabor		binbufptr = binbuf;
152210389Sgabor	}
153210389Sgabor
154210389Sgabor	/* Read a line whether from the buffer or from the file itself. */
155210389Sgabor	for (i = 0; !(grep_feof(f) &&
156210389Sgabor	    (binbufptr == &binbuf[binbufsiz])); i++) {
157210389Sgabor		if (binbufptr == &binbuf[binbufsiz]) {
158210389Sgabor			ch = grep_fgetc(f);
159210389Sgabor		} else {
160210389Sgabor			ch = binbufptr[0];
161210389Sgabor			binbufptr++;
162210389Sgabor		}
163210389Sgabor		if (i >= lnbuflen) {
164210389Sgabor			lnbuflen *= 2;
165210389Sgabor			lnbuf = grep_realloc(lnbuf, ++lnbuflen);
166210389Sgabor		}
167210389Sgabor		if ((ch == '\n') || (ch == EOF)) {
168210389Sgabor			lnbuf[i] = '\0';
169210389Sgabor			break;
170210389Sgabor		} else
171210389Sgabor			lnbuf[i] = ch;
172210389Sgabor	}
173210389Sgabor	if (grep_feof(f) && (i == 0) && (ch != '\n'))
174210389Sgabor		return (NULL);
175210389Sgabor	*len = i;
176210389Sgabor	return (lnbuf);
177210389Sgabor}
178210389Sgabor
179210389Sgabor/*
180210389Sgabor * Opens the standard input for processing.
181210389Sgabor */
182210389Sgaborstruct file *
183210389Sgaborgrep_stdin_open(void)
184210389Sgabor{
185210389Sgabor	struct file *f;
186210389Sgabor
187210389Sgabor	snprintf(fname, sizeof fname, "%s", getstr(1));
188210389Sgabor
189210389Sgabor	f = grep_malloc(sizeof *f);
190210389Sgabor
191210389Sgabor	if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
192210389Sgabor		f->stdin = true;
193210389Sgabor		return (f);
194210389Sgabor	}
195210389Sgabor
196210389Sgabor	free(f);
197210389Sgabor	return (NULL);
198210389Sgabor}
199210389Sgabor
200210389Sgabor/*
201210389Sgabor * Opens a normal, a gzipped or a bzip2 compressed file for processing.
202210389Sgabor */
203210389Sgaborstruct file *
204210389Sgaborgrep_open(const char *path)
205210389Sgabor{
206210389Sgabor	struct file *f;
207210389Sgabor
208210389Sgabor	snprintf(fname, sizeof fname, "%s", path);
209210389Sgabor
210210389Sgabor	f = grep_malloc(sizeof *f);
211210389Sgabor
212210389Sgabor	f->stdin = false;
213210389Sgabor	switch (filebehave) {
214210389Sgabor	case FILE_STDIO:
215210389Sgabor		if ((f->f = fopen(path, "r")) != NULL)
216210389Sgabor			return (f);
217210389Sgabor		break;
218210389Sgabor	case FILE_GZIP:
219210389Sgabor		if ((f->gzf = gzopen(fname, "r")) != NULL)
220210389Sgabor			return (f);
221210389Sgabor		break;
222210389Sgabor	case FILE_BZIP:
223210389Sgabor		if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
224210389Sgabor			return (f);
225210389Sgabor		break;
226210389Sgabor	}
227210389Sgabor
228210389Sgabor	free(f);
229210389Sgabor	return (NULL);
230210389Sgabor}
231210389Sgabor
232210389Sgabor/*
233210389Sgabor * Closes a normal, a gzipped or a bzip2 compressed file.
234210389Sgabor */
235210389Sgaborvoid
236210389Sgaborgrep_close(struct file *f)
237210389Sgabor{
238210389Sgabor
239210389Sgabor	switch (filebehave) {
240210389Sgabor	case FILE_STDIO:
241210389Sgabor		fclose(f->f);
242210389Sgabor		break;
243210389Sgabor	case FILE_GZIP:
244210389Sgabor		gzclose(f->gzf);
245210389Sgabor		break;
246210389Sgabor	case FILE_BZIP:
247210389Sgabor		BZ2_bzclose(f->bzf);
248210389Sgabor		break;
249210389Sgabor	}
250210389Sgabor
251210389Sgabor	/* Reset read buffer for the file we are closing */
252210389Sgabor	binbufptr = NULL;
253210389Sgabor	free(binbuf);
254210389Sgabor
255210389Sgabor}
256