Deleted Added
full compact
file.c (211364) file.c (211463)
1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/cdefs.h>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright

--- 9 unchanged lines hidden (view full) ---

24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211364 2010-08-15 22:15:04Z gabor $");
32__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211463 2010-08-18 17:40:10Z gabor $");
32
33#include <sys/param.h>
34#include <sys/types.h>
35#include <sys/stat.h>
36
37#include <bzlib.h>
38#include <err.h>
39#include <errno.h>
33
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37
38#include <bzlib.h>
39#include <err.h>
40#include <errno.h>
40#include <stdio.h>
41#include <fcntl.h>
42#include <stddef.h>
41#include <stdlib.h>
42#include <string.h>
43#include <unistd.h>
44#include <wchar.h>
45#include <wctype.h>
46#include <zlib.h>
47
48#include "grep.h"
49
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <wchar.h>
47#include <wctype.h>
48#include <zlib.h>
49
50#include "grep.h"
51
50static char fname[MAXPATHLEN]; /* file name */
52#define MAXBUFSIZ (32 * 1024)
53#define LNBUFBUMP 80
51
54
52#define MAXBUFSIZ (16 * 1024)
53#define PREREAD_M 0.2
55static gzFile gzbufdesc;
56static BZFILE* bzbufdesc;
54
57
55/* Some global variables for the buffering and reading. */
56static char *lnbuf;
57static size_t lnbuflen;
58static unsigned char *binbuf;
59static int binbufsiz;
60unsigned char *binbufptr;
61static int bzerr;
58static unsigned char buffer[MAXBUFSIZ];
59static unsigned char *bufpos;
60static size_t bufrem;
62
61
63#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \
64 (ch != L'\b') && (ch != L'\0'))
62static unsigned char *lnbuf;
63static size_t lnbuflen;
65
64
66/*
67 * Returns a single character according to the file type.
68 * Returns -1 on failure.
69 */
70static inline int
65static inline int
71grep_fgetc(struct file *f)
66grep_refill(struct file *f)
72{
67{
73 unsigned char c;
68 ssize_t nr;
69 int bzerr;
74
70
75 switch (filebehave) {
76 case FILE_STDIO:
77 return (getc_unlocked(f->f));
78 case FILE_GZIP:
79 return (gzgetc(f->gzf));
80 case FILE_BZIP:
81 BZ2_bzRead(&bzerr, f->bzf, &c, 1);
82 if (bzerr == BZ_STREAM_END)
83 return (-1);
84 else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
85 errx(2, "%s", getstr(2));
86 return (c);
87 }
88 return (-1);
71 bufpos = buffer;
72 bufrem = 0;
73
74 if (filebehave == FILE_GZIP)
75 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
76 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
77 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
78 switch (bzerr) {
79 case BZ_OK:
80 case BZ_STREAM_END:
81 /* No problem, nr will be okay */
82 break;
83 case BZ_DATA_ERROR_MAGIC:
84 /*
85 * As opposed to gzread(), which simply returns the
86 * plain file data, if it is not in the correct
87 * compressed format, BZ2_bzRead() instead aborts.
88 *
89 * So, just restart at the beginning of the file again,
90 * and use plain reads from now on.
91 */
92 BZ2_bzReadClose(&bzerr, bzbufdesc);
93 bzbufdesc = NULL;
94 if (lseek(f->fd, 0, SEEK_SET) == -1)
95 return (-1);
96 nr = read(f->fd, buffer, MAXBUFSIZ);
97 break;
98 default:
99 /* Make sure we exit with an error */
100 nr = -1;
101 }
102 } else
103 nr = read(f->fd, buffer, MAXBUFSIZ);
104
105 if (nr < 0)
106 return (-1);
107
108 bufrem = nr;
109 return (0);
89}
90
110}
111
91/*
92 * Returns true if the file position is a EOF, returns false
93 * otherwise.
94 */
95static inline int
112static inline int
96grep_feof(struct file *f)
113grep_lnbufgrow(size_t newlen)
97{
98
114{
115
99 switch (filebehave) {
100 case FILE_STDIO:
101 return (feof_unlocked(f->f));
102 case FILE_GZIP:
103 return (gzeof(f->gzf));
104 case FILE_BZIP:
105 return (bzerr == BZ_STREAM_END);
116 if (lnbuflen < newlen) {
117 lnbuf = grep_realloc(lnbuf, newlen);
118 lnbuflen = newlen;
106 }
119 }
107 return (1);
120
121 return (0);
108}
109
122}
123
110/*
111 * At the first call, fills in an internal buffer and checks if the given
112 * file is a binary file and sets the binary flag accordingly. Then returns
113 * a single line and sets len to the length of the returned line.
114 * At any other call returns a single line either from the internal buffer
115 * or from the file if the buffer is exhausted and sets len to the length
116 * of the line.
117 */
118char *
124char *
119grep_fgetln(struct file *f, size_t *len)
125grep_fgetln(struct file *f, size_t *lenp)
120{
126{
121 struct stat st;
122 size_t bufsiz, i = 0;
123 int ch = 0;
127 unsigned char *p;
128 char *ret;
129 size_t len;
130 size_t off;
131 ptrdiff_t diff;
124
132
125 /* Fill in the buffer if it is empty. */
126 if (binbufptr == NULL) {
133 /* Fill the buffer, if necessary */
134 if (bufrem == 0 && grep_refill(f) != 0)
135 goto error;
127
136
128 /* Only pre-read to the buffer if we need the binary check. */
129 if (binbehave != BINFILE_TEXT) {
130 if (f->stdin)
131 st.st_size = MAXBUFSIZ;
132 else if (stat(fname, &st) != 0)
133 err(2, NULL);
134 /* no need to allocate buffer. */
135 if (st.st_size == 0)
136 return (NULL);
137 if (bufrem == 0) {
138 /* Return zero length to indicate EOF */
139 *lenp = 0;
140 return (bufpos);
141 }
137
142
138 bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
139 (st.st_size / 2) : MAXBUFSIZ;
140
141 binbuf = grep_malloc(sizeof(char) * bufsiz);
142
143 while (i < bufsiz) {
144 ch = grep_fgetc(f);
145 if (ch == EOF)
146 break;
147 binbuf[i++] = ch;
148 if ((ch == '\n') && lbflag)
149 break;
150 }
151
152 f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
153 '\0' : '\200', i - 1) != NULL;
154 }
155 binbufsiz = i;
156 binbufptr = binbuf;
143 /* Look for a newline in the remaining part of the buffer */
144 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
145 ++p; /* advance over newline */
146 ret = bufpos;
147 len = p - bufpos;
148 bufrem -= len;
149 bufpos = p;
150 *lenp = len;
151 return (ret);
157 }
158
152 }
153
159 /* Read a line whether from the buffer or from the file itself. */
160 for (i = 0; !(grep_feof(f) &&
161 (binbufptr == &binbuf[binbufsiz])); i++) {
162 if (binbufptr == &binbuf[binbufsiz]) {
163 ch = grep_fgetc(f);
164 } else {
165 ch = binbufptr[0];
166 binbufptr++;
167 }
168 if (i >= lnbuflen) {
169 lnbuflen *= 2;
170 lnbuf = grep_realloc(lnbuf, ++lnbuflen);
171 }
172 if ((ch == '\n') || (ch == EOF)) {
173 lnbuf[i] = '\0';
154 /* We have to copy the current buffered data to the line buffer */
155 for (len = bufrem, off = 0; ; len += bufrem) {
156 /* Make sure there is room for more data */
157 if (grep_lnbufgrow(len + LNBUFBUMP))
158 goto error;
159 memcpy(lnbuf + off, bufpos, len - off);
160 off = len;
161 if (grep_refill(f) != 0)
162 goto error;
163 if (bufrem == 0)
164 /* EOF: return partial line */
174 break;
165 break;
175 } else
176 lnbuf[i] = ch;
166 if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
167 continue;
168 /* got it: finish up the line (like code above) */
169 ++p;
170 diff = p - bufpos;
171 len += diff;
172 if (grep_lnbufgrow(len))
173 goto error;
174 memcpy(lnbuf + off, bufpos, diff);
175 bufrem -= diff;
176 bufpos = p;
177 break;
177 }
178 }
178 if (grep_feof(f) && (i == 0) && (ch != '\n'))
179 return (NULL);
180 *len = i;
179 *lenp = len;
181 return (lnbuf);
180 return (lnbuf);
181
182error:
183 *lenp = 0;
184 return (NULL);
182}
183
185}
186
184/*
185 * Opens the standard input for processing.
186 */
187struct file *
188grep_stdin_open(void)
187static inline struct file *
188grep_file_init(struct file *f)
189{
189{
190 struct file *f;
191
190
192 /* Processing stdin implies --line-buffered for tail -f to work. */
193 lbflag = true;
191 if (filebehave == FILE_GZIP &&
192 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
193 goto error;
194
194
195 snprintf(fname, sizeof fname, "%s", getstr(1));
195 if (filebehave == FILE_BZIP &&
196 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
197 goto error;
196
198
197 f = grep_malloc(sizeof *f);
199 /* Fill read buffer, also catches errors early */
200 if (grep_refill(f) != 0)
201 goto error;
198
202
199 binbuf = NULL;
200 if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
201 flockfile(f->f);
202 f->stdin = true;
203 return (f);
204 }
203 /* Check for binary stuff, if necessary */
204 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
205 f->binary = true;
205
206
207 return (f);
208error:
209 close(f->fd);
206 free(f);
207 return (NULL);
208}
209
210/*
210 free(f);
211 return (NULL);
212}
213
214/*
211 * Opens a normal, a gzipped or a bzip2 compressed file for processing.
215 * Opens a file for processing.
212 */
213struct file *
214grep_open(const char *path)
215{
216 struct file *f;
217
216 */
217struct file *
218grep_open(const char *path)
219{
220 struct file *f;
221
218 snprintf(fname, sizeof fname, "%s", path);
219
220 f = grep_malloc(sizeof *f);
222 f = grep_malloc(sizeof *f);
221
222 binbuf = NULL;
223 f->stdin = false;
224 switch (filebehave) {
225 case FILE_STDIO:
226 if ((f->f = fopen(path, "r")) != NULL) {
227 flockfile(f->f);
228 return (f);
229 }
230 break;
231 case FILE_GZIP:
232 if ((f->gzf = gzopen(fname, "r")) != NULL)
233 return (f);
234 break;
235 case FILE_BZIP:
236 if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
237 return (f);
238 break;
223 memset(f, 0, sizeof *f);
224 if (path == NULL) {
225 /* Processing stdin implies --line-buffered. */
226 lbflag = true;
227 f->fd = STDIN_FILENO;
228 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
229 free(f);
230 return (NULL);
239 }
240
231 }
232
241 free(f);
242 return (NULL);
233 return (grep_file_init(f));
243}
244
245/*
234}
235
236/*
246 * Closes a normal, a gzipped or a bzip2 compressed file.
237 * Closes a file.
247 */
248void
249grep_close(struct file *f)
250{
251
238 */
239void
240grep_close(struct file *f)
241{
242
252 switch (filebehave) {
253 case FILE_STDIO:
254 funlockfile(f->f);
255 fclose(f->f);
256 break;
257 case FILE_GZIP:
258 gzclose(f->gzf);
259 break;
260 case FILE_BZIP:
261 BZ2_bzclose(f->bzf);
262 break;
263 }
243 close(f->fd);
264
244
265 /* Reset read buffer for the file we are closing */
266 binbufptr = NULL;
267 free(binbuf);
245 /* Reset read buffer and line buffer */
246 bufpos = buffer;
247 bufrem = 0;
248
249 free(lnbuf);
250 lnbuf = NULL;
251 lnbuflen = 0;
268}
252}