Deleted Added
full compact
file.c (211496) file.c (220422)
1/* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */
2/* $FreeBSD: head/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $ */
1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
2
3/*-
4 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
6 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include <sys/cdefs.h>
3/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */
4
5/*-
6 * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav
7 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
8 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 */
32
33#include <sys/cdefs.h>
32__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211496 2010-08-19 09:28:59Z des $");
34__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $");
33
34#include <sys/param.h>
35#include <sys/types.h>
36#include <sys/stat.h>
37
38#include <bzlib.h>
39#include <err.h>
40#include <errno.h>
41#include <fcntl.h>
42#include <stddef.h>
43#include <stdlib.h>
44#include <string.h>
45#include <unistd.h>
46#include <wchar.h>
47#include <wctype.h>
48#include <zlib.h>
49
50#include "grep.h"
51
52#define MAXBUFSIZ (32 * 1024)
53#define LNBUFBUMP 80
54
55static gzFile gzbufdesc;
56static BZFILE* bzbufdesc;
57
58static unsigned char buffer[MAXBUFSIZ];
59static unsigned char *bufpos;
60static size_t bufrem;
61
62static unsigned char *lnbuf;
63static size_t lnbuflen;
64
65static inline int
66grep_refill(struct file *f)
67{
68 ssize_t nr;
69 int bzerr;
70
71 bufpos = buffer;
72 bufrem = 0;
73
74 if (filebehave == FILE_GZIP)
75 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
76 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
77 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
78 switch (bzerr) {
79 case BZ_OK:
80 case BZ_STREAM_END:
81 /* No problem, nr will be okay */
82 break;
83 case BZ_DATA_ERROR_MAGIC:
84 /*
85 * As opposed to gzread(), which simply returns the
86 * plain file data, if it is not in the correct
87 * compressed format, BZ2_bzRead() instead aborts.
88 *
89 * So, just restart at the beginning of the file again,
90 * and use plain reads from now on.
91 */
92 BZ2_bzReadClose(&bzerr, bzbufdesc);
93 bzbufdesc = NULL;
94 if (lseek(f->fd, 0, SEEK_SET) == -1)
95 return (-1);
96 nr = read(f->fd, buffer, MAXBUFSIZ);
97 break;
98 default:
99 /* Make sure we exit with an error */
100 nr = -1;
101 }
102 } else
103 nr = read(f->fd, buffer, MAXBUFSIZ);
104
105 if (nr < 0)
106 return (-1);
107
108 bufrem = nr;
109 return (0);
110}
111
112static inline int
113grep_lnbufgrow(size_t newlen)
114{
115
116 if (lnbuflen < newlen) {
117 lnbuf = grep_realloc(lnbuf, newlen);
118 lnbuflen = newlen;
119 }
120
121 return (0);
122}
123
124char *
125grep_fgetln(struct file *f, size_t *lenp)
126{
127 unsigned char *p;
128 char *ret;
129 size_t len;
130 size_t off;
131 ptrdiff_t diff;
132
133 /* Fill the buffer, if necessary */
134 if (bufrem == 0 && grep_refill(f) != 0)
135 goto error;
136
137 if (bufrem == 0) {
138 /* Return zero length to indicate EOF */
139 *lenp = 0;
140 return (bufpos);
141 }
142
143 /* Look for a newline in the remaining part of the buffer */
144 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
145 ++p; /* advance over newline */
146 ret = bufpos;
147 len = p - bufpos;
148 bufrem -= len;
149 bufpos = p;
150 *lenp = len;
151 return (ret);
152 }
153
154 /* We have to copy the current buffered data to the line buffer */
155 for (len = bufrem, off = 0; ; len += bufrem) {
156 /* Make sure there is room for more data */
157 if (grep_lnbufgrow(len + LNBUFBUMP))
158 goto error;
159 memcpy(lnbuf + off, bufpos, len - off);
160 off = len;
161 if (grep_refill(f) != 0)
162 goto error;
163 if (bufrem == 0)
164 /* EOF: return partial line */
165 break;
166 if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
167 continue;
168 /* got it: finish up the line (like code above) */
169 ++p;
170 diff = p - bufpos;
171 len += diff;
172 if (grep_lnbufgrow(len))
173 goto error;
174 memcpy(lnbuf + off, bufpos, diff);
175 bufrem -= diff;
176 bufpos = p;
177 break;
178 }
179 *lenp = len;
180 return (lnbuf);
181
182error:
183 *lenp = 0;
184 return (NULL);
185}
186
187static inline struct file *
188grep_file_init(struct file *f)
189{
190
191 if (filebehave == FILE_GZIP &&
192 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
193 goto error;
194
195 if (filebehave == FILE_BZIP &&
196 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
197 goto error;
198
199 /* Fill read buffer, also catches errors early */
200 if (grep_refill(f) != 0)
201 goto error;
202
203 /* Check for binary stuff, if necessary */
204 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
205 f->binary = true;
206
207 return (f);
208error:
209 close(f->fd);
210 free(f);
211 return (NULL);
212}
213
214/*
215 * Opens a file for processing.
216 */
217struct file *
218grep_open(const char *path)
219{
220 struct file *f;
221
222 f = grep_malloc(sizeof *f);
223 memset(f, 0, sizeof *f);
224 if (path == NULL) {
225 /* Processing stdin implies --line-buffered. */
226 lbflag = true;
227 f->fd = STDIN_FILENO;
228 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
229 free(f);
230 return (NULL);
231 }
232
233 return (grep_file_init(f));
234}
235
236/*
237 * Closes a file.
238 */
239void
240grep_close(struct file *f)
241{
242
243 close(f->fd);
244
245 /* Reset read buffer and line buffer */
246 bufpos = buffer;
247 bufrem = 0;
248
249 free(lnbuf);
250 lnbuf = NULL;
251 lnbuflen = 0;
252}
35
36#include <sys/param.h>
37#include <sys/types.h>
38#include <sys/stat.h>
39
40#include <bzlib.h>
41#include <err.h>
42#include <errno.h>
43#include <fcntl.h>
44#include <stddef.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <wchar.h>
49#include <wctype.h>
50#include <zlib.h>
51
52#include "grep.h"
53
54#define MAXBUFSIZ (32 * 1024)
55#define LNBUFBUMP 80
56
57static gzFile gzbufdesc;
58static BZFILE* bzbufdesc;
59
60static unsigned char buffer[MAXBUFSIZ];
61static unsigned char *bufpos;
62static size_t bufrem;
63
64static unsigned char *lnbuf;
65static size_t lnbuflen;
66
67static inline int
68grep_refill(struct file *f)
69{
70 ssize_t nr;
71 int bzerr;
72
73 bufpos = buffer;
74 bufrem = 0;
75
76 if (filebehave == FILE_GZIP)
77 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
78 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
79 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
80 switch (bzerr) {
81 case BZ_OK:
82 case BZ_STREAM_END:
83 /* No problem, nr will be okay */
84 break;
85 case BZ_DATA_ERROR_MAGIC:
86 /*
87 * As opposed to gzread(), which simply returns the
88 * plain file data, if it is not in the correct
89 * compressed format, BZ2_bzRead() instead aborts.
90 *
91 * So, just restart at the beginning of the file again,
92 * and use plain reads from now on.
93 */
94 BZ2_bzReadClose(&bzerr, bzbufdesc);
95 bzbufdesc = NULL;
96 if (lseek(f->fd, 0, SEEK_SET) == -1)
97 return (-1);
98 nr = read(f->fd, buffer, MAXBUFSIZ);
99 break;
100 default:
101 /* Make sure we exit with an error */
102 nr = -1;
103 }
104 } else
105 nr = read(f->fd, buffer, MAXBUFSIZ);
106
107 if (nr < 0)
108 return (-1);
109
110 bufrem = nr;
111 return (0);
112}
113
114static inline int
115grep_lnbufgrow(size_t newlen)
116{
117
118 if (lnbuflen < newlen) {
119 lnbuf = grep_realloc(lnbuf, newlen);
120 lnbuflen = newlen;
121 }
122
123 return (0);
124}
125
126char *
127grep_fgetln(struct file *f, size_t *lenp)
128{
129 unsigned char *p;
130 char *ret;
131 size_t len;
132 size_t off;
133 ptrdiff_t diff;
134
135 /* Fill the buffer, if necessary */
136 if (bufrem == 0 && grep_refill(f) != 0)
137 goto error;
138
139 if (bufrem == 0) {
140 /* Return zero length to indicate EOF */
141 *lenp = 0;
142 return (bufpos);
143 }
144
145 /* Look for a newline in the remaining part of the buffer */
146 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
147 ++p; /* advance over newline */
148 ret = bufpos;
149 len = p - bufpos;
150 bufrem -= len;
151 bufpos = p;
152 *lenp = len;
153 return (ret);
154 }
155
156 /* We have to copy the current buffered data to the line buffer */
157 for (len = bufrem, off = 0; ; len += bufrem) {
158 /* Make sure there is room for more data */
159 if (grep_lnbufgrow(len + LNBUFBUMP))
160 goto error;
161 memcpy(lnbuf + off, bufpos, len - off);
162 off = len;
163 if (grep_refill(f) != 0)
164 goto error;
165 if (bufrem == 0)
166 /* EOF: return partial line */
167 break;
168 if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
169 continue;
170 /* got it: finish up the line (like code above) */
171 ++p;
172 diff = p - bufpos;
173 len += diff;
174 if (grep_lnbufgrow(len))
175 goto error;
176 memcpy(lnbuf + off, bufpos, diff);
177 bufrem -= diff;
178 bufpos = p;
179 break;
180 }
181 *lenp = len;
182 return (lnbuf);
183
184error:
185 *lenp = 0;
186 return (NULL);
187}
188
189static inline struct file *
190grep_file_init(struct file *f)
191{
192
193 if (filebehave == FILE_GZIP &&
194 (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
195 goto error;
196
197 if (filebehave == FILE_BZIP &&
198 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
199 goto error;
200
201 /* Fill read buffer, also catches errors early */
202 if (grep_refill(f) != 0)
203 goto error;
204
205 /* Check for binary stuff, if necessary */
206 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
207 f->binary = true;
208
209 return (f);
210error:
211 close(f->fd);
212 free(f);
213 return (NULL);
214}
215
216/*
217 * Opens a file for processing.
218 */
219struct file *
220grep_open(const char *path)
221{
222 struct file *f;
223
224 f = grep_malloc(sizeof *f);
225 memset(f, 0, sizeof *f);
226 if (path == NULL) {
227 /* Processing stdin implies --line-buffered. */
228 lbflag = true;
229 f->fd = STDIN_FILENO;
230 } else if ((f->fd = open(path, O_RDONLY)) == -1) {
231 free(f);
232 return (NULL);
233 }
234
235 return (grep_file_init(f));
236}
237
238/*
239 * Closes a file.
240 */
241void
242grep_close(struct file *f)
243{
244
245 close(f->fd);
246
247 /* Reset read buffer and line buffer */
248 bufpos = buffer;
249 bufrem = 0;
250
251 free(lnbuf);
252 lnbuf = NULL;
253 lnbuflen = 0;
254}