1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
| 1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav
|
5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
| 5 * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 6 * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
|
6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h>
| 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31#include <sys/cdefs.h>
|
31__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211364 2010-08-15 22:15:04Z gabor $");
| 32__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211463 2010-08-18 17:40:10Z gabor $");
|
32 33#include <sys/param.h> 34#include <sys/types.h> 35#include <sys/stat.h> 36 37#include <bzlib.h> 38#include <err.h> 39#include <errno.h>
| 33 34#include <sys/param.h> 35#include <sys/types.h> 36#include <sys/stat.h> 37 38#include <bzlib.h> 39#include <err.h> 40#include <errno.h>
|
40#include <stdio.h>
| 41#include <fcntl.h> 42#include <stddef.h>
|
41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <wchar.h> 45#include <wctype.h> 46#include <zlib.h> 47 48#include "grep.h" 49
| 43#include <stdlib.h> 44#include <string.h> 45#include <unistd.h> 46#include <wchar.h> 47#include <wctype.h> 48#include <zlib.h> 49 50#include "grep.h" 51
|
50static char fname[MAXPATHLEN]; /* file name */
| 52#define MAXBUFSIZ (32 * 1024) 53#define LNBUFBUMP 80
|
51
| 54
|
52#define MAXBUFSIZ (16 * 1024) 53#define PREREAD_M 0.2
| 55static gzFile gzbufdesc; 56static BZFILE* bzbufdesc;
|
54
| 57
|
55/* Some global variables for the buffering and reading. */ 56static char *lnbuf; 57static size_t lnbuflen; 58static unsigned char *binbuf; 59static int binbufsiz; 60unsigned char *binbufptr; 61static int bzerr;
| 58static unsigned char buffer[MAXBUFSIZ]; 59static unsigned char *bufpos; 60static size_t bufrem;
|
62
| 61
|
63#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \ 64 (ch != L'\b') && (ch != L'\0'))
| 62static unsigned char *lnbuf; 63static size_t lnbuflen;
|
65
| 64
|
66/* 67 * Returns a single character according to the file type. 68 * Returns -1 on failure. 69 */
| |
70static inline int
| 65static inline int
|
71grep_fgetc(struct file *f)
| 66grep_refill(struct file *f)
|
72{
| 67{
|
73 unsigned char c;
| 68 ssize_t nr; 69 int bzerr;
|
74
| 70
|
75 switch (filebehave) { 76 case FILE_STDIO: 77 return (getc_unlocked(f->f)); 78 case FILE_GZIP: 79 return (gzgetc(f->gzf)); 80 case FILE_BZIP: 81 BZ2_bzRead(&bzerr, f->bzf, &c, 1); 82 if (bzerr == BZ_STREAM_END) 83 return (-1); 84 else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK) 85 errx(2, "%s", getstr(2)); 86 return (c); 87 } 88 return (-1);
| 71 bufpos = buffer; 72 bufrem = 0; 73 74 if (filebehave == FILE_GZIP) 75 nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 76 else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 77 nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 78 switch (bzerr) { 79 case BZ_OK: 80 case BZ_STREAM_END: 81 /* No problem, nr will be okay */ 82 break; 83 case BZ_DATA_ERROR_MAGIC: 84 /* 85 * As opposed to gzread(), which simply returns the 86 * plain file data, if it is not in the correct 87 * compressed format, BZ2_bzRead() instead aborts. 88 * 89 * So, just restart at the beginning of the file again, 90 * and use plain reads from now on. 91 */ 92 BZ2_bzReadClose(&bzerr, bzbufdesc); 93 bzbufdesc = NULL; 94 if (lseek(f->fd, 0, SEEK_SET) == -1) 95 return (-1); 96 nr = read(f->fd, buffer, MAXBUFSIZ); 97 break; 98 default: 99 /* Make sure we exit with an error */ 100 nr = -1; 101 } 102 } else 103 nr = read(f->fd, buffer, MAXBUFSIZ); 104 105 if (nr < 0) 106 return (-1); 107 108 bufrem = nr; 109 return (0);
|
89} 90
| 110} 111
|
91/* 92 * Returns true if the file position is a EOF, returns false 93 * otherwise. 94 */
| |
95static inline int
| 112static inline int
|
96grep_feof(struct file *f)
| 113grep_lnbufgrow(size_t newlen)
|
97{ 98
| 114{ 115
|
99 switch (filebehave) { 100 case FILE_STDIO: 101 return (feof_unlocked(f->f)); 102 case FILE_GZIP: 103 return (gzeof(f->gzf)); 104 case FILE_BZIP: 105 return (bzerr == BZ_STREAM_END);
| 116 if (lnbuflen < newlen) { 117 lnbuf = grep_realloc(lnbuf, newlen); 118 lnbuflen = newlen;
|
106 }
| 119 }
|
107 return (1);
| 120 121 return (0);
|
108} 109
| 122} 123
|
110/* 111 * At the first call, fills in an internal buffer and checks if the given 112 * file is a binary file and sets the binary flag accordingly. Then returns 113 * a single line and sets len to the length of the returned line. 114 * At any other call returns a single line either from the internal buffer 115 * or from the file if the buffer is exhausted and sets len to the length 116 * of the line. 117 */
| |
118char *
| 124char *
|
119grep_fgetln(struct file *f, size_t *len)
| 125grep_fgetln(struct file *f, size_t *lenp)
|
120{
| 126{
|
121 struct stat st; 122 size_t bufsiz, i = 0; 123 int ch = 0;
| 127 unsigned char *p; 128 char *ret; 129 size_t len; 130 size_t off; 131 ptrdiff_t diff;
|
124
| 132
|
125 /* Fill in the buffer if it is empty. */ 126 if (binbufptr == NULL) {
| 133 /* Fill the buffer, if necessary */ 134 if (bufrem == 0 && grep_refill(f) != 0) 135 goto error;
|
127
| 136
|
128 /* Only pre-read to the buffer if we need the binary check. */ 129 if (binbehave != BINFILE_TEXT) { 130 if (f->stdin) 131 st.st_size = MAXBUFSIZ; 132 else if (stat(fname, &st) != 0) 133 err(2, NULL); 134 /* no need to allocate buffer. */ 135 if (st.st_size == 0) 136 return (NULL);
| 137 if (bufrem == 0) { 138 /* Return zero length to indicate EOF */ 139 *lenp = 0; 140 return (bufpos); 141 }
|
137
| 142
|
138 bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ? 139 (st.st_size / 2) : MAXBUFSIZ; 140 141 binbuf = grep_malloc(sizeof(char) * bufsiz); 142 143 while (i < bufsiz) { 144 ch = grep_fgetc(f); 145 if (ch == EOF) 146 break; 147 binbuf[i++] = ch; 148 if ((ch == '\n') && lbflag) 149 break; 150 } 151 152 f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? 153 '\0' : '\200', i - 1) != NULL; 154 } 155 binbufsiz = i; 156 binbufptr = binbuf;
| 143 /* Look for a newline in the remaining part of the buffer */ 144 if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { 145 ++p; /* advance over newline */ 146 ret = bufpos; 147 len = p - bufpos; 148 bufrem -= len; 149 bufpos = p; 150 *lenp = len; 151 return (ret);
|
157 } 158
| 152 } 153
|
159 /* Read a line whether from the buffer or from the file itself. */ 160 for (i = 0; !(grep_feof(f) && 161 (binbufptr == &binbuf[binbufsiz])); i++) { 162 if (binbufptr == &binbuf[binbufsiz]) { 163 ch = grep_fgetc(f); 164 } else { 165 ch = binbufptr[0]; 166 binbufptr++; 167 } 168 if (i >= lnbuflen) { 169 lnbuflen *= 2; 170 lnbuf = grep_realloc(lnbuf, ++lnbuflen); 171 } 172 if ((ch == '\n') || (ch == EOF)) { 173 lnbuf[i] = '\0';
| 154 /* We have to copy the current buffered data to the line buffer */ 155 for (len = bufrem, off = 0; ; len += bufrem) { 156 /* Make sure there is room for more data */ 157 if (grep_lnbufgrow(len + LNBUFBUMP)) 158 goto error; 159 memcpy(lnbuf + off, bufpos, len - off); 160 off = len; 161 if (grep_refill(f) != 0) 162 goto error; 163 if (bufrem == 0) 164 /* EOF: return partial line */
|
174 break;
| 165 break;
|
175 } else 176 lnbuf[i] = ch;
| 166 if ((p = memchr(bufpos, '\n', bufrem)) == NULL) 167 continue; 168 /* got it: finish up the line (like code above) */ 169 ++p; 170 diff = p - bufpos; 171 len += diff; 172 if (grep_lnbufgrow(len)) 173 goto error; 174 memcpy(lnbuf + off, bufpos, diff); 175 bufrem -= diff; 176 bufpos = p; 177 break;
|
177 }
| 178 }
|
178 if (grep_feof(f) && (i == 0) && (ch != '\n')) 179 return (NULL); 180 *len = i;
| 179 *lenp = len;
|
181 return (lnbuf);
| 180 return (lnbuf);
|
| 181 182error: 183 *lenp = 0; 184 return (NULL);
|
182} 183
| 185} 186
|
184/* 185 * Opens the standard input for processing. 186 */ 187struct file * 188grep_stdin_open(void)
| 187static inline struct file * 188grep_file_init(struct file *f)
|
189{
| 189{
|
190 struct file *f;
| |
191
| 190
|
192 /* Processing stdin implies --line-buffered for tail -f to work. */ 193 lbflag = true;
| 191 if (filebehave == FILE_GZIP && 192 (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 193 goto error;
|
194
| 194
|
195 snprintf(fname, sizeof fname, "%s", getstr(1));
| 195 if (filebehave == FILE_BZIP && 196 (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 197 goto error;
|
196
| 198
|
197 f = grep_malloc(sizeof *f);
| 199 /* Fill read buffer, also catches errors early */ 200 if (grep_refill(f) != 0) 201 goto error;
|
198
| 202
|
199 binbuf = NULL; 200 if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) { 201 flockfile(f->f); 202 f->stdin = true; 203 return (f); 204 }
| 203 /* Check for binary stuff, if necessary */ 204 if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) 205 f->binary = true;
|
205
| 206
|
| 207 return (f); 208error: 209 close(f->fd);
|
206 free(f); 207 return (NULL); 208} 209 210/*
| 210 free(f); 211 return (NULL); 212} 213 214/*
|
211 * Opens a normal, a gzipped or a bzip2 compressed file for processing.
| 215 * Opens a file for processing.
|
212 */ 213struct file * 214grep_open(const char *path) 215{ 216 struct file *f; 217
| 216 */ 217struct file * 218grep_open(const char *path) 219{ 220 struct file *f; 221
|
218 snprintf(fname, sizeof fname, "%s", path); 219
| |
220 f = grep_malloc(sizeof *f);
| 222 f = grep_malloc(sizeof *f);
|
221 222 binbuf = NULL; 223 f->stdin = false; 224 switch (filebehave) { 225 case FILE_STDIO: 226 if ((f->f = fopen(path, "r")) != NULL) { 227 flockfile(f->f); 228 return (f); 229 } 230 break; 231 case FILE_GZIP: 232 if ((f->gzf = gzopen(fname, "r")) != NULL) 233 return (f); 234 break; 235 case FILE_BZIP: 236 if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL) 237 return (f); 238 break;
| 223 memset(f, 0, sizeof *f); 224 if (path == NULL) { 225 /* Processing stdin implies --line-buffered. */ 226 lbflag = true; 227 f->fd = STDIN_FILENO; 228 } else if ((f->fd = open(path, O_RDONLY)) == -1) { 229 free(f); 230 return (NULL);
|
239 } 240
| 231 } 232
|
241 free(f); 242 return (NULL);
| 233 return (grep_file_init(f));
|
243} 244 245/*
| 234} 235 236/*
|
246 * Closes a normal, a gzipped or a bzip2 compressed file.
| 237 * Closes a file.
|
247 */ 248void 249grep_close(struct file *f) 250{ 251
| 238 */ 239void 240grep_close(struct file *f) 241{ 242
|
252 switch (filebehave) { 253 case FILE_STDIO: 254 funlockfile(f->f); 255 fclose(f->f); 256 break; 257 case FILE_GZIP: 258 gzclose(f->gzf); 259 break; 260 case FILE_BZIP: 261 BZ2_bzclose(f->bzf); 262 break; 263 }
| 243 close(f->fd);
|
264
| 244
|
265 /* Reset read buffer for the file we are closing */ 266 binbufptr = NULL; 267 free(binbuf);
| 245 /* Reset read buffer and line buffer */ 246 bufpos = buffer; 247 bufrem = 0; 248 249 free(lnbuf); 250 lnbuf = NULL; 251 lnbuflen = 0;
|
268}
| 252}
|