file.c revision 211364
1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211364 2010-08-15 22:15:04Z gabor $"); 32 33#include <sys/param.h> 34#include <sys/types.h> 35#include <sys/stat.h> 36 37#include <bzlib.h> 38#include <err.h> 39#include <errno.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <wchar.h> 45#include <wctype.h> 46#include <zlib.h> 47 48#include "grep.h" 49 50static char fname[MAXPATHLEN]; /* file name */ 51 52#define MAXBUFSIZ (16 * 1024) 53#define PREREAD_M 0.2 54 55/* Some global variables for the buffering and reading. */ 56static char *lnbuf; 57static size_t lnbuflen; 58static unsigned char *binbuf; 59static int binbufsiz; 60unsigned char *binbufptr; 61static int bzerr; 62 63#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \ 64 (ch != L'\b') && (ch != L'\0')) 65 66/* 67 * Returns a single character according to the file type. 68 * Returns -1 on failure. 69 */ 70static inline int 71grep_fgetc(struct file *f) 72{ 73 unsigned char c; 74 75 switch (filebehave) { 76 case FILE_STDIO: 77 return (getc_unlocked(f->f)); 78 case FILE_GZIP: 79 return (gzgetc(f->gzf)); 80 case FILE_BZIP: 81 BZ2_bzRead(&bzerr, f->bzf, &c, 1); 82 if (bzerr == BZ_STREAM_END) 83 return (-1); 84 else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK) 85 errx(2, "%s", getstr(2)); 86 return (c); 87 } 88 return (-1); 89} 90 91/* 92 * Returns true if the file position is a EOF, returns false 93 * otherwise. 94 */ 95static inline int 96grep_feof(struct file *f) 97{ 98 99 switch (filebehave) { 100 case FILE_STDIO: 101 return (feof_unlocked(f->f)); 102 case FILE_GZIP: 103 return (gzeof(f->gzf)); 104 case FILE_BZIP: 105 return (bzerr == BZ_STREAM_END); 106 } 107 return (1); 108} 109 110/* 111 * At the first call, fills in an internal buffer and checks if the given 112 * file is a binary file and sets the binary flag accordingly. Then returns 113 * a single line and sets len to the length of the returned line. 114 * At any other call returns a single line either from the internal buffer 115 * or from the file if the buffer is exhausted and sets len to the length 116 * of the line. 117 */ 118char * 119grep_fgetln(struct file *f, size_t *len) 120{ 121 struct stat st; 122 size_t bufsiz, i = 0; 123 int ch = 0; 124 125 /* Fill in the buffer if it is empty. */ 126 if (binbufptr == NULL) { 127 128 /* Only pre-read to the buffer if we need the binary check. */ 129 if (binbehave != BINFILE_TEXT) { 130 if (f->stdin) 131 st.st_size = MAXBUFSIZ; 132 else if (stat(fname, &st) != 0) 133 err(2, NULL); 134 /* no need to allocate buffer. */ 135 if (st.st_size == 0) 136 return (NULL); 137 138 bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ? 139 (st.st_size / 2) : MAXBUFSIZ; 140 141 binbuf = grep_malloc(sizeof(char) * bufsiz); 142 143 while (i < bufsiz) { 144 ch = grep_fgetc(f); 145 if (ch == EOF) 146 break; 147 binbuf[i++] = ch; 148 if ((ch == '\n') && lbflag) 149 break; 150 } 151 152 f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? 153 '\0' : '\200', i - 1) != NULL; 154 } 155 binbufsiz = i; 156 binbufptr = binbuf; 157 } 158 159 /* Read a line whether from the buffer or from the file itself. */ 160 for (i = 0; !(grep_feof(f) && 161 (binbufptr == &binbuf[binbufsiz])); i++) { 162 if (binbufptr == &binbuf[binbufsiz]) { 163 ch = grep_fgetc(f); 164 } else { 165 ch = binbufptr[0]; 166 binbufptr++; 167 } 168 if (i >= lnbuflen) { 169 lnbuflen *= 2; 170 lnbuf = grep_realloc(lnbuf, ++lnbuflen); 171 } 172 if ((ch == '\n') || (ch == EOF)) { 173 lnbuf[i] = '\0'; 174 break; 175 } else 176 lnbuf[i] = ch; 177 } 178 if (grep_feof(f) && (i == 0) && (ch != '\n')) 179 return (NULL); 180 *len = i; 181 return (lnbuf); 182} 183 184/* 185 * Opens the standard input for processing. 186 */ 187struct file * 188grep_stdin_open(void) 189{ 190 struct file *f; 191 192 /* Processing stdin implies --line-buffered for tail -f to work. */ 193 lbflag = true; 194 195 snprintf(fname, sizeof fname, "%s", getstr(1)); 196 197 f = grep_malloc(sizeof *f); 198 199 binbuf = NULL; 200 if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) { 201 flockfile(f->f); 202 f->stdin = true; 203 return (f); 204 } 205 206 free(f); 207 return (NULL); 208} 209 210/* 211 * Opens a normal, a gzipped or a bzip2 compressed file for processing. 212 */ 213struct file * 214grep_open(const char *path) 215{ 216 struct file *f; 217 218 snprintf(fname, sizeof fname, "%s", path); 219 220 f = grep_malloc(sizeof *f); 221 222 binbuf = NULL; 223 f->stdin = false; 224 switch (filebehave) { 225 case FILE_STDIO: 226 if ((f->f = fopen(path, "r")) != NULL) { 227 flockfile(f->f); 228 return (f); 229 } 230 break; 231 case FILE_GZIP: 232 if ((f->gzf = gzopen(fname, "r")) != NULL) 233 return (f); 234 break; 235 case FILE_BZIP: 236 if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL) 237 return (f); 238 break; 239 } 240 241 free(f); 242 return (NULL); 243} 244 245/* 246 * Closes a normal, a gzipped or a bzip2 compressed file. 247 */ 248void 249grep_close(struct file *f) 250{ 251 252 switch (filebehave) { 253 case FILE_STDIO: 254 funlockfile(f->f); 255 fclose(f->f); 256 break; 257 case FILE_GZIP: 258 gzclose(f->gzf); 259 break; 260 case FILE_BZIP: 261 BZ2_bzclose(f->bzf); 262 break; 263 } 264 265 /* Reset read buffer for the file we are closing */ 266 binbufptr = NULL; 267 free(binbuf); 268} 269