file.c revision 210389
1/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2 3/*- 4 * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5 * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/cdefs.h> 31__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 210389 2010-07-22 19:11:57Z gabor $"); 32 33#include <sys/param.h> 34#include <sys/types.h> 35#include <sys/stat.h> 36 37#include <bzlib.h> 38#include <err.h> 39#include <errno.h> 40#include <stdio.h> 41#include <stdlib.h> 42#include <string.h> 43#include <unistd.h> 44#include <wchar.h> 45#include <wctype.h> 46#include <zlib.h> 47 48#include "grep.h" 49 50static char fname[MAXPATHLEN]; /* file name */ 51 52#define MAXBUFSIZ (16 * 1024) 53#define PREREAD_M 0.2 54 55/* Some global variables for the buffering and reading. */ 56static char *lnbuf; 57static size_t lnbuflen; 58static unsigned char *binbuf; 59static int binbufsiz; 60unsigned char *binbufptr; 61static int bzerr; 62 63#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \ 64 (ch != L'\b') && (ch != L'\0')) 65 66/* 67 * Returns a single character according to the file type. 68 * Returns -1 on failure. 69 */ 70int 71grep_fgetc(struct file *f) 72{ 73 unsigned char c; 74 75 switch (filebehave) { 76 case FILE_STDIO: 77 return (fgetc(f->f)); 78 case FILE_GZIP: 79 return (gzgetc(f->gzf)); 80 case FILE_BZIP: 81 BZ2_bzRead(&bzerr, f->bzf, &c, 1); 82 if (bzerr == BZ_STREAM_END) 83 return (-1); 84 else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK) 85 errx(2, "%s", getstr(2)); 86 return (c); 87 } 88 return (-1); 89} 90 91/* 92 * Returns true if the file position is a EOF, returns false 93 * otherwise. 94 */ 95int 96grep_feof(struct file *f) 97{ 98 99 switch (filebehave) { 100 case FILE_STDIO: 101 return (feof(f->f)); 102 case FILE_GZIP: 103 return (gzeof(f->gzf)); 104 case FILE_BZIP: 105 return (bzerr == BZ_STREAM_END); 106 } 107 return (1); 108} 109 110/* 111 * At the first call, fills in an internal buffer and checks if the given 112 * file is a binary file and sets the binary flag accordingly. Then returns 113 * a single line and sets len to the length of the returned line. 114 * At any other call returns a single line either from the internal buffer 115 * or from the file if the buffer is exhausted and sets len to the length 116 * of the line. 117 */ 118char * 119grep_fgetln(struct file *f, size_t *len) 120{ 121 struct stat st; 122 size_t bufsiz, i = 0; 123 int ch = 0; 124 125 /* Fill in the buffer if it is empty. */ 126 if (binbufptr == NULL) { 127 128 /* Only pre-read to the buffer if we need the binary check. */ 129 if (binbehave != BINFILE_TEXT) { 130 if (f->stdin) 131 st.st_size = MAXBUFSIZ; 132 else if (stat(fname, &st) != 0) 133 err(2, NULL); 134 135 bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ? 136 (st.st_size / 2) : MAXBUFSIZ; 137 138 binbuf = grep_malloc(sizeof(char) * bufsiz); 139 140 while (i < bufsiz) { 141 ch = grep_fgetc(f); 142 if (ch == EOF) 143 break; 144 binbuf[i++] = ch; 145 } 146 147 f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? 148 '\0' : '\200', i - 1) != NULL; 149 } 150 binbufsiz = i; 151 binbufptr = binbuf; 152 } 153 154 /* Read a line whether from the buffer or from the file itself. */ 155 for (i = 0; !(grep_feof(f) && 156 (binbufptr == &binbuf[binbufsiz])); i++) { 157 if (binbufptr == &binbuf[binbufsiz]) { 158 ch = grep_fgetc(f); 159 } else { 160 ch = binbufptr[0]; 161 binbufptr++; 162 } 163 if (i >= lnbuflen) { 164 lnbuflen *= 2; 165 lnbuf = grep_realloc(lnbuf, ++lnbuflen); 166 } 167 if ((ch == '\n') || (ch == EOF)) { 168 lnbuf[i] = '\0'; 169 break; 170 } else 171 lnbuf[i] = ch; 172 } 173 if (grep_feof(f) && (i == 0) && (ch != '\n')) 174 return (NULL); 175 *len = i; 176 return (lnbuf); 177} 178 179/* 180 * Opens the standard input for processing. 181 */ 182struct file * 183grep_stdin_open(void) 184{ 185 struct file *f; 186 187 snprintf(fname, sizeof fname, "%s", getstr(1)); 188 189 f = grep_malloc(sizeof *f); 190 191 if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) { 192 f->stdin = true; 193 return (f); 194 } 195 196 free(f); 197 return (NULL); 198} 199 200/* 201 * Opens a normal, a gzipped or a bzip2 compressed file for processing. 202 */ 203struct file * 204grep_open(const char *path) 205{ 206 struct file *f; 207 208 snprintf(fname, sizeof fname, "%s", path); 209 210 f = grep_malloc(sizeof *f); 211 212 f->stdin = false; 213 switch (filebehave) { 214 case FILE_STDIO: 215 if ((f->f = fopen(path, "r")) != NULL) 216 return (f); 217 break; 218 case FILE_GZIP: 219 if ((f->gzf = gzopen(fname, "r")) != NULL) 220 return (f); 221 break; 222 case FILE_BZIP: 223 if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL) 224 return (f); 225 break; 226 } 227 228 free(f); 229 return (NULL); 230} 231 232/* 233 * Closes a normal, a gzipped or a bzip2 compressed file. 234 */ 235void 236grep_close(struct file *f) 237{ 238 239 switch (filebehave) { 240 case FILE_STDIO: 241 fclose(f->f); 242 break; 243 case FILE_GZIP: 244 gzclose(f->gzf); 245 break; 246 case FILE_BZIP: 247 BZ2_bzclose(f->bzf); 248 break; 249 } 250 251 /* Reset read buffer for the file we are closing */ 252 binbufptr = NULL; 253 free(binbuf); 254 255} 256