file.c revision 210389
1210389Sgabor/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 2210389Sgabor 3210389Sgabor/*- 4210389Sgabor * Copyright (c) 1999 James Howard and Dag-Erling Co�dan Sm�rgrav 5210389Sgabor * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org> 6210389Sgabor * All rights reserved. 7210389Sgabor * 8210389Sgabor * Redistribution and use in source and binary forms, with or without 9210389Sgabor * modification, are permitted provided that the following conditions 10210389Sgabor * are met: 11210389Sgabor * 1. Redistributions of source code must retain the above copyright 12210389Sgabor * notice, this list of conditions and the following disclaimer. 13210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 14210389Sgabor * notice, this list of conditions and the following disclaimer in the 15210389Sgabor * documentation and/or other materials provided with the distribution. 16210389Sgabor * 17210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27210389Sgabor * SUCH DAMAGE. 28210389Sgabor */ 29210389Sgabor 30210389Sgabor#include <sys/cdefs.h> 31210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 210389 2010-07-22 19:11:57Z gabor $"); 32210389Sgabor 33210389Sgabor#include <sys/param.h> 34210389Sgabor#include <sys/types.h> 35210389Sgabor#include <sys/stat.h> 36210389Sgabor 37210389Sgabor#include <bzlib.h> 38210389Sgabor#include <err.h> 39210389Sgabor#include <errno.h> 40210389Sgabor#include <stdio.h> 41210389Sgabor#include <stdlib.h> 42210389Sgabor#include <string.h> 43210389Sgabor#include <unistd.h> 44210389Sgabor#include <wchar.h> 45210389Sgabor#include <wctype.h> 46210389Sgabor#include <zlib.h> 47210389Sgabor 48210389Sgabor#include "grep.h" 49210389Sgabor 50210389Sgaborstatic char fname[MAXPATHLEN]; /* file name */ 51210389Sgabor 52210389Sgabor#define MAXBUFSIZ (16 * 1024) 53210389Sgabor#define PREREAD_M 0.2 54210389Sgabor 55210389Sgabor/* Some global variables for the buffering and reading. */ 56210389Sgaborstatic char *lnbuf; 57210389Sgaborstatic size_t lnbuflen; 58210389Sgaborstatic unsigned char *binbuf; 59210389Sgaborstatic int binbufsiz; 60210389Sgaborunsigned char *binbufptr; 61210389Sgaborstatic int bzerr; 62210389Sgabor 63210389Sgabor#define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \ 64210389Sgabor (ch != L'\b') && (ch != L'\0')) 65210389Sgabor 66210389Sgabor/* 67210389Sgabor * Returns a single character according to the file type. 68210389Sgabor * Returns -1 on failure. 69210389Sgabor */ 70210389Sgaborint 71210389Sgaborgrep_fgetc(struct file *f) 72210389Sgabor{ 73210389Sgabor unsigned char c; 74210389Sgabor 75210389Sgabor switch (filebehave) { 76210389Sgabor case FILE_STDIO: 77210389Sgabor return (fgetc(f->f)); 78210389Sgabor case FILE_GZIP: 79210389Sgabor return (gzgetc(f->gzf)); 80210389Sgabor case FILE_BZIP: 81210389Sgabor BZ2_bzRead(&bzerr, f->bzf, &c, 1); 82210389Sgabor if (bzerr == BZ_STREAM_END) 83210389Sgabor return (-1); 84210389Sgabor else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK) 85210389Sgabor errx(2, "%s", getstr(2)); 86210389Sgabor return (c); 87210389Sgabor } 88210389Sgabor return (-1); 89210389Sgabor} 90210389Sgabor 91210389Sgabor/* 92210389Sgabor * Returns true if the file position is a EOF, returns false 93210389Sgabor * otherwise. 94210389Sgabor */ 95210389Sgaborint 96210389Sgaborgrep_feof(struct file *f) 97210389Sgabor{ 98210389Sgabor 99210389Sgabor switch (filebehave) { 100210389Sgabor case FILE_STDIO: 101210389Sgabor return (feof(f->f)); 102210389Sgabor case FILE_GZIP: 103210389Sgabor return (gzeof(f->gzf)); 104210389Sgabor case FILE_BZIP: 105210389Sgabor return (bzerr == BZ_STREAM_END); 106210389Sgabor } 107210389Sgabor return (1); 108210389Sgabor} 109210389Sgabor 110210389Sgabor/* 111210389Sgabor * At the first call, fills in an internal buffer and checks if the given 112210389Sgabor * file is a binary file and sets the binary flag accordingly. Then returns 113210389Sgabor * a single line and sets len to the length of the returned line. 114210389Sgabor * At any other call returns a single line either from the internal buffer 115210389Sgabor * or from the file if the buffer is exhausted and sets len to the length 116210389Sgabor * of the line. 117210389Sgabor */ 118210389Sgaborchar * 119210389Sgaborgrep_fgetln(struct file *f, size_t *len) 120210389Sgabor{ 121210389Sgabor struct stat st; 122210389Sgabor size_t bufsiz, i = 0; 123210389Sgabor int ch = 0; 124210389Sgabor 125210389Sgabor /* Fill in the buffer if it is empty. */ 126210389Sgabor if (binbufptr == NULL) { 127210389Sgabor 128210389Sgabor /* Only pre-read to the buffer if we need the binary check. */ 129210389Sgabor if (binbehave != BINFILE_TEXT) { 130210389Sgabor if (f->stdin) 131210389Sgabor st.st_size = MAXBUFSIZ; 132210389Sgabor else if (stat(fname, &st) != 0) 133210389Sgabor err(2, NULL); 134210389Sgabor 135210389Sgabor bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ? 136210389Sgabor (st.st_size / 2) : MAXBUFSIZ; 137210389Sgabor 138210389Sgabor binbuf = grep_malloc(sizeof(char) * bufsiz); 139210389Sgabor 140210389Sgabor while (i < bufsiz) { 141210389Sgabor ch = grep_fgetc(f); 142210389Sgabor if (ch == EOF) 143210389Sgabor break; 144210389Sgabor binbuf[i++] = ch; 145210389Sgabor } 146210389Sgabor 147210389Sgabor f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ? 148210389Sgabor '\0' : '\200', i - 1) != NULL; 149210389Sgabor } 150210389Sgabor binbufsiz = i; 151210389Sgabor binbufptr = binbuf; 152210389Sgabor } 153210389Sgabor 154210389Sgabor /* Read a line whether from the buffer or from the file itself. */ 155210389Sgabor for (i = 0; !(grep_feof(f) && 156210389Sgabor (binbufptr == &binbuf[binbufsiz])); i++) { 157210389Sgabor if (binbufptr == &binbuf[binbufsiz]) { 158210389Sgabor ch = grep_fgetc(f); 159210389Sgabor } else { 160210389Sgabor ch = binbufptr[0]; 161210389Sgabor binbufptr++; 162210389Sgabor } 163210389Sgabor if (i >= lnbuflen) { 164210389Sgabor lnbuflen *= 2; 165210389Sgabor lnbuf = grep_realloc(lnbuf, ++lnbuflen); 166210389Sgabor } 167210389Sgabor if ((ch == '\n') || (ch == EOF)) { 168210389Sgabor lnbuf[i] = '\0'; 169210389Sgabor break; 170210389Sgabor } else 171210389Sgabor lnbuf[i] = ch; 172210389Sgabor } 173210389Sgabor if (grep_feof(f) && (i == 0) && (ch != '\n')) 174210389Sgabor return (NULL); 175210389Sgabor *len = i; 176210389Sgabor return (lnbuf); 177210389Sgabor} 178210389Sgabor 179210389Sgabor/* 180210389Sgabor * Opens the standard input for processing. 181210389Sgabor */ 182210389Sgaborstruct file * 183210389Sgaborgrep_stdin_open(void) 184210389Sgabor{ 185210389Sgabor struct file *f; 186210389Sgabor 187210389Sgabor snprintf(fname, sizeof fname, "%s", getstr(1)); 188210389Sgabor 189210389Sgabor f = grep_malloc(sizeof *f); 190210389Sgabor 191210389Sgabor if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) { 192210389Sgabor f->stdin = true; 193210389Sgabor return (f); 194210389Sgabor } 195210389Sgabor 196210389Sgabor free(f); 197210389Sgabor return (NULL); 198210389Sgabor} 199210389Sgabor 200210389Sgabor/* 201210389Sgabor * Opens a normal, a gzipped or a bzip2 compressed file for processing. 202210389Sgabor */ 203210389Sgaborstruct file * 204210389Sgaborgrep_open(const char *path) 205210389Sgabor{ 206210389Sgabor struct file *f; 207210389Sgabor 208210389Sgabor snprintf(fname, sizeof fname, "%s", path); 209210389Sgabor 210210389Sgabor f = grep_malloc(sizeof *f); 211210389Sgabor 212210389Sgabor f->stdin = false; 213210389Sgabor switch (filebehave) { 214210389Sgabor case FILE_STDIO: 215210389Sgabor if ((f->f = fopen(path, "r")) != NULL) 216210389Sgabor return (f); 217210389Sgabor break; 218210389Sgabor case FILE_GZIP: 219210389Sgabor if ((f->gzf = gzopen(fname, "r")) != NULL) 220210389Sgabor return (f); 221210389Sgabor break; 222210389Sgabor case FILE_BZIP: 223210389Sgabor if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL) 224210389Sgabor return (f); 225210389Sgabor break; 226210389Sgabor } 227210389Sgabor 228210389Sgabor free(f); 229210389Sgabor return (NULL); 230210389Sgabor} 231210389Sgabor 232210389Sgabor/* 233210389Sgabor * Closes a normal, a gzipped or a bzip2 compressed file. 234210389Sgabor */ 235210389Sgaborvoid 236210389Sgaborgrep_close(struct file *f) 237210389Sgabor{ 238210389Sgabor 239210389Sgabor switch (filebehave) { 240210389Sgabor case FILE_STDIO: 241210389Sgabor fclose(f->f); 242210389Sgabor break; 243210389Sgabor case FILE_GZIP: 244210389Sgabor gzclose(f->gzf); 245210389Sgabor break; 246210389Sgabor case FILE_BZIP: 247210389Sgabor BZ2_bzclose(f->bzf); 248210389Sgabor break; 249210389Sgabor } 250210389Sgabor 251210389Sgabor /* Reset read buffer for the file we are closing */ 252210389Sgabor binbufptr = NULL; 253210389Sgabor free(binbuf); 254210389Sgabor 255210389Sgabor} 256