file.c revision 220422
1220422Sgabor/* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ 2220422Sgabor/* $FreeBSD: head/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $ */ 3210389Sgabor/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7211463Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8211463Sgabor * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 9210389Sgabor * All rights reserved. 10210389Sgabor * 11210389Sgabor * Redistribution and use in source and binary forms, with or without 12210389Sgabor * modification, are permitted provided that the following conditions 13210389Sgabor * are met: 14210389Sgabor * 1. Redistributions of source code must retain the above copyright 15210389Sgabor * notice, this list of conditions and the following disclaimer. 16210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 17210389Sgabor * notice, this list of conditions and the following disclaimer in the 18210389Sgabor * documentation and/or other materials provided with the distribution. 19210389Sgabor * 20210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30210389Sgabor * SUCH DAMAGE. 31210389Sgabor */ 32210389Sgabor 33210389Sgabor#include <sys/cdefs.h> 34210389Sgabor__FBSDID("$FreeBSD: head/usr.bin/grep/file.c 220422 2011-04-07 13:03:35Z gabor $"); 35210389Sgabor 36210389Sgabor#include <sys/param.h> 37210389Sgabor#include <sys/types.h> 38210389Sgabor#include <sys/stat.h> 39210389Sgabor 40210389Sgabor#include <bzlib.h> 41210389Sgabor#include <err.h> 42210389Sgabor#include <errno.h> 43211463Sgabor#include <fcntl.h> 44211463Sgabor#include <stddef.h> 45210389Sgabor#include <stdlib.h> 46210389Sgabor#include <string.h> 47210389Sgabor#include <unistd.h> 48210389Sgabor#include <wchar.h> 49210389Sgabor#include <wctype.h> 50210389Sgabor#include <zlib.h> 51210389Sgabor 52210389Sgabor#include "grep.h" 53210389Sgabor 54211463Sgabor#define MAXBUFSIZ (32 * 1024) 55211463Sgabor#define LNBUFBUMP 80 56210389Sgabor 57211463Sgaborstatic gzFile gzbufdesc; 58211463Sgaborstatic BZFILE* bzbufdesc; 59210389Sgabor 60211463Sgaborstatic unsigned char buffer[MAXBUFSIZ]; 61211463Sgaborstatic unsigned char *bufpos; 62211463Sgaborstatic size_t bufrem; 63210389Sgabor 64211463Sgaborstatic unsigned char *lnbuf; 65211463Sgaborstatic size_t lnbuflen; 66210389Sgabor 67211364Sgaborstatic inline int 68211463Sgaborgrep_refill(struct file *f) 69210389Sgabor{ 70211463Sgabor ssize_t nr; 71211463Sgabor int bzerr; 72210389Sgabor 73211463Sgabor bufpos = buffer; 74211463Sgabor bufrem = 0; 75211463Sgabor 76211463Sgabor if (filebehave == FILE_GZIP) 77211463Sgabor nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 78211463Sgabor else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 79211463Sgabor nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 80211463Sgabor switch (bzerr) { 81211463Sgabor case BZ_OK: 82211463Sgabor case BZ_STREAM_END: 83211463Sgabor /* No problem, nr will be okay */ 84211463Sgabor break; 85211463Sgabor case BZ_DATA_ERROR_MAGIC: 86211463Sgabor /* 87211463Sgabor * As opposed to gzread(), which simply returns the 88211463Sgabor * plain file data, if it is not in the correct 89211463Sgabor * compressed format, BZ2_bzRead() instead aborts. 90211463Sgabor * 91211463Sgabor * So, just restart at the beginning of the file again, 92211463Sgabor * and use plain reads from now on. 93211463Sgabor */ 94211463Sgabor BZ2_bzReadClose(&bzerr, bzbufdesc); 95211463Sgabor bzbufdesc = NULL; 96211463Sgabor if (lseek(f->fd, 0, SEEK_SET) == -1) 97211463Sgabor return (-1); 98211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 99211463Sgabor break; 100211463Sgabor default: 101211463Sgabor /* Make sure we exit with an error */ 102211463Sgabor nr = -1; 103211463Sgabor } 104211463Sgabor } else 105211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 106211463Sgabor 107211463Sgabor if (nr < 0) 108211463Sgabor return (-1); 109211463Sgabor 110211463Sgabor bufrem = nr; 111211463Sgabor return (0); 112210389Sgabor} 113210389Sgabor 114211364Sgaborstatic inline int 115211463Sgaborgrep_lnbufgrow(size_t newlen) 116210389Sgabor{ 117210389Sgabor 118211463Sgabor if (lnbuflen < newlen) { 119211463Sgabor lnbuf = grep_realloc(lnbuf, newlen); 120211463Sgabor lnbuflen = newlen; 121210389Sgabor } 122211463Sgabor 123211463Sgabor return (0); 124210389Sgabor} 125210389Sgabor 126210389Sgaborchar * 127211463Sgaborgrep_fgetln(struct file *f, size_t *lenp) 128210389Sgabor{ 129211463Sgabor unsigned char *p; 130211463Sgabor char *ret; 131211463Sgabor size_t len; 132211463Sgabor size_t off; 133211463Sgabor ptrdiff_t diff; 134210389Sgabor 135211463Sgabor /* Fill the buffer, if necessary */ 136211463Sgabor if (bufrem == 0 && grep_refill(f) != 0) 137211463Sgabor goto error; 138210389Sgabor 139211463Sgabor if (bufrem == 0) { 140211463Sgabor /* Return zero length to indicate EOF */ 141211463Sgabor *lenp = 0; 142211463Sgabor return (bufpos); 143211463Sgabor } 144210389Sgabor 145211463Sgabor /* Look for a newline in the remaining part of the buffer */ 146211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { 147211463Sgabor ++p; /* advance over newline */ 148211463Sgabor ret = bufpos; 149211463Sgabor len = p - bufpos; 150211463Sgabor bufrem -= len; 151211463Sgabor bufpos = p; 152211463Sgabor *lenp = len; 153211463Sgabor return (ret); 154210389Sgabor } 155210389Sgabor 156211463Sgabor /* We have to copy the current buffered data to the line buffer */ 157211463Sgabor for (len = bufrem, off = 0; ; len += bufrem) { 158211463Sgabor /* Make sure there is room for more data */ 159211463Sgabor if (grep_lnbufgrow(len + LNBUFBUMP)) 160211463Sgabor goto error; 161211463Sgabor memcpy(lnbuf + off, bufpos, len - off); 162211463Sgabor off = len; 163211463Sgabor if (grep_refill(f) != 0) 164211463Sgabor goto error; 165211463Sgabor if (bufrem == 0) 166211463Sgabor /* EOF: return partial line */ 167210389Sgabor break; 168211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) == NULL) 169211463Sgabor continue; 170211463Sgabor /* got it: finish up the line (like code above) */ 171211463Sgabor ++p; 172211463Sgabor diff = p - bufpos; 173211463Sgabor len += diff; 174211463Sgabor if (grep_lnbufgrow(len)) 175211463Sgabor goto error; 176211463Sgabor memcpy(lnbuf + off, bufpos, diff); 177211463Sgabor bufrem -= diff; 178211463Sgabor bufpos = p; 179211463Sgabor break; 180210389Sgabor } 181211463Sgabor *lenp = len; 182210389Sgabor return (lnbuf); 183211463Sgabor 184211463Sgaborerror: 185211463Sgabor *lenp = 0; 186211463Sgabor return (NULL); 187210389Sgabor} 188210389Sgabor 189211463Sgaborstatic inline struct file * 190211463Sgaborgrep_file_init(struct file *f) 191210389Sgabor{ 192210389Sgabor 193211463Sgabor if (filebehave == FILE_GZIP && 194211463Sgabor (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 195211463Sgabor goto error; 196211364Sgabor 197211463Sgabor if (filebehave == FILE_BZIP && 198211463Sgabor (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 199211463Sgabor goto error; 200210389Sgabor 201211463Sgabor /* Fill read buffer, also catches errors early */ 202211463Sgabor if (grep_refill(f) != 0) 203211463Sgabor goto error; 204210389Sgabor 205211463Sgabor /* Check for binary stuff, if necessary */ 206211463Sgabor if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) 207211463Sgabor f->binary = true; 208210389Sgabor 209211463Sgabor return (f); 210211463Sgaborerror: 211211463Sgabor close(f->fd); 212210389Sgabor free(f); 213210389Sgabor return (NULL); 214210389Sgabor} 215210389Sgabor 216210389Sgabor/* 217211463Sgabor * Opens a file for processing. 218210389Sgabor */ 219210389Sgaborstruct file * 220210389Sgaborgrep_open(const char *path) 221210389Sgabor{ 222210389Sgabor struct file *f; 223210389Sgabor 224210389Sgabor f = grep_malloc(sizeof *f); 225211463Sgabor memset(f, 0, sizeof *f); 226211463Sgabor if (path == NULL) { 227211463Sgabor /* Processing stdin implies --line-buffered. */ 228211463Sgabor lbflag = true; 229211463Sgabor f->fd = STDIN_FILENO; 230211463Sgabor } else if ((f->fd = open(path, O_RDONLY)) == -1) { 231211463Sgabor free(f); 232211463Sgabor return (NULL); 233210389Sgabor } 234210389Sgabor 235211463Sgabor return (grep_file_init(f)); 236210389Sgabor} 237210389Sgabor 238210389Sgabor/* 239211463Sgabor * Closes a file. 240210389Sgabor */ 241210389Sgaborvoid 242210389Sgaborgrep_close(struct file *f) 243210389Sgabor{ 244210389Sgabor 245211463Sgabor close(f->fd); 246210389Sgabor 247211463Sgabor /* Reset read buffer and line buffer */ 248211463Sgabor bufpos = buffer; 249211463Sgabor bufrem = 0; 250211463Sgabor 251211463Sgabor free(lnbuf); 252211463Sgabor lnbuf = NULL; 253211463Sgabor lnbuflen = 0; 254210389Sgabor} 255