1220422Sgabor/* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ 2220422Sgabor/* $FreeBSD$ */ 3210389Sgabor/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7211463Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8211463Sgabor * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 9210389Sgabor * All rights reserved. 10210389Sgabor * 11210389Sgabor * Redistribution and use in source and binary forms, with or without 12210389Sgabor * modification, are permitted provided that the following conditions 13210389Sgabor * are met: 14210389Sgabor * 1. Redistributions of source code must retain the above copyright 15210389Sgabor * notice, this list of conditions and the following disclaimer. 16210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 17210389Sgabor * notice, this list of conditions and the following disclaimer in the 18210389Sgabor * documentation and/or other materials provided with the distribution. 19210389Sgabor * 20210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30210389Sgabor * SUCH DAMAGE. 31210389Sgabor */ 32210389Sgabor 33210389Sgabor#include <sys/cdefs.h> 34210389Sgabor__FBSDID("$FreeBSD$"); 35210389Sgabor 36210389Sgabor#include <sys/param.h> 37226261Sgabor#include <sys/mman.h> 38226261Sgabor#include <sys/stat.h> 39210389Sgabor#include <sys/types.h> 40210389Sgabor 41210389Sgabor#include <err.h> 42210389Sgabor#include <errno.h> 43211463Sgabor#include <fcntl.h> 44226261Sgabor#include <lzma.h> 45211463Sgabor#include <stddef.h> 46210389Sgabor#include <stdlib.h> 47210389Sgabor#include <string.h> 48210389Sgabor#include <unistd.h> 49210389Sgabor#include <wchar.h> 50210389Sgabor#include <wctype.h> 51210389Sgabor#include <zlib.h> 52210389Sgabor 53226573Sgabor#ifndef WITHOUT_BZIP2 54226573Sgabor#include <bzlib.h> 55226573Sgabor#endif 56226573Sgabor 57210389Sgabor#include "grep.h" 58210389Sgabor 59211463Sgabor#define MAXBUFSIZ (32 * 1024) 60211463Sgabor#define LNBUFBUMP 80 61210389Sgabor 62211463Sgaborstatic gzFile gzbufdesc; 63226573Sgaborstatic lzma_stream lstrm = LZMA_STREAM_INIT; 64226573Sgabor#ifndef WITHOUT_BZIP2 65211463Sgaborstatic BZFILE* bzbufdesc; 66226573Sgabor#endif 67210389Sgabor 68226261Sgaborstatic unsigned char *buffer; 69211463Sgaborstatic unsigned char *bufpos; 70211463Sgaborstatic size_t bufrem; 71226261Sgaborstatic size_t fsiz; 72210389Sgabor 73211463Sgaborstatic unsigned char *lnbuf; 74211463Sgaborstatic size_t lnbuflen; 75210389Sgabor 76211364Sgaborstatic inline int 77211463Sgaborgrep_refill(struct file *f) 78210389Sgabor{ 79211463Sgabor ssize_t nr; 80210389Sgabor 81226261Sgabor if (filebehave == FILE_MMAP) 82226261Sgabor return (0); 83226261Sgabor 84211463Sgabor bufpos = buffer; 85211463Sgabor bufrem = 0; 86211463Sgabor 87226573Sgabor if (filebehave == FILE_GZIP) { 88211463Sgabor nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 89226573Sgabor#ifndef WITHOUT_BZIP2 90226573Sgabor } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 91226573Sgabor int bzerr; 92226573Sgabor 93211463Sgabor nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 94211463Sgabor switch (bzerr) { 95211463Sgabor case BZ_OK: 96211463Sgabor case BZ_STREAM_END: 97211463Sgabor /* No problem, nr will be okay */ 98211463Sgabor break; 99211463Sgabor case BZ_DATA_ERROR_MAGIC: 100211463Sgabor /* 101211463Sgabor * As opposed to gzread(), which simply returns the 102211463Sgabor * plain file data, if it is not in the correct 103211463Sgabor * compressed format, BZ2_bzRead() instead aborts. 104211463Sgabor * 105211463Sgabor * So, just restart at the beginning of the file again, 106211463Sgabor * and use plain reads from now on. 107211463Sgabor */ 108211463Sgabor BZ2_bzReadClose(&bzerr, bzbufdesc); 109211463Sgabor bzbufdesc = NULL; 110211463Sgabor if (lseek(f->fd, 0, SEEK_SET) == -1) 111211463Sgabor return (-1); 112211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 113211463Sgabor break; 114211463Sgabor default: 115211463Sgabor /* Make sure we exit with an error */ 116211463Sgabor nr = -1; 117211463Sgabor } 118226573Sgabor#endif 119226261Sgabor } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { 120226261Sgabor lzma_action action = LZMA_RUN; 121226261Sgabor uint8_t in_buf[MAXBUFSIZ]; 122226261Sgabor lzma_ret ret; 123226261Sgabor 124226261Sgabor ret = (filebehave == FILE_XZ) ? 125226261Sgabor lzma_stream_decoder(&lstrm, UINT64_MAX, 126226261Sgabor LZMA_CONCATENATED) : 127226261Sgabor lzma_alone_decoder(&lstrm, UINT64_MAX); 128226261Sgabor 129226261Sgabor if (ret != LZMA_OK) 130226261Sgabor return (-1); 131226261Sgabor 132226261Sgabor lstrm.next_out = buffer; 133226261Sgabor lstrm.avail_out = MAXBUFSIZ; 134226261Sgabor lstrm.next_in = in_buf; 135226261Sgabor nr = read(f->fd, in_buf, MAXBUFSIZ); 136226261Sgabor 137226261Sgabor if (nr < 0) 138226261Sgabor return (-1); 139226261Sgabor else if (nr == 0) 140226261Sgabor action = LZMA_FINISH; 141226261Sgabor 142226261Sgabor lstrm.avail_in = nr; 143226261Sgabor ret = lzma_code(&lstrm, action); 144226261Sgabor 145226261Sgabor if (ret != LZMA_OK && ret != LZMA_STREAM_END) 146226261Sgabor return (-1); 147226261Sgabor bufrem = MAXBUFSIZ - lstrm.avail_out; 148226261Sgabor return (0); 149211463Sgabor } else 150211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 151211463Sgabor 152211463Sgabor if (nr < 0) 153211463Sgabor return (-1); 154211463Sgabor 155211463Sgabor bufrem = nr; 156211463Sgabor return (0); 157210389Sgabor} 158210389Sgabor 159211364Sgaborstatic inline int 160211463Sgaborgrep_lnbufgrow(size_t newlen) 161210389Sgabor{ 162210389Sgabor 163211463Sgabor if (lnbuflen < newlen) { 164211463Sgabor lnbuf = grep_realloc(lnbuf, newlen); 165211463Sgabor lnbuflen = newlen; 166210389Sgabor } 167211463Sgabor 168211463Sgabor return (0); 169210389Sgabor} 170210389Sgabor 171210389Sgaborchar * 172211463Sgaborgrep_fgetln(struct file *f, size_t *lenp) 173210389Sgabor{ 174211463Sgabor unsigned char *p; 175211463Sgabor char *ret; 176211463Sgabor size_t len; 177211463Sgabor size_t off; 178211463Sgabor ptrdiff_t diff; 179210389Sgabor 180211463Sgabor /* Fill the buffer, if necessary */ 181211463Sgabor if (bufrem == 0 && grep_refill(f) != 0) 182211463Sgabor goto error; 183210389Sgabor 184211463Sgabor if (bufrem == 0) { 185211463Sgabor /* Return zero length to indicate EOF */ 186211463Sgabor *lenp = 0; 187211463Sgabor return (bufpos); 188211463Sgabor } 189210389Sgabor 190211463Sgabor /* Look for a newline in the remaining part of the buffer */ 191211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { 192211463Sgabor ++p; /* advance over newline */ 193211463Sgabor ret = bufpos; 194211463Sgabor len = p - bufpos; 195211463Sgabor bufrem -= len; 196211463Sgabor bufpos = p; 197211463Sgabor *lenp = len; 198211463Sgabor return (ret); 199210389Sgabor } 200210389Sgabor 201211463Sgabor /* We have to copy the current buffered data to the line buffer */ 202211463Sgabor for (len = bufrem, off = 0; ; len += bufrem) { 203211463Sgabor /* Make sure there is room for more data */ 204211463Sgabor if (grep_lnbufgrow(len + LNBUFBUMP)) 205211463Sgabor goto error; 206211463Sgabor memcpy(lnbuf + off, bufpos, len - off); 207211463Sgabor off = len; 208211463Sgabor if (grep_refill(f) != 0) 209211463Sgabor goto error; 210211463Sgabor if (bufrem == 0) 211211463Sgabor /* EOF: return partial line */ 212210389Sgabor break; 213211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) == NULL) 214211463Sgabor continue; 215211463Sgabor /* got it: finish up the line (like code above) */ 216211463Sgabor ++p; 217211463Sgabor diff = p - bufpos; 218211463Sgabor len += diff; 219211463Sgabor if (grep_lnbufgrow(len)) 220211463Sgabor goto error; 221211463Sgabor memcpy(lnbuf + off, bufpos, diff); 222211463Sgabor bufrem -= diff; 223211463Sgabor bufpos = p; 224211463Sgabor break; 225210389Sgabor } 226211463Sgabor *lenp = len; 227210389Sgabor return (lnbuf); 228211463Sgabor 229211463Sgaborerror: 230211463Sgabor *lenp = 0; 231211463Sgabor return (NULL); 232210389Sgabor} 233210389Sgabor 234226261Sgabor/* 235226261Sgabor * Opens a file for processing. 236226261Sgabor */ 237226261Sgaborstruct file * 238226261Sgaborgrep_open(const char *path) 239210389Sgabor{ 240226261Sgabor struct file *f; 241210389Sgabor 242226261Sgabor f = grep_malloc(sizeof *f); 243226261Sgabor memset(f, 0, sizeof *f); 244226261Sgabor if (path == NULL) { 245226261Sgabor /* Processing stdin implies --line-buffered. */ 246226261Sgabor lbflag = true; 247226261Sgabor f->fd = STDIN_FILENO; 248226261Sgabor } else if ((f->fd = open(path, O_RDONLY)) == -1) 249226261Sgabor goto error1; 250226261Sgabor 251226261Sgabor if (filebehave == FILE_MMAP) { 252226261Sgabor struct stat st; 253226261Sgabor 254226261Sgabor if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) || 255226261Sgabor (!S_ISREG(st.st_mode))) 256226261Sgabor filebehave = FILE_STDIO; 257226261Sgabor else { 258226261Sgabor int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC; 259226261Sgabor#ifdef MAP_PREFAULT_READ 260226261Sgabor flags |= MAP_PREFAULT_READ; 261226261Sgabor#endif 262226261Sgabor fsiz = st.st_size; 263226261Sgabor buffer = mmap(NULL, fsiz, PROT_READ, flags, 264226261Sgabor f->fd, (off_t)0); 265226261Sgabor if (buffer == MAP_FAILED) 266226261Sgabor filebehave = FILE_STDIO; 267226261Sgabor else { 268226261Sgabor bufrem = st.st_size; 269226261Sgabor bufpos = buffer; 270226261Sgabor madvise(buffer, st.st_size, MADV_SEQUENTIAL); 271226261Sgabor } 272226261Sgabor } 273226261Sgabor } 274226261Sgabor 275226261Sgabor if ((buffer == NULL) || (buffer == MAP_FAILED)) 276226261Sgabor buffer = grep_malloc(MAXBUFSIZ); 277226261Sgabor 278211463Sgabor if (filebehave == FILE_GZIP && 279211463Sgabor (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 280226261Sgabor goto error2; 281211364Sgabor 282226573Sgabor#ifndef WITHOUT_BZIP2 283211463Sgabor if (filebehave == FILE_BZIP && 284211463Sgabor (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 285226261Sgabor goto error2; 286226573Sgabor#endif 287210389Sgabor 288211463Sgabor /* Fill read buffer, also catches errors early */ 289226261Sgabor if (bufrem == 0 && grep_refill(f) != 0) 290226261Sgabor goto error2; 291210389Sgabor 292211463Sgabor /* Check for binary stuff, if necessary */ 293211463Sgabor if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) 294226261Sgabor f->binary = true; 295210389Sgabor 296211463Sgabor return (f); 297226261Sgabor 298226261Sgaborerror2: 299211463Sgabor close(f->fd); 300226261Sgaborerror1: 301210389Sgabor free(f); 302210389Sgabor return (NULL); 303210389Sgabor} 304210389Sgabor 305210389Sgabor/* 306211463Sgabor * Closes a file. 307210389Sgabor */ 308210389Sgaborvoid 309210389Sgaborgrep_close(struct file *f) 310210389Sgabor{ 311210389Sgabor 312211463Sgabor close(f->fd); 313210389Sgabor 314211463Sgabor /* Reset read buffer and line buffer */ 315226261Sgabor if (filebehave == FILE_MMAP) { 316226261Sgabor munmap(buffer, fsiz); 317226261Sgabor buffer = NULL; 318226261Sgabor } 319211463Sgabor bufpos = buffer; 320211463Sgabor bufrem = 0; 321211463Sgabor 322211463Sgabor free(lnbuf); 323211463Sgabor lnbuf = NULL; 324211463Sgabor lnbuflen = 0; 325210389Sgabor} 326