1220422Sgabor/* $NetBSD: file.c,v 1.5 2011/02/16 18:35:39 joerg Exp $ */ 2220422Sgabor/* $FreeBSD$ */ 3210389Sgabor/* $OpenBSD: file.c,v 1.11 2010/07/02 20:48:48 nicm Exp $ */ 4210389Sgabor 5210389Sgabor/*- 6211496Sdes * Copyright (c) 1999 James Howard and Dag-Erling Co��dan Sm��rgrav 7211463Sgabor * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org> 8211463Sgabor * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com> 9210389Sgabor * All rights reserved. 10210389Sgabor * 11210389Sgabor * Redistribution and use in source and binary forms, with or without 12210389Sgabor * modification, are permitted provided that the following conditions 13210389Sgabor * are met: 14210389Sgabor * 1. Redistributions of source code must retain the above copyright 15210389Sgabor * notice, this list of conditions and the following disclaimer. 16210389Sgabor * 2. Redistributions in binary form must reproduce the above copyright 17210389Sgabor * notice, this list of conditions and the following disclaimer in the 18210389Sgabor * documentation and/or other materials provided with the distribution. 19210389Sgabor * 20210389Sgabor * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21210389Sgabor * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22210389Sgabor * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23210389Sgabor * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24210389Sgabor * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25210389Sgabor * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26210389Sgabor * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27210389Sgabor * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28210389Sgabor * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29210389Sgabor * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30210389Sgabor * SUCH DAMAGE. 31210389Sgabor */ 32210389Sgabor 33210389Sgabor#include <sys/cdefs.h> 34210389Sgabor__FBSDID("$FreeBSD$"); 35210389Sgabor 36210389Sgabor#include <sys/param.h> 37226035Sgabor#include <sys/mman.h> 38226035Sgabor#include <sys/stat.h> 39210389Sgabor#include <sys/types.h> 40210389Sgabor 41210389Sgabor#include <err.h> 42210389Sgabor#include <errno.h> 43211463Sgabor#include <fcntl.h> 44211463Sgabor#include <stddef.h> 45210389Sgabor#include <stdlib.h> 46210389Sgabor#include <string.h> 47210389Sgabor#include <unistd.h> 48210389Sgabor#include <wchar.h> 49210389Sgabor#include <wctype.h> 50210389Sgabor#include <zlib.h> 51210389Sgabor 52245171Sobrien#ifndef WITHOUT_LZMA 53245171Sobrien#include <lzma.h> 54245171Sobrien#endif 55245171Sobrien 56226271Sgabor#ifndef WITHOUT_BZIP2 57226271Sgabor#include <bzlib.h> 58226271Sgabor#endif 59226271Sgabor 60210389Sgabor#include "grep.h" 61210389Sgabor 62211463Sgabor#define MAXBUFSIZ (32 * 1024) 63211463Sgabor#define LNBUFBUMP 80 64210389Sgabor 65211463Sgaborstatic gzFile gzbufdesc; 66245171Sobrien#ifndef WITHOUT_LZMA 67226271Sgaborstatic lzma_stream lstrm = LZMA_STREAM_INIT; 68245171Sobrien#endif 69226271Sgabor#ifndef WITHOUT_BZIP2 70211463Sgaborstatic BZFILE* bzbufdesc; 71226271Sgabor#endif 72210389Sgabor 73226035Sgaborstatic unsigned char *buffer; 74211463Sgaborstatic unsigned char *bufpos; 75211463Sgaborstatic size_t bufrem; 76226035Sgaborstatic size_t fsiz; 77210389Sgabor 78211463Sgaborstatic unsigned char *lnbuf; 79211463Sgaborstatic size_t lnbuflen; 80210389Sgabor 81211364Sgaborstatic inline int 82211463Sgaborgrep_refill(struct file *f) 83210389Sgabor{ 84211463Sgabor ssize_t nr; 85210389Sgabor 86226035Sgabor if (filebehave == FILE_MMAP) 87226035Sgabor return (0); 88226035Sgabor 89211463Sgabor bufpos = buffer; 90211463Sgabor bufrem = 0; 91211463Sgabor 92226271Sgabor if (filebehave == FILE_GZIP) { 93211463Sgabor nr = gzread(gzbufdesc, buffer, MAXBUFSIZ); 94226271Sgabor#ifndef WITHOUT_BZIP2 95226271Sgabor } else if (filebehave == FILE_BZIP && bzbufdesc != NULL) { 96226271Sgabor int bzerr; 97226271Sgabor 98211463Sgabor nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ); 99211463Sgabor switch (bzerr) { 100211463Sgabor case BZ_OK: 101211463Sgabor case BZ_STREAM_END: 102211463Sgabor /* No problem, nr will be okay */ 103211463Sgabor break; 104211463Sgabor case BZ_DATA_ERROR_MAGIC: 105211463Sgabor /* 106211463Sgabor * As opposed to gzread(), which simply returns the 107211463Sgabor * plain file data, if it is not in the correct 108211463Sgabor * compressed format, BZ2_bzRead() instead aborts. 109211463Sgabor * 110211463Sgabor * So, just restart at the beginning of the file again, 111211463Sgabor * and use plain reads from now on. 112211463Sgabor */ 113211463Sgabor BZ2_bzReadClose(&bzerr, bzbufdesc); 114211463Sgabor bzbufdesc = NULL; 115211463Sgabor if (lseek(f->fd, 0, SEEK_SET) == -1) 116211463Sgabor return (-1); 117211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 118211463Sgabor break; 119211463Sgabor default: 120211463Sgabor /* Make sure we exit with an error */ 121211463Sgabor nr = -1; 122211463Sgabor } 123226271Sgabor#endif 124245171Sobrien#ifndef WITHOUT_LZMA 125226035Sgabor } else if ((filebehave == FILE_XZ) || (filebehave == FILE_LZMA)) { 126226035Sgabor lzma_action action = LZMA_RUN; 127226035Sgabor uint8_t in_buf[MAXBUFSIZ]; 128226035Sgabor lzma_ret ret; 129226035Sgabor 130226035Sgabor ret = (filebehave == FILE_XZ) ? 131226035Sgabor lzma_stream_decoder(&lstrm, UINT64_MAX, 132226035Sgabor LZMA_CONCATENATED) : 133226035Sgabor lzma_alone_decoder(&lstrm, UINT64_MAX); 134226035Sgabor 135226035Sgabor if (ret != LZMA_OK) 136226035Sgabor return (-1); 137226035Sgabor 138226035Sgabor lstrm.next_out = buffer; 139226035Sgabor lstrm.avail_out = MAXBUFSIZ; 140226035Sgabor lstrm.next_in = in_buf; 141226035Sgabor nr = read(f->fd, in_buf, MAXBUFSIZ); 142226035Sgabor 143226035Sgabor if (nr < 0) 144226035Sgabor return (-1); 145226035Sgabor else if (nr == 0) 146226035Sgabor action = LZMA_FINISH; 147226035Sgabor 148226035Sgabor lstrm.avail_in = nr; 149226035Sgabor ret = lzma_code(&lstrm, action); 150226035Sgabor 151226035Sgabor if (ret != LZMA_OK && ret != LZMA_STREAM_END) 152226035Sgabor return (-1); 153226035Sgabor bufrem = MAXBUFSIZ - lstrm.avail_out; 154226035Sgabor return (0); 155245171Sobrien#endif /* WIHTOUT_LZMA */ 156211463Sgabor } else 157211463Sgabor nr = read(f->fd, buffer, MAXBUFSIZ); 158211463Sgabor 159211463Sgabor if (nr < 0) 160211463Sgabor return (-1); 161211463Sgabor 162211463Sgabor bufrem = nr; 163211463Sgabor return (0); 164210389Sgabor} 165210389Sgabor 166211364Sgaborstatic inline int 167211463Sgaborgrep_lnbufgrow(size_t newlen) 168210389Sgabor{ 169210389Sgabor 170211463Sgabor if (lnbuflen < newlen) { 171211463Sgabor lnbuf = grep_realloc(lnbuf, newlen); 172211463Sgabor lnbuflen = newlen; 173210389Sgabor } 174211463Sgabor 175211463Sgabor return (0); 176210389Sgabor} 177210389Sgabor 178210389Sgaborchar * 179211463Sgaborgrep_fgetln(struct file *f, size_t *lenp) 180210389Sgabor{ 181211463Sgabor unsigned char *p; 182211463Sgabor char *ret; 183211463Sgabor size_t len; 184211463Sgabor size_t off; 185211463Sgabor ptrdiff_t diff; 186210389Sgabor 187211463Sgabor /* Fill the buffer, if necessary */ 188211463Sgabor if (bufrem == 0 && grep_refill(f) != 0) 189211463Sgabor goto error; 190210389Sgabor 191211463Sgabor if (bufrem == 0) { 192211463Sgabor /* Return zero length to indicate EOF */ 193211463Sgabor *lenp = 0; 194211463Sgabor return (bufpos); 195211463Sgabor } 196210389Sgabor 197211463Sgabor /* Look for a newline in the remaining part of the buffer */ 198211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) != NULL) { 199211463Sgabor ++p; /* advance over newline */ 200211463Sgabor ret = bufpos; 201211463Sgabor len = p - bufpos; 202211463Sgabor bufrem -= len; 203211463Sgabor bufpos = p; 204211463Sgabor *lenp = len; 205211463Sgabor return (ret); 206210389Sgabor } 207210389Sgabor 208211463Sgabor /* We have to copy the current buffered data to the line buffer */ 209211463Sgabor for (len = bufrem, off = 0; ; len += bufrem) { 210211463Sgabor /* Make sure there is room for more data */ 211211463Sgabor if (grep_lnbufgrow(len + LNBUFBUMP)) 212211463Sgabor goto error; 213211463Sgabor memcpy(lnbuf + off, bufpos, len - off); 214211463Sgabor off = len; 215211463Sgabor if (grep_refill(f) != 0) 216211463Sgabor goto error; 217211463Sgabor if (bufrem == 0) 218211463Sgabor /* EOF: return partial line */ 219210389Sgabor break; 220211463Sgabor if ((p = memchr(bufpos, '\n', bufrem)) == NULL) 221211463Sgabor continue; 222211463Sgabor /* got it: finish up the line (like code above) */ 223211463Sgabor ++p; 224211463Sgabor diff = p - bufpos; 225211463Sgabor len += diff; 226211463Sgabor if (grep_lnbufgrow(len)) 227211463Sgabor goto error; 228211463Sgabor memcpy(lnbuf + off, bufpos, diff); 229211463Sgabor bufrem -= diff; 230211463Sgabor bufpos = p; 231211463Sgabor break; 232210389Sgabor } 233211463Sgabor *lenp = len; 234210389Sgabor return (lnbuf); 235211463Sgabor 236211463Sgaborerror: 237211463Sgabor *lenp = 0; 238211463Sgabor return (NULL); 239210389Sgabor} 240210389Sgabor 241226035Sgabor/* 242226035Sgabor * Opens a file for processing. 243226035Sgabor */ 244226035Sgaborstruct file * 245226035Sgaborgrep_open(const char *path) 246210389Sgabor{ 247226035Sgabor struct file *f; 248210389Sgabor 249226035Sgabor f = grep_malloc(sizeof *f); 250226035Sgabor memset(f, 0, sizeof *f); 251226035Sgabor if (path == NULL) { 252226035Sgabor /* Processing stdin implies --line-buffered. */ 253226035Sgabor lbflag = true; 254226035Sgabor f->fd = STDIN_FILENO; 255226035Sgabor } else if ((f->fd = open(path, O_RDONLY)) == -1) 256226035Sgabor goto error1; 257226035Sgabor 258226035Sgabor if (filebehave == FILE_MMAP) { 259226035Sgabor struct stat st; 260226035Sgabor 261226035Sgabor if ((fstat(f->fd, &st) == -1) || (st.st_size > OFF_MAX) || 262226035Sgabor (!S_ISREG(st.st_mode))) 263226035Sgabor filebehave = FILE_STDIO; 264226035Sgabor else { 265226035Sgabor int flags = MAP_PRIVATE | MAP_NOCORE | MAP_NOSYNC; 266226035Sgabor#ifdef MAP_PREFAULT_READ 267226035Sgabor flags |= MAP_PREFAULT_READ; 268226035Sgabor#endif 269226035Sgabor fsiz = st.st_size; 270226035Sgabor buffer = mmap(NULL, fsiz, PROT_READ, flags, 271226035Sgabor f->fd, (off_t)0); 272226035Sgabor if (buffer == MAP_FAILED) 273226035Sgabor filebehave = FILE_STDIO; 274226035Sgabor else { 275226035Sgabor bufrem = st.st_size; 276226035Sgabor bufpos = buffer; 277226035Sgabor madvise(buffer, st.st_size, MADV_SEQUENTIAL); 278226035Sgabor } 279226035Sgabor } 280226035Sgabor } 281226035Sgabor 282226035Sgabor if ((buffer == NULL) || (buffer == MAP_FAILED)) 283226035Sgabor buffer = grep_malloc(MAXBUFSIZ); 284226035Sgabor 285211463Sgabor if (filebehave == FILE_GZIP && 286211463Sgabor (gzbufdesc = gzdopen(f->fd, "r")) == NULL) 287226035Sgabor goto error2; 288211364Sgabor 289226271Sgabor#ifndef WITHOUT_BZIP2 290211463Sgabor if (filebehave == FILE_BZIP && 291211463Sgabor (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL) 292226035Sgabor goto error2; 293226271Sgabor#endif 294210389Sgabor 295211463Sgabor /* Fill read buffer, also catches errors early */ 296226035Sgabor if (bufrem == 0 && grep_refill(f) != 0) 297226035Sgabor goto error2; 298210389Sgabor 299211463Sgabor /* Check for binary stuff, if necessary */ 300211463Sgabor if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL) 301226035Sgabor f->binary = true; 302210389Sgabor 303211463Sgabor return (f); 304226035Sgabor 305226035Sgaborerror2: 306211463Sgabor close(f->fd); 307226035Sgaborerror1: 308210389Sgabor free(f); 309210389Sgabor return (NULL); 310210389Sgabor} 311210389Sgabor 312210389Sgabor/* 313211463Sgabor * Closes a file. 314210389Sgabor */ 315210389Sgaborvoid 316210389Sgaborgrep_close(struct file *f) 317210389Sgabor{ 318210389Sgabor 319211463Sgabor close(f->fd); 320210389Sgabor 321211463Sgabor /* Reset read buffer and line buffer */ 322226035Sgabor if (filebehave == FILE_MMAP) { 323226035Sgabor munmap(buffer, fsiz); 324226035Sgabor buffer = NULL; 325226035Sgabor } 326211463Sgabor bufpos = buffer; 327211463Sgabor bufrem = 0; 328211463Sgabor 329211463Sgabor free(lnbuf); 330211463Sgabor lnbuf = NULL; 331211463Sgabor lnbuflen = 0; 332210389Sgabor} 333