Deleted Added
full compact
5c5,6
< * Copyright (C) 2008-2009 Gabor Kovesdan <gabor@FreeBSD.org>
---
> * Copyright (C) 2008-2010 Gabor Kovesdan <gabor@FreeBSD.org>
> * Copyright (C) 2010 Dimitry Andric <dimitry@andric.com>
31c32
< __FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211364 2010-08-15 22:15:04Z gabor $");
---
> __FBSDID("$FreeBSD: head/usr.bin/grep/file.c 211463 2010-08-18 17:40:10Z gabor $");
40c41,42
< #include <stdio.h>
---
> #include <fcntl.h>
> #include <stddef.h>
50c52,53
< static char fname[MAXPATHLEN]; /* file name */
---
> #define MAXBUFSIZ (32 * 1024)
> #define LNBUFBUMP 80
52,53c55,56
< #define MAXBUFSIZ (16 * 1024)
< #define PREREAD_M 0.2
---
> static gzFile gzbufdesc;
> static BZFILE* bzbufdesc;
55,61c58,60
< /* Some global variables for the buffering and reading. */
< static char *lnbuf;
< static size_t lnbuflen;
< static unsigned char *binbuf;
< static int binbufsiz;
< unsigned char *binbufptr;
< static int bzerr;
---
> static unsigned char buffer[MAXBUFSIZ];
> static unsigned char *bufpos;
> static size_t bufrem;
63,64c62,63
< #define iswbinary(ch) (!iswspace((ch)) && iswcntrl((ch)) && \
< (ch != L'\b') && (ch != L'\0'))
---
> static unsigned char *lnbuf;
> static size_t lnbuflen;
66,69d64
< /*
< * Returns a single character according to the file type.
< * Returns -1 on failure.
< */
71c66
< grep_fgetc(struct file *f)
---
> grep_refill(struct file *f)
73c68,69
< unsigned char c;
---
> ssize_t nr;
> int bzerr;
75,88c71,109
< switch (filebehave) {
< case FILE_STDIO:
< return (getc_unlocked(f->f));
< case FILE_GZIP:
< return (gzgetc(f->gzf));
< case FILE_BZIP:
< BZ2_bzRead(&bzerr, f->bzf, &c, 1);
< if (bzerr == BZ_STREAM_END)
< return (-1);
< else if (bzerr != BZ_SEQUENCE_ERROR && bzerr != BZ_OK)
< errx(2, "%s", getstr(2));
< return (c);
< }
< return (-1);
---
> bufpos = buffer;
> bufrem = 0;
>
> if (filebehave == FILE_GZIP)
> nr = gzread(gzbufdesc, buffer, MAXBUFSIZ);
> else if (filebehave == FILE_BZIP && bzbufdesc != NULL) {
> nr = BZ2_bzRead(&bzerr, bzbufdesc, buffer, MAXBUFSIZ);
> switch (bzerr) {
> case BZ_OK:
> case BZ_STREAM_END:
> /* No problem, nr will be okay */
> break;
> case BZ_DATA_ERROR_MAGIC:
> /*
> * As opposed to gzread(), which simply returns the
> * plain file data, if it is not in the correct
> * compressed format, BZ2_bzRead() instead aborts.
> *
> * So, just restart at the beginning of the file again,
> * and use plain reads from now on.
> */
> BZ2_bzReadClose(&bzerr, bzbufdesc);
> bzbufdesc = NULL;
> if (lseek(f->fd, 0, SEEK_SET) == -1)
> return (-1);
> nr = read(f->fd, buffer, MAXBUFSIZ);
> break;
> default:
> /* Make sure we exit with an error */
> nr = -1;
> }
> } else
> nr = read(f->fd, buffer, MAXBUFSIZ);
>
> if (nr < 0)
> return (-1);
>
> bufrem = nr;
> return (0);
91,94d111
< /*
< * Returns true if the file position is a EOF, returns false
< * otherwise.
< */
96c113
< grep_feof(struct file *f)
---
> grep_lnbufgrow(size_t newlen)
99,105c116,118
< switch (filebehave) {
< case FILE_STDIO:
< return (feof_unlocked(f->f));
< case FILE_GZIP:
< return (gzeof(f->gzf));
< case FILE_BZIP:
< return (bzerr == BZ_STREAM_END);
---
> if (lnbuflen < newlen) {
> lnbuf = grep_realloc(lnbuf, newlen);
> lnbuflen = newlen;
107c120,121
< return (1);
---
>
> return (0);
110,117d123
< /*
< * At the first call, fills in an internal buffer and checks if the given
< * file is a binary file and sets the binary flag accordingly. Then returns
< * a single line and sets len to the length of the returned line.
< * At any other call returns a single line either from the internal buffer
< * or from the file if the buffer is exhausted and sets len to the length
< * of the line.
< */
119c125
< grep_fgetln(struct file *f, size_t *len)
---
> grep_fgetln(struct file *f, size_t *lenp)
121,123c127,131
< struct stat st;
< size_t bufsiz, i = 0;
< int ch = 0;
---
> unsigned char *p;
> char *ret;
> size_t len;
> size_t off;
> ptrdiff_t diff;
125,126c133,135
< /* Fill in the buffer if it is empty. */
< if (binbufptr == NULL) {
---
> /* Fill the buffer, if necessary */
> if (bufrem == 0 && grep_refill(f) != 0)
> goto error;
128,136c137,141
< /* Only pre-read to the buffer if we need the binary check. */
< if (binbehave != BINFILE_TEXT) {
< if (f->stdin)
< st.st_size = MAXBUFSIZ;
< else if (stat(fname, &st) != 0)
< err(2, NULL);
< /* no need to allocate buffer. */
< if (st.st_size == 0)
< return (NULL);
---
> if (bufrem == 0) {
> /* Return zero length to indicate EOF */
> *lenp = 0;
> return (bufpos);
> }
138,156c143,151
< bufsiz = (MAXBUFSIZ > (st.st_size * PREREAD_M)) ?
< (st.st_size / 2) : MAXBUFSIZ;
<
< binbuf = grep_malloc(sizeof(char) * bufsiz);
<
< while (i < bufsiz) {
< ch = grep_fgetc(f);
< if (ch == EOF)
< break;
< binbuf[i++] = ch;
< if ((ch == '\n') && lbflag)
< break;
< }
<
< f->binary = memchr(binbuf, (filebehave != FILE_GZIP) ?
< '\0' : '\200', i - 1) != NULL;
< }
< binbufsiz = i;
< binbufptr = binbuf;
---
> /* Look for a newline in the remaining part of the buffer */
> if ((p = memchr(bufpos, '\n', bufrem)) != NULL) {
> ++p; /* advance over newline */
> ret = bufpos;
> len = p - bufpos;
> bufrem -= len;
> bufpos = p;
> *lenp = len;
> return (ret);
159,173c154,164
< /* Read a line whether from the buffer or from the file itself. */
< for (i = 0; !(grep_feof(f) &&
< (binbufptr == &binbuf[binbufsiz])); i++) {
< if (binbufptr == &binbuf[binbufsiz]) {
< ch = grep_fgetc(f);
< } else {
< ch = binbufptr[0];
< binbufptr++;
< }
< if (i >= lnbuflen) {
< lnbuflen *= 2;
< lnbuf = grep_realloc(lnbuf, ++lnbuflen);
< }
< if ((ch == '\n') || (ch == EOF)) {
< lnbuf[i] = '\0';
---
> /* We have to copy the current buffered data to the line buffer */
> for (len = bufrem, off = 0; ; len += bufrem) {
> /* Make sure there is room for more data */
> if (grep_lnbufgrow(len + LNBUFBUMP))
> goto error;
> memcpy(lnbuf + off, bufpos, len - off);
> off = len;
> if (grep_refill(f) != 0)
> goto error;
> if (bufrem == 0)
> /* EOF: return partial line */
175,176c166,177
< } else
< lnbuf[i] = ch;
---
> if ((p = memchr(bufpos, '\n', bufrem)) == NULL)
> continue;
> /* got it: finish up the line (like code above) */
> ++p;
> diff = p - bufpos;
> len += diff;
> if (grep_lnbufgrow(len))
> goto error;
> memcpy(lnbuf + off, bufpos, diff);
> bufrem -= diff;
> bufpos = p;
> break;
178,180c179
< if (grep_feof(f) && (i == 0) && (ch != '\n'))
< return (NULL);
< *len = i;
---
> *lenp = len;
181a181,184
>
> error:
> *lenp = 0;
> return (NULL);
184,188c187,188
< /*
< * Opens the standard input for processing.
< */
< struct file *
< grep_stdin_open(void)
---
> static inline struct file *
> grep_file_init(struct file *f)
190d189
< struct file *f;
192,193c191,193
< /* Processing stdin implies --line-buffered for tail -f to work. */
< lbflag = true;
---
> if (filebehave == FILE_GZIP &&
> (gzbufdesc = gzdopen(f->fd, "r")) == NULL)
> goto error;
195c195,197
< snprintf(fname, sizeof fname, "%s", getstr(1));
---
> if (filebehave == FILE_BZIP &&
> (bzbufdesc = BZ2_bzdopen(f->fd, "r")) == NULL)
> goto error;
197c199,201
< f = grep_malloc(sizeof *f);
---
> /* Fill read buffer, also catches errors early */
> if (grep_refill(f) != 0)
> goto error;
199,204c203,205
< binbuf = NULL;
< if ((f->f = fdopen(STDIN_FILENO, "r")) != NULL) {
< flockfile(f->f);
< f->stdin = true;
< return (f);
< }
---
> /* Check for binary stuff, if necessary */
> if (binbehave != BINFILE_TEXT && memchr(bufpos, '\0', bufrem) != NULL)
> f->binary = true;
205a207,209
> return (f);
> error:
> close(f->fd);
211c215
< * Opens a normal, a gzipped or a bzip2 compressed file for processing.
---
> * Opens a file for processing.
218,219d221
< snprintf(fname, sizeof fname, "%s", path);
<
221,238c223,230
<
< binbuf = NULL;
< f->stdin = false;
< switch (filebehave) {
< case FILE_STDIO:
< if ((f->f = fopen(path, "r")) != NULL) {
< flockfile(f->f);
< return (f);
< }
< break;
< case FILE_GZIP:
< if ((f->gzf = gzopen(fname, "r")) != NULL)
< return (f);
< break;
< case FILE_BZIP:
< if ((f->bzf = BZ2_bzopen(fname, "r")) != NULL)
< return (f);
< break;
---
> memset(f, 0, sizeof *f);
> if (path == NULL) {
> /* Processing stdin implies --line-buffered. */
> lbflag = true;
> f->fd = STDIN_FILENO;
> } else if ((f->fd = open(path, O_RDONLY)) == -1) {
> free(f);
> return (NULL);
241,242c233
< free(f);
< return (NULL);
---
> return (grep_file_init(f));
246c237
< * Closes a normal, a gzipped or a bzip2 compressed file.
---
> * Closes a file.
252,263c243
< switch (filebehave) {
< case FILE_STDIO:
< funlockfile(f->f);
< fclose(f->f);
< break;
< case FILE_GZIP:
< gzclose(f->gzf);
< break;
< case FILE_BZIP:
< BZ2_bzclose(f->bzf);
< break;
< }
---
> close(f->fd);
265,267c245,251
< /* Reset read buffer for the file we are closing */
< binbufptr = NULL;
< free(binbuf);
---
> /* Reset read buffer and line buffer */
> bufpos = buffer;
> bufrem = 0;
>
> free(lnbuf);
> lnbuf = NULL;
> lnbuflen = 0;