1/*-
2 * Copyright (c) 2008 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer
10 *    in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "lafe_platform.h"
28__FBSDID("$FreeBSD$");
29
30#include <errno.h>
31#include <stdio.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include "err.h"
36#include "line_reader.h"
37
38#if defined(_WIN32) && !defined(__CYGWIN__) && !defined(__BORLANDC__)
39#define strdup _strdup
40#endif
41
42/*
43 * Read lines from file and do something with each one.  If option_null
44 * is set, lines are terminated with zero bytes; otherwise, they're
45 * terminated with newlines.
46 *
47 * This uses a self-sizing buffer to handle arbitrarily-long lines.
48 */
49struct lafe_line_reader {
50	FILE *f;
51	char *buff, *buff_end, *line_start, *line_end, *p;
52	char *pathname;
53	size_t buff_length;
54	int nullSeparator; /* Lines separated by null, not CR/CRLF/etc. */
55	int ret;
56};
57
58struct lafe_line_reader *
59lafe_line_reader(const char *pathname, int nullSeparator)
60{
61	struct lafe_line_reader *lr;
62
63	lr = calloc(1, sizeof(*lr));
64	if (lr == NULL)
65		lafe_errc(1, ENOMEM, "Can't open %s", pathname);
66
67	lr->nullSeparator = nullSeparator;
68	lr->pathname = strdup(pathname);
69
70	if (strcmp(pathname, "-") == 0)
71		lr->f = stdin;
72	else
73		lr->f = fopen(pathname, "r");
74	if (lr->f == NULL)
75		lafe_errc(1, errno, "Couldn't open %s", pathname);
76	lr->buff_length = 8192;
77	lr->buff = malloc(lr->buff_length);
78	if (lr->buff == NULL)
79		lafe_errc(1, ENOMEM, "Can't read %s", pathname);
80	lr->line_start = lr->line_end = lr->buff_end = lr->buff;
81
82	return (lr);
83}
84
85const char *
86lafe_line_reader_next(struct lafe_line_reader *lr)
87{
88	size_t bytes_wanted, bytes_read, new_buff_size;
89	char *line_start, *p;
90
91	for (;;) {
92		/* If there's a line in the buffer, return it immediately. */
93		while (lr->line_end < lr->buff_end) {
94			if (lr->nullSeparator) {
95				if (*lr->line_end == '\0') {
96					line_start = lr->line_start;
97					lr->line_start = lr->line_end + 1;
98					lr->line_end = lr->line_start;
99					return (line_start);
100				}
101			} else if (*lr->line_end == '\x0a' || *lr->line_end == '\x0d') {
102				*lr->line_end = '\0';
103				line_start = lr->line_start;
104				lr->line_start = lr->line_end + 1;
105				lr->line_end = lr->line_start;
106				if (line_start[0] != '\0')
107					return (line_start);
108			}
109			lr->line_end++;
110		}
111
112		/* If we're at end-of-file, process the final data. */
113		if (lr->f == NULL) {
114			/* If there's more text, return one last line. */
115			if (lr->line_end > lr->line_start) {
116				*lr->line_end = '\0';
117				line_start = lr->line_start;
118				lr->line_start = lr->line_end + 1;
119				lr->line_end = lr->line_start;
120				return (line_start);
121			}
122			/* Otherwise, we're done. */
123			return (NULL);
124		}
125
126		/* Buffer only has part of a line. */
127		if (lr->line_start > lr->buff) {
128			/* Move a leftover fractional line to the beginning. */
129			memmove(lr->buff, lr->line_start,
130			    lr->buff_end - lr->line_start);
131			lr->buff_end -= lr->line_start - lr->buff;
132			lr->line_end -= lr->line_start - lr->buff;
133			lr->line_start = lr->buff;
134		} else {
135			/* Line is too big; enlarge the buffer. */
136			new_buff_size = lr->buff_length * 2;
137			if (new_buff_size <= lr->buff_length)
138				lafe_errc(1, ENOMEM,
139				    "Line too long in %s", lr->pathname);
140			lr->buff_length = new_buff_size;
141			p = realloc(lr->buff, new_buff_size);
142			if (p == NULL)
143				lafe_errc(1, ENOMEM,
144				    "Line too long in %s", lr->pathname);
145			lr->buff_end = p + (lr->buff_end - lr->buff);
146			lr->line_end = p + (lr->line_end - lr->buff);
147			lr->line_start = lr->buff = p;
148		}
149
150		/* Get some more data into the buffer. */
151		bytes_wanted = lr->buff + lr->buff_length - lr->buff_end;
152		bytes_read = fread(lr->buff_end, 1, bytes_wanted, lr->f);
153		lr->buff_end += bytes_read;
154
155		if (ferror(lr->f))
156			lafe_errc(1, errno, "Can't read %s", lr->pathname);
157		if (feof(lr->f)) {
158			if (lr->f != stdin)
159				fclose(lr->f);
160			lr->f = NULL;
161		}
162	}
163}
164
165void
166lafe_line_reader_free(struct lafe_line_reader *lr)
167{
168	free(lr->buff);
169	free(lr->pathname);
170	free(lr);
171}
172