archive_read_open_filename.c revision 228761
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_open_filename.c 201093 2009-12-28 02:28:44Z kientzle $");
28
29#ifdef HAVE_SYS_STAT_H
30#include <sys/stat.h>
31#endif
32#ifdef HAVE_ERRNO_H
33#include <errno.h>
34#endif
35#ifdef HAVE_FCNTL_H
36#include <fcntl.h>
37#endif
38#ifdef HAVE_IO_H
39#include <io.h>
40#endif
41#ifdef HAVE_STDLIB_H
42#include <stdlib.h>
43#endif
44#ifdef HAVE_STRING_H
45#include <string.h>
46#endif
47#ifdef HAVE_UNISTD_H
48#include <unistd.h>
49#endif
50
51#include "archive.h"
52
53#ifndef O_BINARY
54#define O_BINARY 0
55#endif
56
57struct read_file_data {
58	int	 fd;
59	size_t	 block_size;
60	void	*buffer;
61	mode_t	 st_mode;  /* Mode bits for opened file. */
62	char	 can_skip; /* This file supports skipping. */
63	char	 filename[1]; /* Must be last! */
64};
65
66static int	file_close(struct archive *, void *);
67static ssize_t	file_read(struct archive *, void *, const void **buff);
68#if ARCHIVE_API_VERSION < 2
69static ssize_t	file_skip(struct archive *, void *, size_t request);
70#else
71static off_t	file_skip(struct archive *, void *, off_t request);
72#endif
73
74int
75archive_read_open_file(struct archive *a, const char *filename,
76    size_t block_size)
77{
78	return (archive_read_open_filename(a, filename, block_size));
79}
80
81int
82archive_read_open_filename(struct archive *a, const char *filename,
83    size_t block_size)
84{
85	struct stat st;
86	struct read_file_data *mine;
87	void *b;
88	int fd;
89
90	archive_clear_error(a);
91	if (filename == NULL || filename[0] == '\0') {
92		/* We used to invoke archive_read_open_fd(a,0,block_size)
93		 * here, but that doesn't (and shouldn't) handle the
94		 * end-of-file flush when reading stdout from a pipe.
95		 * Basically, read_open_fd() is intended for folks who
96		 * are willing to handle such details themselves.  This
97		 * API is intended to be a little smarter for folks who
98		 * want easy handling of the common case.
99		 */
100		filename = ""; /* Normalize NULL to "" */
101		fd = 0;
102#if defined(__CYGWIN__) || defined(_WIN32)
103		setmode(0, O_BINARY);
104#endif
105	} else {
106		fd = open(filename, O_RDONLY | O_BINARY);
107		if (fd < 0) {
108			archive_set_error(a, errno,
109			    "Failed to open '%s'", filename);
110			return (ARCHIVE_FATAL);
111		}
112	}
113	if (fstat(fd, &st) != 0) {
114		archive_set_error(a, errno, "Can't stat '%s'", filename);
115		return (ARCHIVE_FATAL);
116	}
117
118	mine = (struct read_file_data *)calloc(1,
119	    sizeof(*mine) + strlen(filename));
120	b = malloc(block_size);
121	if (mine == NULL || b == NULL) {
122		archive_set_error(a, ENOMEM, "No memory");
123		free(mine);
124		free(b);
125		return (ARCHIVE_FATAL);
126	}
127	strcpy(mine->filename, filename);
128	mine->block_size = block_size;
129	mine->buffer = b;
130	mine->fd = fd;
131	/* Remember mode so close can decide whether to flush. */
132	mine->st_mode = st.st_mode;
133	/* If we're reading a file from disk, ensure that we don't
134	   overwrite it with an extracted file. */
135	if (S_ISREG(st.st_mode)) {
136		archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
137		/*
138		 * Enabling skip here is a performance optimization
139		 * for anything that supports lseek().  On FreeBSD
140		 * (and probably many other systems), only regular
141		 * files and raw disk devices support lseek() (on
142		 * other input types, lseek() returns success but
143		 * doesn't actually change the file pointer, which
144		 * just completely screws up the position-tracking
145		 * logic).  In addition, I've yet to find a portable
146		 * way to determine if a device is a raw disk device.
147		 * So I don't see a way to do much better than to only
148		 * enable this optimization for regular files.
149		 */
150		mine->can_skip = 1;
151	}
152	return (archive_read_open2(a, mine,
153		NULL, file_read, file_skip, file_close));
154}
155
156static ssize_t
157file_read(struct archive *a, void *client_data, const void **buff)
158{
159	struct read_file_data *mine = (struct read_file_data *)client_data;
160	ssize_t bytes_read;
161
162	*buff = mine->buffer;
163	for (;;) {
164		bytes_read = read(mine->fd, mine->buffer, mine->block_size);
165		if (bytes_read < 0) {
166			if (errno == EINTR)
167				continue;
168			else if (mine->filename[0] == '\0')
169				archive_set_error(a, errno, "Error reading stdin");
170			else
171				archive_set_error(a, errno, "Error reading '%s'",
172				    mine->filename);
173		}
174		return (bytes_read);
175	}
176}
177
178#if ARCHIVE_API_VERSION < 2
179static ssize_t
180file_skip(struct archive *a, void *client_data, size_t request)
181#else
182static off_t
183file_skip(struct archive *a, void *client_data, off_t request)
184#endif
185{
186	struct read_file_data *mine = (struct read_file_data *)client_data;
187	off_t old_offset, new_offset;
188
189	if (!mine->can_skip) /* We can't skip, so ... */
190		return (0); /* ... skip zero bytes. */
191
192	/* Reduce request to the next smallest multiple of block_size */
193	request = (request / mine->block_size) * mine->block_size;
194	if (request == 0)
195		return (0);
196
197	/*
198	 * Hurray for lazy evaluation: if the first lseek fails, the second
199	 * one will not be executed.
200	 */
201	if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
202	    ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
203	{
204		/* If skip failed once, it will probably fail again. */
205		mine->can_skip = 0;
206
207		if (errno == ESPIPE)
208		{
209			/*
210			 * Failure to lseek() can be caused by the file
211			 * descriptor pointing to a pipe, socket or FIFO.
212			 * Return 0 here, so the compression layer will use
213			 * read()s instead to advance the file descriptor.
214			 * It's slower of course, but works as well.
215			 */
216			return (0);
217		}
218		/*
219		 * There's been an error other than ESPIPE. This is most
220		 * likely caused by a programmer error (too large request)
221		 * or a corrupted archive file.
222		 */
223		if (mine->filename[0] == '\0')
224			/*
225			 * Should never get here, since lseek() on stdin ought
226			 * to return an ESPIPE error.
227			 */
228			archive_set_error(a, errno, "Error seeking in stdin");
229		else
230			archive_set_error(a, errno, "Error seeking in '%s'",
231			    mine->filename);
232		return (-1);
233	}
234	return (new_offset - old_offset);
235}
236
237static int
238file_close(struct archive *a, void *client_data)
239{
240	struct read_file_data *mine = (struct read_file_data *)client_data;
241
242	(void)a; /* UNUSED */
243
244	/* Only flush and close if open succeeded. */
245	if (mine->fd >= 0) {
246		/*
247		 * Sometimes, we should flush the input before closing.
248		 *   Regular files: faster to just close without flush.
249		 *   Devices: must not flush (user might need to
250		 *      read the "next" item on a non-rewind device).
251		 *   Pipes and sockets:  must flush (otherwise, the
252		 *      program feeding the pipe or socket may complain).
253		 * Here, I flush everything except for regular files and
254		 * device nodes.
255		 */
256		if (!S_ISREG(mine->st_mode)
257		    && !S_ISCHR(mine->st_mode)
258		    && !S_ISBLK(mine->st_mode)) {
259			ssize_t bytesRead;
260			do {
261				bytesRead = read(mine->fd, mine->buffer,
262				    mine->block_size);
263			} while (bytesRead > 0);
264		}
265		/* If a named file was opened, then it needs to be closed. */
266		if (mine->filename[0] != '\0')
267			close(mine->fd);
268	}
269	free(mine->buffer);
270	free(mine);
271	return (ARCHIVE_OK);
272}
273