1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27229592Smm__FBSDID("$FreeBSD$");
28228753Smm
29228753Smm#ifdef HAVE_SYS_STAT_H
30228753Smm#include <sys/stat.h>
31228753Smm#endif
32228753Smm#ifdef HAVE_ERRNO_H
33228753Smm#include <errno.h>
34228753Smm#endif
35228753Smm#ifdef HAVE_FCNTL_H
36228753Smm#include <fcntl.h>
37228753Smm#endif
38228753Smm#ifdef HAVE_IO_H
39228753Smm#include <io.h>
40228753Smm#endif
41228753Smm#ifdef HAVE_STDLIB_H
42228753Smm#include <stdlib.h>
43228753Smm#endif
44228753Smm#ifdef HAVE_STRING_H
45228753Smm#include <string.h>
46228753Smm#endif
47228753Smm#ifdef HAVE_UNISTD_H
48228753Smm#include <unistd.h>
49228753Smm#endif
50228753Smm
51228753Smm#include "archive.h"
52228753Smm
53228753Smm#ifndef O_BINARY
54228753Smm#define O_BINARY 0
55228753Smm#endif
56228753Smm
57228753Smmstruct read_file_data {
58228753Smm	int	 fd;
59228753Smm	size_t	 block_size;
60228753Smm	void	*buffer;
61228753Smm	mode_t	 st_mode;  /* Mode bits for opened file. */
62228753Smm	char	 can_skip; /* This file supports skipping. */
63228753Smm	char	 filename[1]; /* Must be last! */
64228753Smm};
65228753Smm
66228753Smmstatic int	file_close(struct archive *, void *);
67228753Smmstatic ssize_t	file_read(struct archive *, void *, const void **buff);
68228753Smm#if ARCHIVE_API_VERSION < 2
69228753Smmstatic ssize_t	file_skip(struct archive *, void *, size_t request);
70228753Smm#else
71228753Smmstatic off_t	file_skip(struct archive *, void *, off_t request);
72228753Smm#endif
73228753Smm
74228753Smmint
75228753Smmarchive_read_open_file(struct archive *a, const char *filename,
76228753Smm    size_t block_size)
77228753Smm{
78228753Smm	return (archive_read_open_filename(a, filename, block_size));
79228753Smm}
80228753Smm
81228753Smmint
82228753Smmarchive_read_open_filename(struct archive *a, const char *filename,
83228753Smm    size_t block_size)
84228753Smm{
85228753Smm	struct stat st;
86228753Smm	struct read_file_data *mine;
87228753Smm	void *b;
88228753Smm	int fd;
89228753Smm
90228753Smm	archive_clear_error(a);
91228753Smm	if (filename == NULL || filename[0] == '\0') {
92228753Smm		/* We used to invoke archive_read_open_fd(a,0,block_size)
93228753Smm		 * here, but that doesn't (and shouldn't) handle the
94228753Smm		 * end-of-file flush when reading stdout from a pipe.
95228753Smm		 * Basically, read_open_fd() is intended for folks who
96228753Smm		 * are willing to handle such details themselves.  This
97228753Smm		 * API is intended to be a little smarter for folks who
98228753Smm		 * want easy handling of the common case.
99228753Smm		 */
100228753Smm		filename = ""; /* Normalize NULL to "" */
101228753Smm		fd = 0;
102228753Smm#if defined(__CYGWIN__) || defined(_WIN32)
103228753Smm		setmode(0, O_BINARY);
104228753Smm#endif
105228753Smm	} else {
106228753Smm		fd = open(filename, O_RDONLY | O_BINARY);
107228753Smm		if (fd < 0) {
108228753Smm			archive_set_error(a, errno,
109228753Smm			    "Failed to open '%s'", filename);
110228753Smm			return (ARCHIVE_FATAL);
111228753Smm		}
112228753Smm	}
113228753Smm	if (fstat(fd, &st) != 0) {
114228753Smm		archive_set_error(a, errno, "Can't stat '%s'", filename);
115228753Smm		return (ARCHIVE_FATAL);
116228753Smm	}
117228753Smm
118228753Smm	mine = (struct read_file_data *)calloc(1,
119228753Smm	    sizeof(*mine) + strlen(filename));
120228753Smm	b = malloc(block_size);
121228753Smm	if (mine == NULL || b == NULL) {
122228753Smm		archive_set_error(a, ENOMEM, "No memory");
123228753Smm		free(mine);
124228753Smm		free(b);
125228753Smm		return (ARCHIVE_FATAL);
126228753Smm	}
127228753Smm	strcpy(mine->filename, filename);
128228753Smm	mine->block_size = block_size;
129228753Smm	mine->buffer = b;
130228753Smm	mine->fd = fd;
131228753Smm	/* Remember mode so close can decide whether to flush. */
132228753Smm	mine->st_mode = st.st_mode;
133228753Smm	/* If we're reading a file from disk, ensure that we don't
134228753Smm	   overwrite it with an extracted file. */
135228753Smm	if (S_ISREG(st.st_mode)) {
136228753Smm		archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
137228753Smm		/*
138228753Smm		 * Enabling skip here is a performance optimization
139228753Smm		 * for anything that supports lseek().  On FreeBSD
140228753Smm		 * (and probably many other systems), only regular
141228753Smm		 * files and raw disk devices support lseek() (on
142228753Smm		 * other input types, lseek() returns success but
143228753Smm		 * doesn't actually change the file pointer, which
144228753Smm		 * just completely screws up the position-tracking
145228753Smm		 * logic).  In addition, I've yet to find a portable
146228753Smm		 * way to determine if a device is a raw disk device.
147228753Smm		 * So I don't see a way to do much better than to only
148228753Smm		 * enable this optimization for regular files.
149228753Smm		 */
150228753Smm		mine->can_skip = 1;
151228753Smm	}
152228753Smm	return (archive_read_open2(a, mine,
153228753Smm		NULL, file_read, file_skip, file_close));
154228753Smm}
155228753Smm
156228753Smmstatic ssize_t
157228753Smmfile_read(struct archive *a, void *client_data, const void **buff)
158228753Smm{
159228753Smm	struct read_file_data *mine = (struct read_file_data *)client_data;
160228753Smm	ssize_t bytes_read;
161228753Smm
162228753Smm	*buff = mine->buffer;
163228753Smm	for (;;) {
164228753Smm		bytes_read = read(mine->fd, mine->buffer, mine->block_size);
165228753Smm		if (bytes_read < 0) {
166228753Smm			if (errno == EINTR)
167228753Smm				continue;
168228753Smm			else if (mine->filename[0] == '\0')
169228753Smm				archive_set_error(a, errno, "Error reading stdin");
170228753Smm			else
171228753Smm				archive_set_error(a, errno, "Error reading '%s'",
172228753Smm				    mine->filename);
173228753Smm		}
174228753Smm		return (bytes_read);
175228753Smm	}
176228753Smm}
177228753Smm
178228753Smm#if ARCHIVE_API_VERSION < 2
179228753Smmstatic ssize_t
180228753Smmfile_skip(struct archive *a, void *client_data, size_t request)
181228753Smm#else
182228753Smmstatic off_t
183228753Smmfile_skip(struct archive *a, void *client_data, off_t request)
184228753Smm#endif
185228753Smm{
186228753Smm	struct read_file_data *mine = (struct read_file_data *)client_data;
187228753Smm	off_t old_offset, new_offset;
188228753Smm
189228753Smm	if (!mine->can_skip) /* We can't skip, so ... */
190228753Smm		return (0); /* ... skip zero bytes. */
191228753Smm
192228753Smm	/* Reduce request to the next smallest multiple of block_size */
193228753Smm	request = (request / mine->block_size) * mine->block_size;
194228753Smm	if (request == 0)
195228753Smm		return (0);
196228753Smm
197228753Smm	/*
198228753Smm	 * Hurray for lazy evaluation: if the first lseek fails, the second
199228753Smm	 * one will not be executed.
200228753Smm	 */
201228753Smm	if (((old_offset = lseek(mine->fd, 0, SEEK_CUR)) < 0) ||
202228753Smm	    ((new_offset = lseek(mine->fd, request, SEEK_CUR)) < 0))
203228753Smm	{
204228753Smm		/* If skip failed once, it will probably fail again. */
205228753Smm		mine->can_skip = 0;
206228753Smm
207228753Smm		if (errno == ESPIPE)
208228753Smm		{
209228753Smm			/*
210228753Smm			 * Failure to lseek() can be caused by the file
211228753Smm			 * descriptor pointing to a pipe, socket or FIFO.
212228753Smm			 * Return 0 here, so the compression layer will use
213228753Smm			 * read()s instead to advance the file descriptor.
214228753Smm			 * It's slower of course, but works as well.
215228753Smm			 */
216228753Smm			return (0);
217228753Smm		}
218228753Smm		/*
219228753Smm		 * There's been an error other than ESPIPE. This is most
220228753Smm		 * likely caused by a programmer error (too large request)
221228753Smm		 * or a corrupted archive file.
222228753Smm		 */
223228753Smm		if (mine->filename[0] == '\0')
224228753Smm			/*
225228753Smm			 * Should never get here, since lseek() on stdin ought
226228753Smm			 * to return an ESPIPE error.
227228753Smm			 */
228228753Smm			archive_set_error(a, errno, "Error seeking in stdin");
229228753Smm		else
230228753Smm			archive_set_error(a, errno, "Error seeking in '%s'",
231228753Smm			    mine->filename);
232228753Smm		return (-1);
233228753Smm	}
234228753Smm	return (new_offset - old_offset);
235228753Smm}
236228753Smm
237228753Smmstatic int
238228753Smmfile_close(struct archive *a, void *client_data)
239228753Smm{
240228753Smm	struct read_file_data *mine = (struct read_file_data *)client_data;
241228753Smm
242228753Smm	(void)a; /* UNUSED */
243228753Smm
244228753Smm	/* Only flush and close if open succeeded. */
245228753Smm	if (mine->fd >= 0) {
246228753Smm		/*
247228753Smm		 * Sometimes, we should flush the input before closing.
248228753Smm		 *   Regular files: faster to just close without flush.
249228753Smm		 *   Devices: must not flush (user might need to
250228753Smm		 *      read the "next" item on a non-rewind device).
251228753Smm		 *   Pipes and sockets:  must flush (otherwise, the
252228753Smm		 *      program feeding the pipe or socket may complain).
253228753Smm		 * Here, I flush everything except for regular files and
254228753Smm		 * device nodes.
255228753Smm		 */
256228753Smm		if (!S_ISREG(mine->st_mode)
257228753Smm		    && !S_ISCHR(mine->st_mode)
258228753Smm		    && !S_ISBLK(mine->st_mode)) {
259228753Smm			ssize_t bytesRead;
260228753Smm			do {
261228753Smm				bytesRead = read(mine->fd, mine->buffer,
262228753Smm				    mine->block_size);
263228753Smm			} while (bytesRead > 0);
264228753Smm		}
265228753Smm		/* If a named file was opened, then it needs to be closed. */
266228753Smm		if (mine->filename[0] != '\0')
267228753Smm			close(mine->fd);
268228753Smm	}
269228753Smm	free(mine->buffer);
270228753Smm	free(mine);
271228753Smm	return (ARCHIVE_OK);
272228753Smm}
273