archive_read_open_filename.c revision 358926
1/*-
2 * Copyright (c) 2003-2010 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/archive_read_open_filename.c 358926 2020-03-13 01:05:55Z mm $");
28
29#ifdef HAVE_SYS_IOCTL_H
30#include <sys/ioctl.h>
31#endif
32#ifdef HAVE_SYS_STAT_H
33#include <sys/stat.h>
34#endif
35#ifdef HAVE_ERRNO_H
36#include <errno.h>
37#endif
38#ifdef HAVE_FCNTL_H
39#include <fcntl.h>
40#endif
41#ifdef HAVE_IO_H
42#include <io.h>
43#endif
44#ifdef HAVE_STDLIB_H
45#include <stdlib.h>
46#endif
47#ifdef HAVE_STRING_H
48#include <string.h>
49#endif
50#ifdef HAVE_UNISTD_H
51#include <unistd.h>
52#endif
53#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
54#include <sys/disk.h>
55#elif defined(__NetBSD__) || defined(__OpenBSD__)
56#include <sys/disklabel.h>
57#include <sys/dkio.h>
58#elif defined(__DragonFly__)
59#include <sys/diskslice.h>
60#endif
61
62#include "archive.h"
63#include "archive_private.h"
64#include "archive_string.h"
65
66#ifndef O_BINARY
67#define O_BINARY 0
68#endif
69#ifndef O_CLOEXEC
70#define O_CLOEXEC	0
71#endif
72
73struct read_file_data {
74	int	 fd;
75	size_t	 block_size;
76	void	*buffer;
77	mode_t	 st_mode;  /* Mode bits for opened file. */
78	char	 use_lseek;
79	enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type;
80	union {
81		char	 m[1];/* MBS filename. */
82		wchar_t	 w[1];/* WCS filename. */
83	} filename; /* Must be last! */
84};
85
86static int	file_open(struct archive *, void *);
87static int	file_close(struct archive *, void *);
88static int file_close2(struct archive *, void *);
89static int file_switch(struct archive *, void *, void *);
90static ssize_t	file_read(struct archive *, void *, const void **buff);
91static int64_t	file_seek(struct archive *, void *, int64_t request, int);
92static int64_t	file_skip(struct archive *, void *, int64_t request);
93static int64_t	file_skip_lseek(struct archive *, void *, int64_t request);
94
95int
96archive_read_open_file(struct archive *a, const char *filename,
97    size_t block_size)
98{
99	return (archive_read_open_filename(a, filename, block_size));
100}
101
102int
103archive_read_open_filename(struct archive *a, const char *filename,
104    size_t block_size)
105{
106	const char *filenames[2];
107	filenames[0] = filename;
108	filenames[1] = NULL;
109	return archive_read_open_filenames(a, filenames, block_size);
110}
111
112int
113archive_read_open_filenames(struct archive *a, const char **filenames,
114    size_t block_size)
115{
116	struct read_file_data *mine;
117	const char *filename = NULL;
118	if (filenames)
119		filename = *(filenames++);
120
121	archive_clear_error(a);
122	do
123	{
124		if (filename == NULL)
125			filename = "";
126		mine = (struct read_file_data *)calloc(1,
127			sizeof(*mine) + strlen(filename));
128		if (mine == NULL)
129			goto no_memory;
130		strcpy(mine->filename.m, filename);
131		mine->block_size = block_size;
132		mine->fd = -1;
133		mine->buffer = NULL;
134		mine->st_mode = mine->use_lseek = 0;
135		if (filename == NULL || filename[0] == '\0') {
136			mine->filename_type = FNT_STDIN;
137		} else
138			mine->filename_type = FNT_MBS;
139		if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
140			return (ARCHIVE_FATAL);
141		if (filenames == NULL)
142			break;
143		filename = *(filenames++);
144	} while (filename != NULL && filename[0] != '\0');
145	archive_read_set_open_callback(a, file_open);
146	archive_read_set_read_callback(a, file_read);
147	archive_read_set_skip_callback(a, file_skip);
148	archive_read_set_close_callback(a, file_close);
149	archive_read_set_switch_callback(a, file_switch);
150	archive_read_set_seek_callback(a, file_seek);
151
152	return (archive_read_open1(a));
153no_memory:
154	archive_set_error(a, ENOMEM, "No memory");
155	return (ARCHIVE_FATAL);
156}
157
158int
159archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename,
160    size_t block_size)
161{
162	struct read_file_data *mine = (struct read_file_data *)calloc(1,
163		sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t));
164	if (!mine)
165	{
166		archive_set_error(a, ENOMEM, "No memory");
167		return (ARCHIVE_FATAL);
168	}
169	mine->fd = -1;
170	mine->block_size = block_size;
171
172	if (wfilename == NULL || wfilename[0] == L'\0') {
173		mine->filename_type = FNT_STDIN;
174	} else {
175#if defined(_WIN32) && !defined(__CYGWIN__)
176		mine->filename_type = FNT_WCS;
177		wcscpy(mine->filename.w, wfilename);
178#else
179		/*
180		 * POSIX system does not support a wchar_t interface for
181		 * open() system call, so we have to translate a wchar_t
182		 * filename to multi-byte one and use it.
183		 */
184		struct archive_string fn;
185
186		archive_string_init(&fn);
187		if (archive_string_append_from_wcs(&fn, wfilename,
188		    wcslen(wfilename)) != 0) {
189			if (errno == ENOMEM)
190				archive_set_error(a, errno,
191				    "Can't allocate memory");
192			else
193				archive_set_error(a, EINVAL,
194				    "Failed to convert a wide-character"
195				    " filename to a multi-byte filename");
196			archive_string_free(&fn);
197			free(mine);
198			return (ARCHIVE_FATAL);
199		}
200		mine->filename_type = FNT_MBS;
201		strcpy(mine->filename.m, fn.s);
202		archive_string_free(&fn);
203#endif
204	}
205	if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK))
206		return (ARCHIVE_FATAL);
207	archive_read_set_open_callback(a, file_open);
208	archive_read_set_read_callback(a, file_read);
209	archive_read_set_skip_callback(a, file_skip);
210	archive_read_set_close_callback(a, file_close);
211	archive_read_set_switch_callback(a, file_switch);
212	archive_read_set_seek_callback(a, file_seek);
213
214	return (archive_read_open1(a));
215}
216
217static int
218file_open(struct archive *a, void *client_data)
219{
220	struct stat st;
221	struct read_file_data *mine = (struct read_file_data *)client_data;
222	void *buffer;
223	const char *filename = NULL;
224#if defined(_WIN32) && !defined(__CYGWIN__)
225	const wchar_t *wfilename = NULL;
226#endif
227	int fd = -1;
228	int is_disk_like = 0;
229#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
230	off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */
231#elif defined(__NetBSD__) || defined(__OpenBSD__)
232	struct disklabel dl;
233#elif defined(__DragonFly__)
234	struct partinfo pi;
235#endif
236
237	archive_clear_error(a);
238	if (mine->filename_type == FNT_STDIN) {
239		/* We used to delegate stdin support by
240		 * directly calling archive_read_open_fd(a,0,block_size)
241		 * here, but that doesn't (and shouldn't) handle the
242		 * end-of-file flush when reading stdout from a pipe.
243		 * Basically, read_open_fd() is intended for folks who
244		 * are willing to handle such details themselves.  This
245		 * API is intended to be a little smarter for folks who
246		 * want easy handling of the common case.
247		 */
248		fd = 0;
249#if defined(__CYGWIN__) || defined(_WIN32)
250		setmode(0, O_BINARY);
251#endif
252		filename = "";
253	} else if (mine->filename_type == FNT_MBS) {
254		filename = mine->filename.m;
255		fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC);
256		__archive_ensure_cloexec_flag(fd);
257		if (fd < 0) {
258			archive_set_error(a, errno,
259			    "Failed to open '%s'", filename);
260			return (ARCHIVE_FATAL);
261		}
262	} else {
263#if defined(_WIN32) && !defined(__CYGWIN__)
264		wfilename = mine->filename.w;
265		fd = _wopen(wfilename, O_RDONLY | O_BINARY);
266		if (fd < 0 && errno == ENOENT) {
267			wchar_t *fullpath;
268			fullpath = __la_win_permissive_name_w(wfilename);
269			if (fullpath != NULL) {
270				fd = _wopen(fullpath, O_RDONLY | O_BINARY);
271				free(fullpath);
272			}
273		}
274		if (fd < 0) {
275			archive_set_error(a, errno,
276			    "Failed to open '%S'", wfilename);
277			return (ARCHIVE_FATAL);
278		}
279#else
280		archive_set_error(a, ARCHIVE_ERRNO_MISC,
281		    "Unexpedted operation in archive_read_open_filename");
282		goto fail;
283#endif
284	}
285	if (fstat(fd, &st) != 0) {
286#if defined(_WIN32) && !defined(__CYGWIN__)
287		if (mine->filename_type == FNT_WCS)
288			archive_set_error(a, errno, "Can't stat '%S'",
289			    wfilename);
290		else
291#endif
292			archive_set_error(a, errno, "Can't stat '%s'",
293			    filename);
294		goto fail;
295	}
296
297	/*
298	 * Determine whether the input looks like a disk device or a
299	 * tape device.  The results are used below to select an I/O
300	 * strategy:
301	 *  = "disk-like" devices support arbitrary lseek() and will
302	 *    support I/O requests of any size.  So we get easy skipping
303	 *    and can cheat on block sizes to get better performance.
304	 *  = "tape-like" devices require strict blocking and use
305	 *    specialized ioctls for seeking.
306	 *  = "socket-like" devices cannot seek at all but can improve
307	 *    performance by using nonblocking I/O to read "whatever is
308	 *    available right now".
309	 *
310	 * Right now, we only specially recognize disk-like devices,
311	 * but it should be straightforward to add probes and strategy
312	 * here for tape-like and socket-like devices.
313	 */
314	if (S_ISREG(st.st_mode)) {
315		/* Safety:  Tell the extractor not to overwrite the input. */
316		archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino);
317		/* Regular files act like disks. */
318		is_disk_like = 1;
319	}
320#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
321	/* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */
322	else if (S_ISCHR(st.st_mode) &&
323	    ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 &&
324	    mediasize > 0) {
325		is_disk_like = 1;
326	}
327#elif defined(__NetBSD__) || defined(__OpenBSD__)
328	/* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */
329	else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) &&
330	    ioctl(fd, DIOCGDINFO, &dl) == 0 &&
331	    dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) {
332		is_disk_like = 1;
333	}
334#elif defined(__DragonFly__)
335	/* DragonFly BSD:  if it supports DIOCGPART ioctl, it's disk-like. */
336	else if (S_ISCHR(st.st_mode) &&
337	    ioctl(fd, DIOCGPART, &pi) == 0 &&
338	    pi.media_size > 0) {
339		is_disk_like = 1;
340	}
341#elif defined(__linux__)
342	/* Linux:  All block devices are disk-like. */
343	else if (S_ISBLK(st.st_mode) &&
344	    lseek(fd, 0, SEEK_CUR) == 0 &&
345	    lseek(fd, 0, SEEK_SET) == 0 &&
346	    lseek(fd, 0, SEEK_END) > 0 &&
347	    lseek(fd, 0, SEEK_SET) == 0) {
348		is_disk_like = 1;
349	}
350#endif
351	/* TODO: Add an "is_tape_like" variable and appropriate tests. */
352
353	/* Disk-like devices prefer power-of-two block sizes.  */
354	/* Use provided block_size as a guide so users have some control. */
355	if (is_disk_like) {
356		size_t new_block_size = 64 * 1024;
357		while (new_block_size < mine->block_size
358		    && new_block_size < 64 * 1024 * 1024)
359			new_block_size *= 2;
360		mine->block_size = new_block_size;
361	}
362	buffer = malloc(mine->block_size);
363	if (buffer == NULL) {
364		archive_set_error(a, ENOMEM, "No memory");
365		goto fail;
366	}
367	mine->buffer = buffer;
368	mine->fd = fd;
369	/* Remember mode so close can decide whether to flush. */
370	mine->st_mode = st.st_mode;
371
372	/* Disk-like inputs can use lseek(). */
373	if (is_disk_like)
374		mine->use_lseek = 1;
375
376	return (ARCHIVE_OK);
377fail:
378	/*
379	 * Don't close file descriptors not opened or ones pointing referring
380	 * to `FNT_STDIN`.
381	 */
382	if (fd != -1 && fd != 0)
383		close(fd);
384	return (ARCHIVE_FATAL);
385}
386
387static ssize_t
388file_read(struct archive *a, void *client_data, const void **buff)
389{
390	struct read_file_data *mine = (struct read_file_data *)client_data;
391	ssize_t bytes_read;
392
393	/* TODO: If a recent lseek() operation has left us
394	 * mis-aligned, read and return a short block to try to get
395	 * us back in alignment. */
396
397	/* TODO: Someday, try mmap() here; if that succeeds, give
398	 * the entire file to libarchive as a single block.  That
399	 * could be a lot faster than block-by-block manual I/O. */
400
401	/* TODO: We might be able to improve performance on pipes and
402	 * sockets by setting non-blocking I/O and just accepting
403	 * whatever we get here instead of waiting for a full block
404	 * worth of data. */
405
406	*buff = mine->buffer;
407	for (;;) {
408		bytes_read = read(mine->fd, mine->buffer, mine->block_size);
409		if (bytes_read < 0) {
410			if (errno == EINTR)
411				continue;
412			else if (mine->filename_type == FNT_STDIN)
413				archive_set_error(a, errno,
414				    "Error reading stdin");
415			else if (mine->filename_type == FNT_MBS)
416				archive_set_error(a, errno,
417				    "Error reading '%s'", mine->filename.m);
418			else
419				archive_set_error(a, errno,
420				    "Error reading '%S'", mine->filename.w);
421		}
422		return (bytes_read);
423	}
424}
425
426/*
427 * Regular files and disk-like block devices can use simple lseek
428 * without needing to round the request to the block size.
429 *
430 * TODO: This can leave future reads mis-aligned.  Since we know the
431 * offset here, we should store it and use it in file_read() above
432 * to determine whether we should perform a short read to get back
433 * into alignment.  Long series of mis-aligned reads can negatively
434 * impact disk throughput.  (Of course, the performance impact should
435 * be carefully tested; extra code complexity is only worthwhile if
436 * it does provide measurable improvement.)
437 *
438 * TODO: Be lazy about the actual seek.  There are a few pathological
439 * cases where libarchive makes a bunch of seek requests in a row
440 * without any intervening reads.  This isn't a huge performance
441 * problem, since the kernel handles seeks lazily already, but
442 * it would be very slightly faster if we simply remembered the
443 * seek request here and then actually performed the seek at the
444 * top of the read callback above.
445 */
446static int64_t
447file_skip_lseek(struct archive *a, void *client_data, int64_t request)
448{
449	struct read_file_data *mine = (struct read_file_data *)client_data;
450#if defined(_WIN32) && !defined(__CYGWIN__)
451	/* We use _lseeki64() on Windows. */
452	int64_t old_offset, new_offset;
453#else
454	off_t old_offset, new_offset;
455#endif
456
457	/* We use off_t here because lseek() is declared that way. */
458
459	/* TODO: Deal with case where off_t isn't 64 bits.
460	 * This shouldn't be a problem on Linux or other POSIX
461	 * systems, since the configuration logic for libarchive
462	 * tries to obtain a 64-bit off_t.
463	 */
464	if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 &&
465	    (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0)
466		return (new_offset - old_offset);
467
468	/* If lseek() fails, don't bother trying again. */
469	mine->use_lseek = 0;
470
471	/* Let libarchive recover with read+discard */
472	if (errno == ESPIPE)
473		return (0);
474
475	/* If the input is corrupted or truncated, fail. */
476	if (mine->filename_type == FNT_STDIN)
477		archive_set_error(a, errno, "Error seeking in stdin");
478	else if (mine->filename_type == FNT_MBS)
479		archive_set_error(a, errno, "Error seeking in '%s'",
480		    mine->filename.m);
481	else
482		archive_set_error(a, errno, "Error seeking in '%S'",
483		    mine->filename.w);
484	return (-1);
485}
486
487
488/*
489 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to
490 * accelerate operation on tape drives.
491 */
492
493static int64_t
494file_skip(struct archive *a, void *client_data, int64_t request)
495{
496	struct read_file_data *mine = (struct read_file_data *)client_data;
497
498	/* Delegate skip requests. */
499	if (mine->use_lseek)
500		return (file_skip_lseek(a, client_data, request));
501
502	/* If we can't skip, return 0; libarchive will read+discard instead. */
503	return (0);
504}
505
506/*
507 * TODO: Store the offset and use it in the read callback.
508 */
509static int64_t
510file_seek(struct archive *a, void *client_data, int64_t request, int whence)
511{
512	struct read_file_data *mine = (struct read_file_data *)client_data;
513	int64_t r;
514
515	/* We use off_t here because lseek() is declared that way. */
516	/* See above for notes about when off_t is less than 64 bits. */
517	r = lseek(mine->fd, request, whence);
518	if (r >= 0)
519		return r;
520
521	/* If the input is corrupted or truncated, fail. */
522	if (mine->filename_type == FNT_STDIN)
523		archive_set_error(a, errno, "Error seeking in stdin");
524	else if (mine->filename_type == FNT_MBS)
525		archive_set_error(a, errno, "Error seeking in '%s'",
526		    mine->filename.m);
527	else
528		archive_set_error(a, errno, "Error seeking in '%S'",
529		    mine->filename.w);
530	return (ARCHIVE_FATAL);
531}
532
533static int
534file_close2(struct archive *a, void *client_data)
535{
536	struct read_file_data *mine = (struct read_file_data *)client_data;
537
538	(void)a; /* UNUSED */
539
540	/* Only flush and close if open succeeded. */
541	if (mine->fd >= 0) {
542		/*
543		 * Sometimes, we should flush the input before closing.
544		 *   Regular files: faster to just close without flush.
545		 *   Disk-like devices:  Ditto.
546		 *   Tapes: must not flush (user might need to
547		 *      read the "next" item on a non-rewind device).
548		 *   Pipes and sockets:  must flush (otherwise, the
549		 *      program feeding the pipe or socket may complain).
550		 * Here, I flush everything except for regular files and
551		 * device nodes.
552		 */
553		if (!S_ISREG(mine->st_mode)
554		    && !S_ISCHR(mine->st_mode)
555		    && !S_ISBLK(mine->st_mode)) {
556			ssize_t bytesRead;
557			do {
558				bytesRead = read(mine->fd, mine->buffer,
559				    mine->block_size);
560			} while (bytesRead > 0);
561		}
562		/* If a named file was opened, then it needs to be closed. */
563		if (mine->filename_type != FNT_STDIN)
564			close(mine->fd);
565	}
566	free(mine->buffer);
567	mine->buffer = NULL;
568	mine->fd = -1;
569	return (ARCHIVE_OK);
570}
571
572static int
573file_close(struct archive *a, void *client_data)
574{
575	struct read_file_data *mine = (struct read_file_data *)client_data;
576	file_close2(a, client_data);
577	free(mine);
578	return (ARCHIVE_OK);
579}
580
581static int
582file_switch(struct archive *a, void *client_data1, void *client_data2)
583{
584	file_close2(a, client_data1);
585	return file_open(a, client_data2);
586}
587