archive_read_open_filename.c revision 358926
1/*- 2 * Copyright (c) 2003-2010 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/archive_read_open_filename.c 358926 2020-03-13 01:05:55Z mm $"); 28 29#ifdef HAVE_SYS_IOCTL_H 30#include <sys/ioctl.h> 31#endif 32#ifdef HAVE_SYS_STAT_H 33#include <sys/stat.h> 34#endif 35#ifdef HAVE_ERRNO_H 36#include <errno.h> 37#endif 38#ifdef HAVE_FCNTL_H 39#include <fcntl.h> 40#endif 41#ifdef HAVE_IO_H 42#include <io.h> 43#endif 44#ifdef HAVE_STDLIB_H 45#include <stdlib.h> 46#endif 47#ifdef HAVE_STRING_H 48#include <string.h> 49#endif 50#ifdef HAVE_UNISTD_H 51#include <unistd.h> 52#endif 53#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 54#include <sys/disk.h> 55#elif defined(__NetBSD__) || defined(__OpenBSD__) 56#include <sys/disklabel.h> 57#include <sys/dkio.h> 58#elif defined(__DragonFly__) 59#include <sys/diskslice.h> 60#endif 61 62#include "archive.h" 63#include "archive_private.h" 64#include "archive_string.h" 65 66#ifndef O_BINARY 67#define O_BINARY 0 68#endif 69#ifndef O_CLOEXEC 70#define O_CLOEXEC 0 71#endif 72 73struct read_file_data { 74 int fd; 75 size_t block_size; 76 void *buffer; 77 mode_t st_mode; /* Mode bits for opened file. */ 78 char use_lseek; 79 enum fnt_e { FNT_STDIN, FNT_MBS, FNT_WCS } filename_type; 80 union { 81 char m[1];/* MBS filename. */ 82 wchar_t w[1];/* WCS filename. */ 83 } filename; /* Must be last! */ 84}; 85 86static int file_open(struct archive *, void *); 87static int file_close(struct archive *, void *); 88static int file_close2(struct archive *, void *); 89static int file_switch(struct archive *, void *, void *); 90static ssize_t file_read(struct archive *, void *, const void **buff); 91static int64_t file_seek(struct archive *, void *, int64_t request, int); 92static int64_t file_skip(struct archive *, void *, int64_t request); 93static int64_t file_skip_lseek(struct archive *, void *, int64_t request); 94 95int 96archive_read_open_file(struct archive *a, const char *filename, 97 size_t block_size) 98{ 99 return (archive_read_open_filename(a, filename, block_size)); 100} 101 102int 103archive_read_open_filename(struct archive *a, const char *filename, 104 size_t block_size) 105{ 106 const char *filenames[2]; 107 filenames[0] = filename; 108 filenames[1] = NULL; 109 return archive_read_open_filenames(a, filenames, block_size); 110} 111 112int 113archive_read_open_filenames(struct archive *a, const char **filenames, 114 size_t block_size) 115{ 116 struct read_file_data *mine; 117 const char *filename = NULL; 118 if (filenames) 119 filename = *(filenames++); 120 121 archive_clear_error(a); 122 do 123 { 124 if (filename == NULL) 125 filename = ""; 126 mine = (struct read_file_data *)calloc(1, 127 sizeof(*mine) + strlen(filename)); 128 if (mine == NULL) 129 goto no_memory; 130 strcpy(mine->filename.m, filename); 131 mine->block_size = block_size; 132 mine->fd = -1; 133 mine->buffer = NULL; 134 mine->st_mode = mine->use_lseek = 0; 135 if (filename == NULL || filename[0] == '\0') { 136 mine->filename_type = FNT_STDIN; 137 } else 138 mine->filename_type = FNT_MBS; 139 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 140 return (ARCHIVE_FATAL); 141 if (filenames == NULL) 142 break; 143 filename = *(filenames++); 144 } while (filename != NULL && filename[0] != '\0'); 145 archive_read_set_open_callback(a, file_open); 146 archive_read_set_read_callback(a, file_read); 147 archive_read_set_skip_callback(a, file_skip); 148 archive_read_set_close_callback(a, file_close); 149 archive_read_set_switch_callback(a, file_switch); 150 archive_read_set_seek_callback(a, file_seek); 151 152 return (archive_read_open1(a)); 153no_memory: 154 archive_set_error(a, ENOMEM, "No memory"); 155 return (ARCHIVE_FATAL); 156} 157 158int 159archive_read_open_filename_w(struct archive *a, const wchar_t *wfilename, 160 size_t block_size) 161{ 162 struct read_file_data *mine = (struct read_file_data *)calloc(1, 163 sizeof(*mine) + wcslen(wfilename) * sizeof(wchar_t)); 164 if (!mine) 165 { 166 archive_set_error(a, ENOMEM, "No memory"); 167 return (ARCHIVE_FATAL); 168 } 169 mine->fd = -1; 170 mine->block_size = block_size; 171 172 if (wfilename == NULL || wfilename[0] == L'\0') { 173 mine->filename_type = FNT_STDIN; 174 } else { 175#if defined(_WIN32) && !defined(__CYGWIN__) 176 mine->filename_type = FNT_WCS; 177 wcscpy(mine->filename.w, wfilename); 178#else 179 /* 180 * POSIX system does not support a wchar_t interface for 181 * open() system call, so we have to translate a wchar_t 182 * filename to multi-byte one and use it. 183 */ 184 struct archive_string fn; 185 186 archive_string_init(&fn); 187 if (archive_string_append_from_wcs(&fn, wfilename, 188 wcslen(wfilename)) != 0) { 189 if (errno == ENOMEM) 190 archive_set_error(a, errno, 191 "Can't allocate memory"); 192 else 193 archive_set_error(a, EINVAL, 194 "Failed to convert a wide-character" 195 " filename to a multi-byte filename"); 196 archive_string_free(&fn); 197 free(mine); 198 return (ARCHIVE_FATAL); 199 } 200 mine->filename_type = FNT_MBS; 201 strcpy(mine->filename.m, fn.s); 202 archive_string_free(&fn); 203#endif 204 } 205 if (archive_read_append_callback_data(a, mine) != (ARCHIVE_OK)) 206 return (ARCHIVE_FATAL); 207 archive_read_set_open_callback(a, file_open); 208 archive_read_set_read_callback(a, file_read); 209 archive_read_set_skip_callback(a, file_skip); 210 archive_read_set_close_callback(a, file_close); 211 archive_read_set_switch_callback(a, file_switch); 212 archive_read_set_seek_callback(a, file_seek); 213 214 return (archive_read_open1(a)); 215} 216 217static int 218file_open(struct archive *a, void *client_data) 219{ 220 struct stat st; 221 struct read_file_data *mine = (struct read_file_data *)client_data; 222 void *buffer; 223 const char *filename = NULL; 224#if defined(_WIN32) && !defined(__CYGWIN__) 225 const wchar_t *wfilename = NULL; 226#endif 227 int fd = -1; 228 int is_disk_like = 0; 229#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 230 off_t mediasize = 0; /* FreeBSD-specific, so off_t okay here. */ 231#elif defined(__NetBSD__) || defined(__OpenBSD__) 232 struct disklabel dl; 233#elif defined(__DragonFly__) 234 struct partinfo pi; 235#endif 236 237 archive_clear_error(a); 238 if (mine->filename_type == FNT_STDIN) { 239 /* We used to delegate stdin support by 240 * directly calling archive_read_open_fd(a,0,block_size) 241 * here, but that doesn't (and shouldn't) handle the 242 * end-of-file flush when reading stdout from a pipe. 243 * Basically, read_open_fd() is intended for folks who 244 * are willing to handle such details themselves. This 245 * API is intended to be a little smarter for folks who 246 * want easy handling of the common case. 247 */ 248 fd = 0; 249#if defined(__CYGWIN__) || defined(_WIN32) 250 setmode(0, O_BINARY); 251#endif 252 filename = ""; 253 } else if (mine->filename_type == FNT_MBS) { 254 filename = mine->filename.m; 255 fd = open(filename, O_RDONLY | O_BINARY | O_CLOEXEC); 256 __archive_ensure_cloexec_flag(fd); 257 if (fd < 0) { 258 archive_set_error(a, errno, 259 "Failed to open '%s'", filename); 260 return (ARCHIVE_FATAL); 261 } 262 } else { 263#if defined(_WIN32) && !defined(__CYGWIN__) 264 wfilename = mine->filename.w; 265 fd = _wopen(wfilename, O_RDONLY | O_BINARY); 266 if (fd < 0 && errno == ENOENT) { 267 wchar_t *fullpath; 268 fullpath = __la_win_permissive_name_w(wfilename); 269 if (fullpath != NULL) { 270 fd = _wopen(fullpath, O_RDONLY | O_BINARY); 271 free(fullpath); 272 } 273 } 274 if (fd < 0) { 275 archive_set_error(a, errno, 276 "Failed to open '%S'", wfilename); 277 return (ARCHIVE_FATAL); 278 } 279#else 280 archive_set_error(a, ARCHIVE_ERRNO_MISC, 281 "Unexpedted operation in archive_read_open_filename"); 282 goto fail; 283#endif 284 } 285 if (fstat(fd, &st) != 0) { 286#if defined(_WIN32) && !defined(__CYGWIN__) 287 if (mine->filename_type == FNT_WCS) 288 archive_set_error(a, errno, "Can't stat '%S'", 289 wfilename); 290 else 291#endif 292 archive_set_error(a, errno, "Can't stat '%s'", 293 filename); 294 goto fail; 295 } 296 297 /* 298 * Determine whether the input looks like a disk device or a 299 * tape device. The results are used below to select an I/O 300 * strategy: 301 * = "disk-like" devices support arbitrary lseek() and will 302 * support I/O requests of any size. So we get easy skipping 303 * and can cheat on block sizes to get better performance. 304 * = "tape-like" devices require strict blocking and use 305 * specialized ioctls for seeking. 306 * = "socket-like" devices cannot seek at all but can improve 307 * performance by using nonblocking I/O to read "whatever is 308 * available right now". 309 * 310 * Right now, we only specially recognize disk-like devices, 311 * but it should be straightforward to add probes and strategy 312 * here for tape-like and socket-like devices. 313 */ 314 if (S_ISREG(st.st_mode)) { 315 /* Safety: Tell the extractor not to overwrite the input. */ 316 archive_read_extract_set_skip_file(a, st.st_dev, st.st_ino); 317 /* Regular files act like disks. */ 318 is_disk_like = 1; 319 } 320#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) 321 /* FreeBSD: if it supports DIOCGMEDIASIZE ioctl, it's disk-like. */ 322 else if (S_ISCHR(st.st_mode) && 323 ioctl(fd, DIOCGMEDIASIZE, &mediasize) == 0 && 324 mediasize > 0) { 325 is_disk_like = 1; 326 } 327#elif defined(__NetBSD__) || defined(__OpenBSD__) 328 /* Net/OpenBSD: if it supports DIOCGDINFO ioctl, it's disk-like. */ 329 else if ((S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) && 330 ioctl(fd, DIOCGDINFO, &dl) == 0 && 331 dl.d_partitions[DISKPART(st.st_rdev)].p_size > 0) { 332 is_disk_like = 1; 333 } 334#elif defined(__DragonFly__) 335 /* DragonFly BSD: if it supports DIOCGPART ioctl, it's disk-like. */ 336 else if (S_ISCHR(st.st_mode) && 337 ioctl(fd, DIOCGPART, &pi) == 0 && 338 pi.media_size > 0) { 339 is_disk_like = 1; 340 } 341#elif defined(__linux__) 342 /* Linux: All block devices are disk-like. */ 343 else if (S_ISBLK(st.st_mode) && 344 lseek(fd, 0, SEEK_CUR) == 0 && 345 lseek(fd, 0, SEEK_SET) == 0 && 346 lseek(fd, 0, SEEK_END) > 0 && 347 lseek(fd, 0, SEEK_SET) == 0) { 348 is_disk_like = 1; 349 } 350#endif 351 /* TODO: Add an "is_tape_like" variable and appropriate tests. */ 352 353 /* Disk-like devices prefer power-of-two block sizes. */ 354 /* Use provided block_size as a guide so users have some control. */ 355 if (is_disk_like) { 356 size_t new_block_size = 64 * 1024; 357 while (new_block_size < mine->block_size 358 && new_block_size < 64 * 1024 * 1024) 359 new_block_size *= 2; 360 mine->block_size = new_block_size; 361 } 362 buffer = malloc(mine->block_size); 363 if (buffer == NULL) { 364 archive_set_error(a, ENOMEM, "No memory"); 365 goto fail; 366 } 367 mine->buffer = buffer; 368 mine->fd = fd; 369 /* Remember mode so close can decide whether to flush. */ 370 mine->st_mode = st.st_mode; 371 372 /* Disk-like inputs can use lseek(). */ 373 if (is_disk_like) 374 mine->use_lseek = 1; 375 376 return (ARCHIVE_OK); 377fail: 378 /* 379 * Don't close file descriptors not opened or ones pointing referring 380 * to `FNT_STDIN`. 381 */ 382 if (fd != -1 && fd != 0) 383 close(fd); 384 return (ARCHIVE_FATAL); 385} 386 387static ssize_t 388file_read(struct archive *a, void *client_data, const void **buff) 389{ 390 struct read_file_data *mine = (struct read_file_data *)client_data; 391 ssize_t bytes_read; 392 393 /* TODO: If a recent lseek() operation has left us 394 * mis-aligned, read and return a short block to try to get 395 * us back in alignment. */ 396 397 /* TODO: Someday, try mmap() here; if that succeeds, give 398 * the entire file to libarchive as a single block. That 399 * could be a lot faster than block-by-block manual I/O. */ 400 401 /* TODO: We might be able to improve performance on pipes and 402 * sockets by setting non-blocking I/O and just accepting 403 * whatever we get here instead of waiting for a full block 404 * worth of data. */ 405 406 *buff = mine->buffer; 407 for (;;) { 408 bytes_read = read(mine->fd, mine->buffer, mine->block_size); 409 if (bytes_read < 0) { 410 if (errno == EINTR) 411 continue; 412 else if (mine->filename_type == FNT_STDIN) 413 archive_set_error(a, errno, 414 "Error reading stdin"); 415 else if (mine->filename_type == FNT_MBS) 416 archive_set_error(a, errno, 417 "Error reading '%s'", mine->filename.m); 418 else 419 archive_set_error(a, errno, 420 "Error reading '%S'", mine->filename.w); 421 } 422 return (bytes_read); 423 } 424} 425 426/* 427 * Regular files and disk-like block devices can use simple lseek 428 * without needing to round the request to the block size. 429 * 430 * TODO: This can leave future reads mis-aligned. Since we know the 431 * offset here, we should store it and use it in file_read() above 432 * to determine whether we should perform a short read to get back 433 * into alignment. Long series of mis-aligned reads can negatively 434 * impact disk throughput. (Of course, the performance impact should 435 * be carefully tested; extra code complexity is only worthwhile if 436 * it does provide measurable improvement.) 437 * 438 * TODO: Be lazy about the actual seek. There are a few pathological 439 * cases where libarchive makes a bunch of seek requests in a row 440 * without any intervening reads. This isn't a huge performance 441 * problem, since the kernel handles seeks lazily already, but 442 * it would be very slightly faster if we simply remembered the 443 * seek request here and then actually performed the seek at the 444 * top of the read callback above. 445 */ 446static int64_t 447file_skip_lseek(struct archive *a, void *client_data, int64_t request) 448{ 449 struct read_file_data *mine = (struct read_file_data *)client_data; 450#if defined(_WIN32) && !defined(__CYGWIN__) 451 /* We use _lseeki64() on Windows. */ 452 int64_t old_offset, new_offset; 453#else 454 off_t old_offset, new_offset; 455#endif 456 457 /* We use off_t here because lseek() is declared that way. */ 458 459 /* TODO: Deal with case where off_t isn't 64 bits. 460 * This shouldn't be a problem on Linux or other POSIX 461 * systems, since the configuration logic for libarchive 462 * tries to obtain a 64-bit off_t. 463 */ 464 if ((old_offset = lseek(mine->fd, 0, SEEK_CUR)) >= 0 && 465 (new_offset = lseek(mine->fd, request, SEEK_CUR)) >= 0) 466 return (new_offset - old_offset); 467 468 /* If lseek() fails, don't bother trying again. */ 469 mine->use_lseek = 0; 470 471 /* Let libarchive recover with read+discard */ 472 if (errno == ESPIPE) 473 return (0); 474 475 /* If the input is corrupted or truncated, fail. */ 476 if (mine->filename_type == FNT_STDIN) 477 archive_set_error(a, errno, "Error seeking in stdin"); 478 else if (mine->filename_type == FNT_MBS) 479 archive_set_error(a, errno, "Error seeking in '%s'", 480 mine->filename.m); 481 else 482 archive_set_error(a, errno, "Error seeking in '%S'", 483 mine->filename.w); 484 return (-1); 485} 486 487 488/* 489 * TODO: Implement another file_skip_XXXX that uses MTIO ioctls to 490 * accelerate operation on tape drives. 491 */ 492 493static int64_t 494file_skip(struct archive *a, void *client_data, int64_t request) 495{ 496 struct read_file_data *mine = (struct read_file_data *)client_data; 497 498 /* Delegate skip requests. */ 499 if (mine->use_lseek) 500 return (file_skip_lseek(a, client_data, request)); 501 502 /* If we can't skip, return 0; libarchive will read+discard instead. */ 503 return (0); 504} 505 506/* 507 * TODO: Store the offset and use it in the read callback. 508 */ 509static int64_t 510file_seek(struct archive *a, void *client_data, int64_t request, int whence) 511{ 512 struct read_file_data *mine = (struct read_file_data *)client_data; 513 int64_t r; 514 515 /* We use off_t here because lseek() is declared that way. */ 516 /* See above for notes about when off_t is less than 64 bits. */ 517 r = lseek(mine->fd, request, whence); 518 if (r >= 0) 519 return r; 520 521 /* If the input is corrupted or truncated, fail. */ 522 if (mine->filename_type == FNT_STDIN) 523 archive_set_error(a, errno, "Error seeking in stdin"); 524 else if (mine->filename_type == FNT_MBS) 525 archive_set_error(a, errno, "Error seeking in '%s'", 526 mine->filename.m); 527 else 528 archive_set_error(a, errno, "Error seeking in '%S'", 529 mine->filename.w); 530 return (ARCHIVE_FATAL); 531} 532 533static int 534file_close2(struct archive *a, void *client_data) 535{ 536 struct read_file_data *mine = (struct read_file_data *)client_data; 537 538 (void)a; /* UNUSED */ 539 540 /* Only flush and close if open succeeded. */ 541 if (mine->fd >= 0) { 542 /* 543 * Sometimes, we should flush the input before closing. 544 * Regular files: faster to just close without flush. 545 * Disk-like devices: Ditto. 546 * Tapes: must not flush (user might need to 547 * read the "next" item on a non-rewind device). 548 * Pipes and sockets: must flush (otherwise, the 549 * program feeding the pipe or socket may complain). 550 * Here, I flush everything except for regular files and 551 * device nodes. 552 */ 553 if (!S_ISREG(mine->st_mode) 554 && !S_ISCHR(mine->st_mode) 555 && !S_ISBLK(mine->st_mode)) { 556 ssize_t bytesRead; 557 do { 558 bytesRead = read(mine->fd, mine->buffer, 559 mine->block_size); 560 } while (bytesRead > 0); 561 } 562 /* If a named file was opened, then it needs to be closed. */ 563 if (mine->filename_type != FNT_STDIN) 564 close(mine->fd); 565 } 566 free(mine->buffer); 567 mine->buffer = NULL; 568 mine->fd = -1; 569 return (ARCHIVE_OK); 570} 571 572static int 573file_close(struct archive *a, void *client_data) 574{ 575 struct read_file_data *mine = (struct read_file_data *)client_data; 576 file_close2(a, client_data); 577 free(mine); 578 return (ARCHIVE_OK); 579} 580 581static int 582file_switch(struct archive *a, void *client_data1, void *client_data2) 583{ 584 file_close2(a, client_data1); 585 return file_open(a, client_data2); 586} 587