unzip.c revision 180124
1/*- 2 * Copyright (c) 2007-2008 Dag-Erling Co�dan Sm�rgrav 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD: head/usr.bin/unzip/unzip.c 180124 2008-06-30 17:11:27Z des $ 28 * 29 * This file would be much shorter if we didn't care about command-line 30 * compatibility with Info-ZIP's UnZip, which requires us to duplicate 31 * parts of libarchive in order to gain more detailed control of its 32 * behaviour for the purpose of implementing the -n, -o, -L and -a 33 * options. 34 */ 35 36#include <sys/queue.h> 37#include <sys/stat.h> 38 39#include <ctype.h> 40#include <errno.h> 41#include <fcntl.h> 42#include <fnmatch.h> 43#include <stdarg.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48 49#include <archive.h> 50#include <archive_entry.h> 51 52/* command-line options */ 53static int a_opt; /* convert EOL */ 54static const char *d_arg; /* directory */ 55static int j_opt; /* junk directories */ 56static int L_opt; /* lowercase names */ 57static int l_opt; /* list */ 58static int n_opt; /* never overwrite */ 59static int o_opt; /* always overwrite */ 60static int q_opt; /* quiet */ 61static int t_opt; /* test */ 62static int u_opt; /* update */ 63 64/* time when unzip started */ 65static time_t now; 66 67/* debug flag */ 68static int unzip_debug; 69 70/* running on tty? */ 71static int tty; 72 73/* error flag for -t */ 74static int test_failed; 75 76/* convenience macro */ 77/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */ 78#define ac(call) \ 79 do { \ 80 int acret = (call); \ 81 if (acret != ARCHIVE_OK) \ 82 errorx("%s", archive_error_string(a)); \ 83 } while (0) 84 85/* 86 * Indicates that last info() did not end with EOL. This helps error() et 87 * al. avoid printing an error message on the same line as an incomplete 88 * informational message. 89 */ 90static int noeol; 91 92/* fatal error message + errno */ 93static void 94error(const char *fmt, ...) 95{ 96 va_list ap; 97 98 if (noeol) 99 fprintf(stdout, "\n"); 100 fflush(stdout); 101 fprintf(stderr, "unzip: "); 102 va_start(ap, fmt); 103 vfprintf(stderr, fmt, ap); 104 va_end(ap); 105 fprintf(stderr, ": %s\n", strerror(errno)); 106 exit(1); 107} 108 109/* fatal error message, no errno */ 110static void 111errorx(const char *fmt, ...) 112{ 113 va_list ap; 114 115 if (noeol) 116 fprintf(stdout, "\n"); 117 fflush(stdout); 118 fprintf(stderr, "unzip: "); 119 va_start(ap, fmt); 120 vfprintf(stderr, fmt, ap); 121 va_end(ap); 122 fprintf(stderr, "\n"); 123 exit(1); 124} 125 126#if 0 127/* non-fatal error message + errno */ 128static void 129warning(const char *fmt, ...) 130{ 131 va_list ap; 132 133 if (noeol) 134 fprintf(stdout, "\n"); 135 fflush(stdout); 136 fprintf(stderr, "unzip: "); 137 va_start(ap, fmt); 138 vfprintf(stderr, fmt, ap); 139 va_end(ap); 140 fprintf(stderr, ": %s\n", strerror(errno)); 141} 142#endif 143 144/* non-fatal error message, no errno */ 145static void 146warningx(const char *fmt, ...) 147{ 148 va_list ap; 149 150 if (noeol) 151 fprintf(stdout, "\n"); 152 fflush(stdout); 153 fprintf(stderr, "unzip: "); 154 va_start(ap, fmt); 155 vfprintf(stderr, fmt, ap); 156 va_end(ap); 157 fprintf(stderr, "\n"); 158} 159 160/* informational message (if not -q) */ 161static void 162info(const char *fmt, ...) 163{ 164 va_list ap; 165 int i; 166 167 if (q_opt && !unzip_debug) 168 return; 169 va_start(ap, fmt); 170 vfprintf(stdout, fmt, ap); 171 va_end(ap); 172 fflush(stdout); 173 174 for (i = 0; fmt[i] != '\0'; ++i) 175 /* nothing */ ; 176 noeol = !(i && fmt[i - 1] == '\n'); 177} 178 179/* debug message (if unzip_debug) */ 180static void 181debug(const char *fmt, ...) 182{ 183 va_list ap; 184 int i; 185 186 if (!unzip_debug) 187 return; 188 va_start(ap, fmt); 189 vfprintf(stderr, fmt, ap); 190 va_end(ap); 191 fflush(stderr); 192 193 for (i = 0; fmt[i] != '\0'; ++i) 194 /* nothing */ ; 195 noeol = !(i && fmt[i - 1] == '\n'); 196} 197 198/* duplicate a path name, possibly converting to lower case */ 199static char * 200pathdup(const char *path) 201{ 202 char *str; 203 int len; 204 205 len = strlen(path); 206 while (len && path[len - 1] == '/') 207 len--; 208 if ((str = malloc(len + 1)) == NULL) { 209 errno = ENOMEM; 210 error("malloc()"); 211 } 212 for (int i = 0; i < len; ++i) 213 str[i] = L_opt ? tolower(path[i]) : path[i]; 214 str[len] = '\0'; 215 216 return (str); 217} 218 219/* concatenate two path names */ 220static char * 221pathcat(const char *prefix, const char *path) 222{ 223 char *str; 224 int prelen, len; 225 226 prelen = prefix ? strlen(prefix) + 1 : 0; 227 len = strlen(path) + 1; 228 if ((str = malloc(prelen + len)) == NULL) { 229 errno = ENOMEM; 230 error("malloc()"); 231 } 232 if (prefix) { 233 memcpy(str, prefix, prelen); /* includes zero */ 234 str[prelen - 1] = '/'; /* splat zero */ 235 } 236 memcpy(str + prelen, path, len); /* includes zero */ 237 238 return (str); 239} 240 241/* 242 * Pattern lists for include / exclude processing 243 */ 244struct pattern { 245 STAILQ_ENTRY(pattern) link; 246 char pattern[]; 247}; 248 249STAILQ_HEAD(pattern_list, pattern); 250static struct pattern_list include = STAILQ_HEAD_INITIALIZER(include); 251static struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude); 252 253/* 254 * Add an entry to a pattern list 255 */ 256static void 257add_pattern(struct pattern_list *list, const char *pattern) 258{ 259 struct pattern *entry; 260 int len; 261 262 debug("adding pattern '%s'\n", pattern); 263 len = strlen(pattern); 264 if ((entry = malloc(sizeof *entry + len + 1)) == NULL) { 265 errno = ENOMEM; 266 error("malloc()"); 267 } 268 memset(&entry->link, 0, sizeof entry->link); 269 memcpy(entry->pattern, pattern, len + 1); 270 STAILQ_INSERT_TAIL(list, entry, link); 271} 272 273/* 274 * Match a string against a list of patterns 275 */ 276static int 277match_pattern(struct pattern_list *list, const char *str) 278{ 279 struct pattern *entry; 280 281 STAILQ_FOREACH(entry, list, link) { 282 if (fnmatch(entry->pattern, str, 0) == 0) 283 return (1); 284 } 285 return (0); 286} 287 288/* 289 * Verify that a given pathname is in the include list and not in the 290 * exclude list. 291 */ 292static int 293accept_pathname(const char *pathname) 294{ 295 296 if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname)) 297 return (0); 298 if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname)) 299 return (0); 300 return (1); 301} 302 303/* 304 * Create the specified directory with the specified mode, taking certain 305 * precautions on they way. 306 */ 307static void 308make_dir(const char *path, int mode) 309{ 310 struct stat sb; 311 312 if (lstat(path, &sb) == 0) { 313 if (S_ISDIR(sb.st_mode)) 314 return; 315 /* 316 * Normally, we should either ask the user about removing 317 * the non-directory of the same name as a directory we 318 * wish to create, or respect the -n or -o command-line 319 * options. However, this may lead to a later failure or 320 * even compromise (if this non-directory happens to be a 321 * symlink to somewhere unsafe), so we don't. 322 */ 323 324 /* 325 * Don't check unlink() result; failure will cause mkdir() 326 * to fail later, which we will catch. 327 */ 328 (void)unlink(path); 329 } 330 if (mkdir(path, mode) != 0 && errno != EEXIST) 331 error("mkdir('%s')", path); 332} 333 334/* 335 * Ensure that all directories leading up to (but not including) the 336 * specified path exist. 337 * 338 * XXX inefficient + modifies the file in-place 339 */ 340static void 341make_parent(char *path) 342{ 343 struct stat sb; 344 char *sep; 345 346 sep = strrchr(path, '/'); 347 if (sep == NULL || sep == path) 348 return; 349 *sep = '\0'; 350 if (lstat(path, &sb) == 0) { 351 if (S_ISDIR(sb.st_mode)) { 352 *sep = '/'; 353 return; 354 } 355 unlink(path); 356 } 357 make_parent(path); 358 mkdir(path, 0755); 359 *sep = '/'; 360 361#if 0 362 for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) { 363 /* root in case of absolute d_arg */ 364 if (sep == path) 365 continue; 366 *sep = '\0'; 367 make_dir(path, 0755); 368 *sep = '/'; 369 } 370#endif 371} 372 373/* 374 * Extract a directory. 375 */ 376static void 377extract_dir(struct archive *a, struct archive_entry *e, const char *path) 378{ 379 int mode; 380 381 mode = archive_entry_filetype(e) & 0777; 382 if (mode == 0) 383 mode = 0755; 384 385 /* 386 * Some zipfiles contain directories with weird permissions such 387 * as 0644 or 0444. This can cause strange issues such as being 388 * unable to extract files into the directory we just created, or 389 * the user being unable to remove the directory later without 390 * first manually changing its permissions. Therefore, we whack 391 * the permissions into shape, assuming that the user wants full 392 * access and that anyone who gets read access also gets execute 393 * access. 394 */ 395 mode |= 0700; 396 if (mode & 0040) 397 mode |= 0010; 398 if (mode & 0004) 399 mode |= 0001; 400 401 info("d %s\n", path); 402 make_dir(path, mode); 403 ac(archive_read_data_skip(a)); 404} 405 406static unsigned char buffer[8192]; 407static char spinner[] = { '|', '/', '-', '\\' }; 408 409/* 410 * Extract a regular file. 411 */ 412static void 413extract_file(struct archive *a, struct archive_entry *e, const char *path) 414{ 415 int mode; 416 time_t mtime; 417 struct stat sb; 418 struct timeval tv[2]; 419 int cr, fd, text, warn; 420 ssize_t len; 421 unsigned char *p, *q, *end; 422 423 mode = archive_entry_filetype(e) & 0777; 424 if (mode == 0) 425 mode = 0644; 426 mtime = archive_entry_mtime(e); 427 428 /* look for existing file of same name */ 429 if (lstat(path, &sb) == 0) { 430 if (u_opt) { 431 /* check if up-to-date */ 432 if (S_ISREG(sb.st_mode) && sb.st_mtime > mtime) 433 return; 434 (void)unlink(path); 435 } else if (o_opt) { 436 /* overwrite */ 437 (void)unlink(path); 438 } else if (n_opt) { 439 /* do not overwrite */ 440 return; 441 } else { 442 /* XXX ask user */ 443 errorx("not implemented"); 444 } 445 } 446 447 if ((fd = open(path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) 448 error("open('%s')", path); 449 450 /* loop over file contents and write to disk */ 451 info("x %s", path); 452 text = a_opt; 453 warn = 0; 454 cr = 0; 455 for (int n = 0; ; n++) { 456 if (tty && (n % 4) == 0) 457 info(" %c\b\b", spinner[(n / 4) % sizeof spinner]); 458 459 len = archive_read_data(a, buffer, sizeof buffer); 460 461 if (len < 0) 462 ac(len); 463 464 /* left over CR from previous buffer */ 465 if (a_opt && cr) { 466 if (len == 0 || buffer[0] != '\n') 467 if (write(fd, "\r", 1) != 1) 468 error("write('%s')", path); 469 cr = 0; 470 } 471 472 /* EOF */ 473 if (len == 0) 474 break; 475 end = buffer + len; 476 477 /* 478 * Detect whether this is a text file. The correct way to 479 * do this is to check the least significant bit of the 480 * "internal file attributes" field of the corresponding 481 * file header in the central directory, but libarchive 482 * does not read the central directory, so we have to 483 * guess by looking for non-ASCII characters in the 484 * buffer. Hopefully we won't guess wrong. If we do 485 * guess wrong, we print a warning message later. 486 */ 487 if (a_opt && n == 0) { 488 for (p = buffer; p < end; ++p) { 489 if (!isascii((unsigned char)*p)) { 490 text = 0; 491 break; 492 } 493 } 494 } 495 496 /* simple case */ 497 if (!a_opt || !text) { 498 if (write(fd, buffer, len) != len) 499 error("write('%s')", path); 500 continue; 501 } 502 503 /* hard case: convert \r\n to \n (sigh...) */ 504 for (p = buffer; p < end; p = q + 1) { 505 for (q = p; q < end; q++) { 506 if (!warn && !isascii(*q)) { 507 warningx("%s may be corrupted due" 508 " to weak text file detection" 509 " heuristic", path); 510 warn = 1; 511 } 512 if (q[0] != '\r') 513 continue; 514 if (&q[1] == end) { 515 cr = 1; 516 break; 517 } 518 if (q[1] == '\n') 519 break; 520 } 521 if (write(fd, p, q - p) != q - p) 522 error("write('%s')", path); 523 } 524 } 525 if (tty) 526 info(" \b\b"); 527 if (text) 528 info(" (text)"); 529 info("\n"); 530 531 /* set access and modification time */ 532 tv[0].tv_sec = now; 533 tv[0].tv_usec = 0; 534 tv[1].tv_sec = mtime; 535 tv[1].tv_usec = 0; 536 if (futimes(fd, tv) != 0) 537 error("utimes('%s')", path); 538 if (close(fd) != 0) 539 error("close('%s')", path); 540} 541 542/* 543 * Extract a zipfile entry: first perform some sanity checks to ensure 544 * that it is either a directory or a regular file and that the path is 545 * not absolute and does not try to break out of the current directory; 546 * then call either extract_dir() or extract_file() as appropriate. 547 * 548 * This is complicated a bit by the various ways in which we need to 549 * manipulate the path name. Case conversion (if requested by the -L 550 * option) happens first, but the include / exclude patterns are applied 551 * to the full converted path name, before the directory part of the path 552 * is removed in accordance with the -j option. Sanity checks are 553 * intentionally done earlier than they need to be, so the user will get a 554 * warning about insecure paths even for files or directories which 555 * wouldn't be extracted anyway. 556 */ 557static void 558extract(struct archive *a, struct archive_entry *e) 559{ 560 char *pathname, *realpathname; 561 mode_t filetype; 562 char *p, *q; 563 564 pathname = pathdup(archive_entry_pathname(e)); 565 filetype = archive_entry_filetype(e); 566 567 /* sanity checks */ 568 if (pathname[0] == '/' || 569 strncmp(pathname, "../", 3) == 0 || 570 strstr(pathname, "/../") != NULL) { 571 warningx("skipping insecure entry '%s'", pathname); 572 ac(archive_read_data_skip(a)); 573 free(pathname); 574 return; 575 } 576 577 /* I don't think this can happen in a zipfile.. */ 578 if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { 579 warningx("skipping non-regular entry '%s'", pathname); 580 ac(archive_read_data_skip(a)); 581 free(pathname); 582 return; 583 } 584 585 /* skip directories in -j case */ 586 if (S_ISDIR(filetype) && j_opt) { 587 ac(archive_read_data_skip(a)); 588 free(pathname); 589 return; 590 } 591 592 /* apply include / exclude patterns */ 593 if (!accept_pathname(pathname)) { 594 ac(archive_read_data_skip(a)); 595 free(pathname); 596 return; 597 } 598 599 /* apply -j and -d */ 600 if (j_opt) { 601 for (p = q = pathname; *p; ++p) 602 if (*p == '/') 603 q = p + 1; 604 realpathname = pathcat(d_arg, q); 605 } else { 606 realpathname = pathcat(d_arg, pathname); 607 } 608 609 /* ensure that parent directory exists */ 610 make_parent(realpathname); 611 612 if (S_ISDIR(filetype)) 613 extract_dir(a, e, realpathname); 614 else 615 extract_file(a, e, realpathname); 616 617 free(realpathname); 618 free(pathname); 619} 620 621/* 622 * Print the name of an entry to stdout. 623 */ 624static void 625list(struct archive *a, struct archive_entry *e) 626{ 627 628 printf("%s\n", archive_entry_pathname(e)); 629 ac(archive_read_data_skip(a)); 630} 631 632/* 633 * Extract to memory to check CRC 634 */ 635static void 636test(struct archive *a, struct archive_entry *e) 637{ 638 ssize_t len; 639 640 if (S_ISDIR(archive_entry_filetype(e))) 641 return; 642 643 info("%s ", archive_entry_pathname(e)); 644 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0) 645 /* nothing */; 646 if (len < 0) { 647 info("%s\n", archive_error_string(a)); 648 ++test_failed; 649 } else { 650 info("OK\n"); 651 } 652 653 /* shouldn't be necessary, but it doesn't hurt */ 654 ac(archive_read_data_skip(a)); 655} 656 657 658/* 659 * Main loop: open the zipfile, iterate over its contents and decide what 660 * to do with each entry. 661 */ 662static void 663unzip(const char *fn) 664{ 665 struct archive *a; 666 struct archive_entry *e; 667 int fd, ret; 668 669 if ((fd = open(fn, O_RDONLY)) < 0) 670 error("%s", fn); 671 672 a = archive_read_new(); 673 ac(archive_read_support_format_zip(a)); 674 ac(archive_read_open_fd(a, fd, 8192)); 675 676 for (;;) { 677 ret = archive_read_next_header(a, &e); 678 if (ret == ARCHIVE_EOF) 679 break; 680 ac(ret); 681 if (t_opt) 682 test(a, e); 683 else if (l_opt) 684 list(a, e); 685 else 686 extract(a, e); 687 } 688 689 ac(archive_read_close(a)); 690 (void)archive_read_finish(a); 691 if (close(fd) != 0) 692 error("%s", fn); 693 694 if (t_opt && test_failed) 695 errorx("%d checksum error(s) found.", test_failed); 696} 697 698static void 699usage(void) 700{ 701 702 fprintf(stderr, "usage: unzip [-ajLlnoqtu] [-d dir] zipfile\n"); 703 exit(1); 704} 705 706static int 707getopts(int argc, char *argv[]) 708{ 709 int opt; 710 711 optreset = optind = 1; 712 while ((opt = getopt(argc, argv, "ad:jLlnoqtux:")) != -1) 713 switch (opt) { 714 case 'a': 715 a_opt = 1; 716 break; 717 case 'd': 718 d_arg = optarg; 719 break; 720 case 'j': 721 j_opt = 1; 722 break; 723 case 'L': 724 L_opt = 1; 725 break; 726 case 'l': 727 l_opt = 1; 728 break; 729 case 'n': 730 n_opt = 1; 731 break; 732 case 'o': 733 o_opt = 1; 734 break; 735 case 'q': 736 q_opt = 1; 737 break; 738 case 't': 739 t_opt = 1; 740 break; 741 case 'u': 742 u_opt = 1; 743 break; 744 case 'x': 745 add_pattern(&exclude, optarg); 746 break; 747 default: 748 usage(); 749 } 750 751 return (optind); 752} 753 754int 755main(int argc, char *argv[]) 756{ 757 const char *zipfile; 758 int nopts; 759 760 if (isatty(STDOUT_FILENO)) 761 tty = 1; 762 763 if (getenv("UNZIP_DEBUG") != NULL) 764 unzip_debug = 1; 765 for (int i = 0; i < argc; ++i) 766 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n'); 767 768 /* 769 * Info-ZIP's unzip(1) expects certain options to come before the 770 * zipfile name, and others to come after - though it does not 771 * enforce this. For simplicity, we accept *all* options both 772 * before and after the zipfile name. 773 */ 774 nopts = getopts(argc, argv); 775 776 if (argc <= nopts) 777 usage(); 778 zipfile = argv[nopts++]; 779 780 while (nopts < argc && *argv[nopts] != '-') 781 add_pattern(&include, argv[nopts++]); 782 783 nopts--; /* fake argv[0] */ 784 nopts += getopts(argc - nopts, argv + nopts); 785 786 if (n_opt + o_opt + u_opt > 1) 787 errorx("-n, -o and -u are contradictory"); 788 789 time(&now); 790 791 unzip(zipfile); 792 793 exit(0); 794} 795