unzip.c revision 180124
1139313Sjeff/*- 2139313Sjeff * Copyright (c) 2007-2008 Dag-Erling Co�dan Sm�rgrav 3187580Sjeff * All rights reserved. 4139313Sjeff * 5139313Sjeff * Redistribution and use in source and binary forms, with or without 6139313Sjeff * modification, are permitted provided that the following conditions 7139313Sjeff * are met: 8139313Sjeff * 1. Redistributions of source code must retain the above copyright 9139313Sjeff * notice, this list of conditions and the following disclaimer 10139313Sjeff * in this position and unchanged. 11139313Sjeff * 2. Redistributions in binary form must reproduce the above copyright 12139313Sjeff * notice, this list of conditions and the following disclaimer in the 13139313Sjeff * documentation and/or other materials provided with the distribution. 14139313Sjeff * 15139313Sjeff * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16139313Sjeff * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17139313Sjeff * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18139313Sjeff * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19139313Sjeff * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20139313Sjeff * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21139313Sjeff * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22139313Sjeff * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23139313Sjeff * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24139313Sjeff * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25139313Sjeff * SUCH DAMAGE. 26139313Sjeff * 27139313Sjeff * $FreeBSD: head/usr.bin/unzip/unzip.c 180124 2008-06-30 17:11:27Z des $ 28139313Sjeff * 29139313Sjeff * This file would be much shorter if we didn't care about command-line 30139313Sjeff * compatibility with Info-ZIP's UnZip, which requires us to duplicate 31187358Sjeff * parts of libarchive in order to gain more detailed control of its 32139313Sjeff * behaviour for the purpose of implementing the -n, -o, -L and -a 33139313Sjeff * options. 34139366Sarr */ 35173743Ssam 36173743Ssam#include <sys/queue.h> 37173743Ssam#include <sys/stat.h> 38173743Ssam 39173743Ssam#include <ctype.h> 40173743Ssam#include <errno.h> 41173743Ssam#include <fcntl.h> 42173743Ssam#include <fnmatch.h> 43173743Ssam#include <stdarg.h> 44173743Ssam#include <stdio.h> 45175306Skris#include <stdlib.h> 46175306Skris#include <string.h> 47168940Skris#include <unistd.h> 48168940Skris 49168940Skris#include <archive.h> 50168940Skris#include <archive_entry.h> 51168940Skris 52168940Skris/* command-line options */ 53168940Skrisstatic int a_opt; /* convert EOL */ 54168940Skrisstatic const char *d_arg; /* directory */ 55168940Skrisstatic int j_opt; /* junk directories */ 56139366Sarrstatic int L_opt; /* lowercase names */ 57187358Sjeffstatic int l_opt; /* list */ 58187358Sjeffstatic int n_opt; /* never overwrite */ 59139366Sarrstatic int o_opt; /* always overwrite */ 60139366Sarrstatic int q_opt; /* quiet */ 61187358Sjeffstatic int t_opt; /* test */ 62187358Sjeffstatic int u_opt; /* update */ 63187358Sjeff 64187358Sjeff/* time when unzip started */ 65168940Skrisstatic time_t now; 66168940Skris 67168940Skris/* debug flag */ 68139313Sjeffstatic int unzip_debug; 69187358Sjeff 70187358Sjeff/* running on tty? */ 71187358Sjeffstatic int tty; 72187358Sjeff 73187358Sjeff/* error flag for -t */ 74187358Sjeffstatic int test_failed; 75187358Sjeff 76187358Sjeff/* convenience macro */ 77187358Sjeff/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */ 78187358Sjeff#define ac(call) \ 79187358Sjeff do { \ 80187358Sjeff int acret = (call); \ 81187358Sjeff if (acret != ARCHIVE_OK) \ 82187358Sjeff errorx("%s", archive_error_string(a)); \ 83187358Sjeff } while (0) 84187358Sjeff 85187358Sjeff/* 86187358Sjeff * Indicates that last info() did not end with EOL. This helps error() et 87187358Sjeff * al. avoid printing an error message on the same line as an incomplete 88187358Sjeff * informational message. 89187379Sjeff */ 90187358Sjeffstatic int noeol; 91187358Sjeff 92187358Sjeff/* fatal error message + errno */ 93187358Sjeffstatic void 94187358Sjefferror(const char *fmt, ...) 95187358Sjeff{ 96187358Sjeff va_list ap; 97187358Sjeff 98187358Sjeff if (noeol) 99187358Sjeff fprintf(stdout, "\n"); 100187358Sjeff fflush(stdout); 101187358Sjeff fprintf(stderr, "unzip: "); 102187358Sjeff va_start(ap, fmt); 103187358Sjeff vfprintf(stderr, fmt, ap); 104187358Sjeff va_end(ap); 105187358Sjeff fprintf(stderr, ": %s\n", strerror(errno)); 106187358Sjeff exit(1); 107187358Sjeff} 108187358Sjeff 109187358Sjeff/* fatal error message, no errno */ 110187358Sjeffstatic void 111187358Sjefferrorx(const char *fmt, ...) 112187358Sjeff{ 113187358Sjeff va_list ap; 114187358Sjeff 115187358Sjeff if (noeol) 116187358Sjeff fprintf(stdout, "\n"); 117187358Sjeff fflush(stdout); 118187358Sjeff fprintf(stderr, "unzip: "); 119187358Sjeff va_start(ap, fmt); 120139313Sjeff vfprintf(stderr, fmt, ap); 121139313Sjeff va_end(ap); 122187358Sjeff fprintf(stderr, "\n"); 123187358Sjeff exit(1); 124187358Sjeff} 125187358Sjeff 126173743Ssam#if 0 127139313Sjeff/* non-fatal error message + errno */ 128187379Sjeffstatic void 129187379Sjeffwarning(const char *fmt, ...) 130187379Sjeff{ 131187379Sjeff va_list ap; 132187379Sjeff 133187379Sjeff if (noeol) 134187358Sjeff fprintf(stdout, "\n"); 135187358Sjeff fflush(stdout); 136187358Sjeff fprintf(stderr, "unzip: "); 137187358Sjeff va_start(ap, fmt); 138187358Sjeff vfprintf(stderr, fmt, ap); 139187358Sjeff va_end(ap); 140187358Sjeff fprintf(stderr, ": %s\n", strerror(errno)); 141187358Sjeff} 142187358Sjeff#endif 143187358Sjeff 144187358Sjeff/* non-fatal error message, no errno */ 145187358Sjeffstatic void 146187358Sjeffwarningx(const char *fmt, ...) 147187358Sjeff{ 148187358Sjeff va_list ap; 149187358Sjeff 150187358Sjeff if (noeol) 151139313Sjeff fprintf(stdout, "\n"); 152187580Sjeff fflush(stdout); 153187580Sjeff fprintf(stderr, "unzip: "); 154187580Sjeff va_start(ap, fmt); 155139313Sjeff vfprintf(stderr, fmt, ap); 156187580Sjeff va_end(ap); 157139313Sjeff fprintf(stderr, "\n"); 158139313Sjeff} 159187580Sjeff 160139313Sjeff/* informational message (if not -q) */ 161187580Sjeffstatic void 162187580Sjeffinfo(const char *fmt, ...) 163187580Sjeff{ 164187580Sjeff va_list ap; 165139313Sjeff int i; 166139313Sjeff 167139313Sjeff if (q_opt && !unzip_debug) 168139313Sjeff return; 169187471Sjeff va_start(ap, fmt); 170187471Sjeff vfprintf(stdout, fmt, ap); 171139313Sjeff va_end(ap); 172139313Sjeff fflush(stdout); 173187471Sjeff 174187471Sjeff for (i = 0; fmt[i] != '\0'; ++i) 175139313Sjeff /* nothing */ ; 176139313Sjeff noeol = !(i && fmt[i - 1] == '\n'); 177139313Sjeff} 178139313Sjeff 179139313Sjeff/* debug message (if unzip_debug) */ 180139313Sjeffstatic void 181139313Sjeffdebug(const char *fmt, ...) 182187471Sjeff{ 183187471Sjeff va_list ap; 184187471Sjeff int i; 185187471Sjeff 186187471Sjeff if (!unzip_debug) 187187471Sjeff return; 188187471Sjeff va_start(ap, fmt); 189187471Sjeff vfprintf(stderr, fmt, ap); 190187471Sjeff va_end(ap); 191187471Sjeff fflush(stderr); 192187471Sjeff 193187471Sjeff for (i = 0; fmt[i] != '\0'; ++i) 194187471Sjeff /* nothing */ ; 195187471Sjeff noeol = !(i && fmt[i - 1] == '\n'); 196139313Sjeff} 197139313Sjeff 198139313Sjeff/* duplicate a path name, possibly converting to lower case */ 199139313Sjeffstatic char * 200139313Sjeffpathdup(const char *path) 201139313Sjeff{ 202139313Sjeff char *str; 203139313Sjeff int len; 204139313Sjeff 205139313Sjeff len = strlen(path); 206139313Sjeff while (len && path[len - 1] == '/') 207139313Sjeff len--; 208139313Sjeff if ((str = malloc(len + 1)) == NULL) { 209139313Sjeff errno = ENOMEM; 210139313Sjeff error("malloc()"); 211139313Sjeff } 212139313Sjeff for (int i = 0; i < len; ++i) 213187358Sjeff str[i] = L_opt ? tolower(path[i]) : path[i]; 214187358Sjeff str[len] = '\0'; 215139313Sjeff 216187358Sjeff return (str); 217187358Sjeff} 218187358Sjeff 219187358Sjeff/* concatenate two path names */ 220139313Sjeffstatic char * 221139313Sjeffpathcat(const char *prefix, const char *path) 222139313Sjeff{ 223139313Sjeff char *str; 224139313Sjeff int prelen, len; 225139313Sjeff 226139313Sjeff prelen = prefix ? strlen(prefix) + 1 : 0; 227139313Sjeff len = strlen(path) + 1; 228139313Sjeff if ((str = malloc(prelen + len)) == NULL) { 229139313Sjeff errno = ENOMEM; 230139313Sjeff error("malloc()"); 231139313Sjeff } 232139313Sjeff if (prefix) { 233139313Sjeff memcpy(str, prefix, prelen); /* includes zero */ 234139313Sjeff str[prelen - 1] = '/'; /* splat zero */ 235139313Sjeff } 236139313Sjeff memcpy(str + prelen, path, len); /* includes zero */ 237187358Sjeff 238187358Sjeff return (str); 239139313Sjeff} 240139313Sjeff 241139313Sjeff/* 242139313Sjeff * Pattern lists for include / exclude processing 243139313Sjeff */ 244139313Sjeffstruct pattern { 245139313Sjeff STAILQ_ENTRY(pattern) link; 246187358Sjeff char pattern[]; 247139313Sjeff}; 248139313Sjeff 249139313SjeffSTAILQ_HEAD(pattern_list, pattern); 250139313Sjeffstatic struct pattern_list include = STAILQ_HEAD_INITIALIZER(include); 251139313Sjeffstatic struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude); 252139313Sjeff 253139313Sjeff/* 254139313Sjeff * Add an entry to a pattern list 255139313Sjeff */ 256139313Sjeffstatic void 257139313Sjeffadd_pattern(struct pattern_list *list, const char *pattern) 258139313Sjeff{ 259139313Sjeff struct pattern *entry; 260139313Sjeff int len; 261139313Sjeff 262139313Sjeff debug("adding pattern '%s'\n", pattern); 263139313Sjeff len = strlen(pattern); 264139313Sjeff if ((entry = malloc(sizeof *entry + len + 1)) == NULL) { 265139313Sjeff errno = ENOMEM; 266139313Sjeff error("malloc()"); 267139313Sjeff } 268139313Sjeff memset(&entry->link, 0, sizeof entry->link); 269139313Sjeff memcpy(entry->pattern, pattern, len + 1); 270139313Sjeff STAILQ_INSERT_TAIL(list, entry, link); 271139313Sjeff} 272139313Sjeff 273139313Sjeff/* 274187358Sjeff * Match a string against a list of patterns 275187358Sjeff */ 276139313Sjeffstatic int 277139313Sjeffmatch_pattern(struct pattern_list *list, const char *str) 278187358Sjeff{ 279187358Sjeff struct pattern *entry; 280139313Sjeff 281139313Sjeff STAILQ_FOREACH(entry, list, link) { 282139313Sjeff if (fnmatch(entry->pattern, str, 0) == 0) 283139313Sjeff return (1); 284139313Sjeff } 285139313Sjeff return (0); 286139313Sjeff} 287187358Sjeff 288187358Sjeff/* 289187358Sjeff * Verify that a given pathname is in the include list and not in the 290187358Sjeff * exclude list. 291139313Sjeff */ 292187358Sjeffstatic int 293187358Sjeffaccept_pathname(const char *pathname) 294139313Sjeff{ 295139313Sjeff 296139313Sjeff if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname)) 297139313Sjeff return (0); 298139313Sjeff if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname)) 299139313Sjeff return (0); 300139313Sjeff return (1); 301187358Sjeff} 302139313Sjeff 303139313Sjeff/* 304187358Sjeff * Create the specified directory with the specified mode, taking certain 305139313Sjeff * precautions on they way. 306139313Sjeff */ 307139313Sjeffstatic void 308139313Sjeffmake_dir(const char *path, int mode) 309139313Sjeff{ 310139313Sjeff struct stat sb; 311139313Sjeff 312139313Sjeff if (lstat(path, &sb) == 0) { 313139313Sjeff if (S_ISDIR(sb.st_mode)) 314139313Sjeff return; 315139313Sjeff /* 316187359Sjeff * Normally, we should either ask the user about removing 317187359Sjeff * the non-directory of the same name as a directory we 318187359Sjeff * wish to create, or respect the -n or -o command-line 319187359Sjeff * options. However, this may lead to a later failure or 320187359Sjeff * even compromise (if this non-directory happens to be a 321187359Sjeff * symlink to somewhere unsafe), so we don't. 322187359Sjeff */ 323187359Sjeff 324187359Sjeff /* 325187359Sjeff * Don't check unlink() result; failure will cause mkdir() 326187359Sjeff * to fail later, which we will catch. 327187359Sjeff */ 328187359Sjeff (void)unlink(path); 329187359Sjeff } 330187359Sjeff if (mkdir(path, mode) != 0 && errno != EEXIST) 331187359Sjeff error("mkdir('%s')", path); 332187359Sjeff} 333187359Sjeff 334187359Sjeff/* 335187359Sjeff * Ensure that all directories leading up to (but not including) the 336187359Sjeff * specified path exist. 337187359Sjeff * 338187359Sjeff * XXX inefficient + modifies the file in-place 339187376Sjeff */ 340187376Sjeffstatic void 341187376Sjeffmake_parent(char *path) 342187376Sjeff{ 343187376Sjeff struct stat sb; 344187359Sjeff char *sep; 345187359Sjeff 346187359Sjeff sep = strrchr(path, '/'); 347187359Sjeff if (sep == NULL || sep == path) 348187359Sjeff return; 349187359Sjeff *sep = '\0'; 350187359Sjeff if (lstat(path, &sb) == 0) { 351187359Sjeff if (S_ISDIR(sb.st_mode)) { 352187359Sjeff *sep = '/'; 353187359Sjeff return; 354187359Sjeff } 355187359Sjeff unlink(path); 356187359Sjeff } 357187359Sjeff make_parent(path); 358187359Sjeff mkdir(path, 0755); 359187359Sjeff *sep = '/'; 360187359Sjeff 361187359Sjeff#if 0 362187359Sjeff for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) { 363187359Sjeff /* root in case of absolute d_arg */ 364187359Sjeff if (sep == path) 365187359Sjeff continue; 366187359Sjeff *sep = '\0'; 367187359Sjeff make_dir(path, 0755); 368187359Sjeff *sep = '/'; 369187359Sjeff } 370187359Sjeff#endif 371187359Sjeff} 372187359Sjeff 373187359Sjeff/* 374187359Sjeff * Extract a directory. 375187359Sjeff */ 376187359Sjeffstatic void 377187359Sjeffextract_dir(struct archive *a, struct archive_entry *e, const char *path) 378187359Sjeff{ 379187359Sjeff int mode; 380187359Sjeff 381187359Sjeff mode = archive_entry_filetype(e) & 0777; 382187359Sjeff if (mode == 0) 383187359Sjeff mode = 0755; 384187359Sjeff 385187359Sjeff /* 386187359Sjeff * Some zipfiles contain directories with weird permissions such 387187359Sjeff * as 0644 or 0444. This can cause strange issues such as being 388187359Sjeff * unable to extract files into the directory we just created, or 389187359Sjeff * the user being unable to remove the directory later without 390187359Sjeff * first manually changing its permissions. Therefore, we whack 391187359Sjeff * the permissions into shape, assuming that the user wants full 392187359Sjeff * access and that anyone who gets read access also gets execute 393187359Sjeff * access. 394187359Sjeff */ 395187359Sjeff mode |= 0700; 396187359Sjeff if (mode & 0040) 397187359Sjeff mode |= 0010; 398187359Sjeff if (mode & 0004) 399187359Sjeff mode |= 0001; 400187359Sjeff 401187359Sjeff info("d %s\n", path); 402187359Sjeff make_dir(path, mode); 403187359Sjeff ac(archive_read_data_skip(a)); 404187359Sjeff} 405187359Sjeff 406187359Sjeffstatic unsigned char buffer[8192]; 407187359Sjeffstatic char spinner[] = { '|', '/', '-', '\\' }; 408187359Sjeff 409187376Sjeff/* 410187376Sjeff * Extract a regular file. 411187359Sjeff */ 412187376Sjeffstatic void 413187376Sjeffextract_file(struct archive *a, struct archive_entry *e, const char *path) 414187376Sjeff{ 415187376Sjeff int mode; 416187376Sjeff time_t mtime; 417187376Sjeff struct stat sb; 418187376Sjeff struct timeval tv[2]; 419187376Sjeff int cr, fd, text, warn; 420187376Sjeff ssize_t len; 421187376Sjeff unsigned char *p, *q, *end; 422187376Sjeff 423187376Sjeff mode = archive_entry_filetype(e) & 0777; 424187376Sjeff if (mode == 0) 425187359Sjeff mode = 0644; 426187359Sjeff mtime = archive_entry_mtime(e); 427187359Sjeff 428187359Sjeff /* look for existing file of same name */ 429187359Sjeff if (lstat(path, &sb) == 0) { 430187359Sjeff if (u_opt) { 431187359Sjeff /* check if up-to-date */ 432187359Sjeff if (S_ISREG(sb.st_mode) && sb.st_mtime > mtime) 433187359Sjeff return; 434187359Sjeff (void)unlink(path); 435187359Sjeff } else if (o_opt) { 436187359Sjeff /* overwrite */ 437187359Sjeff (void)unlink(path); 438187359Sjeff } else if (n_opt) { 439187376Sjeff /* do not overwrite */ 440187376Sjeff return; 441187376Sjeff } else { 442187376Sjeff /* XXX ask user */ 443187376Sjeff errorx("not implemented"); 444187376Sjeff } 445187376Sjeff } 446187376Sjeff 447187376Sjeff if ((fd = open(path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) 448187376Sjeff error("open('%s')", path); 449187376Sjeff 450187580Sjeff /* loop over file contents and write to disk */ 451187376Sjeff info("x %s", path); 452187376Sjeff text = a_opt; 453187376Sjeff warn = 0; 454187376Sjeff cr = 0; 455187376Sjeff for (int n = 0; ; n++) { 456187376Sjeff if (tty && (n % 4) == 0) 457187376Sjeff info(" %c\b\b", spinner[(n / 4) % sizeof spinner]); 458187376Sjeff 459187376Sjeff len = archive_read_data(a, buffer, sizeof buffer); 460187376Sjeff 461187376Sjeff if (len < 0) 462187376Sjeff ac(len); 463187376Sjeff 464187376Sjeff /* left over CR from previous buffer */ 465187376Sjeff if (a_opt && cr) { 466187376Sjeff if (len == 0 || buffer[0] != '\n') 467187376Sjeff if (write(fd, "\r", 1) != 1) 468187376Sjeff error("write('%s')", path); 469187376Sjeff cr = 0; 470187376Sjeff } 471187376Sjeff 472187376Sjeff /* EOF */ 473187580Sjeff if (len == 0) 474187580Sjeff break; 475187580Sjeff end = buffer + len; 476187580Sjeff 477187580Sjeff /* 478187580Sjeff * Detect whether this is a text file. The correct way to 479187580Sjeff * do this is to check the least significant bit of the 480187580Sjeff * "internal file attributes" field of the corresponding 481187580Sjeff * file header in the central directory, but libarchive 482187580Sjeff * does not read the central directory, so we have to 483187580Sjeff * guess by looking for non-ASCII characters in the 484187580Sjeff * buffer. Hopefully we won't guess wrong. If we do 485187580Sjeff * guess wrong, we print a warning message later. 486187580Sjeff */ 487187580Sjeff if (a_opt && n == 0) { 488187580Sjeff for (p = buffer; p < end; ++p) { 489187376Sjeff if (!isascii((unsigned char)*p)) { 490187376Sjeff text = 0; 491187376Sjeff break; 492187376Sjeff } 493187376Sjeff } 494187376Sjeff } 495187376Sjeff 496187376Sjeff /* simple case */ 497187376Sjeff if (!a_opt || !text) { 498187376Sjeff if (write(fd, buffer, len) != len) 499187376Sjeff error("write('%s')", path); 500187376Sjeff continue; 501187376Sjeff } 502187376Sjeff 503187376Sjeff /* hard case: convert \r\n to \n (sigh...) */ 504187376Sjeff for (p = buffer; p < end; p = q + 1) { 505187376Sjeff for (q = p; q < end; q++) { 506187376Sjeff if (!warn && !isascii(*q)) { 507187376Sjeff warningx("%s may be corrupted due" 508187376Sjeff " to weak text file detection" 509187376Sjeff " heuristic", path); 510187376Sjeff warn = 1; 511187376Sjeff } 512187376Sjeff if (q[0] != '\r') 513187376Sjeff continue; 514187376Sjeff if (&q[1] == end) { 515187376Sjeff cr = 1; 516187376Sjeff break; 517139313Sjeff } 518139313Sjeff if (q[1] == '\n') 519139313Sjeff break; 520139313Sjeff } 521139313Sjeff if (write(fd, p, q - p) != q - p) 522139313Sjeff error("write('%s')", path); 523187358Sjeff } 524187358Sjeff } 525139313Sjeff if (tty) 526187358Sjeff info(" \b\b"); 527139313Sjeff if (text) 528139313Sjeff info(" (text)"); 529139313Sjeff info("\n"); 530139313Sjeff 531139313Sjeff /* set access and modification time */ 532139313Sjeff tv[0].tv_sec = now; 533139313Sjeff tv[0].tv_usec = 0; 534139313Sjeff tv[1].tv_sec = mtime; 535139313Sjeff tv[1].tv_usec = 0; 536139313Sjeff if (futimes(fd, tv) != 0) 537139313Sjeff error("utimes('%s')", path); 538139313Sjeff if (close(fd) != 0) 539139313Sjeff error("close('%s')", path); 540139313Sjeff} 541139313Sjeff 542139313Sjeff/* 543139313Sjeff * Extract a zipfile entry: first perform some sanity checks to ensure 544139313Sjeff * that it is either a directory or a regular file and that the path is 545139313Sjeff * not absolute and does not try to break out of the current directory; 546139313Sjeff * then call either extract_dir() or extract_file() as appropriate. 547139313Sjeff * 548187358Sjeff * This is complicated a bit by the various ways in which we need to 549139313Sjeff * manipulate the path name. Case conversion (if requested by the -L 550187358Sjeff * option) happens first, but the include / exclude patterns are applied 551187358Sjeff * to the full converted path name, before the directory part of the path 552187358Sjeff * is removed in accordance with the -j option. Sanity checks are 553187358Sjeff * intentionally done earlier than they need to be, so the user will get a 554139313Sjeff * warning about insecure paths even for files or directories which 555139313Sjeff * wouldn't be extracted anyway. 556139313Sjeff */ 557139313Sjeffstatic void 558139313Sjeffextract(struct archive *a, struct archive_entry *e) 559139313Sjeff{ 560139313Sjeff char *pathname, *realpathname; 561139313Sjeff mode_t filetype; 562139313Sjeff char *p, *q; 563139313Sjeff 564139313Sjeff pathname = pathdup(archive_entry_pathname(e)); 565139313Sjeff filetype = archive_entry_filetype(e); 566139313Sjeff 567139313Sjeff /* sanity checks */ 568139313Sjeff if (pathname[0] == '/' || 569139313Sjeff strncmp(pathname, "../", 3) == 0 || 570139313Sjeff strstr(pathname, "/../") != NULL) { 571139313Sjeff warningx("skipping insecure entry '%s'", pathname); 572139313Sjeff ac(archive_read_data_skip(a)); 573139313Sjeff free(pathname); 574139313Sjeff return; 575139313Sjeff } 576139313Sjeff 577139313Sjeff /* I don't think this can happen in a zipfile.. */ 578139313Sjeff if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { 579139313Sjeff warningx("skipping non-regular entry '%s'", pathname); 580139313Sjeff ac(archive_read_data_skip(a)); 581139313Sjeff free(pathname); 582139313Sjeff return; 583139313Sjeff } 584139313Sjeff 585139313Sjeff /* skip directories in -j case */ 586139313Sjeff if (S_ISDIR(filetype) && j_opt) { 587139313Sjeff ac(archive_read_data_skip(a)); 588139313Sjeff free(pathname); 589139313Sjeff return; 590139313Sjeff } 591139313Sjeff 592187358Sjeff /* apply include / exclude patterns */ 593139313Sjeff if (!accept_pathname(pathname)) { 594139313Sjeff ac(archive_read_data_skip(a)); 595139313Sjeff free(pathname); 596139313Sjeff return; 597139313Sjeff } 598139313Sjeff 599139313Sjeff /* apply -j and -d */ 600139313Sjeff if (j_opt) { 601139313Sjeff for (p = q = pathname; *p; ++p) 602187358Sjeff if (*p == '/') 603139313Sjeff q = p + 1; 604139313Sjeff realpathname = pathcat(d_arg, q); 605139313Sjeff } else { 606139313Sjeff realpathname = pathcat(d_arg, pathname); 607139313Sjeff } 608139313Sjeff 609139313Sjeff /* ensure that parent directory exists */ 610139313Sjeff make_parent(realpathname); 611139313Sjeff 612139313Sjeff if (S_ISDIR(filetype)) 613139313Sjeff extract_dir(a, e, realpathname); 614187358Sjeff else 615139313Sjeff extract_file(a, e, realpathname); 616187358Sjeff 617139313Sjeff free(realpathname); 618139313Sjeff free(pathname); 619187358Sjeff} 620139313Sjeff 621139313Sjeff/* 622139313Sjeff * Print the name of an entry to stdout. 623187376Sjeff */ 624173743Ssamstatic void 625139313Sjefflist(struct archive *a, struct archive_entry *e) 626139313Sjeff{ 627139313Sjeff 628139313Sjeff printf("%s\n", archive_entry_pathname(e)); 629139313Sjeff ac(archive_read_data_skip(a)); 630187358Sjeff} 631187358Sjeff 632187358Sjeff/* 633187358Sjeff * Extract to memory to check CRC 634187358Sjeff */ 635187358Sjeffstatic void 636139313Sjefftest(struct archive *a, struct archive_entry *e) 637139313Sjeff{ 638187358Sjeff ssize_t len; 639187358Sjeff 640187358Sjeff if (S_ISDIR(archive_entry_filetype(e))) 641187358Sjeff return; 642187358Sjeff 643187358Sjeff info("%s ", archive_entry_pathname(e)); 644187358Sjeff while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0) 645139313Sjeff /* nothing */; 646187358Sjeff if (len < 0) { 647139313Sjeff info("%s\n", archive_error_string(a)); 648139313Sjeff ++test_failed; 649139313Sjeff } else { 650187358Sjeff info("OK\n"); 651139313Sjeff } 652139313Sjeff 653139313Sjeff /* shouldn't be necessary, but it doesn't hurt */ 654187358Sjeff ac(archive_read_data_skip(a)); 655139313Sjeff} 656139313Sjeff 657187358Sjeff 658187358Sjeff/* 659187358Sjeff * Main loop: open the zipfile, iterate over its contents and decide what 660187358Sjeff * to do with each entry. 661187358Sjeff */ 662187358Sjeffstatic void 663187358Sjeffunzip(const char *fn) 664187358Sjeff{ 665187358Sjeff struct archive *a; 666187358Sjeff struct archive_entry *e; 667139313Sjeff int fd, ret; 668139313Sjeff 669139313Sjeff if ((fd = open(fn, O_RDONLY)) < 0) 670187358Sjeff error("%s", fn); 671187358Sjeff 672187358Sjeff a = archive_read_new(); 673187358Sjeff ac(archive_read_support_format_zip(a)); 674187358Sjeff ac(archive_read_open_fd(a, fd, 8192)); 675187358Sjeff 676139313Sjeff for (;;) { 677139313Sjeff ret = archive_read_next_header(a, &e); 678139313Sjeff if (ret == ARCHIVE_EOF) 679139313Sjeff break; 680139313Sjeff ac(ret); 681139313Sjeff if (t_opt) 682139313Sjeff test(a, e); 683139313Sjeff else if (l_opt) 684139313Sjeff list(a, e); 685139313Sjeff else 686139313Sjeff extract(a, e); 687139313Sjeff } 688139313Sjeff 689139313Sjeff ac(archive_read_close(a)); 690139313Sjeff (void)archive_read_finish(a); 691187358Sjeff if (close(fd) != 0) 692187358Sjeff error("%s", fn); 693187358Sjeff 694187358Sjeff if (t_opt && test_failed) 695187358Sjeff errorx("%d checksum error(s) found.", test_failed); 696187358Sjeff} 697139313Sjeff 698139313Sjeffstatic void 699187358Sjeffusage(void) 700187358Sjeff{ 701187358Sjeff 702139313Sjeff fprintf(stderr, "usage: unzip [-ajLlnoqtu] [-d dir] zipfile\n"); 703139313Sjeff exit(1); 704187358Sjeff} 705187379Sjeff 706187379Sjeffstatic int 707187376Sjeffgetopts(int argc, char *argv[]) 708187379Sjeff{ 709187358Sjeff int opt; 710139313Sjeff 711187358Sjeff optreset = optind = 1; 712139313Sjeff while ((opt = getopt(argc, argv, "ad:jLlnoqtux:")) != -1) 713139313Sjeff switch (opt) { 714187358Sjeff case 'a': 715187358Sjeff a_opt = 1; 716187358Sjeff break; 717139313Sjeff case 'd': 718139313Sjeff d_arg = optarg; 719187358Sjeff break; 720187358Sjeff case 'j': 721139313Sjeff j_opt = 1; 722187376Sjeff break; 723187358Sjeff case 'L': 724187358Sjeff L_opt = 1; 725187358Sjeff break; 726187358Sjeff case 'l': 727166209Sjeff l_opt = 1; 728166209Sjeff break; 729166209Sjeff case 'n': 730187358Sjeff n_opt = 1; 731139313Sjeff break; 732187358Sjeff case 'o': 733187379Sjeff o_opt = 1; 734187358Sjeff break; 735139313Sjeff case 'q': 736139313Sjeff q_opt = 1; 737139313Sjeff break; 738187358Sjeff case 't': 739187358Sjeff t_opt = 1; 740187358Sjeff break; 741187358Sjeff case 'u': 742187358Sjeff u_opt = 1; 743187358Sjeff break; 744139313Sjeff case 'x': 745139313Sjeff add_pattern(&exclude, optarg); 746187358Sjeff break; 747187358Sjeff default: 748187358Sjeff usage(); 749187358Sjeff } 750187376Sjeff 751187471Sjeff return (optind); 752187471Sjeff} 753187471Sjeff 754187471Sjeffint 755187471Sjeffmain(int argc, char *argv[]) 756187358Sjeff{ 757187358Sjeff const char *zipfile; 758187358Sjeff int nopts; 759139313Sjeff 760139313Sjeff if (isatty(STDOUT_FILENO)) 761187358Sjeff tty = 1; 762187379Sjeff 763187358Sjeff if (getenv("UNZIP_DEBUG") != NULL) 764139313Sjeff unzip_debug = 1; 765139313Sjeff for (int i = 0; i < argc; ++i) 766187358Sjeff debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n'); 767187358Sjeff 768187358Sjeff /* 769187358Sjeff * Info-ZIP's unzip(1) expects certain options to come before the 770187358Sjeff * zipfile name, and others to come after - though it does not 771187358Sjeff * enforce this. For simplicity, we accept *all* options both 772187358Sjeff * before and after the zipfile name. 773187358Sjeff */ 774139313Sjeff nopts = getopts(argc, argv); 775139313Sjeff 776139313Sjeff if (argc <= nopts) 777139313Sjeff usage(); 778187358Sjeff zipfile = argv[nopts++]; 779187358Sjeff 780187358Sjeff while (nopts < argc && *argv[nopts] != '-') 781139313Sjeff add_pattern(&include, argv[nopts++]); 782139313Sjeff 783187376Sjeff nopts--; /* fake argv[0] */ 784187376Sjeff nopts += getopts(argc - nopts, argv + nopts); 785187376Sjeff 786187376Sjeff if (n_opt + o_opt + u_opt > 1) 787139313Sjeff errorx("-n, -o and -u are contradictory"); 788187358Sjeff 789187358Sjeff time(&now); 790139313Sjeff 791187358Sjeff unzip(zipfile); 792187155Sjhb 793187358Sjeff exit(0); 794187358Sjeff} 795187359Sjeff