unzip.c revision 225736
1223637Sbz/*- 2126353Smlaier * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org> 3126353Smlaier * Copyright (c) 2007-2008 Dag-Erling Co�dan Sm�rgrav 4126353Smlaier * All rights reserved. 5126353Smlaier * 6126353Smlaier * Redistribution and use in source and binary forms, with or without 7126353Smlaier * modification, are permitted provided that the following conditions 8126353Smlaier * are met: 9126353Smlaier * 1. Redistributions of source code must retain the above copyright 10126353Smlaier * notice, this list of conditions and the following disclaimer 11126353Smlaier * in this position and unchanged. 12126353Smlaier * 2. Redistributions in binary form must reproduce the above copyright 13126353Smlaier * notice, this list of conditions and the following disclaimer in the 14126353Smlaier * documentation and/or other materials provided with the distribution. 15126353Smlaier * 16126353Smlaier * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17126353Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18126353Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19126353Smlaier * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20126353Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21126353Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22126353Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23126353Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24126353Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25126353Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26126353Smlaier * SUCH DAMAGE. 27126353Smlaier * 28126353Smlaier * $FreeBSD: stable/9/usr.bin/unzip/unzip.c 214137 2010-10-21 17:05:15Z glebius $ 29126353Smlaier * 30126353Smlaier * This file would be much shorter if we didn't care about command-line 31126353Smlaier * compatibility with Info-ZIP's UnZip, which requires us to duplicate 32126353Smlaier * parts of libarchive in order to gain more detailed control of its 33127082Sobrien * behaviour for the purpose of implementing the -n, -o, -L and -a 34127082Sobrien * options. 35127082Sobrien */ 36126353Smlaier 37130617Smlaier#include <sys/queue.h> 38126353Smlaier#include <sys/stat.h> 39126353Smlaier 40223637Sbz#include <ctype.h> 41223637Sbz#include <errno.h> 42126353Smlaier#include <fcntl.h> 43126353Smlaier#include <fnmatch.h> 44126353Smlaier#include <stdarg.h> 45126353Smlaier#include <stdio.h> 46126353Smlaier#include <stdlib.h> 47126353Smlaier#include <string.h> 48126353Smlaier#include <unistd.h> 49126353Smlaier 50223637Sbz#include <archive.h> 51126353Smlaier#include <archive_entry.h> 52126353Smlaier 53126353Smlaier/* command-line options */ 54127024Smlaierstatic int a_opt; /* convert EOL */ 55223637Sbzstatic int C_opt; /* match case-insensitively */ 56126355Smlaierstatic int c_opt; /* extract to stdout */ 57126355Smlaierstatic const char *d_arg; /* directory */ 58126353Smlaierstatic int f_opt; /* update existing files only */ 59126355Smlaierstatic int j_opt; /* junk directories */ 60130617Smlaierstatic int L_opt; /* lowercase names */ 61126353Smlaierstatic int n_opt; /* never overwrite */ 62126353Smlaierstatic int o_opt; /* always overwrite */ 63130617Smlaierstatic int p_opt; /* extract to stdout, quiet */ 64126353Smlaierstatic int q_opt; /* quiet */ 65126353Smlaierstatic int t_opt; /* test */ 66130617Smlaierstatic int u_opt; /* update */ 67130617Smlaierstatic int v_opt; /* verbose/list */ 68126353Smlaier 69223637Sbz/* time when unzip started */ 70126353Smlaierstatic time_t now; 71126353Smlaier 72126353Smlaier/* debug flag */ 73126353Smlaierstatic int unzip_debug; 74126353Smlaier 75126353Smlaier/* running on tty? */ 76126353Smlaierstatic int tty; 77126353Smlaier 78126353Smlaier/* convenience macro */ 79126353Smlaier/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */ 80130617Smlaier#define ac(call) \ 81130617Smlaier do { \ 82130617Smlaier int acret = (call); \ 83223637Sbz if (acret != ARCHIVE_OK) \ 84130617Smlaier errorx("%s", archive_error_string(a)); \ 85223637Sbz } while (0) 86126353Smlaier 87130617Smlaier/* 88130617Smlaier * Indicates that last info() did not end with EOL. This helps error() et 89171172Smlaier * al. avoid printing an error message on the same line as an incomplete 90130617Smlaier * informational message. 91130617Smlaier */ 92130617Smlaierstatic int noeol; 93126353Smlaier 94223637Sbz/* fatal error message + errno */ 95126353Smlaierstatic void 96126353Smlaiererror(const char *fmt, ...) 97126353Smlaier{ 98126353Smlaier va_list ap; 99171172Smlaier 100171172Smlaier if (noeol) 101130617Smlaier fprintf(stdout, "\n"); 102130617Smlaier fflush(stdout); 103130617Smlaier fprintf(stderr, "unzip: "); 104130617Smlaier va_start(ap, fmt); 105130617Smlaier vfprintf(stderr, fmt, ap); 106130617Smlaier va_end(ap); 107126353Smlaier fprintf(stderr, ": %s\n", strerror(errno)); 108130617Smlaier exit(1); 109130617Smlaier} 110130617Smlaier 111130617Smlaier/* fatal error message, no errno */ 112130617Smlaierstatic void 113130617Smlaiererrorx(const char *fmt, ...) 114130617Smlaier{ 115130617Smlaier va_list ap; 116130617Smlaier 117130617Smlaier if (noeol) 118130617Smlaier fprintf(stdout, "\n"); 119171172Smlaier fflush(stdout); 120171172Smlaier fprintf(stderr, "unzip: "); 121171172Smlaier va_start(ap, fmt); 122130617Smlaier vfprintf(stderr, fmt, ap); 123130617Smlaier va_end(ap); 124126353Smlaier fprintf(stderr, "\n"); 125126353Smlaier exit(1); 126126353Smlaier} 127126353Smlaier 128126353Smlaier#if 0 129126353Smlaier/* non-fatal error message + errno */ 130126353Smlaierstatic void 131126353Smlaierwarning(const char *fmt, ...) 132126353Smlaier{ 133126353Smlaier va_list ap; 134126353Smlaier 135126353Smlaier if (noeol) 136126353Smlaier fprintf(stdout, "\n"); 137126353Smlaier fflush(stdout); 138126353Smlaier fprintf(stderr, "unzip: "); 139126353Smlaier va_start(ap, fmt); 140126353Smlaier vfprintf(stderr, fmt, ap); 141126353Smlaier va_end(ap); 142126353Smlaier fprintf(stderr, ": %s\n", strerror(errno)); 143126353Smlaier} 144126353Smlaier#endif 145126353Smlaier 146126353Smlaier/* non-fatal error message, no errno */ 147126353Smlaierstatic void 148126353Smlaierwarningx(const char *fmt, ...) 149126353Smlaier{ 150126353Smlaier va_list ap; 151126353Smlaier 152126353Smlaier if (noeol) 153126353Smlaier fprintf(stdout, "\n"); 154126353Smlaier fflush(stdout); 155126353Smlaier fprintf(stderr, "unzip: "); 156126353Smlaier va_start(ap, fmt); 157126353Smlaier vfprintf(stderr, fmt, ap); 158126353Smlaier va_end(ap); 159126353Smlaier fprintf(stderr, "\n"); 160126353Smlaier} 161126353Smlaier 162126353Smlaier/* informational message (if not -q) */ 163126353Smlaierstatic void 164127024Smlaierinfo(const char *fmt, ...) 165127024Smlaier{ 166127024Smlaier va_list ap; 167126353Smlaier 168127024Smlaier if (q_opt && !unzip_debug) 169126353Smlaier return; 170126353Smlaier va_start(ap, fmt); 171171172Smlaier vfprintf(stdout, fmt, ap); 172223637Sbz va_end(ap); 173223637Sbz fflush(stdout); 174126353Smlaier 175126353Smlaier if (*fmt == '\0') 176126353Smlaier noeol = 1; 177126353Smlaier else 178126353Smlaier noeol = fmt[strlen(fmt) - 1] != '\n'; 179126353Smlaier} 180126353Smlaier 181126353Smlaier/* debug message (if unzip_debug) */ 182126353Smlaierstatic void 183126353Smlaierdebug(const char *fmt, ...) 184126353Smlaier{ 185126353Smlaier va_list ap; 186126353Smlaier 187126353Smlaier if (!unzip_debug) 188126353Smlaier return; 189126353Smlaier va_start(ap, fmt); 190126353Smlaier vfprintf(stderr, fmt, ap); 191126353Smlaier va_end(ap); 192126353Smlaier fflush(stderr); 193126353Smlaier 194126353Smlaier if (*fmt == '\0') 195130617Smlaier noeol = 1; 196223637Sbz else 197223637Sbz noeol = fmt[strlen(fmt) - 1] != '\n'; 198223637Sbz} 199223637Sbz 200223637Sbz/* duplicate a path name, possibly converting to lower case */ 201223637Sbzstatic char * 202130617Smlaierpathdup(const char *path) 203130617Smlaier{ 204130617Smlaier char *str; 205130617Smlaier size_t i, len; 206130617Smlaier 207130617Smlaier len = strlen(path); 208130617Smlaier while (len && path[len - 1] == '/') 209130617Smlaier len--; 210130617Smlaier if ((str = malloc(len + 1)) == NULL) { 211130617Smlaier errno = ENOMEM; 212130617Smlaier error("malloc()"); 213130617Smlaier } 214130617Smlaier if (L_opt) { 215126353Smlaier for (i = 0; i < len; ++i) 216223637Sbz str[i] = tolower((unsigned char)path[i]); 217223637Sbz } else { 218223637Sbz memcpy(str, path, len); 219223637Sbz } 220223637Sbz str[len] = '\0'; 221223637Sbz 222223637Sbz return (str); 223223637Sbz} 224223637Sbz 225223637Sbz/* concatenate two path names */ 226223637Sbzstatic char * 227223637Sbzpathcat(const char *prefix, const char *path) 228223637Sbz{ 229223637Sbz char *str; 230223637Sbz size_t prelen, len; 231223637Sbz 232223637Sbz prelen = prefix ? strlen(prefix) + 1 : 0; 233223637Sbz len = strlen(path) + 1; 234223637Sbz if ((str = malloc(prelen + len)) == NULL) { 235223637Sbz errno = ENOMEM; 236223637Sbz error("malloc()"); 237223637Sbz } 238223637Sbz if (prefix) { 239223637Sbz memcpy(str, prefix, prelen); /* includes zero */ 240223637Sbz str[prelen - 1] = '/'; /* splat zero */ 241223637Sbz } 242223637Sbz memcpy(str + prelen, path, len); /* includes zero */ 243223637Sbz 244223637Sbz return (str); 245223637Sbz} 246223637Sbz 247223637Sbz/* 248223637Sbz * Pattern lists for include / exclude processing 249223637Sbz */ 250223637Sbzstruct pattern { 251223637Sbz STAILQ_ENTRY(pattern) link; 252223637Sbz char pattern[]; 253223637Sbz}; 254223637Sbz 255223637SbzSTAILQ_HEAD(pattern_list, pattern); 256223637Sbzstatic struct pattern_list include = STAILQ_HEAD_INITIALIZER(include); 257223637Sbzstatic struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude); 258223637Sbz 259223637Sbz/* 260223637Sbz * Add an entry to a pattern list 261126353Smlaier */ 262126353Smlaierstatic void 263126353Smlaieradd_pattern(struct pattern_list *list, const char *pattern) 264126353Smlaier{ 265126353Smlaier struct pattern *entry; 266126353Smlaier size_t len; 267126353Smlaier 268126353Smlaier debug("adding pattern '%s'\n", pattern); 269126353Smlaier len = strlen(pattern); 270126353Smlaier if ((entry = malloc(sizeof *entry + len + 1)) == NULL) { 271126353Smlaier errno = ENOMEM; 272130617Smlaier error("malloc()"); 273126353Smlaier } 274126353Smlaier memcpy(entry->pattern, pattern, len + 1); 275126353Smlaier STAILQ_INSERT_TAIL(list, entry, link); 276130617Smlaier} 277126353Smlaier 278130617Smlaier/* 279130617Smlaier * Match a string against a list of patterns 280130617Smlaier */ 281130617Smlaierstatic int 282130617Smlaiermatch_pattern(struct pattern_list *list, const char *str) 283130617Smlaier{ 284130617Smlaier struct pattern *entry; 285130617Smlaier 286126353Smlaier STAILQ_FOREACH(entry, list, link) { 287126353Smlaier if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0) 288126353Smlaier return (1); 289126353Smlaier } 290130617Smlaier return (0); 291130617Smlaier} 292130617Smlaier 293130617Smlaier/* 294130617Smlaier * Verify that a given pathname is in the include list and not in the 295130617Smlaier * exclude list. 296130617Smlaier */ 297130617Smlaierstatic int 298130617Smlaieraccept_pathname(const char *pathname) 299130617Smlaier{ 300130617Smlaier 301130617Smlaier if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname)) 302130617Smlaier return (0); 303130617Smlaier if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname)) 304171172Smlaier return (0); 305126353Smlaier return (1); 306171172Smlaier} 307171172Smlaier 308171172Smlaier/* 309171172Smlaier * Create the specified directory with the specified mode, taking certain 310171172Smlaier * precautions on they way. 311171172Smlaier */ 312171172Smlaierstatic void 313171172Smlaiermake_dir(const char *path, int mode) 314171172Smlaier{ 315171172Smlaier struct stat sb; 316171172Smlaier 317171172Smlaier if (lstat(path, &sb) == 0) { 318171172Smlaier if (S_ISDIR(sb.st_mode)) 319171172Smlaier return; 320171172Smlaier /* 321171172Smlaier * Normally, we should either ask the user about removing 322171172Smlaier * the non-directory of the same name as a directory we 323171172Smlaier * wish to create, or respect the -n or -o command-line 324126353Smlaier * options. However, this may lead to a later failure or 325126353Smlaier * even compromise (if this non-directory happens to be a 326130617Smlaier * symlink to somewhere unsafe), so we don't. 327126353Smlaier */ 328126353Smlaier 329126353Smlaier /* 330130617Smlaier * Don't check unlink() result; failure will cause mkdir() 331130617Smlaier * to fail later, which we will catch. 332126353Smlaier */ 333130617Smlaier (void)unlink(path); 334130617Smlaier } 335130617Smlaier if (mkdir(path, mode) != 0 && errno != EEXIST) 336126353Smlaier error("mkdir('%s')", path); 337126353Smlaier} 338126353Smlaier 339126353Smlaier/* 340126353Smlaier * Ensure that all directories leading up to (but not including) the 341126353Smlaier * specified path exist. 342130617Smlaier * 343130617Smlaier * XXX inefficient + modifies the file in-place 344171172Smlaier */ 345130617Smlaierstatic void 346130617Smlaiermake_parent(char *path) 347130617Smlaier{ 348126353Smlaier struct stat sb; 349171172Smlaier char *sep; 350145840Smlaier 351171172Smlaier sep = strrchr(path, '/'); 352126353Smlaier if (sep == NULL || sep == path) 353126353Smlaier return; 354171172Smlaier *sep = '\0'; 355145840Smlaier if (lstat(path, &sb) == 0) { 356171172Smlaier if (S_ISDIR(sb.st_mode)) { 357126353Smlaier *sep = '/'; 358126353Smlaier return; 359130617Smlaier } 360130617Smlaier unlink(path); 361171172Smlaier } 362145840Smlaier make_parent(path); 363171172Smlaier mkdir(path, 0755); 364130617Smlaier *sep = '/'; 365126353Smlaier 366126353Smlaier#if 0 367126353Smlaier for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) { 368126353Smlaier /* root in case of absolute d_arg */ 369130617Smlaier if (sep == path) 370126353Smlaier continue; 371171172Smlaier *sep = '\0'; 372130617Smlaier make_dir(path, 0755); 373130617Smlaier *sep = '/'; 374126353Smlaier } 375126353Smlaier#endif 376126353Smlaier} 377126353Smlaier 378126353Smlaier/* 379126353Smlaier * Extract a directory. 380126353Smlaier */ 381126353Smlaierstatic void 382126353Smlaierextract_dir(struct archive *a, struct archive_entry *e, const char *path) 383126353Smlaier{ 384126353Smlaier int mode; 385171172Smlaier 386126353Smlaier mode = archive_entry_mode(e) & 0777; 387130617Smlaier if (mode == 0) 388130617Smlaier mode = 0755; 389171172Smlaier 390171172Smlaier /* 391171172Smlaier * Some zipfiles contain directories with weird permissions such 392171172Smlaier * as 0644 or 0444. This can cause strange issues such as being 393171172Smlaier * unable to extract files into the directory we just created, or 394130617Smlaier * the user being unable to remove the directory later without 395126353Smlaier * first manually changing its permissions. Therefore, we whack 396126353Smlaier * the permissions into shape, assuming that the user wants full 397130617Smlaier * access and that anyone who gets read access also gets execute 398130617Smlaier * access. 399130617Smlaier */ 400130617Smlaier mode |= 0700; 401130617Smlaier if (mode & 0040) 402130617Smlaier mode |= 0010; 403130617Smlaier if (mode & 0004) 404130617Smlaier mode |= 0001; 405130617Smlaier 406130617Smlaier info("d %s\n", path); 407130617Smlaier make_dir(path, mode); 408130617Smlaier ac(archive_read_data_skip(a)); 409134578Smlaier} 410134578Smlaier 411134578Smlaierstatic unsigned char buffer[8192]; 412130617Smlaierstatic char spinner[] = { '|', '/', '-', '\\' }; 413134578Smlaier 414130617Smlaierstatic int 415130617Smlaierhandle_existing_file(char **path) 416126353Smlaier{ 417130617Smlaier size_t alen; 418126353Smlaier ssize_t len; 419130617Smlaier char buf[4]; 420130617Smlaier 421130617Smlaier for (;;) { 422130617Smlaier fprintf(stderr, 423126353Smlaier "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ", 424126353Smlaier *path); 425130617Smlaier if (fgets(buf, sizeof(buf), stdin) == 0) { 426130617Smlaier clearerr(stdin); 427130617Smlaier printf("NULL\n(EOF or read error, " 428130617Smlaier "treating as \"[N]one\"...)\n"); 429130617Smlaier n_opt = 1; 430130617Smlaier return -1; 431130617Smlaier } 432130617Smlaier switch (*buf) { 433130617Smlaier case 'A': 434130617Smlaier o_opt = 1; 435130617Smlaier /* FALLTHROUGH */ 436130617Smlaier case 'y': 437130617Smlaier case 'Y': 438130617Smlaier (void)unlink(*path); 439130617Smlaier return 1; 440130617Smlaier case 'N': 441130617Smlaier n_opt = 1; 442130617Smlaier /* FALLTHROUGH */ 443130617Smlaier case 'n': 444130617Smlaier return -1; 445130617Smlaier case 'r': 446130617Smlaier case 'R': 447130617Smlaier printf("New name: "); 448130617Smlaier fflush(stdout); 449130617Smlaier free(*path); 450130617Smlaier *path = NULL; 451130617Smlaier alen = 0; 452130617Smlaier len = getdelim(path, &alen, '\n', stdin); 453130617Smlaier if ((*path)[len - 1] == '\n') 454130617Smlaier (*path)[len - 1] = '\0'; 455130617Smlaier return 0; 456130617Smlaier default: 457130617Smlaier break; 458130617Smlaier } 459130617Smlaier } 460130617Smlaier} 461130617Smlaier 462130617Smlaier/* 463130617Smlaier * Extract a regular file. 464126353Smlaier */ 465130617Smlaierstatic void 466130617Smlaierextract_file(struct archive *a, struct archive_entry *e, char **path) 467126353Smlaier{ 468126353Smlaier int mode; 469126353Smlaier time_t mtime; 470126353Smlaier struct stat sb; 471130617Smlaier struct timeval tv[2]; 472130617Smlaier int cr, fd, text, warn, check; 473130617Smlaier ssize_t len; 474130617Smlaier unsigned char *p, *q, *end; 475126353Smlaier 476126353Smlaier mode = archive_entry_mode(e) & 0777; 477130617Smlaier if (mode == 0) 478130617Smlaier mode = 0644; 479130617Smlaier mtime = archive_entry_mtime(e); 480130617Smlaier 481130617Smlaier /* look for existing file of same name */ 482134578Smlaierrecheck: 483134578Smlaier if (lstat(*path, &sb) == 0) { 484134578Smlaier if (u_opt || f_opt) { 485130617Smlaier /* check if up-to-date */ 486130617Smlaier if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime) 487130617Smlaier return; 488130617Smlaier (void)unlink(*path); 489130617Smlaier } else if (o_opt) { 490130617Smlaier /* overwrite */ 491134578Smlaier (void)unlink(*path); 492134578Smlaier } else if (n_opt) { 493134578Smlaier /* do not overwrite */ 494134578Smlaier return; 495134578Smlaier } else { 496134578Smlaier check = handle_existing_file(path); 497134578Smlaier if (check == 0) 498134578Smlaier goto recheck; 499130617Smlaier if (check == -1) 500134578Smlaier return; /* do not overwrite */ 501145840Smlaier } 502145840Smlaier } else { 503130617Smlaier if (f_opt) 504134578Smlaier return; 505134578Smlaier } 506134578Smlaier 507134578Smlaier if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0) 508130617Smlaier error("open('%s')", *path); 509130617Smlaier 510134578Smlaier /* loop over file contents and write to disk */ 511130617Smlaier info(" extracting: %s", *path); 512130617Smlaier text = a_opt; 513130617Smlaier warn = 0; 514130617Smlaier cr = 0; 515130617Smlaier for (int n = 0; ; n++) { 516130617Smlaier if (tty && (n % 4) == 0) 517130617Smlaier info(" %c\b\b", spinner[(n / 4) % sizeof spinner]); 518130617Smlaier 519130617Smlaier len = archive_read_data(a, buffer, sizeof buffer); 520130617Smlaier 521130617Smlaier if (len < 0) 522130617Smlaier ac(len); 523130617Smlaier 524130617Smlaier /* left over CR from previous buffer */ 525130617Smlaier if (a_opt && cr) { 526130617Smlaier if (len == 0 || buffer[0] != '\n') 527130617Smlaier if (write(fd, "\r", 1) != 1) 528130617Smlaier error("write('%s')", *path); 529130617Smlaier cr = 0; 530126353Smlaier } 531130617Smlaier 532130617Smlaier /* EOF */ 533130617Smlaier if (len == 0) 534130617Smlaier break; 535130617Smlaier end = buffer + len; 536130617Smlaier 537130617Smlaier /* 538130617Smlaier * Detect whether this is a text file. The correct way to 539130617Smlaier * do this is to check the least significant bit of the 540130617Smlaier * "internal file attributes" field of the corresponding 541130617Smlaier * file header in the central directory, but libarchive 542130617Smlaier * does not read the central directory, so we have to 543130617Smlaier * guess by looking for non-ASCII characters in the 544130617Smlaier * buffer. Hopefully we won't guess wrong. If we do 545130617Smlaier * guess wrong, we print a warning message later. 546130617Smlaier */ 547130617Smlaier if (a_opt && n == 0) { 548130617Smlaier for (p = buffer; p < end; ++p) { 549130617Smlaier if (!isascii((unsigned char)*p)) { 550130617Smlaier text = 0; 551130617Smlaier break; 552130617Smlaier } 553130617Smlaier } 554130617Smlaier } 555130617Smlaier 556130617Smlaier /* simple case */ 557130617Smlaier if (!a_opt || !text) { 558130617Smlaier if (write(fd, buffer, len) != len) 559130617Smlaier error("write('%s')", *path); 560130617Smlaier continue; 561130617Smlaier } 562130617Smlaier 563130617Smlaier /* hard case: convert \r\n to \n (sigh...) */ 564130617Smlaier for (p = buffer; p < end; p = q + 1) { 565130617Smlaier for (q = p; q < end; q++) { 566130617Smlaier if (!warn && !isascii(*q)) { 567130617Smlaier warningx("%s may be corrupted due" 568130617Smlaier " to weak text file detection" 569130617Smlaier " heuristic", *path); 570130617Smlaier warn = 1; 571130617Smlaier } 572130617Smlaier if (q[0] != '\r') 573130617Smlaier continue; 574130617Smlaier if (&q[1] == end) { 575130617Smlaier cr = 1; 576130617Smlaier break; 577130617Smlaier } 578130617Smlaier if (q[1] == '\n') 579134578Smlaier break; 580134578Smlaier } 581134578Smlaier if (write(fd, p, q - p) != q - p) 582134578Smlaier error("write('%s')", *path); 583130617Smlaier } 584134578Smlaier } 585130617Smlaier if (tty) 586130617Smlaier info(" \b\b"); 587130617Smlaier if (text) 588130617Smlaier info(" (text)"); 589130617Smlaier info("\n"); 590130617Smlaier 591130617Smlaier /* set access and modification time */ 592130617Smlaier tv[0].tv_sec = now; 593130617Smlaier tv[0].tv_usec = 0; 594130617Smlaier tv[1].tv_sec = mtime; 595130617Smlaier tv[1].tv_usec = 0; 596130617Smlaier if (futimes(fd, tv) != 0) 597130617Smlaier error("utimes('%s')", *path); 598130617Smlaier if (close(fd) != 0) 599130617Smlaier error("close('%s')", *path); 600130617Smlaier} 601130617Smlaier 602130617Smlaier/* 603130617Smlaier * Extract a zipfile entry: first perform some sanity checks to ensure 604130617Smlaier * that it is either a directory or a regular file and that the path is 605130617Smlaier * not absolute and does not try to break out of the current directory; 606130617Smlaier * then call either extract_dir() or extract_file() as appropriate. 607130617Smlaier * 608130617Smlaier * This is complicated a bit by the various ways in which we need to 609130617Smlaier * manipulate the path name. Case conversion (if requested by the -L 610130617Smlaier * option) happens first, but the include / exclude patterns are applied 611223637Sbz * to the full converted path name, before the directory part of the path 612134578Smlaier * is removed in accordance with the -j option. Sanity checks are 613223637Sbz * intentionally done earlier than they need to be, so the user will get a 614223637Sbz * warning about insecure paths even for files or directories which 615134578Smlaier * wouldn't be extracted anyway. 616134578Smlaier */ 617134578Smlaierstatic void 618134578Smlaierextract(struct archive *a, struct archive_entry *e) 619134578Smlaier{ 620134578Smlaier char *pathname, *realpathname; 621130617Smlaier mode_t filetype; 622130617Smlaier char *p, *q; 623134578Smlaier 624130617Smlaier pathname = pathdup(archive_entry_pathname(e)); 625130617Smlaier filetype = archive_entry_filetype(e); 626130617Smlaier 627130617Smlaier /* sanity checks */ 628130617Smlaier if (pathname[0] == '/' || 629130617Smlaier strncmp(pathname, "../", 3) == 0 || 630130617Smlaier strstr(pathname, "/../") != NULL) { 631130617Smlaier warningx("skipping insecure entry '%s'", pathname); 632223637Sbz ac(archive_read_data_skip(a)); 633223637Sbz free(pathname); 634223637Sbz return; 635223637Sbz } 636223637Sbz 637223637Sbz /* I don't think this can happen in a zipfile.. */ 638223637Sbz if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { 639223637Sbz warningx("skipping non-regular entry '%s'", pathname); 640223637Sbz ac(archive_read_data_skip(a)); 641223637Sbz free(pathname); 642223637Sbz return; 643223637Sbz } 644130617Smlaier 645126353Smlaier /* skip directories in -j case */ 646126353Smlaier if (S_ISDIR(filetype) && j_opt) { 647223637Sbz ac(archive_read_data_skip(a)); 648130617Smlaier free(pathname); 649156744Smlaier return; 650223637Sbz } 651126353Smlaier 652223637Sbz /* apply include / exclude patterns */ 653223637Sbz if (!accept_pathname(pathname)) { 654130617Smlaier ac(archive_read_data_skip(a)); 655130617Smlaier free(pathname); 656223637Sbz return; 657126353Smlaier } 658126353Smlaier 659126353Smlaier /* apply -j and -d */ 660126353Smlaier if (j_opt) { 661126353Smlaier for (p = q = pathname; *p; ++p) 662145840Smlaier if (*p == '/') 663145840Smlaier q = p + 1; 664126353Smlaier realpathname = pathcat(d_arg, q); 665126353Smlaier } else { 666126353Smlaier realpathname = pathcat(d_arg, pathname); 667126353Smlaier } 668126353Smlaier 669171172Smlaier /* ensure that parent directory exists */ 670171172Smlaier make_parent(realpathname); 671171172Smlaier 672223637Sbz if (S_ISDIR(filetype)) 673223637Sbz extract_dir(a, e, realpathname); 674223637Sbz else 675126353Smlaier extract_file(a, e, &realpathname); 676145840Smlaier 677145840Smlaier free(realpathname); 678126353Smlaier free(pathname); 679126353Smlaier} 680145840Smlaier 681130617Smlaierstatic void 682126353Smlaierextract_stdout(struct archive *a, struct archive_entry *e) 683130617Smlaier{ 684130617Smlaier char *pathname; 685130617Smlaier mode_t filetype; 686126353Smlaier int cr, text, warn; 687126353Smlaier ssize_t len; 688126353Smlaier unsigned char *p, *q, *end; 689126353Smlaier 690126353Smlaier pathname = pathdup(archive_entry_pathname(e)); 691126353Smlaier filetype = archive_entry_filetype(e); 692126353Smlaier 693126353Smlaier /* I don't think this can happen in a zipfile.. */ 694126353Smlaier if (!S_ISDIR(filetype) && !S_ISREG(filetype)) { 695126353Smlaier warningx("skipping non-regular entry '%s'", pathname); 696223637Sbz ac(archive_read_data_skip(a)); 697223637Sbz free(pathname); 698223637Sbz return; 699223637Sbz } 700223637Sbz 701223637Sbz /* skip directories in -j case */ 702223637Sbz if (S_ISDIR(filetype)) { 703223637Sbz ac(archive_read_data_skip(a)); 704126353Smlaier free(pathname); 705126353Smlaier return; 706126353Smlaier } 707126353Smlaier 708126353Smlaier /* apply include / exclude patterns */ 709126353Smlaier if (!accept_pathname(pathname)) { 710223637Sbz ac(archive_read_data_skip(a)); 711126353Smlaier free(pathname); 712126353Smlaier return; 713145840Smlaier } 714126353Smlaier 715126353Smlaier if (c_opt) 716130617Smlaier info("x %s\n", pathname); 717126353Smlaier 718126353Smlaier text = a_opt; 719126353Smlaier warn = 0; 720126353Smlaier cr = 0; 721126353Smlaier for (int n = 0; ; n++) { 722126353Smlaier len = archive_read_data(a, buffer, sizeof buffer); 723130617Smlaier 724126353Smlaier if (len < 0) 725126353Smlaier ac(len); 726126353Smlaier 727126353Smlaier /* left over CR from previous buffer */ 728126353Smlaier if (a_opt && cr) { 729130617Smlaier if (len == 0 || buffer[0] != '\n') { 730130617Smlaier if (fwrite("\r", 1, 1, stderr) != 1) 731130617Smlaier error("write('%s')", pathname); 732126353Smlaier } 733126353Smlaier cr = 0; 734126353Smlaier } 735130617Smlaier 736130617Smlaier /* EOF */ 737130617Smlaier if (len == 0) 738130617Smlaier break; 739130617Smlaier end = buffer + len; 740130617Smlaier 741223637Sbz /* 742130617Smlaier * Detect whether this is a text file. The correct way to 743130617Smlaier * do this is to check the least significant bit of the 744130617Smlaier * "internal file attributes" field of the corresponding 745130617Smlaier * file header in the central directory, but libarchive 746130617Smlaier * does not read the central directory, so we have to 747130617Smlaier * guess by looking for non-ASCII characters in the 748130617Smlaier * buffer. Hopefully we won't guess wrong. If we do 749130617Smlaier * guess wrong, we print a warning message later. 750130617Smlaier */ 751130617Smlaier if (a_opt && n == 0) { 752130617Smlaier for (p = buffer; p < end; ++p) { 753130617Smlaier if (!isascii((unsigned char)*p)) { 754130617Smlaier text = 0; 755130617Smlaier break; 756171172Smlaier } 757130617Smlaier } 758130617Smlaier } 759130617Smlaier 760130617Smlaier /* simple case */ 761130617Smlaier if (!a_opt || !text) { 762130617Smlaier if (fwrite(buffer, 1, len, stdout) != (size_t)len) 763130617Smlaier error("write('%s')", pathname); 764130617Smlaier continue; 765126353Smlaier } 766130617Smlaier 767145840Smlaier /* hard case: convert \r\n to \n (sigh...) */ 768136141Smlaier for (p = buffer; p < end; p = q + 1) { 769223637Sbz for (q = p; q < end; q++) { 770223637Sbz if (!warn && !isascii(*q)) { 771223637Sbz warningx("%s may be corrupted due" 772223637Sbz " to weak text file detection" 773136141Smlaier " heuristic", pathname); 774136141Smlaier warn = 1; 775126353Smlaier } 776136141Smlaier if (q[0] != '\r') 777126353Smlaier continue; 778126353Smlaier if (&q[1] == end) { 779126353Smlaier cr = 1; 780126353Smlaier break; 781171172Smlaier } 782130617Smlaier if (q[1] == '\n') 783130617Smlaier break; 784130617Smlaier } 785126353Smlaier if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p)) 786126353Smlaier error("write('%s')", pathname); 787126353Smlaier } 788126353Smlaier } 789126353Smlaier 790130617Smlaier free(pathname); 791130617Smlaier} 792171172Smlaier 793171172Smlaier/* 794126353Smlaier * Print the name of an entry to stdout. 795126353Smlaier */ 796126353Smlaierstatic void 797223637Sbzlist(struct archive *a, struct archive_entry *e) 798223637Sbz{ 799223637Sbz char buf[20]; 800223637Sbz time_t mtime; 801223637Sbz 802126353Smlaier mtime = archive_entry_mtime(e); 803126353Smlaier strftime(buf, sizeof(buf), "%m-%d-%g %R", localtime(&mtime)); 804130617Smlaier 805130617Smlaier if (v_opt == 1) { 806130617Smlaier printf(" %8ju %s %s\n", 807130617Smlaier (uintmax_t)archive_entry_size(e), 808130617Smlaier buf, archive_entry_pathname(e)); 809130617Smlaier } else if (v_opt == 2) { 810126353Smlaier printf("%8ju Stored %7ju 0%% %s %08x %s\n", 811223637Sbz (uintmax_t)archive_entry_size(e), 812126353Smlaier (uintmax_t)archive_entry_size(e), 813126353Smlaier buf, 814126353Smlaier 0U, 815223637Sbz archive_entry_pathname(e)); 816126353Smlaier } 817 ac(archive_read_data_skip(a)); 818} 819 820/* 821 * Extract to memory to check CRC 822 */ 823static int 824test(struct archive *a, struct archive_entry *e) 825{ 826 ssize_t len; 827 int error_count; 828 829 error_count = 0; 830 if (S_ISDIR(archive_entry_filetype(e))) 831 return 0; 832 833 info(" testing: %s\t", archive_entry_pathname(e)); 834 while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0) 835 /* nothing */; 836 if (len < 0) { 837 info(" %s\n", archive_error_string(a)); 838 ++error_count; 839 } else { 840 info(" OK\n"); 841 } 842 843 /* shouldn't be necessary, but it doesn't hurt */ 844 ac(archive_read_data_skip(a)); 845 846 return error_count; 847} 848 849 850/* 851 * Main loop: open the zipfile, iterate over its contents and decide what 852 * to do with each entry. 853 */ 854static void 855unzip(const char *fn) 856{ 857 struct archive *a; 858 struct archive_entry *e; 859 int fd, ret; 860 uintmax_t total_size, file_count, error_count; 861 862 if (strcmp(fn, "-") == 0) 863 fd = STDIN_FILENO; 864 else if ((fd = open(fn, O_RDONLY)) < 0) 865 error("%s", fn); 866 867 if ((a = archive_read_new()) == NULL) 868 error("archive_read_new failed"); 869 870 ac(archive_read_support_format_zip(a)); 871 ac(archive_read_open_fd(a, fd, 8192)); 872 873 if (!p_opt && !q_opt) 874 printf("Archive: %s\n", fn); 875 if (v_opt == 1) { 876 printf(" Length Date Time Name\n"); 877 printf(" -------- ---- ---- ----\n"); 878 } else if (v_opt == 2) { 879 printf(" Length Method Size Ratio Date Time CRC-32 Name\n"); 880 printf("-------- ------ ------- ----- ---- ---- ------ ----\n"); 881 } 882 883 total_size = 0; 884 file_count = 0; 885 error_count = 0; 886 for (;;) { 887 ret = archive_read_next_header(a, &e); 888 if (ret == ARCHIVE_EOF) 889 break; 890 ac(ret); 891 if (t_opt) 892 error_count += test(a, e); 893 else if (v_opt) 894 list(a, e); 895 else if (p_opt || c_opt) 896 extract_stdout(a, e); 897 else 898 extract(a, e); 899 900 total_size += archive_entry_size(e); 901 ++file_count; 902 } 903 904 if (v_opt == 1) { 905 printf(" -------- -------\n"); 906 printf(" %8ju %ju file%s\n", 907 total_size, file_count, file_count != 1 ? "s" : ""); 908 } else if (v_opt == 2) { 909 printf("-------- ------- --- -------\n"); 910 printf("%8ju %7ju 0%% %ju file%s\n", 911 total_size, total_size, file_count, 912 file_count != 1 ? "s" : ""); 913 } 914 915 ac(archive_read_close(a)); 916 (void)archive_read_finish(a); 917 918 if (fd != STDIN_FILENO && close(fd) != 0) 919 error("%s", fn); 920 921 if (t_opt) { 922 if (error_count > 0) { 923 errorx("%d checksum error(s) found.", error_count); 924 } 925 else { 926 printf("No errors detected in compressed data of %s.\n", 927 fn); 928 } 929 } 930} 931 932static void 933usage(void) 934{ 935 936 fprintf(stderr, "usage: unzip [-aCcfjLlnopqtuv] [-d dir] [-x pattern] zipfile\n"); 937 exit(1); 938} 939 940static int 941getopts(int argc, char *argv[]) 942{ 943 int opt; 944 945 optreset = optind = 1; 946 while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvx:")) != -1) 947 switch (opt) { 948 case 'a': 949 a_opt = 1; 950 break; 951 case 'C': 952 C_opt = 1; 953 break; 954 case 'c': 955 c_opt = 1; 956 break; 957 case 'd': 958 d_arg = optarg; 959 break; 960 case 'f': 961 f_opt = 1; 962 break; 963 case 'j': 964 j_opt = 1; 965 break; 966 case 'L': 967 L_opt = 1; 968 break; 969 case 'l': 970 if (v_opt == 0) 971 v_opt = 1; 972 break; 973 case 'n': 974 n_opt = 1; 975 break; 976 case 'o': 977 o_opt = 1; 978 q_opt = 1; 979 break; 980 case 'p': 981 p_opt = 1; 982 break; 983 case 'q': 984 q_opt = 1; 985 break; 986 case 't': 987 t_opt = 1; 988 break; 989 case 'u': 990 u_opt = 1; 991 break; 992 case 'v': 993 v_opt = 2; 994 break; 995 case 'x': 996 add_pattern(&exclude, optarg); 997 break; 998 default: 999 usage(); 1000 } 1001 1002 return (optind); 1003} 1004 1005int 1006main(int argc, char *argv[]) 1007{ 1008 const char *zipfile; 1009 int nopts; 1010 1011 if (isatty(STDOUT_FILENO)) 1012 tty = 1; 1013 1014 if (getenv("UNZIP_DEBUG") != NULL) 1015 unzip_debug = 1; 1016 for (int i = 0; i < argc; ++i) 1017 debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n'); 1018 1019 /* 1020 * Info-ZIP's unzip(1) expects certain options to come before the 1021 * zipfile name, and others to come after - though it does not 1022 * enforce this. For simplicity, we accept *all* options both 1023 * before and after the zipfile name. 1024 */ 1025 nopts = getopts(argc, argv); 1026 1027 if (argc <= nopts) 1028 usage(); 1029 zipfile = argv[nopts++]; 1030 1031 while (nopts < argc && *argv[nopts] != '-') 1032 add_pattern(&include, argv[nopts++]); 1033 1034 nopts--; /* fake argv[0] */ 1035 nopts += getopts(argc - nopts, argv + nopts); 1036 1037 if (n_opt + o_opt + u_opt > 1) 1038 errorx("-n, -o and -u are contradictory"); 1039 1040 time(&now); 1041 1042 unzip(zipfile); 1043 1044 exit(0); 1045} 1046