unzip.c revision 225736
1223637Sbz/*-
2126353Smlaier * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org>
3126353Smlaier * Copyright (c) 2007-2008 Dag-Erling Co�dan Sm�rgrav
4126353Smlaier * All rights reserved.
5126353Smlaier *
6126353Smlaier * Redistribution and use in source and binary forms, with or without
7126353Smlaier * modification, are permitted provided that the following conditions
8126353Smlaier * are met:
9126353Smlaier * 1. Redistributions of source code must retain the above copyright
10126353Smlaier *    notice, this list of conditions and the following disclaimer
11126353Smlaier *    in this position and unchanged.
12126353Smlaier * 2. Redistributions in binary form must reproduce the above copyright
13126353Smlaier *    notice, this list of conditions and the following disclaimer in the
14126353Smlaier *    documentation and/or other materials provided with the distribution.
15126353Smlaier *
16126353Smlaier * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17126353Smlaier * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18126353Smlaier * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19126353Smlaier * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20126353Smlaier * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21126353Smlaier * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22126353Smlaier * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23126353Smlaier * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24126353Smlaier * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25126353Smlaier * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26126353Smlaier * SUCH DAMAGE.
27126353Smlaier *
28126353Smlaier * $FreeBSD: stable/9/usr.bin/unzip/unzip.c 214137 2010-10-21 17:05:15Z glebius $
29126353Smlaier *
30126353Smlaier * This file would be much shorter if we didn't care about command-line
31126353Smlaier * compatibility with Info-ZIP's UnZip, which requires us to duplicate
32126353Smlaier * parts of libarchive in order to gain more detailed control of its
33127082Sobrien * behaviour for the purpose of implementing the -n, -o, -L and -a
34127082Sobrien * options.
35127082Sobrien */
36126353Smlaier
37130617Smlaier#include <sys/queue.h>
38126353Smlaier#include <sys/stat.h>
39126353Smlaier
40223637Sbz#include <ctype.h>
41223637Sbz#include <errno.h>
42126353Smlaier#include <fcntl.h>
43126353Smlaier#include <fnmatch.h>
44126353Smlaier#include <stdarg.h>
45126353Smlaier#include <stdio.h>
46126353Smlaier#include <stdlib.h>
47126353Smlaier#include <string.h>
48126353Smlaier#include <unistd.h>
49126353Smlaier
50223637Sbz#include <archive.h>
51126353Smlaier#include <archive_entry.h>
52126353Smlaier
53126353Smlaier/* command-line options */
54127024Smlaierstatic int		 a_opt;		/* convert EOL */
55223637Sbzstatic int		 C_opt;		/* match case-insensitively */
56126355Smlaierstatic int		 c_opt;		/* extract to stdout */
57126355Smlaierstatic const char	*d_arg;		/* directory */
58126353Smlaierstatic int		 f_opt;		/* update existing files only */
59126355Smlaierstatic int		 j_opt;		/* junk directories */
60130617Smlaierstatic int		 L_opt;		/* lowercase names */
61126353Smlaierstatic int		 n_opt;		/* never overwrite */
62126353Smlaierstatic int		 o_opt;		/* always overwrite */
63130617Smlaierstatic int		 p_opt;		/* extract to stdout, quiet */
64126353Smlaierstatic int		 q_opt;		/* quiet */
65126353Smlaierstatic int		 t_opt;		/* test */
66130617Smlaierstatic int		 u_opt;		/* update */
67130617Smlaierstatic int		 v_opt;		/* verbose/list */
68126353Smlaier
69223637Sbz/* time when unzip started */
70126353Smlaierstatic time_t		 now;
71126353Smlaier
72126353Smlaier/* debug flag */
73126353Smlaierstatic int		 unzip_debug;
74126353Smlaier
75126353Smlaier/* running on tty? */
76126353Smlaierstatic int		 tty;
77126353Smlaier
78126353Smlaier/* convenience macro */
79126353Smlaier/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
80130617Smlaier#define ac(call)						\
81130617Smlaier	do {							\
82130617Smlaier		int acret = (call);				\
83223637Sbz		if (acret != ARCHIVE_OK)			\
84130617Smlaier			errorx("%s", archive_error_string(a));	\
85223637Sbz	} while (0)
86126353Smlaier
87130617Smlaier/*
88130617Smlaier * Indicates that last info() did not end with EOL.  This helps error() et
89171172Smlaier * al. avoid printing an error message on the same line as an incomplete
90130617Smlaier * informational message.
91130617Smlaier */
92130617Smlaierstatic int noeol;
93126353Smlaier
94223637Sbz/* fatal error message + errno */
95126353Smlaierstatic void
96126353Smlaiererror(const char *fmt, ...)
97126353Smlaier{
98126353Smlaier	va_list ap;
99171172Smlaier
100171172Smlaier	if (noeol)
101130617Smlaier		fprintf(stdout, "\n");
102130617Smlaier	fflush(stdout);
103130617Smlaier	fprintf(stderr, "unzip: ");
104130617Smlaier	va_start(ap, fmt);
105130617Smlaier	vfprintf(stderr, fmt, ap);
106130617Smlaier	va_end(ap);
107126353Smlaier	fprintf(stderr, ": %s\n", strerror(errno));
108130617Smlaier	exit(1);
109130617Smlaier}
110130617Smlaier
111130617Smlaier/* fatal error message, no errno */
112130617Smlaierstatic void
113130617Smlaiererrorx(const char *fmt, ...)
114130617Smlaier{
115130617Smlaier	va_list ap;
116130617Smlaier
117130617Smlaier	if (noeol)
118130617Smlaier		fprintf(stdout, "\n");
119171172Smlaier	fflush(stdout);
120171172Smlaier	fprintf(stderr, "unzip: ");
121171172Smlaier	va_start(ap, fmt);
122130617Smlaier	vfprintf(stderr, fmt, ap);
123130617Smlaier	va_end(ap);
124126353Smlaier	fprintf(stderr, "\n");
125126353Smlaier	exit(1);
126126353Smlaier}
127126353Smlaier
128126353Smlaier#if 0
129126353Smlaier/* non-fatal error message + errno */
130126353Smlaierstatic void
131126353Smlaierwarning(const char *fmt, ...)
132126353Smlaier{
133126353Smlaier	va_list ap;
134126353Smlaier
135126353Smlaier	if (noeol)
136126353Smlaier		fprintf(stdout, "\n");
137126353Smlaier	fflush(stdout);
138126353Smlaier	fprintf(stderr, "unzip: ");
139126353Smlaier	va_start(ap, fmt);
140126353Smlaier	vfprintf(stderr, fmt, ap);
141126353Smlaier	va_end(ap);
142126353Smlaier	fprintf(stderr, ": %s\n", strerror(errno));
143126353Smlaier}
144126353Smlaier#endif
145126353Smlaier
146126353Smlaier/* non-fatal error message, no errno */
147126353Smlaierstatic void
148126353Smlaierwarningx(const char *fmt, ...)
149126353Smlaier{
150126353Smlaier	va_list ap;
151126353Smlaier
152126353Smlaier	if (noeol)
153126353Smlaier		fprintf(stdout, "\n");
154126353Smlaier	fflush(stdout);
155126353Smlaier	fprintf(stderr, "unzip: ");
156126353Smlaier	va_start(ap, fmt);
157126353Smlaier	vfprintf(stderr, fmt, ap);
158126353Smlaier	va_end(ap);
159126353Smlaier	fprintf(stderr, "\n");
160126353Smlaier}
161126353Smlaier
162126353Smlaier/* informational message (if not -q) */
163126353Smlaierstatic void
164127024Smlaierinfo(const char *fmt, ...)
165127024Smlaier{
166127024Smlaier	va_list ap;
167126353Smlaier
168127024Smlaier	if (q_opt && !unzip_debug)
169126353Smlaier		return;
170126353Smlaier	va_start(ap, fmt);
171171172Smlaier	vfprintf(stdout, fmt, ap);
172223637Sbz	va_end(ap);
173223637Sbz	fflush(stdout);
174126353Smlaier
175126353Smlaier	if (*fmt == '\0')
176126353Smlaier		noeol = 1;
177126353Smlaier	else
178126353Smlaier		noeol = fmt[strlen(fmt) - 1] != '\n';
179126353Smlaier}
180126353Smlaier
181126353Smlaier/* debug message (if unzip_debug) */
182126353Smlaierstatic void
183126353Smlaierdebug(const char *fmt, ...)
184126353Smlaier{
185126353Smlaier	va_list ap;
186126353Smlaier
187126353Smlaier	if (!unzip_debug)
188126353Smlaier		return;
189126353Smlaier	va_start(ap, fmt);
190126353Smlaier	vfprintf(stderr, fmt, ap);
191126353Smlaier	va_end(ap);
192126353Smlaier	fflush(stderr);
193126353Smlaier
194126353Smlaier	if (*fmt == '\0')
195130617Smlaier		noeol = 1;
196223637Sbz	else
197223637Sbz		noeol = fmt[strlen(fmt) - 1] != '\n';
198223637Sbz}
199223637Sbz
200223637Sbz/* duplicate a path name, possibly converting to lower case */
201223637Sbzstatic char *
202130617Smlaierpathdup(const char *path)
203130617Smlaier{
204130617Smlaier	char *str;
205130617Smlaier	size_t i, len;
206130617Smlaier
207130617Smlaier	len = strlen(path);
208130617Smlaier	while (len && path[len - 1] == '/')
209130617Smlaier		len--;
210130617Smlaier	if ((str = malloc(len + 1)) == NULL) {
211130617Smlaier		errno = ENOMEM;
212130617Smlaier		error("malloc()");
213130617Smlaier	}
214130617Smlaier	if (L_opt) {
215126353Smlaier		for (i = 0; i < len; ++i)
216223637Sbz			str[i] = tolower((unsigned char)path[i]);
217223637Sbz	} else {
218223637Sbz		memcpy(str, path, len);
219223637Sbz	}
220223637Sbz	str[len] = '\0';
221223637Sbz
222223637Sbz	return (str);
223223637Sbz}
224223637Sbz
225223637Sbz/* concatenate two path names */
226223637Sbzstatic char *
227223637Sbzpathcat(const char *prefix, const char *path)
228223637Sbz{
229223637Sbz	char *str;
230223637Sbz	size_t prelen, len;
231223637Sbz
232223637Sbz	prelen = prefix ? strlen(prefix) + 1 : 0;
233223637Sbz	len = strlen(path) + 1;
234223637Sbz	if ((str = malloc(prelen + len)) == NULL) {
235223637Sbz		errno = ENOMEM;
236223637Sbz		error("malloc()");
237223637Sbz	}
238223637Sbz	if (prefix) {
239223637Sbz		memcpy(str, prefix, prelen);	/* includes zero */
240223637Sbz		str[prelen - 1] = '/';		/* splat zero */
241223637Sbz	}
242223637Sbz	memcpy(str + prelen, path, len);	/* includes zero */
243223637Sbz
244223637Sbz	return (str);
245223637Sbz}
246223637Sbz
247223637Sbz/*
248223637Sbz * Pattern lists for include / exclude processing
249223637Sbz */
250223637Sbzstruct pattern {
251223637Sbz	STAILQ_ENTRY(pattern) link;
252223637Sbz	char pattern[];
253223637Sbz};
254223637Sbz
255223637SbzSTAILQ_HEAD(pattern_list, pattern);
256223637Sbzstatic struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
257223637Sbzstatic struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
258223637Sbz
259223637Sbz/*
260223637Sbz * Add an entry to a pattern list
261126353Smlaier */
262126353Smlaierstatic void
263126353Smlaieradd_pattern(struct pattern_list *list, const char *pattern)
264126353Smlaier{
265126353Smlaier	struct pattern *entry;
266126353Smlaier	size_t len;
267126353Smlaier
268126353Smlaier	debug("adding pattern '%s'\n", pattern);
269126353Smlaier	len = strlen(pattern);
270126353Smlaier	if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
271126353Smlaier		errno = ENOMEM;
272130617Smlaier		error("malloc()");
273126353Smlaier	}
274126353Smlaier	memcpy(entry->pattern, pattern, len + 1);
275126353Smlaier	STAILQ_INSERT_TAIL(list, entry, link);
276130617Smlaier}
277126353Smlaier
278130617Smlaier/*
279130617Smlaier * Match a string against a list of patterns
280130617Smlaier */
281130617Smlaierstatic int
282130617Smlaiermatch_pattern(struct pattern_list *list, const char *str)
283130617Smlaier{
284130617Smlaier	struct pattern *entry;
285130617Smlaier
286126353Smlaier	STAILQ_FOREACH(entry, list, link) {
287126353Smlaier		if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0)
288126353Smlaier			return (1);
289126353Smlaier	}
290130617Smlaier	return (0);
291130617Smlaier}
292130617Smlaier
293130617Smlaier/*
294130617Smlaier * Verify that a given pathname is in the include list and not in the
295130617Smlaier * exclude list.
296130617Smlaier */
297130617Smlaierstatic int
298130617Smlaieraccept_pathname(const char *pathname)
299130617Smlaier{
300130617Smlaier
301130617Smlaier	if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
302130617Smlaier		return (0);
303130617Smlaier	if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
304171172Smlaier		return (0);
305126353Smlaier	return (1);
306171172Smlaier}
307171172Smlaier
308171172Smlaier/*
309171172Smlaier * Create the specified directory with the specified mode, taking certain
310171172Smlaier * precautions on they way.
311171172Smlaier */
312171172Smlaierstatic void
313171172Smlaiermake_dir(const char *path, int mode)
314171172Smlaier{
315171172Smlaier	struct stat sb;
316171172Smlaier
317171172Smlaier	if (lstat(path, &sb) == 0) {
318171172Smlaier		if (S_ISDIR(sb.st_mode))
319171172Smlaier			return;
320171172Smlaier		/*
321171172Smlaier		 * Normally, we should either ask the user about removing
322171172Smlaier		 * the non-directory of the same name as a directory we
323171172Smlaier		 * wish to create, or respect the -n or -o command-line
324126353Smlaier		 * options.  However, this may lead to a later failure or
325126353Smlaier		 * even compromise (if this non-directory happens to be a
326130617Smlaier		 * symlink to somewhere unsafe), so we don't.
327126353Smlaier		 */
328126353Smlaier
329126353Smlaier		/*
330130617Smlaier		 * Don't check unlink() result; failure will cause mkdir()
331130617Smlaier		 * to fail later, which we will catch.
332126353Smlaier		 */
333130617Smlaier		(void)unlink(path);
334130617Smlaier	}
335130617Smlaier	if (mkdir(path, mode) != 0 && errno != EEXIST)
336126353Smlaier		error("mkdir('%s')", path);
337126353Smlaier}
338126353Smlaier
339126353Smlaier/*
340126353Smlaier * Ensure that all directories leading up to (but not including) the
341126353Smlaier * specified path exist.
342130617Smlaier *
343130617Smlaier * XXX inefficient + modifies the file in-place
344171172Smlaier */
345130617Smlaierstatic void
346130617Smlaiermake_parent(char *path)
347130617Smlaier{
348126353Smlaier	struct stat sb;
349171172Smlaier	char *sep;
350145840Smlaier
351171172Smlaier	sep = strrchr(path, '/');
352126353Smlaier	if (sep == NULL || sep == path)
353126353Smlaier		return;
354171172Smlaier	*sep = '\0';
355145840Smlaier	if (lstat(path, &sb) == 0) {
356171172Smlaier		if (S_ISDIR(sb.st_mode)) {
357126353Smlaier			*sep = '/';
358126353Smlaier			return;
359130617Smlaier		}
360130617Smlaier		unlink(path);
361171172Smlaier	}
362145840Smlaier	make_parent(path);
363171172Smlaier	mkdir(path, 0755);
364130617Smlaier	*sep = '/';
365126353Smlaier
366126353Smlaier#if 0
367126353Smlaier	for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
368126353Smlaier		/* root in case of absolute d_arg */
369130617Smlaier		if (sep == path)
370126353Smlaier			continue;
371171172Smlaier		*sep = '\0';
372130617Smlaier		make_dir(path, 0755);
373130617Smlaier		*sep = '/';
374126353Smlaier	}
375126353Smlaier#endif
376126353Smlaier}
377126353Smlaier
378126353Smlaier/*
379126353Smlaier * Extract a directory.
380126353Smlaier */
381126353Smlaierstatic void
382126353Smlaierextract_dir(struct archive *a, struct archive_entry *e, const char *path)
383126353Smlaier{
384126353Smlaier	int mode;
385171172Smlaier
386126353Smlaier	mode = archive_entry_mode(e) & 0777;
387130617Smlaier	if (mode == 0)
388130617Smlaier		mode = 0755;
389171172Smlaier
390171172Smlaier	/*
391171172Smlaier	 * Some zipfiles contain directories with weird permissions such
392171172Smlaier	 * as 0644 or 0444.  This can cause strange issues such as being
393171172Smlaier	 * unable to extract files into the directory we just created, or
394130617Smlaier	 * the user being unable to remove the directory later without
395126353Smlaier	 * first manually changing its permissions.  Therefore, we whack
396126353Smlaier	 * the permissions into shape, assuming that the user wants full
397130617Smlaier	 * access and that anyone who gets read access also gets execute
398130617Smlaier	 * access.
399130617Smlaier	 */
400130617Smlaier	mode |= 0700;
401130617Smlaier	if (mode & 0040)
402130617Smlaier		mode |= 0010;
403130617Smlaier	if (mode & 0004)
404130617Smlaier		mode |= 0001;
405130617Smlaier
406130617Smlaier	info("d %s\n", path);
407130617Smlaier	make_dir(path, mode);
408130617Smlaier	ac(archive_read_data_skip(a));
409134578Smlaier}
410134578Smlaier
411134578Smlaierstatic unsigned char buffer[8192];
412130617Smlaierstatic char spinner[] = { '|', '/', '-', '\\' };
413134578Smlaier
414130617Smlaierstatic int
415130617Smlaierhandle_existing_file(char **path)
416126353Smlaier{
417130617Smlaier	size_t alen;
418126353Smlaier	ssize_t len;
419130617Smlaier	char buf[4];
420130617Smlaier
421130617Smlaier	for (;;) {
422130617Smlaier		fprintf(stderr,
423126353Smlaier		    "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
424126353Smlaier		    *path);
425130617Smlaier		if (fgets(buf, sizeof(buf), stdin) == 0) {
426130617Smlaier			clearerr(stdin);
427130617Smlaier			printf("NULL\n(EOF or read error, "
428130617Smlaier			    "treating as \"[N]one\"...)\n");
429130617Smlaier			n_opt = 1;
430130617Smlaier			return -1;
431130617Smlaier		}
432130617Smlaier		switch (*buf) {
433130617Smlaier		case 'A':
434130617Smlaier			o_opt = 1;
435130617Smlaier			/* FALLTHROUGH */
436130617Smlaier		case 'y':
437130617Smlaier		case 'Y':
438130617Smlaier			(void)unlink(*path);
439130617Smlaier			return 1;
440130617Smlaier		case 'N':
441130617Smlaier			n_opt = 1;
442130617Smlaier			/* FALLTHROUGH */
443130617Smlaier		case 'n':
444130617Smlaier			return -1;
445130617Smlaier		case 'r':
446130617Smlaier		case 'R':
447130617Smlaier			printf("New name: ");
448130617Smlaier			fflush(stdout);
449130617Smlaier			free(*path);
450130617Smlaier			*path = NULL;
451130617Smlaier			alen = 0;
452130617Smlaier			len = getdelim(path, &alen, '\n', stdin);
453130617Smlaier			if ((*path)[len - 1] == '\n')
454130617Smlaier				(*path)[len - 1] = '\0';
455130617Smlaier			return 0;
456130617Smlaier		default:
457130617Smlaier			break;
458130617Smlaier		}
459130617Smlaier	}
460130617Smlaier}
461130617Smlaier
462130617Smlaier/*
463130617Smlaier * Extract a regular file.
464126353Smlaier */
465130617Smlaierstatic void
466130617Smlaierextract_file(struct archive *a, struct archive_entry *e, char **path)
467126353Smlaier{
468126353Smlaier	int mode;
469126353Smlaier	time_t mtime;
470126353Smlaier	struct stat sb;
471130617Smlaier	struct timeval tv[2];
472130617Smlaier	int cr, fd, text, warn, check;
473130617Smlaier	ssize_t len;
474130617Smlaier	unsigned char *p, *q, *end;
475126353Smlaier
476126353Smlaier	mode = archive_entry_mode(e) & 0777;
477130617Smlaier	if (mode == 0)
478130617Smlaier		mode = 0644;
479130617Smlaier	mtime = archive_entry_mtime(e);
480130617Smlaier
481130617Smlaier	/* look for existing file of same name */
482134578Smlaierrecheck:
483134578Smlaier	if (lstat(*path, &sb) == 0) {
484134578Smlaier		if (u_opt || f_opt) {
485130617Smlaier			/* check if up-to-date */
486130617Smlaier			if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime)
487130617Smlaier				return;
488130617Smlaier			(void)unlink(*path);
489130617Smlaier		} else if (o_opt) {
490130617Smlaier			/* overwrite */
491134578Smlaier			(void)unlink(*path);
492134578Smlaier		} else if (n_opt) {
493134578Smlaier			/* do not overwrite */
494134578Smlaier			return;
495134578Smlaier		} else {
496134578Smlaier			check = handle_existing_file(path);
497134578Smlaier			if (check == 0)
498134578Smlaier				goto recheck;
499130617Smlaier			if (check == -1)
500134578Smlaier				return; /* do not overwrite */
501145840Smlaier		}
502145840Smlaier	} else {
503130617Smlaier		if (f_opt)
504134578Smlaier			return;
505134578Smlaier	}
506134578Smlaier
507134578Smlaier	if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
508130617Smlaier		error("open('%s')", *path);
509130617Smlaier
510134578Smlaier	/* loop over file contents and write to disk */
511130617Smlaier	info(" extracting: %s", *path);
512130617Smlaier	text = a_opt;
513130617Smlaier	warn = 0;
514130617Smlaier	cr = 0;
515130617Smlaier	for (int n = 0; ; n++) {
516130617Smlaier		if (tty && (n % 4) == 0)
517130617Smlaier			info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
518130617Smlaier
519130617Smlaier		len = archive_read_data(a, buffer, sizeof buffer);
520130617Smlaier
521130617Smlaier		if (len < 0)
522130617Smlaier			ac(len);
523130617Smlaier
524130617Smlaier		/* left over CR from previous buffer */
525130617Smlaier		if (a_opt && cr) {
526130617Smlaier			if (len == 0 || buffer[0] != '\n')
527130617Smlaier				if (write(fd, "\r", 1) != 1)
528130617Smlaier					error("write('%s')", *path);
529130617Smlaier			cr = 0;
530126353Smlaier		}
531130617Smlaier
532130617Smlaier		/* EOF */
533130617Smlaier		if (len == 0)
534130617Smlaier			break;
535130617Smlaier		end = buffer + len;
536130617Smlaier
537130617Smlaier		/*
538130617Smlaier		 * Detect whether this is a text file.  The correct way to
539130617Smlaier		 * do this is to check the least significant bit of the
540130617Smlaier		 * "internal file attributes" field of the corresponding
541130617Smlaier		 * file header in the central directory, but libarchive
542130617Smlaier		 * does not read the central directory, so we have to
543130617Smlaier		 * guess by looking for non-ASCII characters in the
544130617Smlaier		 * buffer.  Hopefully we won't guess wrong.  If we do
545130617Smlaier		 * guess wrong, we print a warning message later.
546130617Smlaier		 */
547130617Smlaier		if (a_opt && n == 0) {
548130617Smlaier			for (p = buffer; p < end; ++p) {
549130617Smlaier				if (!isascii((unsigned char)*p)) {
550130617Smlaier					text = 0;
551130617Smlaier					break;
552130617Smlaier				}
553130617Smlaier			}
554130617Smlaier		}
555130617Smlaier
556130617Smlaier		/* simple case */
557130617Smlaier		if (!a_opt || !text) {
558130617Smlaier			if (write(fd, buffer, len) != len)
559130617Smlaier				error("write('%s')", *path);
560130617Smlaier			continue;
561130617Smlaier		}
562130617Smlaier
563130617Smlaier		/* hard case: convert \r\n to \n (sigh...) */
564130617Smlaier		for (p = buffer; p < end; p = q + 1) {
565130617Smlaier			for (q = p; q < end; q++) {
566130617Smlaier				if (!warn && !isascii(*q)) {
567130617Smlaier					warningx("%s may be corrupted due"
568130617Smlaier					    " to weak text file detection"
569130617Smlaier					    " heuristic", *path);
570130617Smlaier					warn = 1;
571130617Smlaier				}
572130617Smlaier				if (q[0] != '\r')
573130617Smlaier					continue;
574130617Smlaier				if (&q[1] == end) {
575130617Smlaier					cr = 1;
576130617Smlaier					break;
577130617Smlaier				}
578130617Smlaier				if (q[1] == '\n')
579134578Smlaier					break;
580134578Smlaier			}
581134578Smlaier			if (write(fd, p, q - p) != q - p)
582134578Smlaier				error("write('%s')", *path);
583130617Smlaier		}
584134578Smlaier	}
585130617Smlaier	if (tty)
586130617Smlaier		info("  \b\b");
587130617Smlaier	if (text)
588130617Smlaier		info(" (text)");
589130617Smlaier	info("\n");
590130617Smlaier
591130617Smlaier	/* set access and modification time */
592130617Smlaier	tv[0].tv_sec = now;
593130617Smlaier	tv[0].tv_usec = 0;
594130617Smlaier	tv[1].tv_sec = mtime;
595130617Smlaier	tv[1].tv_usec = 0;
596130617Smlaier	if (futimes(fd, tv) != 0)
597130617Smlaier		error("utimes('%s')", *path);
598130617Smlaier	if (close(fd) != 0)
599130617Smlaier		error("close('%s')", *path);
600130617Smlaier}
601130617Smlaier
602130617Smlaier/*
603130617Smlaier * Extract a zipfile entry: first perform some sanity checks to ensure
604130617Smlaier * that it is either a directory or a regular file and that the path is
605130617Smlaier * not absolute and does not try to break out of the current directory;
606130617Smlaier * then call either extract_dir() or extract_file() as appropriate.
607130617Smlaier *
608130617Smlaier * This is complicated a bit by the various ways in which we need to
609130617Smlaier * manipulate the path name.  Case conversion (if requested by the -L
610130617Smlaier * option) happens first, but the include / exclude patterns are applied
611223637Sbz * to the full converted path name, before the directory part of the path
612134578Smlaier * is removed in accordance with the -j option.  Sanity checks are
613223637Sbz * intentionally done earlier than they need to be, so the user will get a
614223637Sbz * warning about insecure paths even for files or directories which
615134578Smlaier * wouldn't be extracted anyway.
616134578Smlaier */
617134578Smlaierstatic void
618134578Smlaierextract(struct archive *a, struct archive_entry *e)
619134578Smlaier{
620134578Smlaier	char *pathname, *realpathname;
621130617Smlaier	mode_t filetype;
622130617Smlaier	char *p, *q;
623134578Smlaier
624130617Smlaier	pathname = pathdup(archive_entry_pathname(e));
625130617Smlaier	filetype = archive_entry_filetype(e);
626130617Smlaier
627130617Smlaier	/* sanity checks */
628130617Smlaier	if (pathname[0] == '/' ||
629130617Smlaier	    strncmp(pathname, "../", 3) == 0 ||
630130617Smlaier	    strstr(pathname, "/../") != NULL) {
631130617Smlaier		warningx("skipping insecure entry '%s'", pathname);
632223637Sbz		ac(archive_read_data_skip(a));
633223637Sbz		free(pathname);
634223637Sbz		return;
635223637Sbz	}
636223637Sbz
637223637Sbz	/* I don't think this can happen in a zipfile.. */
638223637Sbz	if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
639223637Sbz		warningx("skipping non-regular entry '%s'", pathname);
640223637Sbz		ac(archive_read_data_skip(a));
641223637Sbz		free(pathname);
642223637Sbz		return;
643223637Sbz	}
644130617Smlaier
645126353Smlaier	/* skip directories in -j case */
646126353Smlaier	if (S_ISDIR(filetype) && j_opt) {
647223637Sbz		ac(archive_read_data_skip(a));
648130617Smlaier		free(pathname);
649156744Smlaier		return;
650223637Sbz	}
651126353Smlaier
652223637Sbz	/* apply include / exclude patterns */
653223637Sbz	if (!accept_pathname(pathname)) {
654130617Smlaier		ac(archive_read_data_skip(a));
655130617Smlaier		free(pathname);
656223637Sbz		return;
657126353Smlaier	}
658126353Smlaier
659126353Smlaier	/* apply -j and -d */
660126353Smlaier	if (j_opt) {
661126353Smlaier		for (p = q = pathname; *p; ++p)
662145840Smlaier			if (*p == '/')
663145840Smlaier				q = p + 1;
664126353Smlaier		realpathname = pathcat(d_arg, q);
665126353Smlaier	} else {
666126353Smlaier		realpathname = pathcat(d_arg, pathname);
667126353Smlaier	}
668126353Smlaier
669171172Smlaier	/* ensure that parent directory exists */
670171172Smlaier	make_parent(realpathname);
671171172Smlaier
672223637Sbz	if (S_ISDIR(filetype))
673223637Sbz		extract_dir(a, e, realpathname);
674223637Sbz	else
675126353Smlaier		extract_file(a, e, &realpathname);
676145840Smlaier
677145840Smlaier	free(realpathname);
678126353Smlaier	free(pathname);
679126353Smlaier}
680145840Smlaier
681130617Smlaierstatic void
682126353Smlaierextract_stdout(struct archive *a, struct archive_entry *e)
683130617Smlaier{
684130617Smlaier	char *pathname;
685130617Smlaier	mode_t filetype;
686126353Smlaier	int cr, text, warn;
687126353Smlaier	ssize_t len;
688126353Smlaier	unsigned char *p, *q, *end;
689126353Smlaier
690126353Smlaier	pathname = pathdup(archive_entry_pathname(e));
691126353Smlaier	filetype = archive_entry_filetype(e);
692126353Smlaier
693126353Smlaier	/* I don't think this can happen in a zipfile.. */
694126353Smlaier	if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
695126353Smlaier		warningx("skipping non-regular entry '%s'", pathname);
696223637Sbz		ac(archive_read_data_skip(a));
697223637Sbz		free(pathname);
698223637Sbz		return;
699223637Sbz	}
700223637Sbz
701223637Sbz	/* skip directories in -j case */
702223637Sbz	if (S_ISDIR(filetype)) {
703223637Sbz		ac(archive_read_data_skip(a));
704126353Smlaier		free(pathname);
705126353Smlaier		return;
706126353Smlaier	}
707126353Smlaier
708126353Smlaier	/* apply include / exclude patterns */
709126353Smlaier	if (!accept_pathname(pathname)) {
710223637Sbz		ac(archive_read_data_skip(a));
711126353Smlaier		free(pathname);
712126353Smlaier		return;
713145840Smlaier	}
714126353Smlaier
715126353Smlaier	if (c_opt)
716130617Smlaier		info("x %s\n", pathname);
717126353Smlaier
718126353Smlaier	text = a_opt;
719126353Smlaier	warn = 0;
720126353Smlaier	cr = 0;
721126353Smlaier	for (int n = 0; ; n++) {
722126353Smlaier		len = archive_read_data(a, buffer, sizeof buffer);
723130617Smlaier
724126353Smlaier		if (len < 0)
725126353Smlaier			ac(len);
726126353Smlaier
727126353Smlaier		/* left over CR from previous buffer */
728126353Smlaier		if (a_opt && cr) {
729130617Smlaier			if (len == 0 || buffer[0] != '\n') {
730130617Smlaier				if (fwrite("\r", 1, 1, stderr) != 1)
731130617Smlaier					error("write('%s')", pathname);
732126353Smlaier			}
733126353Smlaier			cr = 0;
734126353Smlaier		}
735130617Smlaier
736130617Smlaier		/* EOF */
737130617Smlaier		if (len == 0)
738130617Smlaier			break;
739130617Smlaier		end = buffer + len;
740130617Smlaier
741223637Sbz		/*
742130617Smlaier		 * Detect whether this is a text file.  The correct way to
743130617Smlaier		 * do this is to check the least significant bit of the
744130617Smlaier		 * "internal file attributes" field of the corresponding
745130617Smlaier		 * file header in the central directory, but libarchive
746130617Smlaier		 * does not read the central directory, so we have to
747130617Smlaier		 * guess by looking for non-ASCII characters in the
748130617Smlaier		 * buffer.  Hopefully we won't guess wrong.  If we do
749130617Smlaier		 * guess wrong, we print a warning message later.
750130617Smlaier		 */
751130617Smlaier		if (a_opt && n == 0) {
752130617Smlaier			for (p = buffer; p < end; ++p) {
753130617Smlaier				if (!isascii((unsigned char)*p)) {
754130617Smlaier					text = 0;
755130617Smlaier					break;
756171172Smlaier				}
757130617Smlaier			}
758130617Smlaier		}
759130617Smlaier
760130617Smlaier		/* simple case */
761130617Smlaier		if (!a_opt || !text) {
762130617Smlaier			if (fwrite(buffer, 1, len, stdout) != (size_t)len)
763130617Smlaier				error("write('%s')", pathname);
764130617Smlaier			continue;
765126353Smlaier		}
766130617Smlaier
767145840Smlaier		/* hard case: convert \r\n to \n (sigh...) */
768136141Smlaier		for (p = buffer; p < end; p = q + 1) {
769223637Sbz			for (q = p; q < end; q++) {
770223637Sbz				if (!warn && !isascii(*q)) {
771223637Sbz					warningx("%s may be corrupted due"
772223637Sbz					    " to weak text file detection"
773136141Smlaier					    " heuristic", pathname);
774136141Smlaier					warn = 1;
775126353Smlaier				}
776136141Smlaier				if (q[0] != '\r')
777126353Smlaier					continue;
778126353Smlaier				if (&q[1] == end) {
779126353Smlaier					cr = 1;
780126353Smlaier					break;
781171172Smlaier				}
782130617Smlaier				if (q[1] == '\n')
783130617Smlaier					break;
784130617Smlaier			}
785126353Smlaier			if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p))
786126353Smlaier				error("write('%s')", pathname);
787126353Smlaier		}
788126353Smlaier	}
789126353Smlaier
790130617Smlaier	free(pathname);
791130617Smlaier}
792171172Smlaier
793171172Smlaier/*
794126353Smlaier * Print the name of an entry to stdout.
795126353Smlaier */
796126353Smlaierstatic void
797223637Sbzlist(struct archive *a, struct archive_entry *e)
798223637Sbz{
799223637Sbz	char buf[20];
800223637Sbz	time_t mtime;
801223637Sbz
802126353Smlaier	mtime = archive_entry_mtime(e);
803126353Smlaier	strftime(buf, sizeof(buf), "%m-%d-%g %R", localtime(&mtime));
804130617Smlaier
805130617Smlaier	if (v_opt == 1) {
806130617Smlaier		printf(" %8ju  %s   %s\n",
807130617Smlaier		    (uintmax_t)archive_entry_size(e),
808130617Smlaier		    buf, archive_entry_pathname(e));
809130617Smlaier	} else if (v_opt == 2) {
810126353Smlaier		printf("%8ju  Stored  %7ju   0%%  %s  %08x  %s\n",
811223637Sbz		    (uintmax_t)archive_entry_size(e),
812126353Smlaier		    (uintmax_t)archive_entry_size(e),
813126353Smlaier		    buf,
814126353Smlaier		    0U,
815223637Sbz		    archive_entry_pathname(e));
816126353Smlaier	}
817	ac(archive_read_data_skip(a));
818}
819
820/*
821 * Extract to memory to check CRC
822 */
823static int
824test(struct archive *a, struct archive_entry *e)
825{
826	ssize_t len;
827	int error_count;
828
829	error_count = 0;
830	if (S_ISDIR(archive_entry_filetype(e)))
831		return 0;
832
833	info("    testing: %s\t", archive_entry_pathname(e));
834	while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
835		/* nothing */;
836	if (len < 0) {
837		info(" %s\n", archive_error_string(a));
838		++error_count;
839	} else {
840		info(" OK\n");
841	}
842
843	/* shouldn't be necessary, but it doesn't hurt */
844	ac(archive_read_data_skip(a));
845
846	return error_count;
847}
848
849
850/*
851 * Main loop: open the zipfile, iterate over its contents and decide what
852 * to do with each entry.
853 */
854static void
855unzip(const char *fn)
856{
857	struct archive *a;
858	struct archive_entry *e;
859	int fd, ret;
860	uintmax_t total_size, file_count, error_count;
861
862	if (strcmp(fn, "-") == 0)
863		fd = STDIN_FILENO;
864	else if ((fd = open(fn, O_RDONLY)) < 0)
865		error("%s", fn);
866
867	if ((a = archive_read_new()) == NULL)
868		error("archive_read_new failed");
869
870	ac(archive_read_support_format_zip(a));
871	ac(archive_read_open_fd(a, fd, 8192));
872
873	if (!p_opt && !q_opt)
874		printf("Archive:  %s\n", fn);
875	if (v_opt == 1) {
876		printf("  Length     Date   Time    Name\n");
877		printf(" --------    ----   ----    ----\n");
878	} else if (v_opt == 2) {
879		printf(" Length   Method    Size  Ratio   Date   Time   CRC-32    Name\n");
880		printf("--------  ------  ------- -----   ----   ----   ------    ----\n");
881	}
882
883	total_size = 0;
884	file_count = 0;
885	error_count = 0;
886	for (;;) {
887		ret = archive_read_next_header(a, &e);
888		if (ret == ARCHIVE_EOF)
889			break;
890		ac(ret);
891		if (t_opt)
892			error_count += test(a, e);
893		else if (v_opt)
894			list(a, e);
895		else if (p_opt || c_opt)
896			extract_stdout(a, e);
897		else
898			extract(a, e);
899
900		total_size += archive_entry_size(e);
901		++file_count;
902	}
903
904	if (v_opt == 1) {
905		printf(" --------                   -------\n");
906		printf(" %8ju                   %ju file%s\n",
907		    total_size, file_count, file_count != 1 ? "s" : "");
908	} else if (v_opt == 2) {
909		printf("--------          -------  ---                            -------\n");
910		printf("%8ju          %7ju   0%%                            %ju file%s\n",
911		    total_size, total_size, file_count,
912		    file_count != 1 ? "s" : "");
913	}
914
915	ac(archive_read_close(a));
916	(void)archive_read_finish(a);
917
918	if (fd != STDIN_FILENO && close(fd) != 0)
919		error("%s", fn);
920
921	if (t_opt) {
922		if (error_count > 0) {
923			errorx("%d checksum error(s) found.", error_count);
924		}
925		else {
926			printf("No errors detected in compressed data of %s.\n",
927			       fn);
928		}
929	}
930}
931
932static void
933usage(void)
934{
935
936	fprintf(stderr, "usage: unzip [-aCcfjLlnopqtuv] [-d dir] [-x pattern] zipfile\n");
937	exit(1);
938}
939
940static int
941getopts(int argc, char *argv[])
942{
943	int opt;
944
945	optreset = optind = 1;
946	while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvx:")) != -1)
947		switch (opt) {
948		case 'a':
949			a_opt = 1;
950			break;
951		case 'C':
952			C_opt = 1;
953			break;
954		case 'c':
955			c_opt = 1;
956			break;
957		case 'd':
958			d_arg = optarg;
959			break;
960		case 'f':
961			f_opt = 1;
962			break;
963		case 'j':
964			j_opt = 1;
965			break;
966		case 'L':
967			L_opt = 1;
968			break;
969		case 'l':
970			if (v_opt == 0)
971				v_opt = 1;
972			break;
973		case 'n':
974			n_opt = 1;
975			break;
976		case 'o':
977			o_opt = 1;
978			q_opt = 1;
979			break;
980		case 'p':
981			p_opt = 1;
982			break;
983		case 'q':
984			q_opt = 1;
985			break;
986		case 't':
987			t_opt = 1;
988			break;
989		case 'u':
990			u_opt = 1;
991			break;
992		case 'v':
993			v_opt = 2;
994			break;
995		case 'x':
996			add_pattern(&exclude, optarg);
997			break;
998		default:
999			usage();
1000		}
1001
1002	return (optind);
1003}
1004
1005int
1006main(int argc, char *argv[])
1007{
1008	const char *zipfile;
1009	int nopts;
1010
1011	if (isatty(STDOUT_FILENO))
1012		tty = 1;
1013
1014	if (getenv("UNZIP_DEBUG") != NULL)
1015		unzip_debug = 1;
1016	for (int i = 0; i < argc; ++i)
1017		debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
1018
1019	/*
1020	 * Info-ZIP's unzip(1) expects certain options to come before the
1021	 * zipfile name, and others to come after - though it does not
1022	 * enforce this.  For simplicity, we accept *all* options both
1023	 * before and after the zipfile name.
1024	 */
1025	nopts = getopts(argc, argv);
1026
1027	if (argc <= nopts)
1028		usage();
1029	zipfile = argv[nopts++];
1030
1031	while (nopts < argc && *argv[nopts] != '-')
1032		add_pattern(&include, argv[nopts++]);
1033
1034	nopts--; /* fake argv[0] */
1035	nopts += getopts(argc - nopts, argv + nopts);
1036
1037	if (n_opt + o_opt + u_opt > 1)
1038		errorx("-n, -o and -u are contradictory");
1039
1040	time(&now);
1041
1042	unzip(zipfile);
1043
1044	exit(0);
1045}
1046