1175154Sdes/*-
2196981Srdivacky * Copyright (c) 2009 Joerg Sonnenberger <joerg@NetBSD.org>
3234311Sdes * Copyright (c) 2007-2008 Dag-Erling Sm��rgrav
4175154Sdes * All rights reserved.
5175154Sdes *
6175154Sdes * Redistribution and use in source and binary forms, with or without
7175154Sdes * modification, are permitted provided that the following conditions
8175154Sdes * are met:
9175154Sdes * 1. Redistributions of source code must retain the above copyright
10175154Sdes *    notice, this list of conditions and the following disclaimer
11175154Sdes *    in this position and unchanged.
12175154Sdes * 2. Redistributions in binary form must reproduce the above copyright
13175154Sdes *    notice, this list of conditions and the following disclaimer in the
14175154Sdes *    documentation and/or other materials provided with the distribution.
15175154Sdes *
16175154Sdes * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17175154Sdes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18175154Sdes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19175154Sdes * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20175154Sdes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21175154Sdes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22175154Sdes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23175154Sdes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24175154Sdes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25175154Sdes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26175154Sdes * SUCH DAMAGE.
27175154Sdes *
28175154Sdes * $FreeBSD$
29175154Sdes *
30175154Sdes * This file would be much shorter if we didn't care about command-line
31175154Sdes * compatibility with Info-ZIP's UnZip, which requires us to duplicate
32175154Sdes * parts of libarchive in order to gain more detailed control of its
33175154Sdes * behaviour for the purpose of implementing the -n, -o, -L and -a
34175154Sdes * options.
35175154Sdes */
36175154Sdes
37175154Sdes#include <sys/queue.h>
38175154Sdes#include <sys/stat.h>
39175154Sdes
40175154Sdes#include <ctype.h>
41175154Sdes#include <errno.h>
42175154Sdes#include <fcntl.h>
43175154Sdes#include <fnmatch.h>
44175154Sdes#include <stdarg.h>
45175154Sdes#include <stdio.h>
46175154Sdes#include <stdlib.h>
47175154Sdes#include <string.h>
48175154Sdes#include <unistd.h>
49175154Sdes
50175154Sdes#include <archive.h>
51175154Sdes#include <archive_entry.h>
52175154Sdes
53175154Sdes/* command-line options */
54175154Sdesstatic int		 a_opt;		/* convert EOL */
55196981Srdivackystatic int		 C_opt;		/* match case-insensitively */
56196981Srdivackystatic int		 c_opt;		/* extract to stdout */
57175154Sdesstatic const char	*d_arg;		/* directory */
58196981Srdivackystatic int		 f_opt;		/* update existing files only */
59175154Sdesstatic int		 j_opt;		/* junk directories */
60175154Sdesstatic int		 L_opt;		/* lowercase names */
61175154Sdesstatic int		 n_opt;		/* never overwrite */
62175154Sdesstatic int		 o_opt;		/* always overwrite */
63196981Srdivackystatic int		 p_opt;		/* extract to stdout, quiet */
64175154Sdesstatic int		 q_opt;		/* quiet */
65180124Sdesstatic int		 t_opt;		/* test */
66175154Sdesstatic int		 u_opt;		/* update */
67196981Srdivackystatic int		 v_opt;		/* verbose/list */
68234206Skevlostatic int		 Z1_opt;	/* zipinfo mode list files only */
69175154Sdes
70175154Sdes/* time when unzip started */
71175154Sdesstatic time_t		 now;
72175154Sdes
73175154Sdes/* debug flag */
74175154Sdesstatic int		 unzip_debug;
75175154Sdes
76234206Skevlo/* zipinfo mode */
77234206Skevlostatic int		 zipinfo_mode;
78234206Skevlo
79175154Sdes/* running on tty? */
80175154Sdesstatic int		 tty;
81175154Sdes
82175154Sdes/* convenience macro */
83175154Sdes/* XXX should differentiate between ARCHIVE_{WARN,FAIL,RETRY} */
84175154Sdes#define ac(call)						\
85175154Sdes	do {							\
86175154Sdes		int acret = (call);				\
87175154Sdes		if (acret != ARCHIVE_OK)			\
88175154Sdes			errorx("%s", archive_error_string(a));	\
89175154Sdes	} while (0)
90175154Sdes
91175154Sdes/*
92175154Sdes * Indicates that last info() did not end with EOL.  This helps error() et
93175154Sdes * al. avoid printing an error message on the same line as an incomplete
94175154Sdes * informational message.
95175154Sdes */
96175154Sdesstatic int noeol;
97175154Sdes
98175154Sdes/* fatal error message + errno */
99175154Sdesstatic void
100175154Sdeserror(const char *fmt, ...)
101175154Sdes{
102175154Sdes	va_list ap;
103175154Sdes
104175154Sdes	if (noeol)
105175154Sdes		fprintf(stdout, "\n");
106175154Sdes	fflush(stdout);
107175154Sdes	fprintf(stderr, "unzip: ");
108175154Sdes	va_start(ap, fmt);
109175154Sdes	vfprintf(stderr, fmt, ap);
110175154Sdes	va_end(ap);
111175154Sdes	fprintf(stderr, ": %s\n", strerror(errno));
112175154Sdes	exit(1);
113175154Sdes}
114175154Sdes
115175154Sdes/* fatal error message, no errno */
116175154Sdesstatic void
117175154Sdeserrorx(const char *fmt, ...)
118175154Sdes{
119175154Sdes	va_list ap;
120175154Sdes
121175154Sdes	if (noeol)
122175154Sdes		fprintf(stdout, "\n");
123175154Sdes	fflush(stdout);
124175154Sdes	fprintf(stderr, "unzip: ");
125175154Sdes	va_start(ap, fmt);
126175154Sdes	vfprintf(stderr, fmt, ap);
127175154Sdes	va_end(ap);
128175154Sdes	fprintf(stderr, "\n");
129175154Sdes	exit(1);
130175154Sdes}
131175154Sdes
132175154Sdes#if 0
133175154Sdes/* non-fatal error message + errno */
134175154Sdesstatic void
135175154Sdeswarning(const char *fmt, ...)
136175154Sdes{
137175154Sdes	va_list ap;
138175154Sdes
139175154Sdes	if (noeol)
140175154Sdes		fprintf(stdout, "\n");
141175154Sdes	fflush(stdout);
142175154Sdes	fprintf(stderr, "unzip: ");
143175154Sdes	va_start(ap, fmt);
144175154Sdes	vfprintf(stderr, fmt, ap);
145175154Sdes	va_end(ap);
146175154Sdes	fprintf(stderr, ": %s\n", strerror(errno));
147175154Sdes}
148175154Sdes#endif
149175154Sdes
150175154Sdes/* non-fatal error message, no errno */
151175154Sdesstatic void
152175154Sdeswarningx(const char *fmt, ...)
153175154Sdes{
154175154Sdes	va_list ap;
155175154Sdes
156175154Sdes	if (noeol)
157175154Sdes		fprintf(stdout, "\n");
158175154Sdes	fflush(stdout);
159175154Sdes	fprintf(stderr, "unzip: ");
160175154Sdes	va_start(ap, fmt);
161175154Sdes	vfprintf(stderr, fmt, ap);
162175154Sdes	va_end(ap);
163175154Sdes	fprintf(stderr, "\n");
164175154Sdes}
165175154Sdes
166175154Sdes/* informational message (if not -q) */
167175154Sdesstatic void
168175154Sdesinfo(const char *fmt, ...)
169175154Sdes{
170175154Sdes	va_list ap;
171175154Sdes
172175154Sdes	if (q_opt && !unzip_debug)
173175154Sdes		return;
174175154Sdes	va_start(ap, fmt);
175175154Sdes	vfprintf(stdout, fmt, ap);
176175154Sdes	va_end(ap);
177175154Sdes	fflush(stdout);
178175154Sdes
179196981Srdivacky	if (*fmt == '\0')
180196981Srdivacky		noeol = 1;
181196981Srdivacky	else
182196981Srdivacky		noeol = fmt[strlen(fmt) - 1] != '\n';
183175154Sdes}
184175154Sdes
185175154Sdes/* debug message (if unzip_debug) */
186175154Sdesstatic void
187175154Sdesdebug(const char *fmt, ...)
188175154Sdes{
189175154Sdes	va_list ap;
190175154Sdes
191175154Sdes	if (!unzip_debug)
192175154Sdes		return;
193175154Sdes	va_start(ap, fmt);
194175154Sdes	vfprintf(stderr, fmt, ap);
195175154Sdes	va_end(ap);
196175154Sdes	fflush(stderr);
197175154Sdes
198196981Srdivacky	if (*fmt == '\0')
199196981Srdivacky		noeol = 1;
200196981Srdivacky	else
201196981Srdivacky		noeol = fmt[strlen(fmt) - 1] != '\n';
202175154Sdes}
203175154Sdes
204175154Sdes/* duplicate a path name, possibly converting to lower case */
205175154Sdesstatic char *
206175154Sdespathdup(const char *path)
207175154Sdes{
208175154Sdes	char *str;
209196981Srdivacky	size_t i, len;
210175154Sdes
211175154Sdes	len = strlen(path);
212175154Sdes	while (len && path[len - 1] == '/')
213175154Sdes		len--;
214175154Sdes	if ((str = malloc(len + 1)) == NULL) {
215175154Sdes		errno = ENOMEM;
216175154Sdes		error("malloc()");
217175154Sdes	}
218196981Srdivacky	if (L_opt) {
219196981Srdivacky		for (i = 0; i < len; ++i)
220196981Srdivacky			str[i] = tolower((unsigned char)path[i]);
221196981Srdivacky	} else {
222196981Srdivacky		memcpy(str, path, len);
223196981Srdivacky	}
224175154Sdes	str[len] = '\0';
225175154Sdes
226175154Sdes	return (str);
227175154Sdes}
228175154Sdes
229175154Sdes/* concatenate two path names */
230175154Sdesstatic char *
231175154Sdespathcat(const char *prefix, const char *path)
232175154Sdes{
233175154Sdes	char *str;
234196981Srdivacky	size_t prelen, len;
235175154Sdes
236175154Sdes	prelen = prefix ? strlen(prefix) + 1 : 0;
237175154Sdes	len = strlen(path) + 1;
238175154Sdes	if ((str = malloc(prelen + len)) == NULL) {
239175154Sdes		errno = ENOMEM;
240175154Sdes		error("malloc()");
241175154Sdes	}
242175154Sdes	if (prefix) {
243175154Sdes		memcpy(str, prefix, prelen);	/* includes zero */
244175154Sdes		str[prelen - 1] = '/';		/* splat zero */
245175154Sdes	}
246175154Sdes	memcpy(str + prelen, path, len);	/* includes zero */
247175154Sdes
248175154Sdes	return (str);
249175154Sdes}
250175154Sdes
251175154Sdes/*
252175154Sdes * Pattern lists for include / exclude processing
253175154Sdes */
254175154Sdesstruct pattern {
255175154Sdes	STAILQ_ENTRY(pattern) link;
256175154Sdes	char pattern[];
257175154Sdes};
258175154Sdes
259175154SdesSTAILQ_HEAD(pattern_list, pattern);
260175154Sdesstatic struct pattern_list include = STAILQ_HEAD_INITIALIZER(include);
261175154Sdesstatic struct pattern_list exclude = STAILQ_HEAD_INITIALIZER(exclude);
262175154Sdes
263175154Sdes/*
264175154Sdes * Add an entry to a pattern list
265175154Sdes */
266175154Sdesstatic void
267175154Sdesadd_pattern(struct pattern_list *list, const char *pattern)
268175154Sdes{
269175154Sdes	struct pattern *entry;
270196981Srdivacky	size_t len;
271175154Sdes
272175154Sdes	debug("adding pattern '%s'\n", pattern);
273175154Sdes	len = strlen(pattern);
274175154Sdes	if ((entry = malloc(sizeof *entry + len + 1)) == NULL) {
275175154Sdes		errno = ENOMEM;
276175154Sdes		error("malloc()");
277175154Sdes	}
278175154Sdes	memcpy(entry->pattern, pattern, len + 1);
279175154Sdes	STAILQ_INSERT_TAIL(list, entry, link);
280175154Sdes}
281175154Sdes
282175154Sdes/*
283175154Sdes * Match a string against a list of patterns
284175154Sdes */
285175154Sdesstatic int
286175154Sdesmatch_pattern(struct pattern_list *list, const char *str)
287175154Sdes{
288175154Sdes	struct pattern *entry;
289175154Sdes
290175154Sdes	STAILQ_FOREACH(entry, list, link) {
291196981Srdivacky		if (fnmatch(entry->pattern, str, C_opt ? FNM_CASEFOLD : 0) == 0)
292175154Sdes			return (1);
293175154Sdes	}
294175154Sdes	return (0);
295175154Sdes}
296175154Sdes
297175154Sdes/*
298175154Sdes * Verify that a given pathname is in the include list and not in the
299175154Sdes * exclude list.
300175154Sdes */
301175154Sdesstatic int
302175154Sdesaccept_pathname(const char *pathname)
303175154Sdes{
304175154Sdes
305175154Sdes	if (!STAILQ_EMPTY(&include) && !match_pattern(&include, pathname))
306175154Sdes		return (0);
307175154Sdes	if (!STAILQ_EMPTY(&exclude) && match_pattern(&exclude, pathname))
308175154Sdes		return (0);
309175154Sdes	return (1);
310175154Sdes}
311175154Sdes
312175154Sdes/*
313175154Sdes * Create the specified directory with the specified mode, taking certain
314175154Sdes * precautions on they way.
315175154Sdes */
316175154Sdesstatic void
317175154Sdesmake_dir(const char *path, int mode)
318175154Sdes{
319175154Sdes	struct stat sb;
320175154Sdes
321175154Sdes	if (lstat(path, &sb) == 0) {
322175154Sdes		if (S_ISDIR(sb.st_mode))
323175154Sdes			return;
324175154Sdes		/*
325175154Sdes		 * Normally, we should either ask the user about removing
326175154Sdes		 * the non-directory of the same name as a directory we
327175154Sdes		 * wish to create, or respect the -n or -o command-line
328175154Sdes		 * options.  However, this may lead to a later failure or
329175154Sdes		 * even compromise (if this non-directory happens to be a
330175154Sdes		 * symlink to somewhere unsafe), so we don't.
331175154Sdes		 */
332175154Sdes
333175154Sdes		/*
334175154Sdes		 * Don't check unlink() result; failure will cause mkdir()
335175154Sdes		 * to fail later, which we will catch.
336175154Sdes		 */
337175154Sdes		(void)unlink(path);
338175154Sdes	}
339175154Sdes	if (mkdir(path, mode) != 0 && errno != EEXIST)
340175154Sdes		error("mkdir('%s')", path);
341175154Sdes}
342175154Sdes
343175154Sdes/*
344175154Sdes * Ensure that all directories leading up to (but not including) the
345175154Sdes * specified path exist.
346175154Sdes *
347180124Sdes * XXX inefficient + modifies the file in-place
348175154Sdes */
349175154Sdesstatic void
350175154Sdesmake_parent(char *path)
351175154Sdes{
352175154Sdes	struct stat sb;
353175154Sdes	char *sep;
354175154Sdes
355175154Sdes	sep = strrchr(path, '/');
356175154Sdes	if (sep == NULL || sep == path)
357175154Sdes		return;
358175154Sdes	*sep = '\0';
359175154Sdes	if (lstat(path, &sb) == 0) {
360175154Sdes		if (S_ISDIR(sb.st_mode)) {
361175154Sdes			*sep = '/';
362175154Sdes			return;
363175154Sdes		}
364175154Sdes		unlink(path);
365175154Sdes	}
366175154Sdes	make_parent(path);
367175154Sdes	mkdir(path, 0755);
368175154Sdes	*sep = '/';
369175154Sdes
370175154Sdes#if 0
371175154Sdes	for (sep = path; (sep = strchr(sep, '/')) != NULL; sep++) {
372175154Sdes		/* root in case of absolute d_arg */
373175154Sdes		if (sep == path)
374175154Sdes			continue;
375175154Sdes		*sep = '\0';
376175154Sdes		make_dir(path, 0755);
377175154Sdes		*sep = '/';
378175154Sdes	}
379175154Sdes#endif
380175154Sdes}
381175154Sdes
382175154Sdes/*
383175154Sdes * Extract a directory.
384175154Sdes */
385175154Sdesstatic void
386175154Sdesextract_dir(struct archive *a, struct archive_entry *e, const char *path)
387175154Sdes{
388175154Sdes	int mode;
389175154Sdes
390201630Skientzle	mode = archive_entry_mode(e) & 0777;
391175154Sdes	if (mode == 0)
392175154Sdes		mode = 0755;
393175154Sdes
394175154Sdes	/*
395175154Sdes	 * Some zipfiles contain directories with weird permissions such
396175154Sdes	 * as 0644 or 0444.  This can cause strange issues such as being
397175154Sdes	 * unable to extract files into the directory we just created, or
398175154Sdes	 * the user being unable to remove the directory later without
399175154Sdes	 * first manually changing its permissions.  Therefore, we whack
400175154Sdes	 * the permissions into shape, assuming that the user wants full
401175154Sdes	 * access and that anyone who gets read access also gets execute
402175154Sdes	 * access.
403175154Sdes	 */
404175154Sdes	mode |= 0700;
405175154Sdes	if (mode & 0040)
406175154Sdes		mode |= 0010;
407175154Sdes	if (mode & 0004)
408175154Sdes		mode |= 0001;
409175154Sdes
410175154Sdes	info("d %s\n", path);
411175154Sdes	make_dir(path, mode);
412175154Sdes	ac(archive_read_data_skip(a));
413175154Sdes}
414175154Sdes
415175154Sdesstatic unsigned char buffer[8192];
416175154Sdesstatic char spinner[] = { '|', '/', '-', '\\' };
417175154Sdes
418203977Sgavinstatic int
419203977Sgavinhandle_existing_file(char **path)
420203977Sgavin{
421203977Sgavin	size_t alen;
422203977Sgavin	ssize_t len;
423203977Sgavin	char buf[4];
424203977Sgavin
425203977Sgavin	for (;;) {
426203977Sgavin		fprintf(stderr,
427203977Sgavin		    "replace %s? [y]es, [n]o, [A]ll, [N]one, [r]ename: ",
428203977Sgavin		    *path);
429230044Skevlo		if (fgets(buf, sizeof(buf), stdin) == NULL) {
430203977Sgavin			clearerr(stdin);
431203977Sgavin			printf("NULL\n(EOF or read error, "
432203977Sgavin			    "treating as \"[N]one\"...)\n");
433203977Sgavin			n_opt = 1;
434203977Sgavin			return -1;
435203977Sgavin		}
436203977Sgavin		switch (*buf) {
437203977Sgavin		case 'A':
438203977Sgavin			o_opt = 1;
439203977Sgavin			/* FALLTHROUGH */
440203977Sgavin		case 'y':
441203977Sgavin		case 'Y':
442203977Sgavin			(void)unlink(*path);
443203977Sgavin			return 1;
444203977Sgavin		case 'N':
445203977Sgavin			n_opt = 1;
446203977Sgavin			/* FALLTHROUGH */
447203977Sgavin		case 'n':
448203977Sgavin			return -1;
449203977Sgavin		case 'r':
450203977Sgavin		case 'R':
451203977Sgavin			printf("New name: ");
452203977Sgavin			fflush(stdout);
453203977Sgavin			free(*path);
454203977Sgavin			*path = NULL;
455203977Sgavin			alen = 0;
456203977Sgavin			len = getdelim(path, &alen, '\n', stdin);
457203977Sgavin			if ((*path)[len - 1] == '\n')
458203977Sgavin				(*path)[len - 1] = '\0';
459203977Sgavin			return 0;
460203977Sgavin		default:
461203977Sgavin			break;
462203977Sgavin		}
463203977Sgavin	}
464203977Sgavin}
465203977Sgavin
466175154Sdes/*
467175154Sdes * Extract a regular file.
468175154Sdes */
469175154Sdesstatic void
470203977Sgavinextract_file(struct archive *a, struct archive_entry *e, char **path)
471175154Sdes{
472175154Sdes	int mode;
473175154Sdes	time_t mtime;
474175154Sdes	struct stat sb;
475175154Sdes	struct timeval tv[2];
476203977Sgavin	int cr, fd, text, warn, check;
477175154Sdes	ssize_t len;
478175154Sdes	unsigned char *p, *q, *end;
479175154Sdes
480201630Skientzle	mode = archive_entry_mode(e) & 0777;
481175154Sdes	if (mode == 0)
482175154Sdes		mode = 0644;
483175154Sdes	mtime = archive_entry_mtime(e);
484175154Sdes
485175154Sdes	/* look for existing file of same name */
486203977Sgavinrecheck:
487203977Sgavin	if (lstat(*path, &sb) == 0) {
488196981Srdivacky		if (u_opt || f_opt) {
489175154Sdes			/* check if up-to-date */
490196981Srdivacky			if (S_ISREG(sb.st_mode) && sb.st_mtime >= mtime)
491175154Sdes				return;
492203977Sgavin			(void)unlink(*path);
493175154Sdes		} else if (o_opt) {
494175154Sdes			/* overwrite */
495203977Sgavin			(void)unlink(*path);
496175154Sdes		} else if (n_opt) {
497175154Sdes			/* do not overwrite */
498175154Sdes			return;
499175154Sdes		} else {
500203977Sgavin			check = handle_existing_file(path);
501203977Sgavin			if (check == 0)
502203977Sgavin				goto recheck;
503203977Sgavin			if (check == -1)
504203977Sgavin				return; /* do not overwrite */
505175154Sdes		}
506196981Srdivacky	} else {
507196981Srdivacky		if (f_opt)
508196981Srdivacky			return;
509175154Sdes	}
510175154Sdes
511203977Sgavin	if ((fd = open(*path, O_RDWR|O_CREAT|O_TRUNC, mode)) < 0)
512203977Sgavin		error("open('%s')", *path);
513175154Sdes
514175154Sdes	/* loop over file contents and write to disk */
515203977Sgavin	info(" extracting: %s", *path);
516175154Sdes	text = a_opt;
517175154Sdes	warn = 0;
518175154Sdes	cr = 0;
519175154Sdes	for (int n = 0; ; n++) {
520175154Sdes		if (tty && (n % 4) == 0)
521175154Sdes			info(" %c\b\b", spinner[(n / 4) % sizeof spinner]);
522175154Sdes
523175154Sdes		len = archive_read_data(a, buffer, sizeof buffer);
524175154Sdes
525175154Sdes		if (len < 0)
526175154Sdes			ac(len);
527175154Sdes
528175154Sdes		/* left over CR from previous buffer */
529175154Sdes		if (a_opt && cr) {
530175154Sdes			if (len == 0 || buffer[0] != '\n')
531175154Sdes				if (write(fd, "\r", 1) != 1)
532203977Sgavin					error("write('%s')", *path);
533175154Sdes			cr = 0;
534175154Sdes		}
535175154Sdes
536175154Sdes		/* EOF */
537175154Sdes		if (len == 0)
538175154Sdes			break;
539175154Sdes		end = buffer + len;
540175154Sdes
541175154Sdes		/*
542175154Sdes		 * Detect whether this is a text file.  The correct way to
543175154Sdes		 * do this is to check the least significant bit of the
544175154Sdes		 * "internal file attributes" field of the corresponding
545175154Sdes		 * file header in the central directory, but libarchive
546175154Sdes		 * does not read the central directory, so we have to
547175154Sdes		 * guess by looking for non-ASCII characters in the
548175154Sdes		 * buffer.  Hopefully we won't guess wrong.  If we do
549175154Sdes		 * guess wrong, we print a warning message later.
550175154Sdes		 */
551175154Sdes		if (a_opt && n == 0) {
552175154Sdes			for (p = buffer; p < end; ++p) {
553175154Sdes				if (!isascii((unsigned char)*p)) {
554175154Sdes					text = 0;
555175154Sdes					break;
556175154Sdes				}
557175154Sdes			}
558175154Sdes		}
559175154Sdes
560175154Sdes		/* simple case */
561175154Sdes		if (!a_opt || !text) {
562175154Sdes			if (write(fd, buffer, len) != len)
563203977Sgavin				error("write('%s')", *path);
564175154Sdes			continue;
565175154Sdes		}
566175154Sdes
567175154Sdes		/* hard case: convert \r\n to \n (sigh...) */
568175154Sdes		for (p = buffer; p < end; p = q + 1) {
569175154Sdes			for (q = p; q < end; q++) {
570175154Sdes				if (!warn && !isascii(*q)) {
571175154Sdes					warningx("%s may be corrupted due"
572175154Sdes					    " to weak text file detection"
573203977Sgavin					    " heuristic", *path);
574175154Sdes					warn = 1;
575175154Sdes				}
576175154Sdes				if (q[0] != '\r')
577175154Sdes					continue;
578175154Sdes				if (&q[1] == end) {
579175154Sdes					cr = 1;
580175154Sdes					break;
581175154Sdes				}
582175154Sdes				if (q[1] == '\n')
583175154Sdes					break;
584175154Sdes			}
585175154Sdes			if (write(fd, p, q - p) != q - p)
586203977Sgavin				error("write('%s')", *path);
587175154Sdes		}
588175154Sdes	}
589175154Sdes	if (tty)
590175154Sdes		info("  \b\b");
591175154Sdes	if (text)
592175154Sdes		info(" (text)");
593175154Sdes	info("\n");
594175154Sdes
595175154Sdes	/* set access and modification time */
596175154Sdes	tv[0].tv_sec = now;
597175154Sdes	tv[0].tv_usec = 0;
598175154Sdes	tv[1].tv_sec = mtime;
599175154Sdes	tv[1].tv_usec = 0;
600175154Sdes	if (futimes(fd, tv) != 0)
601203977Sgavin		error("utimes('%s')", *path);
602175154Sdes	if (close(fd) != 0)
603203977Sgavin		error("close('%s')", *path);
604175154Sdes}
605175154Sdes
606175154Sdes/*
607175154Sdes * Extract a zipfile entry: first perform some sanity checks to ensure
608175154Sdes * that it is either a directory or a regular file and that the path is
609175154Sdes * not absolute and does not try to break out of the current directory;
610175154Sdes * then call either extract_dir() or extract_file() as appropriate.
611175154Sdes *
612175154Sdes * This is complicated a bit by the various ways in which we need to
613175154Sdes * manipulate the path name.  Case conversion (if requested by the -L
614175154Sdes * option) happens first, but the include / exclude patterns are applied
615175154Sdes * to the full converted path name, before the directory part of the path
616175154Sdes * is removed in accordance with the -j option.  Sanity checks are
617175154Sdes * intentionally done earlier than they need to be, so the user will get a
618175154Sdes * warning about insecure paths even for files or directories which
619175154Sdes * wouldn't be extracted anyway.
620175154Sdes */
621175154Sdesstatic void
622175154Sdesextract(struct archive *a, struct archive_entry *e)
623175154Sdes{
624175154Sdes	char *pathname, *realpathname;
625175154Sdes	mode_t filetype;
626175154Sdes	char *p, *q;
627175154Sdes
628175154Sdes	pathname = pathdup(archive_entry_pathname(e));
629175154Sdes	filetype = archive_entry_filetype(e);
630175154Sdes
631175154Sdes	/* sanity checks */
632175154Sdes	if (pathname[0] == '/' ||
633175154Sdes	    strncmp(pathname, "../", 3) == 0 ||
634175154Sdes	    strstr(pathname, "/../") != NULL) {
635175154Sdes		warningx("skipping insecure entry '%s'", pathname);
636175154Sdes		ac(archive_read_data_skip(a));
637175154Sdes		free(pathname);
638175154Sdes		return;
639175154Sdes	}
640175154Sdes
641175154Sdes	/* I don't think this can happen in a zipfile.. */
642175154Sdes	if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
643175154Sdes		warningx("skipping non-regular entry '%s'", pathname);
644175154Sdes		ac(archive_read_data_skip(a));
645175154Sdes		free(pathname);
646175154Sdes		return;
647175154Sdes	}
648175154Sdes
649175154Sdes	/* skip directories in -j case */
650175154Sdes	if (S_ISDIR(filetype) && j_opt) {
651175154Sdes		ac(archive_read_data_skip(a));
652175154Sdes		free(pathname);
653175154Sdes		return;
654175154Sdes	}
655175154Sdes
656175154Sdes	/* apply include / exclude patterns */
657175154Sdes	if (!accept_pathname(pathname)) {
658175154Sdes		ac(archive_read_data_skip(a));
659175154Sdes		free(pathname);
660175154Sdes		return;
661175154Sdes	}
662175154Sdes
663175154Sdes	/* apply -j and -d */
664175154Sdes	if (j_opt) {
665175154Sdes		for (p = q = pathname; *p; ++p)
666175154Sdes			if (*p == '/')
667175154Sdes				q = p + 1;
668175154Sdes		realpathname = pathcat(d_arg, q);
669175154Sdes	} else {
670175154Sdes		realpathname = pathcat(d_arg, pathname);
671175154Sdes	}
672175154Sdes
673175154Sdes	/* ensure that parent directory exists */
674175154Sdes	make_parent(realpathname);
675175154Sdes
676175154Sdes	if (S_ISDIR(filetype))
677175154Sdes		extract_dir(a, e, realpathname);
678175154Sdes	else
679203977Sgavin		extract_file(a, e, &realpathname);
680175154Sdes
681175154Sdes	free(realpathname);
682175154Sdes	free(pathname);
683175154Sdes}
684175154Sdes
685196981Srdivackystatic void
686196981Srdivackyextract_stdout(struct archive *a, struct archive_entry *e)
687196981Srdivacky{
688196981Srdivacky	char *pathname;
689196981Srdivacky	mode_t filetype;
690196981Srdivacky	int cr, text, warn;
691196981Srdivacky	ssize_t len;
692196981Srdivacky	unsigned char *p, *q, *end;
693196981Srdivacky
694196981Srdivacky	pathname = pathdup(archive_entry_pathname(e));
695196981Srdivacky	filetype = archive_entry_filetype(e);
696196981Srdivacky
697196981Srdivacky	/* I don't think this can happen in a zipfile.. */
698196981Srdivacky	if (!S_ISDIR(filetype) && !S_ISREG(filetype)) {
699196981Srdivacky		warningx("skipping non-regular entry '%s'", pathname);
700196981Srdivacky		ac(archive_read_data_skip(a));
701196981Srdivacky		free(pathname);
702196981Srdivacky		return;
703196981Srdivacky	}
704196981Srdivacky
705196981Srdivacky	/* skip directories in -j case */
706196981Srdivacky	if (S_ISDIR(filetype)) {
707196981Srdivacky		ac(archive_read_data_skip(a));
708196981Srdivacky		free(pathname);
709196981Srdivacky		return;
710196981Srdivacky	}
711196981Srdivacky
712196981Srdivacky	/* apply include / exclude patterns */
713196981Srdivacky	if (!accept_pathname(pathname)) {
714196981Srdivacky		ac(archive_read_data_skip(a));
715196981Srdivacky		free(pathname);
716196981Srdivacky		return;
717196981Srdivacky	}
718196981Srdivacky
719196981Srdivacky	if (c_opt)
720196981Srdivacky		info("x %s\n", pathname);
721196981Srdivacky
722196981Srdivacky	text = a_opt;
723196981Srdivacky	warn = 0;
724196981Srdivacky	cr = 0;
725196981Srdivacky	for (int n = 0; ; n++) {
726196981Srdivacky		len = archive_read_data(a, buffer, sizeof buffer);
727196981Srdivacky
728196981Srdivacky		if (len < 0)
729196981Srdivacky			ac(len);
730196981Srdivacky
731196981Srdivacky		/* left over CR from previous buffer */
732196981Srdivacky		if (a_opt && cr) {
733196981Srdivacky			if (len == 0 || buffer[0] != '\n') {
734196981Srdivacky				if (fwrite("\r", 1, 1, stderr) != 1)
735196981Srdivacky					error("write('%s')", pathname);
736196981Srdivacky			}
737196981Srdivacky			cr = 0;
738196981Srdivacky		}
739196981Srdivacky
740196981Srdivacky		/* EOF */
741196981Srdivacky		if (len == 0)
742196981Srdivacky			break;
743196981Srdivacky		end = buffer + len;
744196981Srdivacky
745196981Srdivacky		/*
746196981Srdivacky		 * Detect whether this is a text file.  The correct way to
747196981Srdivacky		 * do this is to check the least significant bit of the
748196981Srdivacky		 * "internal file attributes" field of the corresponding
749196981Srdivacky		 * file header in the central directory, but libarchive
750196981Srdivacky		 * does not read the central directory, so we have to
751196981Srdivacky		 * guess by looking for non-ASCII characters in the
752196981Srdivacky		 * buffer.  Hopefully we won't guess wrong.  If we do
753196981Srdivacky		 * guess wrong, we print a warning message later.
754196981Srdivacky		 */
755196981Srdivacky		if (a_opt && n == 0) {
756196981Srdivacky			for (p = buffer; p < end; ++p) {
757196981Srdivacky				if (!isascii((unsigned char)*p)) {
758196981Srdivacky					text = 0;
759196981Srdivacky					break;
760196981Srdivacky				}
761196981Srdivacky			}
762196981Srdivacky		}
763196981Srdivacky
764196981Srdivacky		/* simple case */
765196981Srdivacky		if (!a_opt || !text) {
766196981Srdivacky			if (fwrite(buffer, 1, len, stdout) != (size_t)len)
767196981Srdivacky				error("write('%s')", pathname);
768196981Srdivacky			continue;
769196981Srdivacky		}
770196981Srdivacky
771196981Srdivacky		/* hard case: convert \r\n to \n (sigh...) */
772196981Srdivacky		for (p = buffer; p < end; p = q + 1) {
773196981Srdivacky			for (q = p; q < end; q++) {
774196981Srdivacky				if (!warn && !isascii(*q)) {
775196981Srdivacky					warningx("%s may be corrupted due"
776196981Srdivacky					    " to weak text file detection"
777196981Srdivacky					    " heuristic", pathname);
778196981Srdivacky					warn = 1;
779196981Srdivacky				}
780196981Srdivacky				if (q[0] != '\r')
781196981Srdivacky					continue;
782196981Srdivacky				if (&q[1] == end) {
783196981Srdivacky					cr = 1;
784196981Srdivacky					break;
785196981Srdivacky				}
786196981Srdivacky				if (q[1] == '\n')
787196981Srdivacky					break;
788196981Srdivacky			}
789196981Srdivacky			if (fwrite(p, 1, q - p, stdout) != (size_t)(q - p))
790196981Srdivacky				error("write('%s')", pathname);
791196981Srdivacky		}
792196981Srdivacky	}
793196981Srdivacky
794196981Srdivacky	free(pathname);
795196981Srdivacky}
796196981Srdivacky
797175154Sdes/*
798175154Sdes * Print the name of an entry to stdout.
799175154Sdes */
800175154Sdesstatic void
801175154Sdeslist(struct archive *a, struct archive_entry *e)
802175154Sdes{
803196981Srdivacky	char buf[20];
804196981Srdivacky	time_t mtime;
805175154Sdes
806196981Srdivacky	mtime = archive_entry_mtime(e);
807196981Srdivacky	strftime(buf, sizeof(buf), "%m-%d-%g %R", localtime(&mtime));
808196981Srdivacky
809234206Skevlo	if (!zipinfo_mode) {
810234206Skevlo		if (v_opt == 1) {
811234206Skevlo			printf(" %8ju  %s   %s\n",
812234206Skevlo			    (uintmax_t)archive_entry_size(e),
813234206Skevlo			    buf, archive_entry_pathname(e));
814234206Skevlo		} else if (v_opt == 2) {
815234206Skevlo			printf("%8ju  Stored  %7ju   0%%  %s  %08x  %s\n",
816234206Skevlo			    (uintmax_t)archive_entry_size(e),
817234206Skevlo			    (uintmax_t)archive_entry_size(e),
818234206Skevlo			    buf,
819234206Skevlo			    0U,
820234206Skevlo			    archive_entry_pathname(e));
821234206Skevlo		}
822234206Skevlo	} else {
823234206Skevlo		if (Z1_opt)
824234206Skevlo			printf("%s\n",archive_entry_pathname(e));
825196981Srdivacky	}
826175154Sdes	ac(archive_read_data_skip(a));
827175154Sdes}
828175154Sdes
829175154Sdes/*
830180124Sdes * Extract to memory to check CRC
831180124Sdes */
832196981Srdivackystatic int
833180124Sdestest(struct archive *a, struct archive_entry *e)
834180124Sdes{
835180124Sdes	ssize_t len;
836196981Srdivacky	int error_count;
837180124Sdes
838196981Srdivacky	error_count = 0;
839180124Sdes	if (S_ISDIR(archive_entry_filetype(e)))
840196981Srdivacky		return 0;
841180124Sdes
842196981Srdivacky	info("    testing: %s\t", archive_entry_pathname(e));
843180124Sdes	while ((len = archive_read_data(a, buffer, sizeof buffer)) > 0)
844180124Sdes		/* nothing */;
845180124Sdes	if (len < 0) {
846196981Srdivacky		info(" %s\n", archive_error_string(a));
847196981Srdivacky		++error_count;
848180124Sdes	} else {
849196981Srdivacky		info(" OK\n");
850180124Sdes	}
851180124Sdes
852180124Sdes	/* shouldn't be necessary, but it doesn't hurt */
853180124Sdes	ac(archive_read_data_skip(a));
854196981Srdivacky
855196981Srdivacky	return error_count;
856180124Sdes}
857180124Sdes
858180124Sdes
859180124Sdes/*
860175154Sdes * Main loop: open the zipfile, iterate over its contents and decide what
861175154Sdes * to do with each entry.
862175154Sdes */
863175154Sdesstatic void
864175154Sdesunzip(const char *fn)
865175154Sdes{
866175154Sdes	struct archive *a;
867175154Sdes	struct archive_entry *e;
868236226Sdes	int ret;
869196981Srdivacky	uintmax_t total_size, file_count, error_count;
870175154Sdes
871208957Sdelphij	if ((a = archive_read_new()) == NULL)
872208957Sdelphij		error("archive_read_new failed");
873208957Sdelphij
874175154Sdes	ac(archive_read_support_format_zip(a));
875236226Sdes	ac(archive_read_open_filename(a, fn, 8192));
876175154Sdes
877234206Skevlo	if (!zipinfo_mode) {
878234206Skevlo		if (!p_opt && !q_opt)
879234206Skevlo			printf("Archive:  %s\n", fn);
880234206Skevlo		if (v_opt == 1) {
881234206Skevlo			printf("  Length     Date   Time    Name\n");
882234206Skevlo			printf(" --------    ----   ----    ----\n");
883234206Skevlo		} else if (v_opt == 2) {
884234206Skevlo			printf(" Length   Method    Size  Ratio   Date   Time   CRC-32    Name\n");
885234206Skevlo			printf("--------  ------  ------- -----   ----   ----   ------    ----\n");
886234206Skevlo		}
887196981Srdivacky	}
888196981Srdivacky
889196981Srdivacky	total_size = 0;
890196981Srdivacky	file_count = 0;
891196981Srdivacky	error_count = 0;
892175154Sdes	for (;;) {
893175154Sdes		ret = archive_read_next_header(a, &e);
894175154Sdes		if (ret == ARCHIVE_EOF)
895175154Sdes			break;
896175154Sdes		ac(ret);
897234206Skevlo		if (!zipinfo_mode) {
898234206Skevlo			if (t_opt)
899234206Skevlo				error_count += test(a, e);
900234206Skevlo			else if (v_opt)
901234206Skevlo				list(a, e);
902234206Skevlo			else if (p_opt || c_opt)
903234206Skevlo				extract_stdout(a, e);
904234206Skevlo			else
905234206Skevlo				extract(a, e);
906234206Skevlo		} else {
907234206Skevlo			if (Z1_opt)
908234206Skevlo				list(a, e);
909234206Skevlo		}
910196981Srdivacky
911196981Srdivacky		total_size += archive_entry_size(e);
912196981Srdivacky		++file_count;
913175154Sdes	}
914175154Sdes
915234206Skevlo	if (zipinfo_mode) {
916234206Skevlo		if (v_opt == 1) {
917234206Skevlo			printf(" --------                   -------\n");
918234206Skevlo			printf(" %8ju                   %ju file%s\n",
919234206Skevlo			    total_size, file_count, file_count != 1 ? "s" : "");
920234206Skevlo		} else if (v_opt == 2) {
921234206Skevlo			printf("--------          -------  ---                            -------\n");
922234206Skevlo			printf("%8ju          %7ju   0%%                            %ju file%s\n",
923234206Skevlo			    total_size, total_size, file_count,
924234206Skevlo			    file_count != 1 ? "s" : "");
925234206Skevlo		}
926196981Srdivacky	}
927196981Srdivacky
928175154Sdes	ac(archive_read_close(a));
929248612Smm	(void)archive_read_free(a);
930196981Srdivacky
931196981Srdivacky	if (t_opt) {
932196981Srdivacky		if (error_count > 0) {
933196981Srdivacky			errorx("%d checksum error(s) found.", error_count);
934196981Srdivacky		}
935196981Srdivacky		else {
936196981Srdivacky			printf("No errors detected in compressed data of %s.\n",
937196981Srdivacky			       fn);
938196981Srdivacky		}
939196981Srdivacky	}
940175154Sdes}
941175154Sdes
942175154Sdesstatic void
943175154Sdesusage(void)
944175154Sdes{
945175154Sdes
946234206Skevlo	fprintf(stderr, "usage: unzip [-aCcfjLlnopqtuvZ1] [-d dir] [-x pattern] zipfile\n");
947175154Sdes	exit(1);
948175154Sdes}
949175154Sdes
950175154Sdesstatic int
951175154Sdesgetopts(int argc, char *argv[])
952175154Sdes{
953175154Sdes	int opt;
954175154Sdes
955175154Sdes	optreset = optind = 1;
956234206Skevlo	while ((opt = getopt(argc, argv, "aCcd:fjLlnopqtuvx:Z1")) != -1)
957175154Sdes		switch (opt) {
958234206Skevlo		case '1':
959234206Skevlo			Z1_opt = 1;
960234206Skevlo			break;
961175154Sdes		case 'a':
962175154Sdes			a_opt = 1;
963175154Sdes			break;
964196981Srdivacky		case 'C':
965196981Srdivacky			C_opt = 1;
966196981Srdivacky			break;
967196981Srdivacky		case 'c':
968196981Srdivacky			c_opt = 1;
969196981Srdivacky			break;
970175154Sdes		case 'd':
971175154Sdes			d_arg = optarg;
972175154Sdes			break;
973196981Srdivacky		case 'f':
974196981Srdivacky			f_opt = 1;
975196981Srdivacky			break;
976175154Sdes		case 'j':
977175154Sdes			j_opt = 1;
978175154Sdes			break;
979175154Sdes		case 'L':
980175154Sdes			L_opt = 1;
981175154Sdes			break;
982175154Sdes		case 'l':
983196981Srdivacky			if (v_opt == 0)
984196981Srdivacky				v_opt = 1;
985175154Sdes			break;
986175154Sdes		case 'n':
987175154Sdes			n_opt = 1;
988175154Sdes			break;
989175154Sdes		case 'o':
990175154Sdes			o_opt = 1;
991196981Srdivacky			q_opt = 1;
992175154Sdes			break;
993196981Srdivacky		case 'p':
994196981Srdivacky			p_opt = 1;
995196981Srdivacky			break;
996175154Sdes		case 'q':
997175154Sdes			q_opt = 1;
998175154Sdes			break;
999180124Sdes		case 't':
1000180124Sdes			t_opt = 1;
1001180124Sdes			break;
1002175154Sdes		case 'u':
1003175154Sdes			u_opt = 1;
1004175154Sdes			break;
1005196981Srdivacky		case 'v':
1006196981Srdivacky			v_opt = 2;
1007196981Srdivacky			break;
1008175154Sdes		case 'x':
1009175154Sdes			add_pattern(&exclude, optarg);
1010175154Sdes			break;
1011234206Skevlo		case 'Z':
1012234206Skevlo			zipinfo_mode = 1;
1013234206Skevlo			break;
1014175154Sdes		default:
1015175154Sdes			usage();
1016175154Sdes		}
1017175154Sdes
1018175154Sdes	return (optind);
1019175154Sdes}
1020175154Sdes
1021175154Sdesint
1022175154Sdesmain(int argc, char *argv[])
1023175154Sdes{
1024175154Sdes	const char *zipfile;
1025175154Sdes	int nopts;
1026175154Sdes
1027175154Sdes	if (isatty(STDOUT_FILENO))
1028175154Sdes		tty = 1;
1029175154Sdes
1030175154Sdes	if (getenv("UNZIP_DEBUG") != NULL)
1031175154Sdes		unzip_debug = 1;
1032175154Sdes	for (int i = 0; i < argc; ++i)
1033175154Sdes		debug("%s%c", argv[i], (i < argc - 1) ? ' ' : '\n');
1034175154Sdes
1035175154Sdes	/*
1036175154Sdes	 * Info-ZIP's unzip(1) expects certain options to come before the
1037175154Sdes	 * zipfile name, and others to come after - though it does not
1038175154Sdes	 * enforce this.  For simplicity, we accept *all* options both
1039175154Sdes	 * before and after the zipfile name.
1040175154Sdes	 */
1041175154Sdes	nopts = getopts(argc, argv);
1042175154Sdes
1043234206Skevlo	/*
1044234206Skevlo	 * When more of the zipinfo mode options are implemented, this
1045234206Skevlo	 * will need to change.
1046234206Skevlo	 */
1047234206Skevlo	if (zipinfo_mode && !Z1_opt) {
1048234206Skevlo		printf("Zipinfo mode needs additional options\n");
1049234206Skevlo		exit(1);
1050234206Skevlo	}
1051234206Skevlo
1052175154Sdes	if (argc <= nopts)
1053175154Sdes		usage();
1054175154Sdes	zipfile = argv[nopts++];
1055175154Sdes
1056236226Sdes	if (strcmp(zipfile, "-") == 0)
1057236226Sdes		zipfile = NULL; /* STDIN */
1058236226Sdes
1059175154Sdes	while (nopts < argc && *argv[nopts] != '-')
1060175154Sdes		add_pattern(&include, argv[nopts++]);
1061175154Sdes
1062175154Sdes	nopts--; /* fake argv[0] */
1063175154Sdes	nopts += getopts(argc - nopts, argv + nopts);
1064175154Sdes
1065175154Sdes	if (n_opt + o_opt + u_opt > 1)
1066175154Sdes		errorx("-n, -o and -u are contradictory");
1067175154Sdes
1068175154Sdes	time(&now);
1069175154Sdes
1070175154Sdes	unzip(zipfile);
1071175154Sdes
1072175154Sdes	exit(0);
1073175154Sdes}
1074