1/*
2 * This file is in the public domain.
3 * Use it as you wish.
4 */
5
6/*
7 * This is a compact tar extraction program using libarchive whose
8 * primary goal is small executable size.  Statically linked, it can
9 * be very small, depending in large part on how cleanly factored your
10 * system libraries are.  Note that this uses the standard libarchive,
11 * without any special recompilation.  The only functional concession
12 * is that this program uses the uid/gid from the archive instead of
13 * doing uname/gname lookups.  (Add a call to
14 * archive_write_disk_set_standard_lookup() to enable uname/gname
15 * lookups, but be aware that this can add 500k or more to a static
16 * executable, depending on the system libraries, since user/group
17 * lookups frequently pull in password, YP/LDAP, networking, and DNS
18 * resolver libraries.)
19 *
20 * To build:
21 * $ gcc -static -Wall -o untar untar.c -larchive
22 * $ strip untar
23 *
24 * NOTE: On some systems, you may need to add additional flags
25 * to ensure that untar.c is compiled the same way as libarchive
26 * was compiled.  In particular, Linux users will probably
27 * have to add -D_FILE_OFFSET_BITS=64 to the command line above.
28 *
29 * For fun, statically compile the following simple hello.c program
30 * using the same flags as for untar and compare the size:
31 *
32 * #include <stdio.h>
33 * int main(int argc, char **argv) {
34 *    printf("hello, world\n");
35 *    return(0);
36 * }
37 *
38 * You may be even more surprised by the compiled size of true.c listed here:
39 *
40 * int main(int argc, char **argv) {
41 *    return (0);
42 * }
43 *
44 * On a slightly customized FreeBSD 5 system that I used around
45 * 2005, hello above compiled to 89k compared to untar of 69k.  So at
46 * that time, libarchive's tar reader and extract-to-disk routines
47 * compiled to less code than printf().
48 *
49 * On my FreeBSD development system today (August, 2009):
50 *  hello: 195024 bytes
51 *  true: 194912 bytes
52 *  untar: 259924 bytes
53 */
54
55#include <sys/types.h>
56__FBSDID("$FreeBSD$");
57
58#include <sys/stat.h>
59
60#include <archive.h>
61#include <archive_entry.h>
62#include <fcntl.h>
63#include <stdio.h>
64#include <stdlib.h>
65#include <string.h>
66#include <unistd.h>
67
68static void	errmsg(const char *);
69static void	extract(const char *filename, int do_extract, int flags);
70static void	fail(const char *, const char *, int);
71static int	copy_data(struct archive *, struct archive *);
72static void	msg(const char *);
73static void	usage(void);
74static void	warn(const char *, const char *);
75
76static int verbose = 0;
77
78int
79main(int argc, const char **argv)
80{
81	const char *filename = NULL;
82	int compress, flags, mode, opt;
83
84	(void)argc;
85	mode = 'x';
86	verbose = 0;
87	compress = '\0';
88	flags = ARCHIVE_EXTRACT_TIME;
89
90	/* Among other sins, getopt(3) pulls in printf(3). */
91	while (*++argv != NULL && **argv == '-') {
92		const char *p = *argv + 1;
93
94		while ((opt = *p++) != '\0') {
95			switch (opt) {
96			case 'f':
97				if (*p != '\0')
98					filename = p;
99				else
100					filename = *++argv;
101				p += strlen(p);
102				break;
103			case 'p':
104				flags |= ARCHIVE_EXTRACT_PERM;
105				flags |= ARCHIVE_EXTRACT_ACL;
106				flags |= ARCHIVE_EXTRACT_FFLAGS;
107				break;
108			case 't':
109				mode = opt;
110				break;
111			case 'v':
112				verbose++;
113				break;
114			case 'x':
115				mode = opt;
116				break;
117			default:
118				usage();
119			}
120		}
121	}
122
123	switch (mode) {
124	case 't':
125		extract(filename, 0, flags);
126		break;
127	case 'x':
128		extract(filename, 1, flags);
129		break;
130	}
131
132	return (0);
133}
134
135
136static void
137extract(const char *filename, int do_extract, int flags)
138{
139	struct archive *a;
140	struct archive *ext;
141	struct archive_entry *entry;
142	int r;
143
144	a = archive_read_new();
145	ext = archive_write_disk_new();
146	archive_write_disk_set_options(ext, flags);
147	/*
148	 * Note: archive_write_disk_set_standard_lookup() is useful
149	 * here, but it requires library routines that can add 500k or
150	 * more to a static executable.
151	 */
152	archive_read_support_format_tar(a);
153	/*
154	 * On my system, enabling other archive formats adds 20k-30k
155	 * each.  Enabling gzip decompression adds about 20k.
156	 * Enabling bzip2 is more expensive because the libbz2 library
157	 * isn't very well factored.
158	 */
159	if (filename != NULL && strcmp(filename, "-") == 0)
160		filename = NULL;
161	if ((r = archive_read_open_file(a, filename, 10240)))
162		fail("archive_read_open_file()",
163		    archive_error_string(a), r);
164	for (;;) {
165		r = archive_read_next_header(a, &entry);
166		if (r == ARCHIVE_EOF)
167			break;
168		if (r != ARCHIVE_OK)
169			fail("archive_read_next_header()",
170			    archive_error_string(a), 1);
171		if (verbose && do_extract)
172			msg("x ");
173		if (verbose || !do_extract)
174			msg(archive_entry_pathname(entry));
175		if (do_extract) {
176			r = archive_write_header(ext, entry);
177			if (r != ARCHIVE_OK)
178				warn("archive_write_header()",
179				    archive_error_string(ext));
180			else {
181				copy_data(a, ext);
182				r = archive_write_finish_entry(ext);
183				if (r != ARCHIVE_OK)
184					fail("archive_write_finish_entry()",
185					    archive_error_string(ext), 1);
186			}
187
188		}
189		if (verbose || !do_extract)
190			msg("\n");
191	}
192	archive_read_close(a);
193	archive_read_finish(a);
194	exit(0);
195}
196
197static int
198copy_data(struct archive *ar, struct archive *aw)
199{
200	int r;
201	const void *buff;
202	size_t size;
203	off_t offset;
204
205	for (;;) {
206		r = archive_read_data_block(ar, &buff, &size, &offset);
207		if (r == ARCHIVE_EOF)
208			return (ARCHIVE_OK);
209		if (r != ARCHIVE_OK)
210			return (r);
211		r = archive_write_data_block(aw, buff, size, offset);
212		if (r != ARCHIVE_OK) {
213			warn("archive_write_data_block()",
214			    archive_error_string(aw));
215			return (r);
216		}
217	}
218}
219
220/*
221 * These reporting functions use low-level I/O; on some systems, this
222 * is a significant code reduction.  Of course, on many server and
223 * desktop operating systems, malloc() and even crt rely on printf(),
224 * which in turn pulls in most of the rest of stdio, so this is not an
225 * optimization at all there.  (If you're going to pay 100k or more
226 * for printf() anyway, you may as well use it!)
227 */
228static void
229msg(const char *m)
230{
231	write(1, m, strlen(m));
232}
233
234static void
235errmsg(const char *m)
236{
237	write(2, m, strlen(m));
238}
239
240static void
241warn(const char *f, const char *m)
242{
243	errmsg(f);
244	errmsg(" failed: ");
245	errmsg(m);
246	errmsg("\n");
247}
248
249static void
250fail(const char *f, const char *m, int r)
251{
252	warn(f, m);
253	exit(r);
254}
255
256static void
257usage(void)
258{
259	const char *m = "Usage: untar [-tvx] [-f file] [file]\n";
260	errmsg(m);
261	exit(1);
262}
263