1/* 2 * This file is in the public domain. 3 * Use it as you wish. 4 */ 5 6/* 7 * This is a compact tar extraction program using libarchive whose 8 * primary goal is small executable size. Statically linked, it can 9 * be very small, depending in large part on how cleanly factored your 10 * system libraries are. Note that this uses the standard libarchive, 11 * without any special recompilation. The only functional concession 12 * is that this program uses the uid/gid from the archive instead of 13 * doing uname/gname lookups. (Add a call to 14 * archive_write_disk_set_standard_lookup() to enable uname/gname 15 * lookups, but be aware that this can add 500k or more to a static 16 * executable, depending on the system libraries, since user/group 17 * lookups frequently pull in password, YP/LDAP, networking, and DNS 18 * resolver libraries.) 19 * 20 * To build: 21 * $ gcc -static -Wall -o untar untar.c -larchive 22 * $ strip untar 23 * 24 * NOTE: On some systems, you may need to add additional flags 25 * to ensure that untar.c is compiled the same way as libarchive 26 * was compiled. In particular, Linux users will probably 27 * have to add -D_FILE_OFFSET_BITS=64 to the command line above. 28 * 29 * For fun, statically compile the following simple hello.c program 30 * using the same flags as for untar and compare the size: 31 * 32 * #include <stdio.h> 33 * int main(int argc, char **argv) { 34 * printf("hello, world\n"); 35 * return(0); 36 * } 37 * 38 * You may be even more surprised by the compiled size of true.c listed here: 39 * 40 * int main(int argc, char **argv) { 41 * return (0); 42 * } 43 * 44 * On a slightly customized FreeBSD 5 system that I used around 45 * 2005, hello above compiled to 89k compared to untar of 69k. So at 46 * that time, libarchive's tar reader and extract-to-disk routines 47 * compiled to less code than printf(). 48 * 49 * On my FreeBSD development system today (August, 2009): 50 * hello: 195024 bytes 51 * true: 194912 bytes 52 * untar: 259924 bytes 53 */ 54 55#include <sys/types.h> 56__FBSDID("$FreeBSD$"); 57 58#include <sys/stat.h> 59 60#include <archive.h> 61#include <archive_entry.h> 62#include <fcntl.h> 63#include <stdio.h> 64#include <stdlib.h> 65#include <string.h> 66#include <unistd.h> 67 68static void errmsg(const char *); 69static void extract(const char *filename, int do_extract, int flags); 70static void fail(const char *, const char *, int); 71static int copy_data(struct archive *, struct archive *); 72static void msg(const char *); 73static void usage(void); 74static void warn(const char *, const char *); 75 76static int verbose = 0; 77 78int 79main(int argc, const char **argv) 80{ 81 const char *filename = NULL; 82 int compress, flags, mode, opt; 83 84 (void)argc; 85 mode = 'x'; 86 verbose = 0; 87 compress = '\0'; 88 flags = ARCHIVE_EXTRACT_TIME; 89 90 /* Among other sins, getopt(3) pulls in printf(3). */ 91 while (*++argv != NULL && **argv == '-') { 92 const char *p = *argv + 1; 93 94 while ((opt = *p++) != '\0') { 95 switch (opt) { 96 case 'f': 97 if (*p != '\0') 98 filename = p; 99 else 100 filename = *++argv; 101 p += strlen(p); 102 break; 103 case 'p': 104 flags |= ARCHIVE_EXTRACT_PERM; 105 flags |= ARCHIVE_EXTRACT_ACL; 106 flags |= ARCHIVE_EXTRACT_FFLAGS; 107 break; 108 case 't': 109 mode = opt; 110 break; 111 case 'v': 112 verbose++; 113 break; 114 case 'x': 115 mode = opt; 116 break; 117 default: 118 usage(); 119 } 120 } 121 } 122 123 switch (mode) { 124 case 't': 125 extract(filename, 0, flags); 126 break; 127 case 'x': 128 extract(filename, 1, flags); 129 break; 130 } 131 132 return (0); 133} 134 135 136static void 137extract(const char *filename, int do_extract, int flags) 138{ 139 struct archive *a; 140 struct archive *ext; 141 struct archive_entry *entry; 142 int r; 143 144 a = archive_read_new(); 145 ext = archive_write_disk_new(); 146 archive_write_disk_set_options(ext, flags); 147 /* 148 * Note: archive_write_disk_set_standard_lookup() is useful 149 * here, but it requires library routines that can add 500k or 150 * more to a static executable. 151 */ 152 archive_read_support_format_tar(a); 153 /* 154 * On my system, enabling other archive formats adds 20k-30k 155 * each. Enabling gzip decompression adds about 20k. 156 * Enabling bzip2 is more expensive because the libbz2 library 157 * isn't very well factored. 158 */ 159 if (filename != NULL && strcmp(filename, "-") == 0) 160 filename = NULL; 161 if ((r = archive_read_open_file(a, filename, 10240))) 162 fail("archive_read_open_file()", 163 archive_error_string(a), r); 164 for (;;) { 165 r = archive_read_next_header(a, &entry); 166 if (r == ARCHIVE_EOF) 167 break; 168 if (r != ARCHIVE_OK) 169 fail("archive_read_next_header()", 170 archive_error_string(a), 1); 171 if (verbose && do_extract) 172 msg("x "); 173 if (verbose || !do_extract) 174 msg(archive_entry_pathname(entry)); 175 if (do_extract) { 176 r = archive_write_header(ext, entry); 177 if (r != ARCHIVE_OK) 178 warn("archive_write_header()", 179 archive_error_string(ext)); 180 else { 181 copy_data(a, ext); 182 r = archive_write_finish_entry(ext); 183 if (r != ARCHIVE_OK) 184 fail("archive_write_finish_entry()", 185 archive_error_string(ext), 1); 186 } 187 188 } 189 if (verbose || !do_extract) 190 msg("\n"); 191 } 192 archive_read_close(a); 193 archive_read_finish(a); 194 exit(0); 195} 196 197static int 198copy_data(struct archive *ar, struct archive *aw) 199{ 200 int r; 201 const void *buff; 202 size_t size; 203 off_t offset; 204 205 for (;;) { 206 r = archive_read_data_block(ar, &buff, &size, &offset); 207 if (r == ARCHIVE_EOF) 208 return (ARCHIVE_OK); 209 if (r != ARCHIVE_OK) 210 return (r); 211 r = archive_write_data_block(aw, buff, size, offset); 212 if (r != ARCHIVE_OK) { 213 warn("archive_write_data_block()", 214 archive_error_string(aw)); 215 return (r); 216 } 217 } 218} 219 220/* 221 * These reporting functions use low-level I/O; on some systems, this 222 * is a significant code reduction. Of course, on many server and 223 * desktop operating systems, malloc() and even crt rely on printf(), 224 * which in turn pulls in most of the rest of stdio, so this is not an 225 * optimization at all there. (If you're going to pay 100k or more 226 * for printf() anyway, you may as well use it!) 227 */ 228static void 229msg(const char *m) 230{ 231 write(1, m, strlen(m)); 232} 233 234static void 235errmsg(const char *m) 236{ 237 write(2, m, strlen(m)); 238} 239 240static void 241warn(const char *f, const char *m) 242{ 243 errmsg(f); 244 errmsg(" failed: "); 245 errmsg(m); 246 errmsg("\n"); 247} 248 249static void 250fail(const char *f, const char *m, int r) 251{ 252 warn(f, m); 253 exit(r); 254} 255 256static void 257usage(void) 258{ 259 const char *m = "Usage: untar [-tvx] [-f file] [file]\n"; 260 errmsg(m); 261 exit(1); 262} 263