1/* vi: set sw=4 ts=4: */ 2/* Licensed under GPLv2 or later, see file LICENSE in this tarball for details. 3 * 4 * FIXME: 5 * In privileged mode if uname and gname map to a uid and gid then use the 6 * mapped value instead of the uid/gid values in tar header 7 * 8 * References: 9 * GNU tar and star man pages, 10 * Opengroup's ustar interchange format, 11 * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html 12 */ 13 14#include "libbb.h" 15#include "unarchive.h" 16 17typedef uint32_t aliased_uint32_t FIX_ALIASING; 18typedef off_t aliased_off_t FIX_ALIASING; 19 20 21/* NB: _DESTROYS_ str[len] character! */ 22static unsigned long long getOctal(char *str, int len) 23{ 24 unsigned long long v; 25 char *end; 26 /* NB: leading spaces are allowed. Using strtoull to handle that. 27 * The downside is that we accept e.g. "-123" too :( 28 */ 29 str[len] = '\0'; 30 v = strtoull(str, &end, 8); 31 /* std: "Each numeric field is terminated by one or more 32 * <space> or NUL characters". We must support ' '! */ 33 if (*end != '\0' && *end != ' ') { 34 int8_t first = str[0]; 35 if (!(first & 0x80)) 36 bb_error_msg_and_die("corrupted octal value in tar header"); 37 /* 38 * GNU tar uses "base-256 encoding" for very large numbers. 39 * Encoding is binary, with highest bit always set as a marker 40 * and sign in next-highest bit: 41 * 80 00 .. 00 - zero 42 * bf ff .. ff - largest positive number 43 * ff ff .. ff - minus 1 44 * c0 00 .. 00 - smallest negative number 45 * 46 * Example of tar file with 8914993153 (0x213600001) byte file. 47 * Field starts at offset 7c: 48 * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....| 49 * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336| 50 * 51 * NB: tarballs with NEGATIVE unix times encoded that way were seen! 52 */ 53 v = first; 54 /* Sign-extend using 6th bit: */ 55 v <<= sizeof(unsigned long long)*8 - 7; 56 v = (long long)v >> (sizeof(unsigned long long)*8 - 7); 57 while (--len != 0) 58 v = (v << 8) + (unsigned char) *str++; 59 } 60 return v; 61} 62#define GET_OCTAL(a) getOctal((a), sizeof(a)) 63 64#if ENABLE_FEATURE_TAR_SELINUX 65/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword. 66 * This is what Red Hat's patched version of tar uses. 67 */ 68# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux" 69static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz) 70{ 71 char *buf, *p; 72 char *result; 73 74 p = buf = xmalloc(sz + 1); 75 /* prevent bb_strtou from running off the buffer */ 76 buf[sz] = '\0'; 77 xread(archive_handle->src_fd, buf, sz); 78 archive_handle->offset += sz; 79 80 result = NULL; 81 while (sz != 0) { 82 char *end, *value; 83 unsigned len; 84 85 /* Every record has this format: "LEN NAME=VALUE\n" */ 86 len = bb_strtou(p, &end, 10); 87 /* expect errno to be EINVAL, because the character 88 * following the digits should be a space 89 */ 90 p += len; 91 sz -= len; 92 if ((int)sz < 0 93 || len == 0 94 || errno != EINVAL 95 || *end != ' ' 96 ) { 97 bb_error_msg("malformed extended header, skipped"); 98 // More verbose version: 99 //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped", 100 // archive_handle->offset - (sz + len)); 101 break; 102 } 103 /* overwrite the terminating newline with NUL 104 * (we do not bother to check that it *was* a newline) 105 */ 106 p[-1] = '\0'; 107 /* Is it selinux security context? */ 108 value = end + 1; 109 if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) { 110 value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1; 111 result = xstrdup(value); 112 break; 113 } 114 } 115 116 free(buf); 117 return result; 118} 119#endif 120 121char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) 122{ 123 file_header_t *file_header = archive_handle->file_header; 124 struct tar_header_t tar; 125 char *cp; 126 int i, sum_u, sum; 127#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY 128 int sum_s; 129#endif 130 int parse_names; 131 132 /* Our "private data" */ 133#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS 134# define p_longname (archive_handle->tar__longname) 135# define p_linkname (archive_handle->tar__linkname) 136#else 137# define p_longname 0 138# define p_linkname 0 139#endif 140 141#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX 142 again: 143#endif 144 /* Align header */ 145 data_align(archive_handle, 512); 146 147 again_after_align: 148 149#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT 150 /* to prevent misdetection of bz2 sig */ 151 *(aliased_uint32_t*)&tar = 0; 152 i = full_read(archive_handle->src_fd, &tar, 512); 153 /* If GNU tar sees EOF in above read, it says: 154 * "tar: A lone zero block at N", where N = kilobyte 155 * where EOF was met (not EOF block, actual EOF!), 156 * and exits with EXIT_SUCCESS. 157 * We will mimic exit(EXIT_SUCCESS), although we will not mimic 158 * the message and we don't check whether we indeed 159 * saw zero block directly before this. */ 160 if (i == 0) { 161 xfunc_error_retval = 0; 162 short_read: 163 bb_error_msg_and_die("short read"); 164 } 165 if (i != 512) { 166 IF_FEATURE_TAR_AUTODETECT(goto autodetect;) 167 goto short_read; 168 } 169 170#else 171 i = 512; 172 xread(archive_handle->src_fd, &tar, i); 173#endif 174 archive_handle->offset += i; 175 176 /* If there is no filename its an empty header */ 177 if (tar.name[0] == 0 && tar.prefix[0] == 0) { 178 if (archive_handle->tar__end) { 179 /* Second consecutive empty header - end of archive. 180 * Read until the end to empty the pipe from gz or bz2 181 */ 182 while (full_read(archive_handle->src_fd, &tar, 512) == 512) 183 continue; 184 return EXIT_FAILURE; 185 } 186 archive_handle->tar__end = 1; 187 return EXIT_SUCCESS; 188 } 189 archive_handle->tar__end = 0; 190 191 /* Check header has valid magic, "ustar" is for the proper tar, 192 * five NULs are for the old tar format */ 193 if (strncmp(tar.magic, "ustar", 5) != 0 194 && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY 195 || memcmp(tar.magic, "\0\0\0\0", 5) != 0) 196 ) { 197#if ENABLE_FEATURE_TAR_AUTODETECT 198 char FAST_FUNC (*get_header_ptr)(archive_handle_t *); 199 uint16_t magic2; 200 201 autodetect: 202 magic2 = *(uint16_t*)tar.name; 203 /* tar gz/bz autodetect: check for gz/bz2 magic. 204 * If we see the magic, and it is the very first block, 205 * we can switch to get_header_tar_gz/bz2/lzma(). 206 * Needs seekable fd. I wish recv(MSG_PEEK) works 207 * on any fd... */ 208# if ENABLE_FEATURE_SEAMLESS_GZ 209 if (magic2 == GZIP_MAGIC) { 210 get_header_ptr = get_header_tar_gz; 211 } else 212# endif 213# if ENABLE_FEATURE_SEAMLESS_BZ2 214 if (magic2 == BZIP2_MAGIC 215 && tar.name[2] == 'h' && isdigit(tar.name[3]) 216 ) { /* bzip2 */ 217 get_header_ptr = get_header_tar_bz2; 218 } else 219# endif 220# if ENABLE_FEATURE_SEAMLESS_XZ 221 //TODO: if (magic2 == XZ_MAGIC1)... 222 //else 223# endif 224 goto err; 225 /* Two different causes for lseek() != 0: 226 * unseekable fd (would like to support that too, but...), 227 * or not first block (false positive, it's not .gz/.bz2!) */ 228 if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) 229 goto err; 230 while (get_header_ptr(archive_handle) == EXIT_SUCCESS) 231 continue; 232 return EXIT_FAILURE; 233 err: 234#endif /* FEATURE_TAR_AUTODETECT */ 235 bb_error_msg_and_die("invalid tar magic"); 236 } 237 238 /* Do checksum on headers. 239 * POSIX says that checksum is done on unsigned bytes, but 240 * Sun and HP-UX gets it wrong... more details in 241 * GNU tar source. */ 242#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY 243 sum_s = ' ' * sizeof(tar.chksum); 244#endif 245 sum_u = ' ' * sizeof(tar.chksum); 246 for (i = 0; i < 148; i++) { 247 sum_u += ((unsigned char*)&tar)[i]; 248#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY 249 sum_s += ((signed char*)&tar)[i]; 250#endif 251 } 252 for (i = 156; i < 512; i++) { 253 sum_u += ((unsigned char*)&tar)[i]; 254#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY 255 sum_s += ((signed char*)&tar)[i]; 256#endif 257 } 258 /* This field does not need special treatment (getOctal) */ 259 { 260 char *endp; /* gcc likes temp var for &endp */ 261 sum = strtoul(tar.chksum, &endp, 8); 262 if ((*endp != '\0' && *endp != ' ') 263 || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) 264 ) { 265 bb_error_msg_and_die("invalid tar header checksum"); 266 } 267 } 268 /* don't use xstrtoul, tar.chksum may have leading spaces */ 269 sum = strtoul(tar.chksum, NULL, 8); 270 if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) { 271 bb_error_msg_and_die("invalid tar header checksum"); 272 } 273 274 /* 0 is reserved for high perf file, treat as normal file */ 275 if (!tar.typeflag) tar.typeflag = '0'; 276 parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7'); 277 278 /* getOctal trashes subsequent field, therefore we call it 279 * on fields in reverse order */ 280 if (tar.devmajor[0]) { 281 char t = tar.prefix[0]; 282 /* we trash prefix[0] here, but we DO need it later! */ 283 unsigned minor = GET_OCTAL(tar.devminor); 284 unsigned major = GET_OCTAL(tar.devmajor); 285 file_header->device = makedev(major, minor); 286 tar.prefix[0] = t; 287 } 288 file_header->link_target = NULL; 289 if (!p_linkname && parse_names && tar.linkname[0]) { 290 file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname)); 291 /* FIXME: what if we have non-link object with link_target? */ 292 /* Will link_target be free()ed? */ 293 } 294#if ENABLE_FEATURE_TAR_UNAME_GNAME 295 file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL; 296 file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL; 297#endif 298 file_header->mtime = GET_OCTAL(tar.mtime); 299 file_header->size = GET_OCTAL(tar.size); 300 file_header->gid = GET_OCTAL(tar.gid); 301 file_header->uid = GET_OCTAL(tar.uid); 302 /* Set bits 0-11 of the files mode */ 303 file_header->mode = 07777 & GET_OCTAL(tar.mode); 304 305 file_header->name = NULL; 306 if (!p_longname && parse_names) { 307 /* we trash mode[0] here, it's ok */ 308 //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain 309 tar.mode[0] = '\0'; 310 if (tar.prefix[0]) { 311 /* and padding[0] */ 312 //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain 313 tar.padding[0] = '\0'; 314 file_header->name = concat_path_file(tar.prefix, tar.name); 315 } else 316 file_header->name = xstrdup(tar.name); 317 } 318 319 /* Set bits 12-15 of the files mode */ 320 /* (typeflag was not trashed because chksum does not use getOctal) */ 321 switch (tar.typeflag) { 322 /* busybox identifies hard links as being regular files with 0 size and a link name */ 323 case '1': 324 file_header->mode |= S_IFREG; 325 break; 326 case '7': 327 /* case 0: */ 328 case '0': 329#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY 330 if (last_char_is(file_header->name, '/')) { 331 goto set_dir; 332 } 333#endif 334 file_header->mode |= S_IFREG; 335 break; 336 case '2': 337 file_header->mode |= S_IFLNK; 338 /* have seen tarballs with size field containing 339 * the size of the link target's name */ 340 size0: 341 file_header->size = 0; 342 break; 343 case '3': 344 file_header->mode |= S_IFCHR; 345 goto size0; /* paranoia */ 346 case '4': 347 file_header->mode |= S_IFBLK; 348 goto size0; 349 case '5': 350 IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:) 351 file_header->mode |= S_IFDIR; 352 goto size0; 353 case '6': 354 file_header->mode |= S_IFIFO; 355 goto size0; 356#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS 357 case 'L': 358 /* free: paranoia: tar with several consecutive longnames */ 359 free(p_longname); 360 /* For paranoia reasons we allocate extra NUL char */ 361 p_longname = xzalloc(file_header->size + 1); 362 /* We read ASCIZ string, including NUL */ 363 xread(archive_handle->src_fd, p_longname, file_header->size); 364 archive_handle->offset += file_header->size; 365 /* return get_header_tar(archive_handle); */ 366 /* gcc 4.1.1 didn't optimize it into jump */ 367 /* so we will do it ourself, this also saves stack */ 368 goto again; 369 case 'K': 370 free(p_linkname); 371 p_linkname = xzalloc(file_header->size + 1); 372 xread(archive_handle->src_fd, p_linkname, file_header->size); 373 archive_handle->offset += file_header->size; 374 /* return get_header_tar(archive_handle); */ 375 goto again; 376 case 'D': /* GNU dump dir */ 377 case 'M': /* Continuation of multi volume archive */ 378 case 'N': /* Old GNU for names > 100 characters */ 379 case 'S': /* Sparse file */ 380 case 'V': /* Volume header */ 381#endif 382#if !ENABLE_FEATURE_TAR_SELINUX 383 case 'g': /* pax global header */ 384 case 'x': /* pax extended header */ 385#else 386 skip_ext_hdr: 387#endif 388 { 389 off_t sz; 390 bb_error_msg("warning: skipping header '%c'", tar.typeflag); 391 sz = (file_header->size + 511) & ~(off_t)511; 392 archive_handle->offset += sz; 393 sz >>= 9; /* sz /= 512 but w/o contortions for signed div */ 394 while (sz--) 395 xread(archive_handle->src_fd, &tar, 512); 396 /* return get_header_tar(archive_handle); */ 397 goto again_after_align; 398 } 399#if ENABLE_FEATURE_TAR_SELINUX 400 case 'g': /* pax global header */ 401 case 'x': { /* pax extended header */ 402 char **pp; 403 if ((uoff_t)file_header->size > 0xfffff) /* paranoia */ 404 goto skip_ext_hdr; 405 pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx; 406 free(*pp); 407 *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size); 408 goto again; 409 } 410#endif 411 default: 412 bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag); 413 } 414 415#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS 416 if (p_longname) { 417 file_header->name = p_longname; 418 p_longname = NULL; 419 } 420 if (p_linkname) { 421 file_header->link_target = p_linkname; 422 p_linkname = NULL; 423 } 424#endif 425 if (strncmp(file_header->name, "/../"+1, 3) == 0 426 || strstr(file_header->name, "/../") 427 ) { 428 bb_error_msg_and_die("name with '..' encountered: '%s'", 429 file_header->name); 430 } 431 432 /* Strip trailing '/' in directories */ 433 /* Must be done after mode is set as '/' is used to check if it's a directory */ 434 cp = last_char_is(file_header->name, '/'); 435 436 if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { 437 archive_handle->action_header(/*archive_handle->*/ file_header); 438 /* Note that we kill the '/' only after action_header() */ 439 /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */ 440 if (cp) 441 *cp = '\0'; 442 archive_handle->action_data(archive_handle); 443 if (archive_handle->accept || archive_handle->reject) 444 llist_add_to(&archive_handle->passed, file_header->name); 445 else /* Caller isn't interested in list of unpacked files */ 446 free(file_header->name); 447 } else { 448 data_skip(archive_handle); 449 free(file_header->name); 450 } 451 archive_handle->offset += file_header->size; 452 453 free(file_header->link_target); 454 /* Do not free(file_header->name)! 455 * It might be inserted in archive_handle->passed - see above */ 456#if ENABLE_FEATURE_TAR_UNAME_GNAME 457 free(file_header->tar__uname); 458 free(file_header->tar__gname); 459#endif 460 return EXIT_SUCCESS; 461} 462