tar.c revision 1.73
1/* $NetBSD: tar.c,v 1.73 2015/12/19 18:28:54 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.73 2015/12/19 18:28:54 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67extern struct stat tst; 68 69/* 70 * Routines for reading, writing and header identify of various versions of tar 71 */ 72 73static int expandname(char *, size_t, char **, size_t *, const char *, size_t); 74static void longlink(ARCHD *, int); 75static uint32_t tar_chksm(char *, int); 76static char *name_split(char *, int); 77static int u32_oct(uintmax_t, char *, int, int); 78static int umax_oct(uintmax_t, char *, int, int); 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-of-volume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96size_t gnu_name_length; /* ././@LongLink hackery name */ 97size_t gnu_link_length; /* ././@LongLink hackery link */ 98static int gnu_short_trailer; /* gnu short trailer */ 99 100static const char LONG_LINK[] = "././@LongLink"; 101 102#ifdef _PAX_ 103char DEV_0[] = "/dev/rst0"; 104char DEV_1[] = "/dev/rst1"; 105char DEV_4[] = "/dev/rst4"; 106char DEV_5[] = "/dev/rst5"; 107char DEV_7[] = "/dev/rst7"; 108char DEV_8[] = "/dev/rst8"; 109#endif 110 111static int 112check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 113{ 114 uint32_t hdck, blck; 115 116 hdck = asc_u32(hd, hdlen, OCT); 117 blck = tar_chksm(bl, bllen); 118 119 if (hdck != blck) { 120 if (!quiet) 121 tty_warn(0, "Header checksum %o does not match %o", 122 hdck, blck); 123 return -1; 124 } 125 return 0; 126} 127 128 129/* 130 * tar_endwr() 131 * add the tar trailer of two null blocks 132 * Return: 133 * 0 if ok, -1 otherwise (what wr_skip returns) 134 */ 135 136int 137tar_endwr(void) 138{ 139 return wr_skip((off_t)(NULLCNT * BLKMULT)); 140} 141 142/* 143 * tar_endrd() 144 * no cleanup needed here, just return size of trailer (for append) 145 * Return: 146 * size of trailer BLKMULT 147 */ 148 149off_t 150tar_endrd(void) 151{ 152 return (off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT); 153} 154 155/* 156 * tar_trail() 157 * Called to determine if a header block is a valid trailer. We are passed 158 * the block, the in_sync flag (which tells us we are in resync mode; 159 * looking for a valid header), and cnt (which starts at zero) which is 160 * used to count the number of empty blocks we have seen so far. 161 * Return: 162 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 163 * could never contain a header. 164 */ 165 166int 167tar_trail(char *buf, int in_resync, int *cnt) 168{ 169 int i; 170 171 gnu_short_trailer = 0; 172 /* 173 * look for all zero, trailer is two consecutive blocks of zero 174 */ 175 for (i = 0; i < BLKMULT; ++i) { 176 if (buf[i] != '\0') 177 break; 178 } 179 180 /* 181 * if not all zero it is not a trailer, but MIGHT be a header. 182 */ 183 if (i != BLKMULT) 184 return -1; 185 186 /* 187 * When given a zero block, we must be careful! 188 * If we are not in resync mode, check for the trailer. Have to watch 189 * out that we do not mis-identify file data as the trailer, so we do 190 * NOT try to id a trailer during resync mode. During resync mode we 191 * might as well throw this block out since a valid header can NEVER be 192 * a block of all 0 (we must have a valid file name). 193 */ 194 if (!in_resync) { 195 ++*cnt; 196 /* 197 * old GNU tar (up through 1.13) only writes one block of 198 * trailers, so we pretend we got another 199 */ 200 if (is_gnutar) { 201 gnu_short_trailer = 1; 202 ++*cnt; 203 } 204 if (*cnt >= NULLCNT) 205 return 0; 206 } 207 return 1; 208} 209 210/* 211 * u32_oct() 212 * convert an uintmax_t to an octal string. many oddball field 213 * termination characters are used by the various versions of tar in the 214 * different fields. term selects which kind to use. str is '0' padded 215 * at the front to len. we are unable to use only one format as many old 216 * tar readers are very cranky about this. 217 * Return: 218 * 0 if the number fit into the string, -1 otherwise 219 */ 220 221static int 222u32_oct(uintmax_t val, char *str, int len, int term) 223{ 224 char *pt; 225 uint64_t p; 226 227 p = val & TOP_HALF; 228 if (p && p != TOP_HALF) 229 return -1; 230 231 val &= BOTTOM_HALF; 232 233 /* 234 * term selects the appropriate character(s) for the end of the string 235 */ 236 pt = str + len - 1; 237 switch(term) { 238 case 3: 239 *pt-- = '\0'; 240 break; 241 case 2: 242 *pt-- = ' '; 243 *pt-- = '\0'; 244 break; 245 case 1: 246 *pt-- = ' '; 247 break; 248 case 0: 249 default: 250 *pt-- = '\0'; 251 *pt-- = ' '; 252 break; 253 } 254 255 /* 256 * convert and blank pad if there is space 257 */ 258 while (pt >= str) { 259 *pt-- = '0' + (char)(val & 0x7); 260 if ((val = val >> 3) == 0) 261 break; 262 } 263 264 while (pt >= str) 265 *pt-- = '0'; 266 if (val != 0) 267 return -1; 268 return 0; 269} 270 271/* 272 * umax_oct() 273 * convert an unsigned long long to an octal string. one of many oddball 274 * field termination characters are used by the various versions of tar 275 * in the different fields. term selects which kind to use. str is '0' 276 * padded at the front to len. we are unable to use only one format as 277 * many old tar readers are very cranky about this. 278 * Return: 279 * 0 if the number fit into the string, -1 otherwise 280 */ 281 282static int 283umax_oct(uintmax_t val, char *str, int len, int term) 284{ 285 char *pt; 286 287 /* 288 * term selects the appropriate character(s) for the end of the string 289 */ 290 pt = str + len - 1; 291 switch(term) { 292 case 3: 293 *pt-- = '\0'; 294 break; 295 case 2: 296 *pt-- = ' '; 297 *pt-- = '\0'; 298 break; 299 case 1: 300 *pt-- = ' '; 301 break; 302 case 0: 303 default: 304 *pt-- = '\0'; 305 *pt-- = ' '; 306 break; 307 } 308 309 /* 310 * convert and blank pad if there is space 311 */ 312 while (pt >= str) { 313 *pt-- = '0' + (char)(val & 0x7); 314 if ((val = val >> 3) == 0) 315 break; 316 } 317 318 while (pt >= str) 319 *pt-- = '0'; 320 if (val != 0) 321 return -1; 322 return 0; 323} 324 325/* 326 * tar_chksm() 327 * calculate the checksum for a tar block counting the checksum field as 328 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 329 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 330 * pad headers with 0. 331 * Return: 332 * unsigned long checksum 333 */ 334 335static uint32_t 336tar_chksm(char *blk, int len) 337{ 338 char *stop; 339 char *pt; 340 uint32_t chksm = BLNKSUM; /* initial value is checksum field sum */ 341 342 /* 343 * add the part of the block before the checksum field 344 */ 345 pt = blk; 346 stop = blk + CHK_OFFSET; 347 while (pt < stop) 348 chksm += (uint32_t)(*pt++ & 0xff); 349 /* 350 * move past the checksum field and keep going, spec counts the 351 * checksum field as the sum of 8 blanks (which is pre-computed as 352 * BLNKSUM). 353 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 354 * starts, no point in summing zero's) 355 */ 356 pt += CHK_LEN; 357 stop = blk + len; 358 while (pt < stop) 359 chksm += (uint32_t)(*pt++ & 0xff); 360 return chksm; 361} 362 363/* 364 * Routines for old BSD style tar (also made portable to sysV tar) 365 */ 366 367/* 368 * tar_id() 369 * determine if a block given to us is a valid tar header (and not a USTAR 370 * header). We have to be on the lookout for those pesky blocks of all 371 * zero's. 372 * Return: 373 * 0 if a tar header, -1 otherwise 374 */ 375 376int 377tar_id(char *blk, int size) 378{ 379 HD_TAR *hd; 380 HD_USTAR *uhd; 381 static int is_ustar = -1; 382 383 if (size < BLKMULT) 384 return -1; 385 hd = (HD_TAR *)blk; 386 uhd = (HD_USTAR *)blk; 387 388 /* 389 * check for block of zero's first, a simple and fast test, then make 390 * sure this is not a ustar header by looking for the ustar magic 391 * cookie. We should use TMAGLEN, but some USTAR archive programs are 392 * wrong and create archives missing the \0. Last we check the 393 * checksum. If this is ok we have to assume it is a valid header. 394 */ 395 if (hd->name[0] == '\0') 396 return -1; 397 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) { 398 if (is_ustar == -1) { 399 is_ustar = 1; 400 return -1; 401 } else 402 tty_warn(0, 403 "Busted tar archive: has both ustar and old tar " 404 "records"); 405 } else 406 is_ustar = 0; 407 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 408} 409 410/* 411 * tar_opt() 412 * handle tar format specific -o options 413 * Return: 414 * 0 if ok -1 otherwise 415 */ 416 417int 418tar_opt(void) 419{ 420 OPLIST *opt; 421 422 while ((opt = opt_next()) != NULL) { 423 if (strcmp(opt->name, TAR_OPTION) || 424 strcmp(opt->value, TAR_NODIR)) { 425 tty_warn(1, 426 "Unknown tar format -o option/value pair %s=%s", 427 opt->name, opt->value); 428 tty_warn(1, 429 "%s=%s is the only supported tar format option", 430 TAR_OPTION, TAR_NODIR); 431 return -1; 432 } 433 434 /* 435 * we only support one option, and only when writing 436 */ 437 if ((act != APPND) && (act != ARCHIVE)) { 438 tty_warn(1, "%s=%s is only supported when writing.", 439 opt->name, opt->value); 440 return -1; 441 } 442 tar_nodir = 1; 443 } 444 return 0; 445} 446 447 448/* 449 * tar_rd() 450 * extract the values out of block already determined to be a tar header. 451 * store the values in the ARCHD parameter. 452 * Return: 453 * 0 454 */ 455 456int 457tar_rd(ARCHD *arcn, char *buf) 458{ 459 HD_TAR *hd; 460 char *pt; 461 462 /* 463 * we only get proper sized buffers passed to us 464 */ 465 if (tar_id(buf, BLKMULT) < 0) 466 return -1; 467 memset(arcn, 0, sizeof(*arcn)); 468 arcn->org_name = arcn->name; 469 arcn->pat = NULL; 470 arcn->sb.st_nlink = 1; 471 472 /* 473 * copy out the name and values in the stat buffer 474 */ 475 hd = (HD_TAR *)buf; 476 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 477 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 478 &gnu_name_string, &gnu_name_length, hd->name, 479 sizeof(hd->name)); 480 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 481 &gnu_link_string, &gnu_link_length, hd->linkname, 482 sizeof(hd->linkname)); 483 } 484 arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode,sizeof(hd->mode),OCT) & 485 0xfff); 486 arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); 487 arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); 488 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 489 arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); 490 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 491 492 /* 493 * have to look at the last character, it may be a '/' and that is used 494 * to encode this as a directory 495 */ 496 pt = &(arcn->name[arcn->nlen - 1]); 497 arcn->pad = 0; 498 arcn->skip = 0; 499 switch(hd->linkflag) { 500 case SYMTYPE: 501 /* 502 * symbolic link, need to get the link name and set the type in 503 * the st_mode so -v printing will look correct. 504 */ 505 arcn->type = PAX_SLK; 506 arcn->sb.st_mode |= S_IFLNK; 507 break; 508 case LNKTYPE: 509 /* 510 * hard link, need to get the link name, set the type in the 511 * st_mode and st_nlink so -v printing will look better. 512 */ 513 arcn->type = PAX_HLK; 514 arcn->sb.st_nlink = 2; 515 516 /* 517 * no idea of what type this thing really points at, but 518 * we set something for printing only. 519 */ 520 arcn->sb.st_mode |= S_IFREG; 521 break; 522 case LONGLINKTYPE: 523 case LONGNAMETYPE: 524 /* 525 * GNU long link/file; we tag these here and let the 526 * pax internals deal with it -- too ugly otherwise. 527 */ 528 if (hd->linkflag != LONGLINKTYPE) 529 arcn->type = PAX_GLF; 530 else 531 arcn->type = PAX_GLL; 532 arcn->pad = TAR_PAD(arcn->sb.st_size); 533 arcn->skip = arcn->sb.st_size; 534 break; 535 case AREGTYPE: 536 case REGTYPE: 537 case DIRTYPE: /* see below */ 538 default: 539 /* 540 * If we have a trailing / this is a directory and NOT a file. 541 * Note: V7 tar doesn't actually have DIRTYPE, but it was 542 * reported that V7 archives using USTAR directories do exist. 543 */ 544 if (*pt == '/' || hd->linkflag == DIRTYPE) { 545 /* 546 * it is a directory, set the mode for -v printing 547 */ 548 arcn->type = PAX_DIR; 549 arcn->sb.st_mode |= S_IFDIR; 550 arcn->sb.st_nlink = 2; 551 } else { 552 /* 553 * have a file that will be followed by data. Set the 554 * skip value to the size field and calculate the size 555 * of the padding. 556 */ 557 arcn->type = PAX_REG; 558 arcn->sb.st_mode |= S_IFREG; 559 arcn->pad = TAR_PAD(arcn->sb.st_size); 560 arcn->skip = arcn->sb.st_size; 561 } 562 break; 563 } 564 565 /* 566 * strip off any trailing slash. 567 */ 568 if (*pt == '/') { 569 *pt = '\0'; 570 --arcn->nlen; 571 } 572 return 0; 573} 574 575/* 576 * tar_wr() 577 * write a tar header for the file specified in the ARCHD to the archive. 578 * Have to check for file types that cannot be stored and file names that 579 * are too long. Be careful of the term (last arg) to u32_oct, each field 580 * of tar has it own spec for the termination character(s). 581 * ASSUMED: space after header in header block is zero filled 582 * Return: 583 * 0 if file has data to be written after the header, 1 if file has NO 584 * data to write after the header, -1 if archive write failed 585 */ 586 587int 588tar_wr(ARCHD *arcn) 589{ 590 HD_TAR *hd; 591 int len; 592 uintmax_t mtime; 593 char hdblk[sizeof(HD_TAR)]; 594 595 /* 596 * check for those file system types which tar cannot store 597 */ 598 switch(arcn->type) { 599 case PAX_DIR: 600 /* 601 * user asked that dirs not be written to the archive 602 */ 603 if (tar_nodir) 604 return 1; 605 break; 606 case PAX_CHR: 607 tty_warn(1, "Tar cannot archive a character device %s", 608 arcn->org_name); 609 return 1; 610 case PAX_BLK: 611 tty_warn(1, 612 "Tar cannot archive a block device %s", arcn->org_name); 613 return 1; 614 case PAX_SCK: 615 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 616 return 1; 617 case PAX_FIF: 618 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 619 return 1; 620 case PAX_SLK: 621 case PAX_HLK: 622 case PAX_HRG: 623 if (arcn->ln_nlen > (int)sizeof(hd->linkname)) { 624 tty_warn(1,"Link name too long for tar %s", 625 arcn->ln_name); 626 return 1; 627 } 628 break; 629 case PAX_REG: 630 case PAX_CTG: 631 default: 632 break; 633 } 634 635 /* 636 * check file name len, remember extra char for dirs (the / at the end) 637 */ 638 len = arcn->nlen; 639 if (arcn->type == PAX_DIR) 640 ++len; 641 if (len >= (int)sizeof(hd->name)) { 642 tty_warn(1, "File name too long for tar %s", arcn->name); 643 return 1; 644 } 645 646 /* 647 * copy the data out of the ARCHD into the tar header based on the type 648 * of the file. Remember many tar readers want the unused fields to be 649 * padded with zero. We set the linkflag field (type), the linkname 650 * (or zero if not used),the size, and set the padding (if any) to be 651 * added after the file data (0 for all other types, as they only have 652 * a header) 653 */ 654 memset(hdblk, 0, sizeof(hdblk)); 655 hd = (HD_TAR *)hdblk; 656 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 657 arcn->pad = 0; 658 659 if (arcn->type == PAX_DIR) { 660 /* 661 * directories are the same as files, except have a filename 662 * that ends with a /, we add the slash here. No data follows, 663 * dirs, so no pad. 664 */ 665 hd->linkflag = AREGTYPE; 666 hd->name[len-1] = '/'; 667 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 668 goto out; 669 } else if (arcn->type == PAX_SLK) { 670 /* 671 * no data follows this file, so no pad 672 */ 673 hd->linkflag = SYMTYPE; 674 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 675 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 676 goto out; 677 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 678 /* 679 * no data follows this file, so no pad 680 */ 681 hd->linkflag = LNKTYPE; 682 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 683 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 684 goto out; 685 } else { 686 /* 687 * data follows this file, so set the pad 688 */ 689 hd->linkflag = AREGTYPE; 690 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 691 tty_warn(1,"File is too large for tar %s", 692 arcn->org_name); 693 return 1; 694 } 695 arcn->pad = TAR_PAD(arcn->sb.st_size); 696 } 697 698 /* 699 * copy those fields that are independent of the type 700 */ 701 mtime = tst.st_ino ? tst.st_mtime : arcn->sb.st_mtime; 702 if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 703 u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 704 u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 705 u32_oct(mtime, hd->mtime, sizeof(hd->mtime), 1)) 706 goto out; 707 708 /* 709 * calculate and add the checksum, then write the header. A return of 710 * 0 tells the caller to now write the file data, 1 says no data needs 711 * to be written 712 */ 713 if (u32_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 714 sizeof(hd->chksum), 3)) 715 goto out; /* XXX Something's wrong here 716 * because a zero-byte file can 717 * cause this to be done and 718 * yet the resulting warning 719 * seems incorrect */ 720 721 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 722 return -1; 723 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 724 return -1; 725 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 726 return 0; 727 return 1; 728 729 out: 730 /* 731 * header field is out of range 732 */ 733 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 734 return 1; 735} 736 737/* 738 * Routines for POSIX ustar 739 */ 740 741/* 742 * ustar_strd() 743 * initialization for ustar read 744 * Return: 745 * 0 if ok, -1 otherwise 746 */ 747 748int 749ustar_strd(void) 750{ 751 return 0; 752} 753 754/* 755 * ustar_stwr() 756 * initialization for ustar write 757 * Return: 758 * 0 if ok, -1 otherwise 759 */ 760 761int 762ustar_stwr(void) 763{ 764 return 0; 765} 766 767/* 768 * ustar_id() 769 * determine if a block given to us is a valid ustar header. We have to 770 * be on the lookout for those pesky blocks of all zero's 771 * Return: 772 * 0 if a ustar header, -1 otherwise 773 */ 774 775int 776ustar_id(char *blk, int size) 777{ 778 HD_USTAR *hd; 779 780 if (size < BLKMULT) 781 return -1; 782 hd = (HD_USTAR *)blk; 783 784 /* 785 * check for block of zero's first, a simple and fast test then check 786 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 787 * programs are fouled up and create archives missing the \0. Last we 788 * check the checksum. If ok we have to assume it is a valid header. 789 */ 790 if (hd->name[0] == '\0') 791 return -1; 792 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 793 return -1; 794 /* This is GNU tar */ 795 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 796 !seen_gnu_warning) { 797 seen_gnu_warning = 1; 798 tty_warn(0, 799 "Trying to read GNU tar archive with GNU extensions and end-of-volume checks off"); 800 } 801 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 802} 803 804/* 805 * ustar_rd() 806 * extract the values out of block already determined to be a ustar header. 807 * store the values in the ARCHD parameter. 808 * Return: 809 * 0 810 */ 811 812int 813ustar_rd(ARCHD *arcn, char *buf) 814{ 815 HD_USTAR *hd; 816 char *dest; 817 int cnt; 818 dev_t devmajor; 819 dev_t devminor; 820 821 /* 822 * we only get proper sized buffers 823 */ 824 if (ustar_id(buf, BLKMULT) < 0) 825 return -1; 826 827 memset(arcn, 0, sizeof(*arcn)); 828 arcn->org_name = arcn->name; 829 arcn->pat = NULL; 830 arcn->sb.st_nlink = 1; 831 hd = (HD_USTAR *)buf; 832 833 /* 834 * see if the filename is split into two parts. if, so joint the parts. 835 * we copy the prefix first and add a / between the prefix and name. 836 */ 837 dest = arcn->name; 838 if (*(hd->prefix) != '\0') { 839 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 840 dest += cnt; 841 *dest++ = '/'; 842 cnt++; 843 } else { 844 cnt = 0; 845 } 846 847 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 848 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 849 &gnu_name_string, &gnu_name_length, hd->name, 850 sizeof(hd->name)) + cnt; 851 arcn->ln_nlen = expandname(arcn->ln_name, 852 sizeof(arcn->ln_name), &gnu_link_string, &gnu_link_length, 853 hd->linkname, sizeof(hd->linkname)); 854 } 855 856 /* 857 * follow the spec to the letter. we should only have mode bits, strip 858 * off all other crud we may be passed. 859 */ 860 arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode, sizeof(hd->mode), OCT) & 861 0xfff); 862 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 863 arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); 864 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 865 866 /* 867 * If we can find the ascii names for gname and uname in the password 868 * and group files we will use the uid's and gid they bind. Otherwise 869 * we use the uid and gid values stored in the header. (This is what 870 * the posix spec wants). 871 */ 872 hd->gname[sizeof(hd->gname) - 1] = '\0'; 873 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 874 arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); 875 hd->uname[sizeof(hd->uname) - 1] = '\0'; 876 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 877 arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); 878 879 /* 880 * set the defaults, these may be changed depending on the file type 881 */ 882 arcn->pad = 0; 883 arcn->skip = 0; 884 arcn->sb.st_rdev = (dev_t)0; 885 886 /* 887 * set the mode and PAX type according to the typeflag in the header 888 */ 889 switch(hd->typeflag) { 890 case FIFOTYPE: 891 arcn->type = PAX_FIF; 892 arcn->sb.st_mode |= S_IFIFO; 893 break; 894 case DIRTYPE: 895 arcn->type = PAX_DIR; 896 arcn->sb.st_mode |= S_IFDIR; 897 arcn->sb.st_nlink = 2; 898 899 /* 900 * Some programs that create ustar archives append a '/' 901 * to the pathname for directories. This clearly violates 902 * ustar specs, but we will silently strip it off anyway. 903 */ 904 if (arcn->name[arcn->nlen - 1] == '/') 905 arcn->name[--arcn->nlen] = '\0'; 906 break; 907 case BLKTYPE: 908 case CHRTYPE: 909 /* 910 * this type requires the rdev field to be set. 911 */ 912 if (hd->typeflag == BLKTYPE) { 913 arcn->type = PAX_BLK; 914 arcn->sb.st_mode |= S_IFBLK; 915 } else { 916 arcn->type = PAX_CHR; 917 arcn->sb.st_mode |= S_IFCHR; 918 } 919 devmajor = (dev_t)asc_u32(hd->devmajor,sizeof(hd->devmajor),OCT); 920 devminor = (dev_t)asc_u32(hd->devminor,sizeof(hd->devminor),OCT); 921 arcn->sb.st_rdev = TODEV(devmajor, devminor); 922 break; 923 case SYMTYPE: 924 case LNKTYPE: 925 if (hd->typeflag == SYMTYPE) { 926 arcn->type = PAX_SLK; 927 arcn->sb.st_mode |= S_IFLNK; 928 } else { 929 arcn->type = PAX_HLK; 930 /* 931 * so printing looks better 932 */ 933 arcn->sb.st_mode |= S_IFREG; 934 arcn->sb.st_nlink = 2; 935 } 936 break; 937 case LONGLINKTYPE: 938 case LONGNAMETYPE: 939 if (is_gnutar) { 940 /* 941 * GNU long link/file; we tag these here and let the 942 * pax internals deal with it -- too ugly otherwise. 943 */ 944 if (hd->typeflag != LONGLINKTYPE) 945 arcn->type = PAX_GLF; 946 else 947 arcn->type = PAX_GLL; 948 arcn->pad = TAR_PAD(arcn->sb.st_size); 949 arcn->skip = arcn->sb.st_size; 950 } else { 951 tty_warn(1, "GNU Long %s found in posix ustar archive.", 952 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 953 } 954 break; 955 case FILEXTYPE: 956 case GLOBXTYPE: 957 tty_warn(0, "%s extended headers posix ustar archive." 958 " Extracting as plain files. Following files might be" 959 " in the wrong directory or have wrong attributes.", 960 hd->typeflag == FILEXTYPE ? "File" : "Global"); 961 /*FALLTHROUGH*/ 962 case CONTTYPE: 963 case AREGTYPE: 964 case REGTYPE: 965 default: 966 /* 967 * these types have file data that follows. Set the skip and 968 * pad fields. 969 */ 970 arcn->type = PAX_REG; 971 arcn->pad = TAR_PAD(arcn->sb.st_size); 972 arcn->skip = arcn->sb.st_size; 973 arcn->sb.st_mode |= S_IFREG; 974 break; 975 } 976 return 0; 977} 978 979static int 980expandname(char *buf, size_t len, char **gnu_name, size_t *gnu_length, 981 const char *name, size_t nlen) 982{ 983 if (*gnu_name) { 984 len = strlcpy(buf, *gnu_name, len); 985 free(*gnu_name); 986 *gnu_name = NULL; 987 *gnu_length = 0; 988 } else { 989 if (len > ++nlen) 990 len = nlen; 991 len = strlcpy(buf, name, len); 992 } 993 return len; 994} 995 996static void 997longlink(ARCHD *arcn, int type) 998{ 999 ARCHD larc; 1000 1001 (void)memset(&larc, 0, sizeof(larc)); 1002 1003 larc.type = type; 1004 larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name)); 1005 1006 switch (type) { 1007 case PAX_GLL: 1008 gnu_hack_string = arcn->ln_name; 1009 gnu_hack_len = arcn->ln_nlen + 1; 1010 break; 1011 case PAX_GLF: 1012 gnu_hack_string = arcn->name; 1013 gnu_hack_len = arcn->nlen + 1; 1014 break; 1015 default: 1016 errx(1, "Invalid type in GNU longlink %d", type); 1017 } 1018 1019 /* 1020 * We need a longlink now. 1021 */ 1022 ustar_wr(&larc); 1023} 1024 1025/* 1026 * ustar_wr() 1027 * write a ustar header for the file specified in the ARCHD to the archive 1028 * Have to check for file types that cannot be stored and file names that 1029 * are too long. Be careful of the term (last arg) to u32_oct, we only use 1030 * '\0' for the termination character (this is different than picky tar) 1031 * ASSUMED: space after header in header block is zero filled 1032 * Return: 1033 * 0 if file has data to be written after the header, 1 if file has NO 1034 * data to write after the header, -1 if archive write failed 1035 */ 1036 1037static int 1038size_err(const char *what, ARCHD *arcn) 1039{ 1040 /* 1041 * header field is out of range 1042 */ 1043 tty_warn(1, "Ustar %s header field is too small for %s", 1044 what, arcn->org_name); 1045 return 1; 1046} 1047 1048int 1049ustar_wr(ARCHD *arcn) 1050{ 1051 HD_USTAR *hd; 1052 char *pt; 1053 uintmax_t mtime; 1054 char hdblk[sizeof(HD_USTAR)]; 1055 const char *user, *group; 1056 1057 switch (arcn->type) { 1058 case PAX_SCK: 1059 /* 1060 * check for those file system types ustar cannot store 1061 */ 1062 if (!is_gnutar) 1063 tty_warn(1, "Ustar cannot archive a socket %s", 1064 arcn->org_name); 1065 return 1; 1066 1067 case PAX_SLK: 1068 case PAX_HLK: 1069 case PAX_HRG: 1070 /* 1071 * check the length of the linkname 1072 */ 1073 if (arcn->ln_nlen >= (int)sizeof(hd->linkname)) { 1074 if (is_gnutar) { 1075 longlink(arcn, PAX_GLL); 1076 } else { 1077 tty_warn(1, "Link name too long for ustar %s", 1078 arcn->ln_name); 1079 return 1; 1080 } 1081 } 1082 break; 1083 default: 1084 break; 1085 } 1086 1087 /* 1088 * split the path name into prefix and name fields (if needed). if 1089 * pt != arcn->name, the name has to be split 1090 */ 1091 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1092 if (is_gnutar) { 1093 longlink(arcn, PAX_GLF); 1094 pt = arcn->name; 1095 } else { 1096 tty_warn(1, "File name too long for ustar %s", 1097 arcn->name); 1098 return 1; 1099 } 1100 } 1101 1102 /* 1103 * zero out the header so we don't have to worry about zero fill below 1104 */ 1105 memset(hdblk, 0, sizeof(hdblk)); 1106 hd = (HD_USTAR *)hdblk; 1107 arcn->pad = 0L; 1108 1109 /* 1110 * split the name, or zero out the prefix 1111 */ 1112 if (pt != arcn->name) { 1113 /* 1114 * name was split, pt points at the / where the split is to 1115 * occur, we remove the / and copy the first part to the prefix 1116 */ 1117 *pt = '\0'; 1118 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1119 *pt++ = '/'; 1120 } 1121 1122 /* 1123 * copy the name part. this may be the whole path or the part after 1124 * the prefix 1125 */ 1126 strlcpy(hd->name, pt, sizeof(hd->name)); 1127 1128 /* 1129 * set the fields in the header that are type dependent 1130 */ 1131 switch(arcn->type) { 1132 case PAX_DIR: 1133 hd->typeflag = DIRTYPE; 1134 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1135 return size_err("DIRTYPE", arcn); 1136 break; 1137 case PAX_CHR: 1138 case PAX_BLK: 1139 if (arcn->type == PAX_CHR) 1140 hd->typeflag = CHRTYPE; 1141 else 1142 hd->typeflag = BLKTYPE; 1143 if (u32_oct((uintmax_t)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1144 sizeof(hd->devmajor), 3) || 1145 u32_oct((uintmax_t)MINOR(arcn->sb.st_rdev), hd->devminor, 1146 sizeof(hd->devminor), 3) || 1147 u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1148 return size_err("DEVTYPE", arcn); 1149 break; 1150 case PAX_FIF: 1151 hd->typeflag = FIFOTYPE; 1152 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1153 return size_err("FIFOTYPE", arcn); 1154 break; 1155 case PAX_GLL: 1156 case PAX_SLK: 1157 case PAX_HLK: 1158 case PAX_HRG: 1159 if (arcn->type == PAX_SLK) 1160 hd->typeflag = SYMTYPE; 1161 else if (arcn->type == PAX_GLL) 1162 hd->typeflag = LONGLINKTYPE; 1163 else 1164 hd->typeflag = LNKTYPE; 1165 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1166 if (u32_oct((uintmax_t)gnu_hack_len, hd->size, 1167 sizeof(hd->size), 3)) 1168 return size_err("LINKTYPE", arcn); 1169 break; 1170 case PAX_GLF: 1171 case PAX_REG: 1172 case PAX_CTG: 1173 default: 1174 /* 1175 * file data with this type, set the padding 1176 */ 1177 if (arcn->type == PAX_GLF) { 1178 hd->typeflag = LONGNAMETYPE; 1179 arcn->pad = TAR_PAD(gnu_hack_len); 1180 if (OFFT_OCT((uint32_t)gnu_hack_len, hd->size, 1181 sizeof(hd->size), 3)) { 1182 tty_warn(1,"File is too long for ustar %s", 1183 arcn->org_name); 1184 return 1; 1185 } 1186 } else { 1187 if (arcn->type == PAX_CTG) 1188 hd->typeflag = CONTTYPE; 1189 else 1190 hd->typeflag = REGTYPE; 1191 arcn->pad = TAR_PAD(arcn->sb.st_size); 1192 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1193 sizeof(hd->size), 3)) { 1194 tty_warn(1,"File is too long for ustar %s", 1195 arcn->org_name); 1196 return 1; 1197 } 1198 } 1199 break; 1200 } 1201 1202 strncpy(hd->magic, TMAGIC, TMAGLEN); 1203 if (is_gnutar) 1204 hd->magic[TMAGLEN - 1] = hd->version[0] = ' '; 1205 else 1206 strncpy(hd->version, TVERSION, TVERSLEN); 1207 1208 /* 1209 * set the remaining fields. Some versions want all 16 bits of mode 1210 * we better humor them (they really do not meet spec though).... 1211 */ 1212 if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) 1213 return size_err("MODE", arcn); 1214 if (u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) 1215 return size_err("UID", arcn); 1216 if (u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) 1217 return size_err("GID", arcn); 1218 mtime = tst.st_ino ? tst.st_mtime : arcn->sb.st_mtime; 1219 if (u32_oct(mtime, hd->mtime, sizeof(hd->mtime), 3)) 1220 return size_err("MTIME", arcn); 1221 user = user_from_uid(arcn->sb.st_uid, 1); 1222 group = group_from_gid(arcn->sb.st_gid, 1); 1223 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1224 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1225 1226 /* 1227 * calculate and store the checksum write the header to the archive 1228 * return 0 tells the caller to now write the file data, 1 says no data 1229 * needs to be written 1230 */ 1231 if (u32_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1232 sizeof(hd->chksum), 3)) 1233 return size_err("CHKSUM", arcn); 1234 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1235 return -1; 1236 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1237 return -1; 1238 if (gnu_hack_string) { 1239 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1240 int pad = gnu_hack_len; 1241 gnu_hack_string = NULL; 1242 gnu_hack_len = 0; 1243 if (res < 0) 1244 return -1; 1245 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1246 return -1; 1247 } 1248 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1249 return 0; 1250 return 1; 1251} 1252 1253/* 1254 * name_split() 1255 * see if the name has to be split for storage in a ustar header. We try 1256 * to fit the entire name in the name field without splitting if we can. 1257 * The split point is always at a / 1258 * Return 1259 * character pointer to split point (always the / that is to be removed 1260 * if the split is not needed, the points is set to the start of the file 1261 * name (it would violate the spec to split there). A NULL is returned if 1262 * the file name is too long 1263 */ 1264 1265static char * 1266name_split(char *name, int len) 1267{ 1268 char *start; 1269 1270 /* 1271 * check to see if the file name is small enough to fit in the name 1272 * field. if so just return a pointer to the name. 1273 */ 1274 if (len < TNMSZ) 1275 return name; 1276 /* 1277 * GNU tar does not honor the prefix+name mode if the magic 1278 * is not "ustar\0". So in GNU tar compatibility mode, we don't 1279 * split the filename into prefix+name because we are setting 1280 * the magic to "ustar " as GNU tar does. This of course will 1281 * end up creating a LongLink record in cases where it does not 1282 * really need do, but we are behaving like GNU tar after all. 1283 */ 1284 if (is_gnutar || len > (TPFSZ + TNMSZ)) 1285 return NULL; 1286 1287 /* 1288 * we start looking at the biggest sized piece that fits in the name 1289 * field. We walk forward looking for a slash to split at. The idea is 1290 * to find the biggest piece to fit in the name field (or the smallest 1291 * prefix we can find) (the -1 is correct the biggest piece would 1292 * include the slash between the two parts that gets thrown away) 1293 */ 1294 start = name + len - TNMSZ; 1295 while ((*start != '\0') && (*start != '/')) 1296 ++start; 1297 1298 /* 1299 * if we hit the end of the string, this name cannot be split, so we 1300 * cannot store this file. 1301 */ 1302 if (*start == '\0') 1303 return NULL; 1304 len = start - name; 1305 1306 /* 1307 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1308 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1309 * the file would then expand on extract to //str. The len == 0 below 1310 * makes this special case follow the spec to the letter. 1311 */ 1312 if ((len >= TPFSZ) || (len == 0)) 1313 return NULL; 1314 1315 /* 1316 * ok have a split point, return it to the caller 1317 */ 1318 return start; 1319} 1320 1321/* 1322 * convert a glob into a RE, and add it to the list. we convert to 1323 * four different RE's (because we're using BRE's and can't use | 1324 * alternation :-() with this padding: 1325 * .*\/ and $ 1326 * .*\/ and \/.* 1327 * ^ and $ 1328 * ^ and \/.* 1329 */ 1330static int 1331tar_gnutar_exclude_one(const char *line, size_t len) 1332{ 1333 /* 2 * buffer len + nul */ 1334 char sbuf[MAXPATHLEN * 2 + 1]; 1335 /* + / + // + .*""/\/ + \/.* */ 1336 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1337 size_t i; 1338 int j = 0; 1339 1340 if (line[len - 1] == '\n') 1341 len--; 1342 for (i = 0; i < len; i++) { 1343 /* 1344 * convert glob to regexp, escaping everything 1345 */ 1346 if (line[i] == '*') 1347 sbuf[j++] = '.'; 1348 else if (line[i] == '?') { 1349 sbuf[j++] = '.'; 1350 continue; 1351 } else if (!isalnum((unsigned char)line[i]) && 1352 !isblank((unsigned char)line[i])) 1353 sbuf[j++] = '\\'; 1354 sbuf[j++] = line[i]; 1355 } 1356 sbuf[j] = '\0'; 1357 /* don't need the .*\/ ones if we start with /, i guess */ 1358 if (line[0] != '/') { 1359 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1360 if (rep_add(rabuf) < 0) 1361 return (-1); 1362 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1363 if (rep_add(rabuf) < 0) 1364 return (-1); 1365 } 1366 1367 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1368 if (rep_add(rabuf) < 0) 1369 return (-1); 1370 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1371 if (rep_add(rabuf) < 0) 1372 return (-1); 1373 1374 return (0); 1375} 1376 1377/* 1378 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1379 * we go through each line of the file, building a string from the "glob" 1380 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1381 * to rep_add(), which will add a empty replacement (exclusion), for the 1382 * named files. 1383 */ 1384int 1385tar_gnutar_minus_minus_exclude(const char *path) 1386{ 1387 size_t len = strlen(path); 1388 1389 if (len > MAXPATHLEN) 1390 tty_warn(0, "pathname too long: %s", path); 1391 1392 return (tar_gnutar_exclude_one(path, len)); 1393} 1394 1395int 1396tar_gnutar_X_compat(const char *path) 1397{ 1398 char *line; 1399 FILE *fp; 1400 int lineno = 0; 1401 size_t len; 1402 1403 if (path[0] == '-' && path[1] == '\0') 1404 fp = stdin; 1405 else { 1406 fp = fopen(path, "r"); 1407 if (fp == NULL) { 1408 tty_warn(1, "cannot open %s: %s", path, 1409 strerror(errno)); 1410 return -1; 1411 } 1412 } 1413 1414 while ((line = fgetln(fp, &len))) { 1415 lineno++; 1416 if (len > MAXPATHLEN) { 1417 tty_warn(0, "pathname too long, line %d of %s", 1418 lineno, path); 1419 } 1420 if (tar_gnutar_exclude_one(line, len)) 1421 return -1; 1422 } 1423 if (fp != stdin) 1424 fclose(fp); 1425 return 0; 1426} 1427