tar.c revision 1.71
1/* $NetBSD: tar.c,v 1.71 2013/01/24 17:43:44 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.71 2013/01/24 17:43:44 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, size_t *, const char *, size_t); 72static void longlink(ARCHD *, int); 73static uint32_t tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int u32_oct(uintmax_t, char *, int, int); 76static int umax_oct(uintmax_t, char *, int, int); 77static int tar_gnutar_exclude_one(const char *, size_t); 78static int check_sum(char *, size_t, char *, size_t, int); 79 80/* 81 * Routines common to all versions of tar 82 */ 83 84static int tar_nodir; /* do not write dirs under old tar */ 85int is_gnutar; /* behave like gnu tar; enable gnu 86 * extensions and skip end-of-volume 87 * checks 88 */ 89static int seen_gnu_warning; /* Have we warned yet? */ 90static char *gnu_hack_string; /* ././@LongLink hackery */ 91static int gnu_hack_len; /* len of gnu_hack_string */ 92char *gnu_name_string; /* ././@LongLink hackery name */ 93char *gnu_link_string; /* ././@LongLink hackery link */ 94size_t gnu_name_length; /* ././@LongLink hackery name */ 95size_t gnu_link_length; /* ././@LongLink hackery link */ 96static int gnu_short_trailer; /* gnu short trailer */ 97 98static const char LONG_LINK[] = "././@LongLink"; 99 100#ifdef _PAX_ 101char DEV_0[] = "/dev/rst0"; 102char DEV_1[] = "/dev/rst1"; 103char DEV_4[] = "/dev/rst4"; 104char DEV_5[] = "/dev/rst5"; 105char DEV_7[] = "/dev/rst7"; 106char DEV_8[] = "/dev/rst8"; 107#endif 108 109static int 110check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 111{ 112 uint32_t hdck, blck; 113 114 hdck = asc_u32(hd, hdlen, OCT); 115 blck = tar_chksm(bl, bllen); 116 117 if (hdck != blck) { 118 if (!quiet) 119 tty_warn(0, "Header checksum %o does not match %o", 120 hdck, blck); 121 return -1; 122 } 123 return 0; 124} 125 126 127/* 128 * tar_endwr() 129 * add the tar trailer of two null blocks 130 * Return: 131 * 0 if ok, -1 otherwise (what wr_skip returns) 132 */ 133 134int 135tar_endwr(void) 136{ 137 return wr_skip((off_t)(NULLCNT * BLKMULT)); 138} 139 140/* 141 * tar_endrd() 142 * no cleanup needed here, just return size of trailer (for append) 143 * Return: 144 * size of trailer BLKMULT 145 */ 146 147off_t 148tar_endrd(void) 149{ 150 return (off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT); 151} 152 153/* 154 * tar_trail() 155 * Called to determine if a header block is a valid trailer. We are passed 156 * the block, the in_sync flag (which tells us we are in resync mode; 157 * looking for a valid header), and cnt (which starts at zero) which is 158 * used to count the number of empty blocks we have seen so far. 159 * Return: 160 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 161 * could never contain a header. 162 */ 163 164int 165tar_trail(char *buf, int in_resync, int *cnt) 166{ 167 int i; 168 169 gnu_short_trailer = 0; 170 /* 171 * look for all zero, trailer is two consecutive blocks of zero 172 */ 173 for (i = 0; i < BLKMULT; ++i) { 174 if (buf[i] != '\0') 175 break; 176 } 177 178 /* 179 * if not all zero it is not a trailer, but MIGHT be a header. 180 */ 181 if (i != BLKMULT) 182 return -1; 183 184 /* 185 * When given a zero block, we must be careful! 186 * If we are not in resync mode, check for the trailer. Have to watch 187 * out that we do not mis-identify file data as the trailer, so we do 188 * NOT try to id a trailer during resync mode. During resync mode we 189 * might as well throw this block out since a valid header can NEVER be 190 * a block of all 0 (we must have a valid file name). 191 */ 192 if (!in_resync) { 193 ++*cnt; 194 /* 195 * old GNU tar (up through 1.13) only writes one block of 196 * trailers, so we pretend we got another 197 */ 198 if (is_gnutar) { 199 gnu_short_trailer = 1; 200 ++*cnt; 201 } 202 if (*cnt >= NULLCNT) 203 return 0; 204 } 205 return 1; 206} 207 208/* 209 * u32_oct() 210 * convert an uintmax_t to an octal string. many oddball field 211 * termination characters are used by the various versions of tar in the 212 * different fields. term selects which kind to use. str is '0' padded 213 * at the front to len. we are unable to use only one format as many old 214 * tar readers are very cranky about this. 215 * Return: 216 * 0 if the number fit into the string, -1 otherwise 217 */ 218 219static int 220u32_oct(uintmax_t val, char *str, int len, int term) 221{ 222 char *pt; 223 uint64_t p; 224 225 p = val & TOP_HALF; 226 if (p && p != TOP_HALF) 227 return -1; 228 229 val &= BOTTOM_HALF; 230 231 /* 232 * term selects the appropriate character(s) for the end of the string 233 */ 234 pt = str + len - 1; 235 switch(term) { 236 case 3: 237 *pt-- = '\0'; 238 break; 239 case 2: 240 *pt-- = ' '; 241 *pt-- = '\0'; 242 break; 243 case 1: 244 *pt-- = ' '; 245 break; 246 case 0: 247 default: 248 *pt-- = '\0'; 249 *pt-- = ' '; 250 break; 251 } 252 253 /* 254 * convert and blank pad if there is space 255 */ 256 while (pt >= str) { 257 *pt-- = '0' + (char)(val & 0x7); 258 if ((val = val >> 3) == 0) 259 break; 260 } 261 262 while (pt >= str) 263 *pt-- = '0'; 264 if (val != 0) 265 return -1; 266 return 0; 267} 268 269/* 270 * umax_oct() 271 * convert an unsigned long long to an octal string. one of many oddball 272 * field termination characters are used by the various versions of tar 273 * in the different fields. term selects which kind to use. str is '0' 274 * padded at the front to len. we are unable to use only one format as 275 * many old tar readers are very cranky about this. 276 * Return: 277 * 0 if the number fit into the string, -1 otherwise 278 */ 279 280static int 281umax_oct(uintmax_t val, char *str, int len, int term) 282{ 283 char *pt; 284 285 /* 286 * term selects the appropriate character(s) for the end of the string 287 */ 288 pt = str + len - 1; 289 switch(term) { 290 case 3: 291 *pt-- = '\0'; 292 break; 293 case 2: 294 *pt-- = ' '; 295 *pt-- = '\0'; 296 break; 297 case 1: 298 *pt-- = ' '; 299 break; 300 case 0: 301 default: 302 *pt-- = '\0'; 303 *pt-- = ' '; 304 break; 305 } 306 307 /* 308 * convert and blank pad if there is space 309 */ 310 while (pt >= str) { 311 *pt-- = '0' + (char)(val & 0x7); 312 if ((val = val >> 3) == 0) 313 break; 314 } 315 316 while (pt >= str) 317 *pt-- = '0'; 318 if (val != 0) 319 return -1; 320 return 0; 321} 322 323/* 324 * tar_chksm() 325 * calculate the checksum for a tar block counting the checksum field as 326 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 327 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 328 * pad headers with 0. 329 * Return: 330 * unsigned long checksum 331 */ 332 333static uint32_t 334tar_chksm(char *blk, int len) 335{ 336 char *stop; 337 char *pt; 338 uint32_t chksm = BLNKSUM; /* initial value is checksum field sum */ 339 340 /* 341 * add the part of the block before the checksum field 342 */ 343 pt = blk; 344 stop = blk + CHK_OFFSET; 345 while (pt < stop) 346 chksm += (uint32_t)(*pt++ & 0xff); 347 /* 348 * move past the checksum field and keep going, spec counts the 349 * checksum field as the sum of 8 blanks (which is pre-computed as 350 * BLNKSUM). 351 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 352 * starts, no point in summing zero's) 353 */ 354 pt += CHK_LEN; 355 stop = blk + len; 356 while (pt < stop) 357 chksm += (uint32_t)(*pt++ & 0xff); 358 return chksm; 359} 360 361/* 362 * Routines for old BSD style tar (also made portable to sysV tar) 363 */ 364 365/* 366 * tar_id() 367 * determine if a block given to us is a valid tar header (and not a USTAR 368 * header). We have to be on the lookout for those pesky blocks of all 369 * zero's. 370 * Return: 371 * 0 if a tar header, -1 otherwise 372 */ 373 374int 375tar_id(char *blk, int size) 376{ 377 HD_TAR *hd; 378 HD_USTAR *uhd; 379 static int is_ustar = -1; 380 381 if (size < BLKMULT) 382 return -1; 383 hd = (HD_TAR *)blk; 384 uhd = (HD_USTAR *)blk; 385 386 /* 387 * check for block of zero's first, a simple and fast test, then make 388 * sure this is not a ustar header by looking for the ustar magic 389 * cookie. We should use TMAGLEN, but some USTAR archive programs are 390 * wrong and create archives missing the \0. Last we check the 391 * checksum. If this is ok we have to assume it is a valid header. 392 */ 393 if (hd->name[0] == '\0') 394 return -1; 395 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) { 396 if (is_ustar == -1) { 397 is_ustar = 1; 398 return -1; 399 } else 400 tty_warn(0, 401 "Busted tar archive: has both ustar and old tar " 402 "records"); 403 } else 404 is_ustar = 0; 405 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 406} 407 408/* 409 * tar_opt() 410 * handle tar format specific -o options 411 * Return: 412 * 0 if ok -1 otherwise 413 */ 414 415int 416tar_opt(void) 417{ 418 OPLIST *opt; 419 420 while ((opt = opt_next()) != NULL) { 421 if (strcmp(opt->name, TAR_OPTION) || 422 strcmp(opt->value, TAR_NODIR)) { 423 tty_warn(1, 424 "Unknown tar format -o option/value pair %s=%s", 425 opt->name, opt->value); 426 tty_warn(1, 427 "%s=%s is the only supported tar format option", 428 TAR_OPTION, TAR_NODIR); 429 return -1; 430 } 431 432 /* 433 * we only support one option, and only when writing 434 */ 435 if ((act != APPND) && (act != ARCHIVE)) { 436 tty_warn(1, "%s=%s is only supported when writing.", 437 opt->name, opt->value); 438 return -1; 439 } 440 tar_nodir = 1; 441 } 442 return 0; 443} 444 445 446/* 447 * tar_rd() 448 * extract the values out of block already determined to be a tar header. 449 * store the values in the ARCHD parameter. 450 * Return: 451 * 0 452 */ 453 454int 455tar_rd(ARCHD *arcn, char *buf) 456{ 457 HD_TAR *hd; 458 char *pt; 459 460 /* 461 * we only get proper sized buffers passed to us 462 */ 463 if (tar_id(buf, BLKMULT) < 0) 464 return -1; 465 memset(arcn, 0, sizeof(*arcn)); 466 arcn->org_name = arcn->name; 467 arcn->pat = NULL; 468 arcn->sb.st_nlink = 1; 469 470 /* 471 * copy out the name and values in the stat buffer 472 */ 473 hd = (HD_TAR *)buf; 474 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 475 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 476 &gnu_name_string, &gnu_name_length, hd->name, 477 sizeof(hd->name)); 478 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 479 &gnu_link_string, &gnu_link_length, hd->linkname, 480 sizeof(hd->linkname)); 481 } 482 arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode,sizeof(hd->mode),OCT) & 483 0xfff); 484 arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); 485 arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); 486 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 487 arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); 488 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 489 490 /* 491 * have to look at the last character, it may be a '/' and that is used 492 * to encode this as a directory 493 */ 494 pt = &(arcn->name[arcn->nlen - 1]); 495 arcn->pad = 0; 496 arcn->skip = 0; 497 switch(hd->linkflag) { 498 case SYMTYPE: 499 /* 500 * symbolic link, need to get the link name and set the type in 501 * the st_mode so -v printing will look correct. 502 */ 503 arcn->type = PAX_SLK; 504 arcn->sb.st_mode |= S_IFLNK; 505 break; 506 case LNKTYPE: 507 /* 508 * hard link, need to get the link name, set the type in the 509 * st_mode and st_nlink so -v printing will look better. 510 */ 511 arcn->type = PAX_HLK; 512 arcn->sb.st_nlink = 2; 513 514 /* 515 * no idea of what type this thing really points at, but 516 * we set something for printing only. 517 */ 518 arcn->sb.st_mode |= S_IFREG; 519 break; 520 case LONGLINKTYPE: 521 case LONGNAMETYPE: 522 /* 523 * GNU long link/file; we tag these here and let the 524 * pax internals deal with it -- too ugly otherwise. 525 */ 526 if (hd->linkflag != LONGLINKTYPE) 527 arcn->type = PAX_GLF; 528 else 529 arcn->type = PAX_GLL; 530 arcn->pad = TAR_PAD(arcn->sb.st_size); 531 arcn->skip = arcn->sb.st_size; 532 break; 533 case AREGTYPE: 534 case REGTYPE: 535 case DIRTYPE: /* see below */ 536 default: 537 /* 538 * If we have a trailing / this is a directory and NOT a file. 539 * Note: V7 tar doesn't actually have DIRTYPE, but it was 540 * reported that V7 archives using USTAR directories do exist. 541 */ 542 if (*pt == '/' || hd->linkflag == DIRTYPE) { 543 /* 544 * it is a directory, set the mode for -v printing 545 */ 546 arcn->type = PAX_DIR; 547 arcn->sb.st_mode |= S_IFDIR; 548 arcn->sb.st_nlink = 2; 549 } else { 550 /* 551 * have a file that will be followed by data. Set the 552 * skip value to the size field and calculate the size 553 * of the padding. 554 */ 555 arcn->type = PAX_REG; 556 arcn->sb.st_mode |= S_IFREG; 557 arcn->pad = TAR_PAD(arcn->sb.st_size); 558 arcn->skip = arcn->sb.st_size; 559 } 560 break; 561 } 562 563 /* 564 * strip off any trailing slash. 565 */ 566 if (*pt == '/') { 567 *pt = '\0'; 568 --arcn->nlen; 569 } 570 return 0; 571} 572 573/* 574 * tar_wr() 575 * write a tar header for the file specified in the ARCHD to the archive. 576 * Have to check for file types that cannot be stored and file names that 577 * are too long. Be careful of the term (last arg) to u32_oct, each field 578 * of tar has it own spec for the termination character(s). 579 * ASSUMED: space after header in header block is zero filled 580 * Return: 581 * 0 if file has data to be written after the header, 1 if file has NO 582 * data to write after the header, -1 if archive write failed 583 */ 584 585int 586tar_wr(ARCHD *arcn) 587{ 588 HD_TAR *hd; 589 int len; 590 char hdblk[sizeof(HD_TAR)]; 591 592 /* 593 * check for those file system types which tar cannot store 594 */ 595 switch(arcn->type) { 596 case PAX_DIR: 597 /* 598 * user asked that dirs not be written to the archive 599 */ 600 if (tar_nodir) 601 return 1; 602 break; 603 case PAX_CHR: 604 tty_warn(1, "Tar cannot archive a character device %s", 605 arcn->org_name); 606 return 1; 607 case PAX_BLK: 608 tty_warn(1, 609 "Tar cannot archive a block device %s", arcn->org_name); 610 return 1; 611 case PAX_SCK: 612 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 613 return 1; 614 case PAX_FIF: 615 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 616 return 1; 617 case PAX_SLK: 618 case PAX_HLK: 619 case PAX_HRG: 620 if (arcn->ln_nlen > (int)sizeof(hd->linkname)) { 621 tty_warn(1,"Link name too long for tar %s", 622 arcn->ln_name); 623 return 1; 624 } 625 break; 626 case PAX_REG: 627 case PAX_CTG: 628 default: 629 break; 630 } 631 632 /* 633 * check file name len, remember extra char for dirs (the / at the end) 634 */ 635 len = arcn->nlen; 636 if (arcn->type == PAX_DIR) 637 ++len; 638 if (len >= (int)sizeof(hd->name)) { 639 tty_warn(1, "File name too long for tar %s", arcn->name); 640 return 1; 641 } 642 643 /* 644 * copy the data out of the ARCHD into the tar header based on the type 645 * of the file. Remember many tar readers want the unused fields to be 646 * padded with zero. We set the linkflag field (type), the linkname 647 * (or zero if not used),the size, and set the padding (if any) to be 648 * added after the file data (0 for all other types, as they only have 649 * a header) 650 */ 651 memset(hdblk, 0, sizeof(hdblk)); 652 hd = (HD_TAR *)hdblk; 653 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 654 arcn->pad = 0; 655 656 if (arcn->type == PAX_DIR) { 657 /* 658 * directories are the same as files, except have a filename 659 * that ends with a /, we add the slash here. No data follows, 660 * dirs, so no pad. 661 */ 662 hd->linkflag = AREGTYPE; 663 hd->name[len-1] = '/'; 664 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 665 goto out; 666 } else if (arcn->type == PAX_SLK) { 667 /* 668 * no data follows this file, so no pad 669 */ 670 hd->linkflag = SYMTYPE; 671 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 672 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 673 goto out; 674 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 675 /* 676 * no data follows this file, so no pad 677 */ 678 hd->linkflag = LNKTYPE; 679 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 680 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 1)) 681 goto out; 682 } else { 683 /* 684 * data follows this file, so set the pad 685 */ 686 hd->linkflag = AREGTYPE; 687 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 688 tty_warn(1,"File is too large for tar %s", 689 arcn->org_name); 690 return 1; 691 } 692 arcn->pad = TAR_PAD(arcn->sb.st_size); 693 } 694 695 /* 696 * copy those fields that are independent of the type 697 */ 698 if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 699 u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 700 u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 701 u32_oct((uintmax_t)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 702 goto out; 703 704 /* 705 * calculate and add the checksum, then write the header. A return of 706 * 0 tells the caller to now write the file data, 1 says no data needs 707 * to be written 708 */ 709 if (u32_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 710 sizeof(hd->chksum), 3)) 711 goto out; /* XXX Something's wrong here 712 * because a zero-byte file can 713 * cause this to be done and 714 * yet the resulting warning 715 * seems incorrect */ 716 717 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 718 return -1; 719 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 720 return -1; 721 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 722 return 0; 723 return 1; 724 725 out: 726 /* 727 * header field is out of range 728 */ 729 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 730 return 1; 731} 732 733/* 734 * Routines for POSIX ustar 735 */ 736 737/* 738 * ustar_strd() 739 * initialization for ustar read 740 * Return: 741 * 0 if ok, -1 otherwise 742 */ 743 744int 745ustar_strd(void) 746{ 747 return 0; 748} 749 750/* 751 * ustar_stwr() 752 * initialization for ustar write 753 * Return: 754 * 0 if ok, -1 otherwise 755 */ 756 757int 758ustar_stwr(void) 759{ 760 return 0; 761} 762 763/* 764 * ustar_id() 765 * determine if a block given to us is a valid ustar header. We have to 766 * be on the lookout for those pesky blocks of all zero's 767 * Return: 768 * 0 if a ustar header, -1 otherwise 769 */ 770 771int 772ustar_id(char *blk, int size) 773{ 774 HD_USTAR *hd; 775 776 if (size < BLKMULT) 777 return -1; 778 hd = (HD_USTAR *)blk; 779 780 /* 781 * check for block of zero's first, a simple and fast test then check 782 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 783 * programs are fouled up and create archives missing the \0. Last we 784 * check the checksum. If ok we have to assume it is a valid header. 785 */ 786 if (hd->name[0] == '\0') 787 return -1; 788 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 789 return -1; 790 /* This is GNU tar */ 791 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 792 !seen_gnu_warning) { 793 seen_gnu_warning = 1; 794 tty_warn(0, 795 "Trying to read GNU tar archive with GNU extensions and end-of-volume checks off"); 796 } 797 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 798} 799 800/* 801 * ustar_rd() 802 * extract the values out of block already determined to be a ustar header. 803 * store the values in the ARCHD parameter. 804 * Return: 805 * 0 806 */ 807 808int 809ustar_rd(ARCHD *arcn, char *buf) 810{ 811 HD_USTAR *hd; 812 char *dest; 813 int cnt; 814 dev_t devmajor; 815 dev_t devminor; 816 817 /* 818 * we only get proper sized buffers 819 */ 820 if (ustar_id(buf, BLKMULT) < 0) 821 return -1; 822 823 memset(arcn, 0, sizeof(*arcn)); 824 arcn->org_name = arcn->name; 825 arcn->pat = NULL; 826 arcn->sb.st_nlink = 1; 827 hd = (HD_USTAR *)buf; 828 829 /* 830 * see if the filename is split into two parts. if, so joint the parts. 831 * we copy the prefix first and add a / between the prefix and name. 832 */ 833 dest = arcn->name; 834 if (*(hd->prefix) != '\0') { 835 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 836 dest += cnt; 837 *dest++ = '/'; 838 cnt++; 839 } else { 840 cnt = 0; 841 } 842 843 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 844 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 845 &gnu_name_string, &gnu_name_length, hd->name, 846 sizeof(hd->name)) + cnt; 847 arcn->ln_nlen = expandname(arcn->ln_name, 848 sizeof(arcn->ln_name), &gnu_link_string, &gnu_link_length, 849 hd->linkname, sizeof(hd->linkname)); 850 } 851 852 /* 853 * follow the spec to the letter. we should only have mode bits, strip 854 * off all other crud we may be passed. 855 */ 856 arcn->sb.st_mode = (mode_t)(asc_u32(hd->mode, sizeof(hd->mode), OCT) & 857 0xfff); 858 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 859 arcn->sb.st_mtime = (time_t)(int32_t)asc_u32(hd->mtime, sizeof(hd->mtime), OCT); 860 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 861 862 /* 863 * If we can find the ascii names for gname and uname in the password 864 * and group files we will use the uid's and gid they bind. Otherwise 865 * we use the uid and gid values stored in the header. (This is what 866 * the posix spec wants). 867 */ 868 hd->gname[sizeof(hd->gname) - 1] = '\0'; 869 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 870 arcn->sb.st_gid = (gid_t)asc_u32(hd->gid, sizeof(hd->gid), OCT); 871 hd->uname[sizeof(hd->uname) - 1] = '\0'; 872 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 873 arcn->sb.st_uid = (uid_t)asc_u32(hd->uid, sizeof(hd->uid), OCT); 874 875 /* 876 * set the defaults, these may be changed depending on the file type 877 */ 878 arcn->pad = 0; 879 arcn->skip = 0; 880 arcn->sb.st_rdev = (dev_t)0; 881 882 /* 883 * set the mode and PAX type according to the typeflag in the header 884 */ 885 switch(hd->typeflag) { 886 case FIFOTYPE: 887 arcn->type = PAX_FIF; 888 arcn->sb.st_mode |= S_IFIFO; 889 break; 890 case DIRTYPE: 891 arcn->type = PAX_DIR; 892 arcn->sb.st_mode |= S_IFDIR; 893 arcn->sb.st_nlink = 2; 894 895 /* 896 * Some programs that create ustar archives append a '/' 897 * to the pathname for directories. This clearly violates 898 * ustar specs, but we will silently strip it off anyway. 899 */ 900 if (arcn->name[arcn->nlen - 1] == '/') 901 arcn->name[--arcn->nlen] = '\0'; 902 break; 903 case BLKTYPE: 904 case CHRTYPE: 905 /* 906 * this type requires the rdev field to be set. 907 */ 908 if (hd->typeflag == BLKTYPE) { 909 arcn->type = PAX_BLK; 910 arcn->sb.st_mode |= S_IFBLK; 911 } else { 912 arcn->type = PAX_CHR; 913 arcn->sb.st_mode |= S_IFCHR; 914 } 915 devmajor = (dev_t)asc_u32(hd->devmajor,sizeof(hd->devmajor),OCT); 916 devminor = (dev_t)asc_u32(hd->devminor,sizeof(hd->devminor),OCT); 917 arcn->sb.st_rdev = TODEV(devmajor, devminor); 918 break; 919 case SYMTYPE: 920 case LNKTYPE: 921 if (hd->typeflag == SYMTYPE) { 922 arcn->type = PAX_SLK; 923 arcn->sb.st_mode |= S_IFLNK; 924 } else { 925 arcn->type = PAX_HLK; 926 /* 927 * so printing looks better 928 */ 929 arcn->sb.st_mode |= S_IFREG; 930 arcn->sb.st_nlink = 2; 931 } 932 break; 933 case LONGLINKTYPE: 934 case LONGNAMETYPE: 935 if (is_gnutar) { 936 /* 937 * GNU long link/file; we tag these here and let the 938 * pax internals deal with it -- too ugly otherwise. 939 */ 940 if (hd->typeflag != LONGLINKTYPE) 941 arcn->type = PAX_GLF; 942 else 943 arcn->type = PAX_GLL; 944 arcn->pad = TAR_PAD(arcn->sb.st_size); 945 arcn->skip = arcn->sb.st_size; 946 } else { 947 tty_warn(1, "GNU Long %s found in posix ustar archive.", 948 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 949 } 950 break; 951 case FILEXTYPE: 952 case GLOBXTYPE: 953 tty_warn(0, "%s extended headers posix ustar archive." 954 " Extracting as plain files. Following files might be" 955 " in the wrong directory or have wrong attributes.", 956 hd->typeflag == FILEXTYPE ? "File" : "Global"); 957 /*FALLTHROUGH*/ 958 case CONTTYPE: 959 case AREGTYPE: 960 case REGTYPE: 961 default: 962 /* 963 * these types have file data that follows. Set the skip and 964 * pad fields. 965 */ 966 arcn->type = PAX_REG; 967 arcn->pad = TAR_PAD(arcn->sb.st_size); 968 arcn->skip = arcn->sb.st_size; 969 arcn->sb.st_mode |= S_IFREG; 970 break; 971 } 972 return 0; 973} 974 975static int 976expandname(char *buf, size_t len, char **gnu_name, size_t *gnu_length, 977 const char *name, size_t nlen) 978{ 979 if (*gnu_name) { 980 len = strlcpy(buf, *gnu_name, len); 981 free(*gnu_name); 982 *gnu_name = NULL; 983 *gnu_length = 0; 984 } else { 985 if (len > ++nlen) 986 len = nlen; 987 len = strlcpy(buf, name, len); 988 } 989 return len; 990} 991 992static void 993longlink(ARCHD *arcn, int type) 994{ 995 ARCHD larc; 996 997 (void)memset(&larc, 0, sizeof(larc)); 998 999 larc.type = type; 1000 larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name)); 1001 1002 switch (type) { 1003 case PAX_GLL: 1004 gnu_hack_string = arcn->ln_name; 1005 gnu_hack_len = arcn->ln_nlen + 1; 1006 break; 1007 case PAX_GLF: 1008 gnu_hack_string = arcn->name; 1009 gnu_hack_len = arcn->nlen + 1; 1010 break; 1011 default: 1012 errx(1, "Invalid type in GNU longlink %d\n", type); 1013 } 1014 1015 /* 1016 * We need a longlink now. 1017 */ 1018 ustar_wr(&larc); 1019} 1020 1021/* 1022 * ustar_wr() 1023 * write a ustar header for the file specified in the ARCHD to the archive 1024 * Have to check for file types that cannot be stored and file names that 1025 * are too long. Be careful of the term (last arg) to u32_oct, we only use 1026 * '\0' for the termination character (this is different than picky tar) 1027 * ASSUMED: space after header in header block is zero filled 1028 * Return: 1029 * 0 if file has data to be written after the header, 1 if file has NO 1030 * data to write after the header, -1 if archive write failed 1031 */ 1032 1033static int 1034size_err(const char *what, ARCHD *arcn) 1035{ 1036 /* 1037 * header field is out of range 1038 */ 1039 tty_warn(1, "Ustar %s header field is too small for %s", 1040 what, arcn->org_name); 1041 return 1; 1042} 1043 1044int 1045ustar_wr(ARCHD *arcn) 1046{ 1047 HD_USTAR *hd; 1048 char *pt; 1049 char hdblk[sizeof(HD_USTAR)]; 1050 const char *user, *group; 1051 1052 switch (arcn->type) { 1053 case PAX_SCK: 1054 /* 1055 * check for those file system types ustar cannot store 1056 */ 1057 if (!is_gnutar) 1058 tty_warn(1, "Ustar cannot archive a socket %s", 1059 arcn->org_name); 1060 return 1; 1061 1062 case PAX_SLK: 1063 case PAX_HLK: 1064 case PAX_HRG: 1065 /* 1066 * check the length of the linkname 1067 */ 1068 if (arcn->ln_nlen >= (int)sizeof(hd->linkname)) { 1069 if (is_gnutar) { 1070 longlink(arcn, PAX_GLL); 1071 } else { 1072 tty_warn(1, "Link name too long for ustar %s", 1073 arcn->ln_name); 1074 return 1; 1075 } 1076 } 1077 break; 1078 default: 1079 break; 1080 } 1081 1082 /* 1083 * split the path name into prefix and name fields (if needed). if 1084 * pt != arcn->name, the name has to be split 1085 */ 1086 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1087 if (is_gnutar) { 1088 longlink(arcn, PAX_GLF); 1089 pt = arcn->name; 1090 } else { 1091 tty_warn(1, "File name too long for ustar %s", 1092 arcn->name); 1093 return 1; 1094 } 1095 } 1096 1097 /* 1098 * zero out the header so we don't have to worry about zero fill below 1099 */ 1100 memset(hdblk, 0, sizeof(hdblk)); 1101 hd = (HD_USTAR *)hdblk; 1102 arcn->pad = 0L; 1103 1104 /* 1105 * split the name, or zero out the prefix 1106 */ 1107 if (pt != arcn->name) { 1108 /* 1109 * name was split, pt points at the / where the split is to 1110 * occur, we remove the / and copy the first part to the prefix 1111 */ 1112 *pt = '\0'; 1113 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1114 *pt++ = '/'; 1115 } 1116 1117 /* 1118 * copy the name part. this may be the whole path or the part after 1119 * the prefix 1120 */ 1121 strlcpy(hd->name, pt, sizeof(hd->name)); 1122 1123 /* 1124 * set the fields in the header that are type dependent 1125 */ 1126 switch(arcn->type) { 1127 case PAX_DIR: 1128 hd->typeflag = DIRTYPE; 1129 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1130 return size_err("DIRTYPE", arcn); 1131 break; 1132 case PAX_CHR: 1133 case PAX_BLK: 1134 if (arcn->type == PAX_CHR) 1135 hd->typeflag = CHRTYPE; 1136 else 1137 hd->typeflag = BLKTYPE; 1138 if (u32_oct((uintmax_t)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1139 sizeof(hd->devmajor), 3) || 1140 u32_oct((uintmax_t)MINOR(arcn->sb.st_rdev), hd->devminor, 1141 sizeof(hd->devminor), 3) || 1142 u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1143 return size_err("DEVTYPE", arcn); 1144 break; 1145 case PAX_FIF: 1146 hd->typeflag = FIFOTYPE; 1147 if (u32_oct((uintmax_t)0L, hd->size, sizeof(hd->size), 3)) 1148 return size_err("FIFOTYPE", arcn); 1149 break; 1150 case PAX_GLL: 1151 case PAX_SLK: 1152 case PAX_HLK: 1153 case PAX_HRG: 1154 if (arcn->type == PAX_SLK) 1155 hd->typeflag = SYMTYPE; 1156 else if (arcn->type == PAX_GLL) 1157 hd->typeflag = LONGLINKTYPE; 1158 else 1159 hd->typeflag = LNKTYPE; 1160 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1161 if (u32_oct((uintmax_t)gnu_hack_len, hd->size, 1162 sizeof(hd->size), 3)) 1163 return size_err("LINKTYPE", arcn); 1164 break; 1165 case PAX_GLF: 1166 case PAX_REG: 1167 case PAX_CTG: 1168 default: 1169 /* 1170 * file data with this type, set the padding 1171 */ 1172 if (arcn->type == PAX_GLF) { 1173 hd->typeflag = LONGNAMETYPE; 1174 arcn->pad = TAR_PAD(gnu_hack_len); 1175 if (OFFT_OCT((uint32_t)gnu_hack_len, hd->size, 1176 sizeof(hd->size), 3)) { 1177 tty_warn(1,"File is too long for ustar %s", 1178 arcn->org_name); 1179 return 1; 1180 } 1181 } else { 1182 if (arcn->type == PAX_CTG) 1183 hd->typeflag = CONTTYPE; 1184 else 1185 hd->typeflag = REGTYPE; 1186 arcn->pad = TAR_PAD(arcn->sb.st_size); 1187 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1188 sizeof(hd->size), 3)) { 1189 tty_warn(1,"File is too long for ustar %s", 1190 arcn->org_name); 1191 return 1; 1192 } 1193 } 1194 break; 1195 } 1196 1197 strncpy(hd->magic, TMAGIC, TMAGLEN); 1198 if (is_gnutar) 1199 hd->magic[TMAGLEN - 1] = hd->version[0] = ' '; 1200 else 1201 strncpy(hd->version, TVERSION, TVERSLEN); 1202 1203 /* 1204 * set the remaining fields. Some versions want all 16 bits of mode 1205 * we better humor them (they really do not meet spec though).... 1206 */ 1207 if (u32_oct((uintmax_t)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) 1208 return size_err("MODE", arcn); 1209 if (u32_oct((uintmax_t)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) 1210 return size_err("UID", arcn); 1211 if (u32_oct((uintmax_t)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) 1212 return size_err("GID", arcn); 1213 if (u32_oct((uintmax_t)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1214 return size_err("MTIME", arcn); 1215 user = user_from_uid(arcn->sb.st_uid, 1); 1216 group = group_from_gid(arcn->sb.st_gid, 1); 1217 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1218 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1219 1220 /* 1221 * calculate and store the checksum write the header to the archive 1222 * return 0 tells the caller to now write the file data, 1 says no data 1223 * needs to be written 1224 */ 1225 if (u32_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1226 sizeof(hd->chksum), 3)) 1227 return size_err("CHKSUM", arcn); 1228 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1229 return -1; 1230 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1231 return -1; 1232 if (gnu_hack_string) { 1233 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1234 int pad = gnu_hack_len; 1235 gnu_hack_string = NULL; 1236 gnu_hack_len = 0; 1237 if (res < 0) 1238 return -1; 1239 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1240 return -1; 1241 } 1242 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1243 return 0; 1244 return 1; 1245} 1246 1247/* 1248 * name_split() 1249 * see if the name has to be split for storage in a ustar header. We try 1250 * to fit the entire name in the name field without splitting if we can. 1251 * The split point is always at a / 1252 * Return 1253 * character pointer to split point (always the / that is to be removed 1254 * if the split is not needed, the points is set to the start of the file 1255 * name (it would violate the spec to split there). A NULL is returned if 1256 * the file name is too long 1257 */ 1258 1259static char * 1260name_split(char *name, int len) 1261{ 1262 char *start; 1263 1264 /* 1265 * check to see if the file name is small enough to fit in the name 1266 * field. if so just return a pointer to the name. 1267 */ 1268 if (len < TNMSZ) 1269 return name; 1270 /* 1271 * GNU tar does not honor the prefix+name mode if the magic 1272 * is not "ustar\0". So in GNU tar compatibility mode, we don't 1273 * split the filename into prefix+name because we are setting 1274 * the magic to "ustar " as GNU tar does. This of course will 1275 * end up creating a LongLink record in cases where it does not 1276 * really need do, but we are behaving like GNU tar after all. 1277 */ 1278 if (is_gnutar || len > (TPFSZ + TNMSZ)) 1279 return NULL; 1280 1281 /* 1282 * we start looking at the biggest sized piece that fits in the name 1283 * field. We walk forward looking for a slash to split at. The idea is 1284 * to find the biggest piece to fit in the name field (or the smallest 1285 * prefix we can find) (the -1 is correct the biggest piece would 1286 * include the slash between the two parts that gets thrown away) 1287 */ 1288 start = name + len - TNMSZ; 1289 while ((*start != '\0') && (*start != '/')) 1290 ++start; 1291 1292 /* 1293 * if we hit the end of the string, this name cannot be split, so we 1294 * cannot store this file. 1295 */ 1296 if (*start == '\0') 1297 return NULL; 1298 len = start - name; 1299 1300 /* 1301 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1302 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1303 * the file would then expand on extract to //str. The len == 0 below 1304 * makes this special case follow the spec to the letter. 1305 */ 1306 if ((len >= TPFSZ) || (len == 0)) 1307 return NULL; 1308 1309 /* 1310 * ok have a split point, return it to the caller 1311 */ 1312 return start; 1313} 1314 1315/* 1316 * convert a glob into a RE, and add it to the list. we convert to 1317 * four different RE's (because we're using BRE's and can't use | 1318 * alternation :-() with this padding: 1319 * .*\/ and $ 1320 * .*\/ and \/.* 1321 * ^ and $ 1322 * ^ and \/.* 1323 */ 1324static int 1325tar_gnutar_exclude_one(const char *line, size_t len) 1326{ 1327 /* 2 * buffer len + nul */ 1328 char sbuf[MAXPATHLEN * 2 + 1]; 1329 /* + / + // + .*""/\/ + \/.* */ 1330 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1331 size_t i; 1332 int j = 0; 1333 1334 if (line[len - 1] == '\n') 1335 len--; 1336 for (i = 0; i < len; i++) { 1337 /* 1338 * convert glob to regexp, escaping everything 1339 */ 1340 if (line[i] == '*') 1341 sbuf[j++] = '.'; 1342 else if (line[i] == '?') { 1343 sbuf[j++] = '.'; 1344 continue; 1345 } else if (!isalnum((unsigned char)line[i]) && 1346 !isblank((unsigned char)line[i])) 1347 sbuf[j++] = '\\'; 1348 sbuf[j++] = line[i]; 1349 } 1350 sbuf[j] = '\0'; 1351 /* don't need the .*\/ ones if we start with /, i guess */ 1352 if (line[0] != '/') { 1353 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1354 if (rep_add(rabuf) < 0) 1355 return (-1); 1356 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1357 if (rep_add(rabuf) < 0) 1358 return (-1); 1359 } 1360 1361 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1362 if (rep_add(rabuf) < 0) 1363 return (-1); 1364 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1365 if (rep_add(rabuf) < 0) 1366 return (-1); 1367 1368 return (0); 1369} 1370 1371/* 1372 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1373 * we go through each line of the file, building a string from the "glob" 1374 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1375 * to rep_add(), which will add a empty replacement (exclusion), for the 1376 * named files. 1377 */ 1378int 1379tar_gnutar_minus_minus_exclude(const char *path) 1380{ 1381 size_t len = strlen(path); 1382 1383 if (len > MAXPATHLEN) 1384 tty_warn(0, "pathname too long: %s", path); 1385 1386 return (tar_gnutar_exclude_one(path, len)); 1387} 1388 1389int 1390tar_gnutar_X_compat(const char *path) 1391{ 1392 char *line; 1393 FILE *fp; 1394 int lineno = 0; 1395 size_t len; 1396 1397 if (path[0] == '-' && path[1] == '\0') 1398 fp = stdin; 1399 else { 1400 fp = fopen(path, "r"); 1401 if (fp == NULL) { 1402 tty_warn(1, "cannot open %s: %s", path, 1403 strerror(errno)); 1404 return -1; 1405 } 1406 } 1407 1408 while ((line = fgetln(fp, &len))) { 1409 lineno++; 1410 if (len > MAXPATHLEN) { 1411 tty_warn(0, "pathname too long, line %d of %s", 1412 lineno, path); 1413 } 1414 if (tar_gnutar_exclude_one(line, len)) 1415 return -1; 1416 } 1417 if (fp != stdin) 1418 fclose(fp); 1419 return 0; 1420} 1421