tar.c revision 1.53
1/* $NetBSD: tar.c,v 1.53 2004/05/11 17:12:26 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.53 2004/05/11 17:12:26 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96static int gnu_short_trailer; /* gnu short trailer */ 97 98#ifdef _PAX_ 99char DEV_0[] = "/dev/rst0"; 100char DEV_1[] = "/dev/rst1"; 101char DEV_4[] = "/dev/rst4"; 102char DEV_5[] = "/dev/rst5"; 103char DEV_7[] = "/dev/rst7"; 104char DEV_8[] = "/dev/rst8"; 105#endif 106 107static int 108check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 109{ 110 u_long hdck, blck; 111 112 hdck = asc_ul(hd, hdlen, OCT); 113 blck = tar_chksm(bl, bllen); 114 115 if (hdck != blck) { 116 if (!quiet) 117 tty_warn(0, "Header checksum %lo does not match %lo", 118 hdck, blck); 119 return(-1); 120 } 121 return(0); 122} 123 124 125/* 126 * tar_endwr() 127 * add the tar trailer of two null blocks 128 * Return: 129 * 0 if ok, -1 otherwise (what wr_skip returns) 130 */ 131 132int 133tar_endwr(void) 134{ 135 return(wr_skip((off_t)(NULLCNT * BLKMULT))); 136} 137 138/* 139 * tar_endrd() 140 * no cleanup needed here, just return size of trailer (for append) 141 * Return: 142 * size of trailer BLKMULT 143 */ 144 145off_t 146tar_endrd(void) 147{ 148 return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT)); 149} 150 151/* 152 * tar_trail() 153 * Called to determine if a header block is a valid trailer. We are passed 154 * the block, the in_sync flag (which tells us we are in resync mode; 155 * looking for a valid header), and cnt (which starts at zero) which is 156 * used to count the number of empty blocks we have seen so far. 157 * Return: 158 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 159 * could never contain a header. 160 */ 161 162int 163tar_trail(char *buf, int in_resync, int *cnt) 164{ 165 int i; 166 167 gnu_short_trailer = 0; 168 /* 169 * look for all zero, trailer is two consecutive blocks of zero 170 */ 171 for (i = 0; i < BLKMULT; ++i) { 172 if (buf[i] != '\0') 173 break; 174 } 175 176 /* 177 * if not all zero it is not a trailer, but MIGHT be a header. 178 */ 179 if (i != BLKMULT) 180 return(-1); 181 182 /* 183 * When given a zero block, we must be careful! 184 * If we are not in resync mode, check for the trailer. Have to watch 185 * out that we do not mis-identify file data as the trailer, so we do 186 * NOT try to id a trailer during resync mode. During resync mode we 187 * might as well throw this block out since a valid header can NEVER be 188 * a block of all 0 (we must have a valid file name). 189 */ 190 if (!in_resync) { 191 ++*cnt; 192 /* 193 * old GNU tar (up through 1.13) only writes one block of 194 * trailers, so we pretend we got another 195 */ 196 if (is_gnutar) { 197 gnu_short_trailer = 1; 198 ++*cnt; 199 } 200 if (*cnt >= NULLCNT) 201 return(0); 202 } 203 return(1); 204} 205 206/* 207 * ul_oct() 208 * convert an unsigned long to an octal string. many oddball field 209 * termination characters are used by the various versions of tar in the 210 * different fields. term selects which kind to use. str is '0' padded 211 * at the front to len. we are unable to use only one format as many old 212 * tar readers are very cranky about this. 213 * Return: 214 * 0 if the number fit into the string, -1 otherwise 215 */ 216 217static int 218ul_oct(u_long val, char *str, int len, int term) 219{ 220 char *pt; 221 222 /* 223 * term selects the appropriate character(s) for the end of the string 224 */ 225 pt = str + len - 1; 226 switch(term) { 227 case 3: 228 *pt-- = '\0'; 229 break; 230 case 2: 231 *pt-- = ' '; 232 *pt-- = '\0'; 233 break; 234 case 1: 235 *pt-- = ' '; 236 break; 237 case 0: 238 default: 239 *pt-- = '\0'; 240 *pt-- = ' '; 241 break; 242 } 243 244 /* 245 * convert and blank pad if there is space 246 */ 247 while (pt >= str) { 248 *pt-- = '0' + (char)(val & 0x7); 249 if ((val = val >> 3) == (u_long)0) 250 break; 251 } 252 253 while (pt >= str) 254 *pt-- = '0'; 255 if (val != (u_long)0) 256 return(-1); 257 return(0); 258} 259 260#if !defined(NET2_STAT) && !defined(_LP64) 261/* 262 * ull_oct() 263 * convert an unsigned long long to an octal string. one of many oddball 264 * field termination characters are used by the various versions of tar 265 * in the different fields. term selects which kind to use. str is '0' 266 * padded at the front to len. we are unable to use only one format as 267 * many old tar readers are very cranky about this. 268 * Return: 269 * 0 if the number fit into the string, -1 otherwise 270 */ 271 272static int 273ull_oct(unsigned long long val, char *str, int len, int term) 274{ 275 char *pt; 276 277 /* 278 * term selects the appropriate character(s) for the end of the string 279 */ 280 pt = str + len - 1; 281 switch(term) { 282 case 3: 283 *pt-- = '\0'; 284 break; 285 case 2: 286 *pt-- = ' '; 287 *pt-- = '\0'; 288 break; 289 case 1: 290 *pt-- = ' '; 291 break; 292 case 0: 293 default: 294 *pt-- = '\0'; 295 *pt-- = ' '; 296 break; 297 } 298 299 /* 300 * convert and blank pad if there is space 301 */ 302 while (pt >= str) { 303 *pt-- = '0' + (char)(val & 0x7); 304 if ((val = val >> 3) == 0) 305 break; 306 } 307 308 while (pt >= str) 309 *pt-- = '0'; 310 if (val != (unsigned long long)0) 311 return(-1); 312 return(0); 313} 314#endif 315 316/* 317 * tar_chksm() 318 * calculate the checksum for a tar block counting the checksum field as 319 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 320 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 321 * pad headers with 0. 322 * Return: 323 * unsigned long checksum 324 */ 325 326static u_long 327tar_chksm(char *blk, int len) 328{ 329 char *stop; 330 char *pt; 331 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 332 333 /* 334 * add the part of the block before the checksum field 335 */ 336 pt = blk; 337 stop = blk + CHK_OFFSET; 338 while (pt < stop) 339 chksm += (u_long)(*pt++ & 0xff); 340 /* 341 * move past the checksum field and keep going, spec counts the 342 * checksum field as the sum of 8 blanks (which is pre-computed as 343 * BLNKSUM). 344 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 345 * starts, no point in summing zero's) 346 */ 347 pt += CHK_LEN; 348 stop = blk + len; 349 while (pt < stop) 350 chksm += (u_long)(*pt++ & 0xff); 351 return(chksm); 352} 353 354/* 355 * Routines for old BSD style tar (also made portable to sysV tar) 356 */ 357 358/* 359 * tar_id() 360 * determine if a block given to us is a valid tar header (and not a USTAR 361 * header). We have to be on the lookout for those pesky blocks of all 362 * zero's. 363 * Return: 364 * 0 if a tar header, -1 otherwise 365 */ 366 367int 368tar_id(char *blk, int size) 369{ 370 HD_TAR *hd; 371 HD_USTAR *uhd; 372 373 if (size < BLKMULT) 374 return(-1); 375 hd = (HD_TAR *)blk; 376 uhd = (HD_USTAR *)blk; 377 378 /* 379 * check for block of zero's first, a simple and fast test, then make 380 * sure this is not a ustar header by looking for the ustar magic 381 * cookie. We should use TMAGLEN, but some USTAR archive programs are 382 * wrong and create archives missing the \0. Last we check the 383 * checksum. If this is ok we have to assume it is a valid header. 384 */ 385 if (hd->name[0] == '\0') 386 return(-1); 387 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 388 return(-1); 389 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 390} 391 392/* 393 * tar_opt() 394 * handle tar format specific -o options 395 * Return: 396 * 0 if ok -1 otherwise 397 */ 398 399int 400tar_opt(void) 401{ 402 OPLIST *opt; 403 404 while ((opt = opt_next()) != NULL) { 405 if (strcmp(opt->name, TAR_OPTION) || 406 strcmp(opt->value, TAR_NODIR)) { 407 tty_warn(1, 408 "Unknown tar format -o option/value pair %s=%s", 409 opt->name, opt->value); 410 tty_warn(1, 411 "%s=%s is the only supported tar format option", 412 TAR_OPTION, TAR_NODIR); 413 return(-1); 414 } 415 416 /* 417 * we only support one option, and only when writing 418 */ 419 if ((act != APPND) && (act != ARCHIVE)) { 420 tty_warn(1, "%s=%s is only supported when writing.", 421 opt->name, opt->value); 422 return(-1); 423 } 424 tar_nodir = 1; 425 } 426 return(0); 427} 428 429 430/* 431 * tar_rd() 432 * extract the values out of block already determined to be a tar header. 433 * store the values in the ARCHD parameter. 434 * Return: 435 * 0 436 */ 437 438int 439tar_rd(ARCHD *arcn, char *buf) 440{ 441 HD_TAR *hd; 442 char *pt; 443 444 /* 445 * we only get proper sized buffers passed to us 446 */ 447 if (tar_id(buf, BLKMULT) < 0) 448 return(-1); 449 memset(arcn, 0, sizeof(*arcn)); 450 arcn->org_name = arcn->name; 451 arcn->pat = NULL; 452 arcn->sb.st_nlink = 1; 453 454 /* 455 * copy out the name and values in the stat buffer 456 */ 457 hd = (HD_TAR *)buf; 458 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 459 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 460 &gnu_name_string, hd->name, sizeof(hd->name)); 461 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 462 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 463 } 464 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 465 0xfff); 466 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 467 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 468 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 469 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 470 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 471 472 /* 473 * have to look at the last character, it may be a '/' and that is used 474 * to encode this as a directory 475 */ 476 pt = &(arcn->name[arcn->nlen - 1]); 477 arcn->pad = 0; 478 arcn->skip = 0; 479 switch(hd->linkflag) { 480 case SYMTYPE: 481 /* 482 * symbolic link, need to get the link name and set the type in 483 * the st_mode so -v printing will look correct. 484 */ 485 arcn->type = PAX_SLK; 486 arcn->sb.st_mode |= S_IFLNK; 487 break; 488 case LNKTYPE: 489 /* 490 * hard link, need to get the link name, set the type in the 491 * st_mode and st_nlink so -v printing will look better. 492 */ 493 arcn->type = PAX_HLK; 494 arcn->sb.st_nlink = 2; 495 496 /* 497 * no idea of what type this thing really points at, but 498 * we set something for printing only. 499 */ 500 arcn->sb.st_mode |= S_IFREG; 501 break; 502 case LONGLINKTYPE: 503 arcn->type = PAX_GLL; 504 /* FALLTHROUGH */ 505 case LONGNAMETYPE: 506 /* 507 * GNU long link/file; we tag these here and let the 508 * pax internals deal with it -- too ugly otherwise. 509 */ 510 if (hd->linkflag != LONGLINKTYPE) 511 arcn->type = PAX_GLF; 512 arcn->pad = TAR_PAD(arcn->sb.st_size); 513 arcn->skip = arcn->sb.st_size; 514 break; 515 case AREGTYPE: 516 case REGTYPE: 517 case DIRTYPE: /* see below */ 518 default: 519 /* 520 * If we have a trailing / this is a directory and NOT a file. 521 * Note: V7 tar doesn't actually have DIRTYPE, but it was 522 * reported that V7 archives using USTAR directories do exist. 523 */ 524 if (*pt == '/' || hd->linkflag == DIRTYPE) { 525 /* 526 * it is a directory, set the mode for -v printing 527 */ 528 arcn->type = PAX_DIR; 529 arcn->sb.st_mode |= S_IFDIR; 530 arcn->sb.st_nlink = 2; 531 } else { 532 /* 533 * have a file that will be followed by data. Set the 534 * skip value to the size field and calculate the size 535 * of the padding. 536 */ 537 arcn->type = PAX_REG; 538 arcn->sb.st_mode |= S_IFREG; 539 arcn->pad = TAR_PAD(arcn->sb.st_size); 540 arcn->skip = arcn->sb.st_size; 541 } 542 break; 543 } 544 545 /* 546 * strip off any trailing slash. 547 */ 548 if (*pt == '/') { 549 *pt = '\0'; 550 --arcn->nlen; 551 } 552 return(0); 553} 554 555/* 556 * tar_wr() 557 * write a tar header for the file specified in the ARCHD to the archive. 558 * Have to check for file types that cannot be stored and file names that 559 * are too long. Be careful of the term (last arg) to ul_oct, each field 560 * of tar has it own spec for the termination character(s). 561 * ASSUMED: space after header in header block is zero filled 562 * Return: 563 * 0 if file has data to be written after the header, 1 if file has NO 564 * data to write after the header, -1 if archive write failed 565 */ 566 567int 568tar_wr(ARCHD *arcn) 569{ 570 HD_TAR *hd; 571 int len; 572 char hdblk[sizeof(HD_TAR)]; 573 574 /* 575 * check for those file system types which tar cannot store 576 */ 577 switch(arcn->type) { 578 case PAX_DIR: 579 /* 580 * user asked that dirs not be written to the archive 581 */ 582 if (tar_nodir) 583 return(1); 584 break; 585 case PAX_CHR: 586 tty_warn(1, "Tar cannot archive a character device %s", 587 arcn->org_name); 588 return(1); 589 case PAX_BLK: 590 tty_warn(1, 591 "Tar cannot archive a block device %s", arcn->org_name); 592 return(1); 593 case PAX_SCK: 594 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 595 return(1); 596 case PAX_FIF: 597 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 598 return(1); 599 case PAX_SLK: 600 case PAX_HLK: 601 case PAX_HRG: 602 if (arcn->ln_nlen > sizeof(hd->linkname)) { 603 tty_warn(1,"Link name too long for tar %s", 604 arcn->ln_name); 605 return(1); 606 } 607 break; 608 case PAX_REG: 609 case PAX_CTG: 610 default: 611 break; 612 } 613 614 /* 615 * check file name len, remember extra char for dirs (the / at the end) 616 */ 617 len = arcn->nlen; 618 if (arcn->type == PAX_DIR) 619 ++len; 620 if (len >= sizeof(hd->name)) { 621 tty_warn(1, "File name too long for tar %s", arcn->name); 622 return(1); 623 } 624 625 /* 626 * copy the data out of the ARCHD into the tar header based on the type 627 * of the file. Remember many tar readers want the unused fields to be 628 * padded with zero. We set the linkflag field (type), the linkname 629 * (or zero if not used),the size, and set the padding (if any) to be 630 * added after the file data (0 for all other types, as they only have 631 * a header) 632 */ 633 memset(hdblk, 0, sizeof(hdblk)); 634 hd = (HD_TAR *)hdblk; 635 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 636 arcn->pad = 0; 637 638 if (arcn->type == PAX_DIR) { 639 /* 640 * directories are the same as files, except have a filename 641 * that ends with a /, we add the slash here. No data follows, 642 * dirs, so no pad. 643 */ 644 hd->linkflag = AREGTYPE; 645 hd->name[len-1] = '/'; 646 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 647 goto out; 648 } else if (arcn->type == PAX_SLK) { 649 /* 650 * no data follows this file, so no pad 651 */ 652 hd->linkflag = SYMTYPE; 653 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 654 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 655 goto out; 656 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 657 /* 658 * no data follows this file, so no pad 659 */ 660 hd->linkflag = LNKTYPE; 661 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 662 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 663 goto out; 664 } else { 665 /* 666 * data follows this file, so set the pad 667 */ 668 hd->linkflag = AREGTYPE; 669 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 670 tty_warn(1,"File is too large for tar %s", 671 arcn->org_name); 672 return(1); 673 } 674 arcn->pad = TAR_PAD(arcn->sb.st_size); 675 } 676 677 /* 678 * copy those fields that are independent of the type 679 */ 680 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 681 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 682 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 683 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 684 goto out; 685 686 /* 687 * calculate and add the checksum, then write the header. A return of 688 * 0 tells the caller to now write the file data, 1 says no data needs 689 * to be written 690 */ 691 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 692 sizeof(hd->chksum), 3)) 693 goto out; /* XXX Something's wrong here 694 * because a zero-byte file can 695 * cause this to be done and 696 * yet the resulting warning 697 * seems incorrect */ 698 699 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 700 return(-1); 701 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 702 return(-1); 703 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 704 return(0); 705 return(1); 706 707 out: 708 /* 709 * header field is out of range 710 */ 711 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 712 return(1); 713} 714 715/* 716 * Routines for POSIX ustar 717 */ 718 719/* 720 * ustar_strd() 721 * initialization for ustar read 722 * Return: 723 * 0 if ok, -1 otherwise 724 */ 725 726int 727ustar_strd(void) 728{ 729 return(0); 730} 731 732/* 733 * ustar_stwr() 734 * initialization for ustar write 735 * Return: 736 * 0 if ok, -1 otherwise 737 */ 738 739int 740ustar_stwr(void) 741{ 742 return(0); 743} 744 745/* 746 * ustar_id() 747 * determine if a block given to us is a valid ustar header. We have to 748 * be on the lookout for those pesky blocks of all zero's 749 * Return: 750 * 0 if a ustar header, -1 otherwise 751 */ 752 753int 754ustar_id(char *blk, int size) 755{ 756 HD_USTAR *hd; 757 758 if (size < BLKMULT) 759 return(-1); 760 hd = (HD_USTAR *)blk; 761 762 /* 763 * check for block of zero's first, a simple and fast test then check 764 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 765 * programs are fouled up and create archives missing the \0. Last we 766 * check the checksum. If ok we have to assume it is a valid header. 767 */ 768 if (hd->name[0] == '\0') 769 return(-1); 770 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 771 return(-1); 772 /* This is GNU tar */ 773 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 774 !seen_gnu_warning) { 775 seen_gnu_warning = 1; 776 tty_warn(0, 777 "Trying to read GNU tar archive with extensions off"); 778 } 779 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 780} 781 782/* 783 * ustar_rd() 784 * extract the values out of block already determined to be a ustar header. 785 * store the values in the ARCHD parameter. 786 * Return: 787 * 0 788 */ 789 790int 791ustar_rd(ARCHD *arcn, char *buf) 792{ 793 HD_USTAR *hd; 794 char *dest; 795 int cnt; 796 dev_t devmajor; 797 dev_t devminor; 798 799 /* 800 * we only get proper sized buffers 801 */ 802 if (ustar_id(buf, BLKMULT) < 0) 803 return(-1); 804 805 memset(arcn, 0, sizeof(*arcn)); 806 arcn->org_name = arcn->name; 807 arcn->pat = NULL; 808 arcn->sb.st_nlink = 1; 809 hd = (HD_USTAR *)buf; 810 811 /* 812 * see if the filename is split into two parts. if, so joint the parts. 813 * we copy the prefix first and add a / between the prefix and name. 814 */ 815 dest = arcn->name; 816 if (*(hd->prefix) != '\0') { 817 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 818 dest += cnt; 819 *dest++ = '/'; 820 cnt++; 821 } else { 822 cnt = 0; 823 } 824 825 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 826 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 827 &gnu_name_string, hd->name, sizeof(hd->name)) + cnt; 828 arcn->ln_nlen = expandname(arcn->ln_name, 829 sizeof(arcn->ln_name), &gnu_link_string, hd->linkname, 830 sizeof(hd->linkname)); 831 } 832 833 /* 834 * follow the spec to the letter. we should only have mode bits, strip 835 * off all other crud we may be passed. 836 */ 837 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 838 0xfff); 839 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 840 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 841 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 842 843 /* 844 * If we can find the ascii names for gname and uname in the password 845 * and group files we will use the uid's and gid they bind. Otherwise 846 * we use the uid and gid values stored in the header. (This is what 847 * the posix spec wants). 848 */ 849 hd->gname[sizeof(hd->gname) - 1] = '\0'; 850 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 851 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 852 hd->uname[sizeof(hd->uname) - 1] = '\0'; 853 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 854 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 855 856 /* 857 * set the defaults, these may be changed depending on the file type 858 */ 859 arcn->pad = 0; 860 arcn->skip = 0; 861 arcn->sb.st_rdev = (dev_t)0; 862 863 /* 864 * set the mode and PAX type according to the typeflag in the header 865 */ 866 switch(hd->typeflag) { 867 case FIFOTYPE: 868 arcn->type = PAX_FIF; 869 arcn->sb.st_mode |= S_IFIFO; 870 break; 871 case DIRTYPE: 872 arcn->type = PAX_DIR; 873 arcn->sb.st_mode |= S_IFDIR; 874 arcn->sb.st_nlink = 2; 875 876 /* 877 * Some programs that create ustar archives append a '/' 878 * to the pathname for directories. This clearly violates 879 * ustar specs, but we will silently strip it off anyway. 880 */ 881 if (arcn->name[arcn->nlen - 1] == '/') 882 arcn->name[--arcn->nlen] = '\0'; 883 break; 884 case BLKTYPE: 885 case CHRTYPE: 886 /* 887 * this type requires the rdev field to be set. 888 */ 889 if (hd->typeflag == BLKTYPE) { 890 arcn->type = PAX_BLK; 891 arcn->sb.st_mode |= S_IFBLK; 892 } else { 893 arcn->type = PAX_CHR; 894 arcn->sb.st_mode |= S_IFCHR; 895 } 896 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 897 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 898 arcn->sb.st_rdev = TODEV(devmajor, devminor); 899 break; 900 case SYMTYPE: 901 case LNKTYPE: 902 if (hd->typeflag == SYMTYPE) { 903 arcn->type = PAX_SLK; 904 arcn->sb.st_mode |= S_IFLNK; 905 } else { 906 arcn->type = PAX_HLK; 907 /* 908 * so printing looks better 909 */ 910 arcn->sb.st_mode |= S_IFREG; 911 arcn->sb.st_nlink = 2; 912 } 913 break; 914 case LONGLINKTYPE: 915 if (is_gnutar) 916 arcn->type = PAX_GLL; 917 /* FALLTHROUGH */ 918 case LONGNAMETYPE: 919 if (is_gnutar) { 920 /* 921 * GNU long link/file; we tag these here and let the 922 * pax internals deal with it -- too ugly otherwise. 923 */ 924 if (hd->typeflag != LONGLINKTYPE) 925 arcn->type = PAX_GLF; 926 arcn->pad = TAR_PAD(arcn->sb.st_size); 927 arcn->skip = arcn->sb.st_size; 928 } else { 929 tty_warn(1, "GNU Long %s found in posix ustar archive.", 930 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 931 } 932 break; 933 case CONTTYPE: 934 case AREGTYPE: 935 case REGTYPE: 936 default: 937 /* 938 * these types have file data that follows. Set the skip and 939 * pad fields. 940 */ 941 arcn->type = PAX_REG; 942 arcn->pad = TAR_PAD(arcn->sb.st_size); 943 arcn->skip = arcn->sb.st_size; 944 arcn->sb.st_mode |= S_IFREG; 945 break; 946 } 947 return(0); 948} 949 950static int 951expandname(char *buf, size_t len, char **gnu_name, const char *name, 952 size_t nlen) 953{ 954 if (*gnu_name) { 955 len = strlcpy(buf, *gnu_name, len); 956 free(*gnu_name); 957 *gnu_name = NULL; 958 } else { 959 if (len > ++nlen) 960 len = nlen; 961 len = strlcpy(buf, name, len); 962 } 963 return len; 964} 965 966static void 967longlink(ARCHD *arcn) 968{ 969 ARCHD larc; 970 971 memset(&larc, 0, sizeof(larc)); 972 973 switch (arcn->type) { 974 case PAX_SLK: 975 case PAX_HRG: 976 case PAX_HLK: 977 larc.type = PAX_GLL; 978 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 979 sizeof(larc.ln_name)); 980 gnu_hack_string = arcn->ln_name; 981 gnu_hack_len = arcn->ln_nlen + 1; 982 break; 983 default: 984 larc.nlen = strlcpy(larc.name, "././@LongLink", 985 sizeof(larc.name)); 986 gnu_hack_string = arcn->name; 987 gnu_hack_len = arcn->nlen + 1; 988 larc.type = PAX_GLF; 989 } 990 /* 991 * We need a longlink now. 992 */ 993 ustar_wr(&larc); 994} 995 996/* 997 * ustar_wr() 998 * write a ustar header for the file specified in the ARCHD to the archive 999 * Have to check for file types that cannot be stored and file names that 1000 * are too long. Be careful of the term (last arg) to ul_oct, we only use 1001 * '\0' for the termination character (this is different than picky tar) 1002 * ASSUMED: space after header in header block is zero filled 1003 * Return: 1004 * 0 if file has data to be written after the header, 1 if file has NO 1005 * data to write after the header, -1 if archive write failed 1006 */ 1007 1008int 1009ustar_wr(ARCHD *arcn) 1010{ 1011 HD_USTAR *hd; 1012 char *pt; 1013 char hdblk[sizeof(HD_USTAR)]; 1014 const char *user, *group; 1015 1016 /* 1017 * check for those file system types ustar cannot store 1018 */ 1019 if (arcn->type == PAX_SCK) { 1020 if (!is_gnutar) 1021 tty_warn(1, "Ustar cannot archive a socket %s", 1022 arcn->org_name); 1023 return(1); 1024 } 1025 1026 /* 1027 * check the length of the linkname 1028 */ 1029 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1030 (arcn->type == PAX_HRG)) && 1031 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1032 if (is_gnutar) { 1033 longlink(arcn); 1034 } else { 1035 tty_warn(1, "Link name too long for ustar %s", 1036 arcn->ln_name); 1037 return(1); 1038 } 1039 } 1040 1041 /* 1042 * split the path name into prefix and name fields (if needed). if 1043 * pt != arcn->name, the name has to be split 1044 */ 1045 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1046 if (is_gnutar) { 1047 longlink(arcn); 1048 pt = arcn->name; 1049 } else { 1050 tty_warn(1, "File name too long for ustar %s", 1051 arcn->name); 1052 return(1); 1053 } 1054 } 1055 1056 /* 1057 * zero out the header so we don't have to worry about zero fill below 1058 */ 1059 memset(hdblk, 0, sizeof(hdblk)); 1060 hd = (HD_USTAR *)hdblk; 1061 arcn->pad = 0L; 1062 1063 /* 1064 * split the name, or zero out the prefix 1065 */ 1066 if (pt != arcn->name) { 1067 /* 1068 * name was split, pt points at the / where the split is to 1069 * occur, we remove the / and copy the first part to the prefix 1070 */ 1071 *pt = '\0'; 1072 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1073 *pt++ = '/'; 1074 } 1075 1076 /* 1077 * copy the name part. this may be the whole path or the part after 1078 * the prefix 1079 */ 1080 strlcpy(hd->name, pt, sizeof(hd->name)); 1081 1082 /* 1083 * set the fields in the header that are type dependent 1084 */ 1085 switch(arcn->type) { 1086 case PAX_DIR: 1087 hd->typeflag = DIRTYPE; 1088 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1089 goto out; 1090 break; 1091 case PAX_CHR: 1092 case PAX_BLK: 1093 if (arcn->type == PAX_CHR) 1094 hd->typeflag = CHRTYPE; 1095 else 1096 hd->typeflag = BLKTYPE; 1097 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1098 sizeof(hd->devmajor), 3) || 1099 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1100 sizeof(hd->devminor), 3) || 1101 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1102 goto out; 1103 break; 1104 case PAX_FIF: 1105 hd->typeflag = FIFOTYPE; 1106 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1107 goto out; 1108 break; 1109 case PAX_GLL: 1110 case PAX_SLK: 1111 case PAX_HLK: 1112 case PAX_HRG: 1113 if (arcn->type == PAX_SLK) 1114 hd->typeflag = SYMTYPE; 1115 else if (arcn->type == PAX_GLL) 1116 hd->typeflag = LONGLINKTYPE; 1117 else 1118 hd->typeflag = LNKTYPE; 1119 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1120 if (ul_oct((u_long)gnu_hack_len, hd->size, 1121 sizeof(hd->size), 3)) 1122 goto out; 1123 break; 1124 case PAX_GLF: 1125 case PAX_REG: 1126 case PAX_CTG: 1127 default: 1128 /* 1129 * file data with this type, set the padding 1130 */ 1131 if (arcn->type == PAX_GLF) { 1132 hd->typeflag = LONGNAMETYPE; 1133 arcn->pad = TAR_PAD(gnu_hack_len); 1134 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1135 sizeof(hd->size), 3)) { 1136 tty_warn(1,"File is too long for ustar %s", 1137 arcn->org_name); 1138 return(1); 1139 } 1140 } else { 1141 if (arcn->type == PAX_CTG) 1142 hd->typeflag = CONTTYPE; 1143 else 1144 hd->typeflag = REGTYPE; 1145 arcn->pad = TAR_PAD(arcn->sb.st_size); 1146 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1147 sizeof(hd->size), 3)) { 1148 tty_warn(1,"File is too long for ustar %s", 1149 arcn->org_name); 1150 return(1); 1151 } 1152 } 1153 break; 1154 } 1155 1156 strncpy(hd->magic, TMAGIC, TMAGLEN); 1157 if (is_gnutar) 1158 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1159 else 1160 strncpy(hd->version, TVERSION, TVERSLEN); 1161 1162 /* 1163 * set the remaining fields. Some versions want all 16 bits of mode 1164 * we better humor them (they really do not meet spec though).... 1165 */ 1166 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1167 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1168 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1169 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1170 goto out; 1171 user = user_from_uid(arcn->sb.st_uid, 1); 1172 group = group_from_gid(arcn->sb.st_gid, 1); 1173 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1174 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1175 1176 /* 1177 * calculate and store the checksum write the header to the archive 1178 * return 0 tells the caller to now write the file data, 1 says no data 1179 * needs to be written 1180 */ 1181 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1182 sizeof(hd->chksum), 3)) 1183 goto out; 1184 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1185 return(-1); 1186 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1187 return(-1); 1188 if (gnu_hack_string) { 1189 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1190 int pad = gnu_hack_len; 1191 gnu_hack_string = NULL; 1192 gnu_hack_len = 0; 1193 if (res < 0) 1194 return(-1); 1195 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1196 return(-1); 1197 } 1198 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1199 return(0); 1200 return(1); 1201 1202 out: 1203 /* 1204 * header field is out of range 1205 */ 1206 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1207 return(1); 1208} 1209 1210/* 1211 * name_split() 1212 * see if the name has to be split for storage in a ustar header. We try 1213 * to fit the entire name in the name field without splitting if we can. 1214 * The split point is always at a / 1215 * Return 1216 * character pointer to split point (always the / that is to be removed 1217 * if the split is not needed, the points is set to the start of the file 1218 * name (it would violate the spec to split there). A NULL is returned if 1219 * the file name is too long 1220 */ 1221 1222static char * 1223name_split(char *name, int len) 1224{ 1225 char *start; 1226 1227 /* 1228 * check to see if the file name is small enough to fit in the name 1229 * field. if so just return a pointer to the name. 1230 */ 1231 if (len < TNMSZ) 1232 return(name); 1233 if (len > (TPFSZ + TNMSZ)) 1234 return(NULL); 1235 1236 /* 1237 * we start looking at the biggest sized piece that fits in the name 1238 * field. We walk forward looking for a slash to split at. The idea is 1239 * to find the biggest piece to fit in the name field (or the smallest 1240 * prefix we can find) (the -1 is correct the biggest piece would 1241 * include the slash between the two parts that gets thrown away) 1242 */ 1243 start = name + len - TNMSZ; 1244 while ((*start != '\0') && (*start != '/')) 1245 ++start; 1246 1247 /* 1248 * if we hit the end of the string, this name cannot be split, so we 1249 * cannot store this file. 1250 */ 1251 if (*start == '\0') 1252 return(NULL); 1253 len = start - name; 1254 1255 /* 1256 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1257 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1258 * the file would then expand on extract to //str. The len == 0 below 1259 * makes this special case follow the spec to the letter. 1260 */ 1261 if ((len >= TPFSZ) || (len == 0)) 1262 return(NULL); 1263 1264 /* 1265 * ok have a split point, return it to the caller 1266 */ 1267 return(start); 1268} 1269 1270/* 1271 * convert a glob into a RE, and add it to the list. we convert to 1272 * four different RE's (because we're using BRE's and can't use | 1273 * alternation :-() with this padding: 1274 * .*\/ and $ 1275 * .*\/ and \/.* 1276 * ^ and $ 1277 * ^ and \/.* 1278 */ 1279static int 1280tar_gnutar_exclude_one(const char *line, size_t len) 1281{ 1282 /* 2 * buffer len + nul */ 1283 char sbuf[MAXPATHLEN * 2 + 1]; 1284 /* + / + // + .*""/\/ + \/.* */ 1285 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1286 int i, j; 1287 1288 if (line[len - 1] == '\n') 1289 len--; 1290 strncpy(sbuf, ".*" "\\/", j = 4); 1291 for (i = 0; i < len; i++) { 1292 /* 1293 * convert glob to regexp, escaping everything 1294 */ 1295 if (line[i] == '*') 1296 sbuf[j++] = '.'; 1297 else if (line[i] == '?') { 1298 sbuf[j++] = '.'; 1299 continue; 1300 } else if (!isalnum(line[i]) && !isblank(line[i])) 1301 sbuf[j++] = '\\'; 1302 sbuf[j++] = line[i]; 1303 } 1304 /* don't need the .*\/ ones if we start with /, i guess */ 1305 if (line[0] != '/') { 1306 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1307 if (rep_add(rabuf) < 0) 1308 return (-1); 1309 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1310 if (rep_add(rabuf) < 0) 1311 return (-1); 1312 } 1313 1314 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1315 if (rep_add(rabuf) < 0) 1316 return (-1); 1317 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1318 if (rep_add(rabuf) < 0) 1319 return (-1); 1320 1321 return (0); 1322} 1323 1324/* 1325 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1326 * we go through each line of the file, building a string from the "glob" 1327 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1328 * to rep_add(), which will add a empty replacement (exclusion), for the 1329 * named files. 1330 */ 1331int 1332tar_gnutar_minus_minus_exclude(path) 1333 const char *path; 1334{ 1335 size_t len = strlen(path); 1336 1337 if (len > MAXPATHLEN) 1338 tty_warn(0, "pathname too long: %s", path); 1339 1340 return (tar_gnutar_exclude_one(path, len)); 1341} 1342 1343int 1344tar_gnutar_X_compat(path) 1345 const char *path; 1346{ 1347 char *line; 1348 FILE *fp; 1349 int lineno = 0; 1350 size_t len; 1351 1352 fp = fopen(path, "r"); 1353 if (fp == NULL) { 1354 tty_warn(1, "cannot open %s: %s", path, 1355 strerror(errno)); 1356 return(-1); 1357 } 1358 1359 while ((line = fgetln(fp, &len))) { 1360 lineno++; 1361 if (len > MAXPATHLEN) { 1362 tty_warn(0, "pathname too long, line %d of %s", 1363 lineno, path); 1364 } 1365 if (tar_gnutar_exclude_one(line, len)) 1366 return (-1); 1367 } 1368 return (0); 1369} 1370