tar.c revision 1.47.2.7
1/* $NetBSD: tar.c,v 1.47.2.7 2004/11/12 04:59:17 jmc Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.47.2.7 2004/11/12 04:59:17 jmc Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *, int); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96static int gnu_short_trailer; /* gnu short trailer */ 97 98static const char LONG_LINK[] = "././@LongLink"; 99 100static int 101check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 102{ 103 u_long hdck, blck; 104 105 hdck = asc_ul(hd, hdlen, OCT); 106 blck = tar_chksm(bl, bllen); 107 108 if (hdck != blck) { 109 if (!quiet) 110 tty_warn(0, "Header checksum %lo does not match %lo", 111 hdck, blck); 112 return(-1); 113 } 114 return(0); 115} 116 117 118/* 119 * tar_endwr() 120 * add the tar trailer of two null blocks 121 * Return: 122 * 0 if ok, -1 otherwise (what wr_skip returns) 123 */ 124 125int 126tar_endwr(void) 127{ 128 return(wr_skip((off_t)(NULLCNT * BLKMULT))); 129} 130 131/* 132 * tar_endrd() 133 * no cleanup needed here, just return size of trailer (for append) 134 * Return: 135 * size of trailer BLKMULT 136 */ 137 138off_t 139tar_endrd(void) 140{ 141 return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT)); 142} 143 144/* 145 * tar_trail() 146 * Called to determine if a header block is a valid trailer. We are passed 147 * the block, the in_sync flag (which tells us we are in resync mode; 148 * looking for a valid header), and cnt (which starts at zero) which is 149 * used to count the number of empty blocks we have seen so far. 150 * Return: 151 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 152 * could never contain a header. 153 */ 154 155int 156tar_trail(char *buf, int in_resync, int *cnt) 157{ 158 int i; 159 160 gnu_short_trailer = 0; 161 /* 162 * look for all zero, trailer is two consecutive blocks of zero 163 */ 164 for (i = 0; i < BLKMULT; ++i) { 165 if (buf[i] != '\0') 166 break; 167 } 168 169 /* 170 * if not all zero it is not a trailer, but MIGHT be a header. 171 */ 172 if (i != BLKMULT) 173 return(-1); 174 175 /* 176 * When given a zero block, we must be careful! 177 * If we are not in resync mode, check for the trailer. Have to watch 178 * out that we do not mis-identify file data as the trailer, so we do 179 * NOT try to id a trailer during resync mode. During resync mode we 180 * might as well throw this block out since a valid header can NEVER be 181 * a block of all 0 (we must have a valid file name). 182 */ 183 if (!in_resync) { 184 ++*cnt; 185 /* 186 * old GNU tar (up through 1.13) only writes one block of 187 * trailers, so we pretend we got another 188 */ 189 if (is_gnutar) { 190 gnu_short_trailer = 1; 191 ++*cnt; 192 } 193 if (*cnt >= NULLCNT) 194 return(0); 195 } 196 return(1); 197} 198 199/* 200 * ul_oct() 201 * convert an unsigned long to an octal string. many oddball field 202 * termination characters are used by the various versions of tar in the 203 * different fields. term selects which kind to use. str is '0' padded 204 * at the front to len. we are unable to use only one format as many old 205 * tar readers are very cranky about this. 206 * Return: 207 * 0 if the number fit into the string, -1 otherwise 208 */ 209 210static int 211ul_oct(u_long val, char *str, int len, int term) 212{ 213 char *pt; 214 215 /* 216 * term selects the appropriate character(s) for the end of the string 217 */ 218 pt = str + len - 1; 219 switch(term) { 220 case 3: 221 *pt-- = '\0'; 222 break; 223 case 2: 224 *pt-- = ' '; 225 *pt-- = '\0'; 226 break; 227 case 1: 228 *pt-- = ' '; 229 break; 230 case 0: 231 default: 232 *pt-- = '\0'; 233 *pt-- = ' '; 234 break; 235 } 236 237 /* 238 * convert and blank pad if there is space 239 */ 240 while (pt >= str) { 241 *pt-- = '0' + (char)(val & 0x7); 242 if ((val = val >> 3) == (u_long)0) 243 break; 244 } 245 246 while (pt >= str) 247 *pt-- = '0'; 248 if (val != (u_long)0) 249 return(-1); 250 return(0); 251} 252 253#if !defined(NET2_STAT) && !defined(_LP64) 254/* 255 * ull_oct() 256 * convert an unsigned long long to an octal string. one of many oddball 257 * field termination characters are used by the various versions of tar 258 * in the different fields. term selects which kind to use. str is '0' 259 * padded at the front to len. we are unable to use only one format as 260 * many old tar readers are very cranky about this. 261 * Return: 262 * 0 if the number fit into the string, -1 otherwise 263 */ 264 265static int 266ull_oct(unsigned long long val, char *str, int len, int term) 267{ 268 char *pt; 269 270 /* 271 * term selects the appropriate character(s) for the end of the string 272 */ 273 pt = str + len - 1; 274 switch(term) { 275 case 3: 276 *pt-- = '\0'; 277 break; 278 case 2: 279 *pt-- = ' '; 280 *pt-- = '\0'; 281 break; 282 case 1: 283 *pt-- = ' '; 284 break; 285 case 0: 286 default: 287 *pt-- = '\0'; 288 *pt-- = ' '; 289 break; 290 } 291 292 /* 293 * convert and blank pad if there is space 294 */ 295 while (pt >= str) { 296 *pt-- = '0' + (char)(val & 0x7); 297 if ((val = val >> 3) == 0) 298 break; 299 } 300 301 while (pt >= str) 302 *pt-- = '0'; 303 if (val != (unsigned long long)0) 304 return(-1); 305 return(0); 306} 307#endif 308 309/* 310 * tar_chksm() 311 * calculate the checksum for a tar block counting the checksum field as 312 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 313 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 314 * pad headers with 0. 315 * Return: 316 * unsigned long checksum 317 */ 318 319static u_long 320tar_chksm(char *blk, int len) 321{ 322 char *stop; 323 char *pt; 324 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 325 326 /* 327 * add the part of the block before the checksum field 328 */ 329 pt = blk; 330 stop = blk + CHK_OFFSET; 331 while (pt < stop) 332 chksm += (u_long)(*pt++ & 0xff); 333 /* 334 * move past the checksum field and keep going, spec counts the 335 * checksum field as the sum of 8 blanks (which is pre-computed as 336 * BLNKSUM). 337 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 338 * starts, no point in summing zero's) 339 */ 340 pt += CHK_LEN; 341 stop = blk + len; 342 while (pt < stop) 343 chksm += (u_long)(*pt++ & 0xff); 344 return(chksm); 345} 346 347/* 348 * Routines for old BSD style tar (also made portable to sysV tar) 349 */ 350 351/* 352 * tar_id() 353 * determine if a block given to us is a valid tar header (and not a USTAR 354 * header). We have to be on the lookout for those pesky blocks of all 355 * zero's. 356 * Return: 357 * 0 if a tar header, -1 otherwise 358 */ 359 360int 361tar_id(char *blk, int size) 362{ 363 HD_TAR *hd; 364 HD_USTAR *uhd; 365 366 if (size < BLKMULT) 367 return(-1); 368 hd = (HD_TAR *)blk; 369 uhd = (HD_USTAR *)blk; 370 371 /* 372 * check for block of zero's first, a simple and fast test, then make 373 * sure this is not a ustar header by looking for the ustar magic 374 * cookie. We should use TMAGLEN, but some USTAR archive programs are 375 * wrong and create archives missing the \0. Last we check the 376 * checksum. If this is ok we have to assume it is a valid header. 377 */ 378 if (hd->name[0] == '\0') 379 return(-1); 380 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 381 return(-1); 382 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 383} 384 385/* 386 * tar_opt() 387 * handle tar format specific -o options 388 * Return: 389 * 0 if ok -1 otherwise 390 */ 391 392int 393tar_opt(void) 394{ 395 OPLIST *opt; 396 397 while ((opt = opt_next()) != NULL) { 398 if (strcmp(opt->name, TAR_OPTION) || 399 strcmp(opt->value, TAR_NODIR)) { 400 tty_warn(1, 401 "Unknown tar format -o option/value pair %s=%s", 402 opt->name, opt->value); 403 tty_warn(1, 404 "%s=%s is the only supported tar format option", 405 TAR_OPTION, TAR_NODIR); 406 return(-1); 407 } 408 409 /* 410 * we only support one option, and only when writing 411 */ 412 if ((act != APPND) && (act != ARCHIVE)) { 413 tty_warn(1, "%s=%s is only supported when writing.", 414 opt->name, opt->value); 415 return(-1); 416 } 417 tar_nodir = 1; 418 } 419 return(0); 420} 421 422 423/* 424 * tar_rd() 425 * extract the values out of block already determined to be a tar header. 426 * store the values in the ARCHD parameter. 427 * Return: 428 * 0 429 */ 430 431int 432tar_rd(ARCHD *arcn, char *buf) 433{ 434 HD_TAR *hd; 435 char *pt; 436 437 /* 438 * we only get proper sized buffers passed to us 439 */ 440 if (tar_id(buf, BLKMULT) < 0) 441 return(-1); 442 memset(arcn, 0, sizeof(*arcn)); 443 arcn->org_name = arcn->name; 444 arcn->pat = NULL; 445 arcn->sb.st_nlink = 1; 446 447 /* 448 * copy out the name and values in the stat buffer 449 */ 450 hd = (HD_TAR *)buf; 451 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 452 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 453 &gnu_name_string, hd->name, sizeof(hd->name)); 454 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 455 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 456 } 457 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 458 0xfff); 459 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 460 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 461 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 462 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 463 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 464 465 /* 466 * have to look at the last character, it may be a '/' and that is used 467 * to encode this as a directory 468 */ 469 pt = &(arcn->name[arcn->nlen - 1]); 470 arcn->pad = 0; 471 arcn->skip = 0; 472 switch(hd->linkflag) { 473 case SYMTYPE: 474 /* 475 * symbolic link, need to get the link name and set the type in 476 * the st_mode so -v printing will look correct. 477 */ 478 arcn->type = PAX_SLK; 479 arcn->sb.st_mode |= S_IFLNK; 480 break; 481 case LNKTYPE: 482 /* 483 * hard link, need to get the link name, set the type in the 484 * st_mode and st_nlink so -v printing will look better. 485 */ 486 arcn->type = PAX_HLK; 487 arcn->sb.st_nlink = 2; 488 489 /* 490 * no idea of what type this thing really points at, but 491 * we set something for printing only. 492 */ 493 arcn->sb.st_mode |= S_IFREG; 494 break; 495 case LONGLINKTYPE: 496 case LONGNAMETYPE: 497 /* 498 * GNU long link/file; we tag these here and let the 499 * pax internals deal with it -- too ugly otherwise. 500 */ 501 if (hd->linkflag != LONGLINKTYPE) 502 arcn->type = PAX_GLF; 503 else 504 arcn->type = PAX_GLL; 505 arcn->pad = TAR_PAD(arcn->sb.st_size); 506 arcn->skip = arcn->sb.st_size; 507 break; 508 case AREGTYPE: 509 case REGTYPE: 510 case DIRTYPE: /* see below */ 511 default: 512 /* 513 * If we have a trailing / this is a directory and NOT a file. 514 * Note: V7 tar doesn't actually have DIRTYPE, but it was 515 * reported that V7 archives using USTAR directories do exist. 516 */ 517 if (*pt == '/' || hd->linkflag == DIRTYPE) { 518 /* 519 * it is a directory, set the mode for -v printing 520 */ 521 arcn->type = PAX_DIR; 522 arcn->sb.st_mode |= S_IFDIR; 523 arcn->sb.st_nlink = 2; 524 } else { 525 /* 526 * have a file that will be followed by data. Set the 527 * skip value to the size field and calculate the size 528 * of the padding. 529 */ 530 arcn->type = PAX_REG; 531 arcn->sb.st_mode |= S_IFREG; 532 arcn->pad = TAR_PAD(arcn->sb.st_size); 533 arcn->skip = arcn->sb.st_size; 534 } 535 break; 536 } 537 538 /* 539 * strip off any trailing slash. 540 */ 541 if (*pt == '/') { 542 *pt = '\0'; 543 --arcn->nlen; 544 } 545 return(0); 546} 547 548/* 549 * tar_wr() 550 * write a tar header for the file specified in the ARCHD to the archive. 551 * Have to check for file types that cannot be stored and file names that 552 * are too long. Be careful of the term (last arg) to ul_oct, each field 553 * of tar has it own spec for the termination character(s). 554 * ASSUMED: space after header in header block is zero filled 555 * Return: 556 * 0 if file has data to be written after the header, 1 if file has NO 557 * data to write after the header, -1 if archive write failed 558 */ 559 560int 561tar_wr(ARCHD *arcn) 562{ 563 HD_TAR *hd; 564 int len; 565 char hdblk[sizeof(HD_TAR)]; 566 567 /* 568 * check for those file system types which tar cannot store 569 */ 570 switch(arcn->type) { 571 case PAX_DIR: 572 /* 573 * user asked that dirs not be written to the archive 574 */ 575 if (tar_nodir) 576 return(1); 577 break; 578 case PAX_CHR: 579 tty_warn(1, "Tar cannot archive a character device %s", 580 arcn->org_name); 581 return(1); 582 case PAX_BLK: 583 tty_warn(1, 584 "Tar cannot archive a block device %s", arcn->org_name); 585 return(1); 586 case PAX_SCK: 587 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 588 return(1); 589 case PAX_FIF: 590 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 591 return(1); 592 case PAX_SLK: 593 case PAX_HLK: 594 case PAX_HRG: 595 if (arcn->ln_nlen > sizeof(hd->linkname)) { 596 tty_warn(1,"Link name too long for tar %s", 597 arcn->ln_name); 598 return(1); 599 } 600 break; 601 case PAX_REG: 602 case PAX_CTG: 603 default: 604 break; 605 } 606 607 /* 608 * check file name len, remember extra char for dirs (the / at the end) 609 */ 610 len = arcn->nlen; 611 if (arcn->type == PAX_DIR) 612 ++len; 613 if (len >= sizeof(hd->name)) { 614 tty_warn(1, "File name too long for tar %s", arcn->name); 615 return(1); 616 } 617 618 /* 619 * copy the data out of the ARCHD into the tar header based on the type 620 * of the file. Remember many tar readers want the unused fields to be 621 * padded with zero. We set the linkflag field (type), the linkname 622 * (or zero if not used),the size, and set the padding (if any) to be 623 * added after the file data (0 for all other types, as they only have 624 * a header) 625 */ 626 memset(hdblk, 0, sizeof(hdblk)); 627 hd = (HD_TAR *)hdblk; 628 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 629 arcn->pad = 0; 630 631 if (arcn->type == PAX_DIR) { 632 /* 633 * directories are the same as files, except have a filename 634 * that ends with a /, we add the slash here. No data follows, 635 * dirs, so no pad. 636 */ 637 hd->linkflag = AREGTYPE; 638 hd->name[len-1] = '/'; 639 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 640 goto out; 641 } else if (arcn->type == PAX_SLK) { 642 /* 643 * no data follows this file, so no pad 644 */ 645 hd->linkflag = SYMTYPE; 646 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 647 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 648 goto out; 649 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 650 /* 651 * no data follows this file, so no pad 652 */ 653 hd->linkflag = LNKTYPE; 654 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 655 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 656 goto out; 657 } else { 658 /* 659 * data follows this file, so set the pad 660 */ 661 hd->linkflag = AREGTYPE; 662 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 663 tty_warn(1,"File is too large for tar %s", 664 arcn->org_name); 665 return(1); 666 } 667 arcn->pad = TAR_PAD(arcn->sb.st_size); 668 } 669 670 /* 671 * copy those fields that are independent of the type 672 */ 673 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 674 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 675 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 676 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 677 goto out; 678 679 /* 680 * calculate and add the checksum, then write the header. A return of 681 * 0 tells the caller to now write the file data, 1 says no data needs 682 * to be written 683 */ 684 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 685 sizeof(hd->chksum), 3)) 686 goto out; /* XXX Something's wrong here 687 * because a zero-byte file can 688 * cause this to be done and 689 * yet the resulting warning 690 * seems incorrect */ 691 692 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 693 return(-1); 694 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 695 return(-1); 696 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 697 return(0); 698 return(1); 699 700 out: 701 /* 702 * header field is out of range 703 */ 704 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 705 return(1); 706} 707 708/* 709 * Routines for POSIX ustar 710 */ 711 712/* 713 * ustar_strd() 714 * initialization for ustar read 715 * Return: 716 * 0 if ok, -1 otherwise 717 */ 718 719int 720ustar_strd(void) 721{ 722 return(0); 723} 724 725/* 726 * ustar_stwr() 727 * initialization for ustar write 728 * Return: 729 * 0 if ok, -1 otherwise 730 */ 731 732int 733ustar_stwr(void) 734{ 735 return(0); 736} 737 738/* 739 * ustar_id() 740 * determine if a block given to us is a valid ustar header. We have to 741 * be on the lookout for those pesky blocks of all zero's 742 * Return: 743 * 0 if a ustar header, -1 otherwise 744 */ 745 746int 747ustar_id(char *blk, int size) 748{ 749 HD_USTAR *hd; 750 751 if (size < BLKMULT) 752 return(-1); 753 hd = (HD_USTAR *)blk; 754 755 /* 756 * check for block of zero's first, a simple and fast test then check 757 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 758 * programs are fouled up and create archives missing the \0. Last we 759 * check the checksum. If ok we have to assume it is a valid header. 760 */ 761 if (hd->name[0] == '\0') 762 return(-1); 763 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 764 return(-1); 765 /* This is GNU tar */ 766 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 767 !seen_gnu_warning) { 768 seen_gnu_warning = 1; 769 tty_warn(0, 770 "Trying to read GNU tar archive with extensions off"); 771 } 772 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 773} 774 775/* 776 * ustar_rd() 777 * extract the values out of block already determined to be a ustar header. 778 * store the values in the ARCHD parameter. 779 * Return: 780 * 0 781 */ 782 783int 784ustar_rd(ARCHD *arcn, char *buf) 785{ 786 HD_USTAR *hd; 787 char *dest; 788 int cnt; 789 dev_t devmajor; 790 dev_t devminor; 791 792 /* 793 * we only get proper sized buffers 794 */ 795 if (ustar_id(buf, BLKMULT) < 0) 796 return(-1); 797 798 memset(arcn, 0, sizeof(*arcn)); 799 arcn->org_name = arcn->name; 800 arcn->pat = NULL; 801 arcn->sb.st_nlink = 1; 802 hd = (HD_USTAR *)buf; 803 804 /* 805 * see if the filename is split into two parts. if, so joint the parts. 806 * we copy the prefix first and add a / between the prefix and name. 807 */ 808 dest = arcn->name; 809 if (*(hd->prefix) != '\0') { 810 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 811 dest += cnt; 812 *dest++ = '/'; 813 cnt++; 814 } else { 815 cnt = 0; 816 } 817 818 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 819 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 820 &gnu_name_string, hd->name, sizeof(hd->name)) + cnt; 821 arcn->ln_nlen = expandname(arcn->ln_name, 822 sizeof(arcn->ln_name), &gnu_link_string, hd->linkname, 823 sizeof(hd->linkname)); 824 } 825 826 /* 827 * follow the spec to the letter. we should only have mode bits, strip 828 * off all other crud we may be passed. 829 */ 830 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 831 0xfff); 832 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 833 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 834 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 835 836 /* 837 * If we can find the ascii names for gname and uname in the password 838 * and group files we will use the uid's and gid they bind. Otherwise 839 * we use the uid and gid values stored in the header. (This is what 840 * the posix spec wants). 841 */ 842 hd->gname[sizeof(hd->gname) - 1] = '\0'; 843 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 844 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 845 hd->uname[sizeof(hd->uname) - 1] = '\0'; 846 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 847 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 848 849 /* 850 * set the defaults, these may be changed depending on the file type 851 */ 852 arcn->pad = 0; 853 arcn->skip = 0; 854 arcn->sb.st_rdev = (dev_t)0; 855 856 /* 857 * set the mode and PAX type according to the typeflag in the header 858 */ 859 switch(hd->typeflag) { 860 case FIFOTYPE: 861 arcn->type = PAX_FIF; 862 arcn->sb.st_mode |= S_IFIFO; 863 break; 864 case DIRTYPE: 865 arcn->type = PAX_DIR; 866 arcn->sb.st_mode |= S_IFDIR; 867 arcn->sb.st_nlink = 2; 868 869 /* 870 * Some programs that create ustar archives append a '/' 871 * to the pathname for directories. This clearly violates 872 * ustar specs, but we will silently strip it off anyway. 873 */ 874 if (arcn->name[arcn->nlen - 1] == '/') 875 arcn->name[--arcn->nlen] = '\0'; 876 break; 877 case BLKTYPE: 878 case CHRTYPE: 879 /* 880 * this type requires the rdev field to be set. 881 */ 882 if (hd->typeflag == BLKTYPE) { 883 arcn->type = PAX_BLK; 884 arcn->sb.st_mode |= S_IFBLK; 885 } else { 886 arcn->type = PAX_CHR; 887 arcn->sb.st_mode |= S_IFCHR; 888 } 889 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 890 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 891 arcn->sb.st_rdev = TODEV(devmajor, devminor); 892 break; 893 case SYMTYPE: 894 case LNKTYPE: 895 if (hd->typeflag == SYMTYPE) { 896 arcn->type = PAX_SLK; 897 arcn->sb.st_mode |= S_IFLNK; 898 } else { 899 arcn->type = PAX_HLK; 900 /* 901 * so printing looks better 902 */ 903 arcn->sb.st_mode |= S_IFREG; 904 arcn->sb.st_nlink = 2; 905 } 906 break; 907 case LONGLINKTYPE: 908 case LONGNAMETYPE: 909 if (is_gnutar) { 910 /* 911 * GNU long link/file; we tag these here and let the 912 * pax internals deal with it -- too ugly otherwise. 913 */ 914 if (hd->typeflag != LONGLINKTYPE) 915 arcn->type = PAX_GLF; 916 else 917 arcn->type = PAX_GLL; 918 arcn->pad = TAR_PAD(arcn->sb.st_size); 919 arcn->skip = arcn->sb.st_size; 920 } else { 921 tty_warn(1, "GNU Long %s found in posix ustar archive.", 922 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 923 } 924 break; 925 case CONTTYPE: 926 case AREGTYPE: 927 case REGTYPE: 928 default: 929 /* 930 * these types have file data that follows. Set the skip and 931 * pad fields. 932 */ 933 arcn->type = PAX_REG; 934 arcn->pad = TAR_PAD(arcn->sb.st_size); 935 arcn->skip = arcn->sb.st_size; 936 arcn->sb.st_mode |= S_IFREG; 937 break; 938 } 939 return(0); 940} 941 942static int 943expandname(char *buf, size_t len, char **gnu_name, const char *name, 944 size_t nlen) 945{ 946 if (*gnu_name) { 947 len = strlcpy(buf, *gnu_name, len); 948 free(*gnu_name); 949 *gnu_name = NULL; 950 } else { 951 if (len > ++nlen) 952 len = nlen; 953 len = strlcpy(buf, name, len); 954 } 955 return len; 956} 957 958static void 959longlink(ARCHD *arcn, int type) 960{ 961 ARCHD larc; 962 963 (void)memset(&larc, 0, sizeof(larc)); 964 965 larc.type = type; 966 larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name)); 967 968 switch (type) { 969 case PAX_GLL: 970 gnu_hack_string = arcn->ln_name; 971 gnu_hack_len = arcn->ln_nlen + 1; 972 break; 973 case PAX_GLF: 974 gnu_hack_string = arcn->name; 975 gnu_hack_len = arcn->nlen + 1; 976 break; 977 default: 978 errx(1, "Invalid type in GNU longlink %d\n", type); 979 } 980 981 /* 982 * We need a longlink now. 983 */ 984 ustar_wr(&larc); 985} 986 987/* 988 * ustar_wr() 989 * write a ustar header for the file specified in the ARCHD to the archive 990 * Have to check for file types that cannot be stored and file names that 991 * are too long. Be careful of the term (last arg) to ul_oct, we only use 992 * '\0' for the termination character (this is different than picky tar) 993 * ASSUMED: space after header in header block is zero filled 994 * Return: 995 * 0 if file has data to be written after the header, 1 if file has NO 996 * data to write after the header, -1 if archive write failed 997 */ 998 999int 1000ustar_wr(ARCHD *arcn) 1001{ 1002 HD_USTAR *hd; 1003 char *pt; 1004 char hdblk[sizeof(HD_USTAR)]; 1005 const char *user, *group; 1006 1007 switch (arcn->type) { 1008 case PAX_SCK: 1009 /* 1010 * check for those file system types ustar cannot store 1011 */ 1012 if (!is_gnutar) 1013 tty_warn(1, "Ustar cannot archive a socket %s", 1014 arcn->org_name); 1015 return(1); 1016 1017 case PAX_SLK: 1018 case PAX_HLK: 1019 case PAX_HRG: 1020 /* 1021 * check the length of the linkname 1022 */ 1023 if (arcn->ln_nlen >= sizeof(hd->linkname)) { 1024 if (is_gnutar) { 1025 longlink(arcn, PAX_GLL); 1026 } else { 1027 tty_warn(1, "Link name too long for ustar %s", 1028 arcn->ln_name); 1029 return(1); 1030 } 1031 } 1032 break; 1033 default: 1034 break; 1035 } 1036 1037 /* 1038 * split the path name into prefix and name fields (if needed). if 1039 * pt != arcn->name, the name has to be split 1040 */ 1041 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1042 if (is_gnutar) { 1043 longlink(arcn, PAX_GLF); 1044 pt = arcn->name; 1045 } else { 1046 tty_warn(1, "File name too long for ustar %s", 1047 arcn->name); 1048 return(1); 1049 } 1050 } 1051 1052 /* 1053 * zero out the header so we don't have to worry about zero fill below 1054 */ 1055 memset(hdblk, 0, sizeof(hdblk)); 1056 hd = (HD_USTAR *)hdblk; 1057 arcn->pad = 0L; 1058 1059 /* 1060 * split the name, or zero out the prefix 1061 */ 1062 if (pt != arcn->name) { 1063 /* 1064 * name was split, pt points at the / where the split is to 1065 * occur, we remove the / and copy the first part to the prefix 1066 */ 1067 *pt = '\0'; 1068 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1069 *pt++ = '/'; 1070 } 1071 1072 /* 1073 * copy the name part. this may be the whole path or the part after 1074 * the prefix 1075 */ 1076 strlcpy(hd->name, pt, sizeof(hd->name)); 1077 1078 /* 1079 * set the fields in the header that are type dependent 1080 */ 1081 switch(arcn->type) { 1082 case PAX_DIR: 1083 hd->typeflag = DIRTYPE; 1084 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1085 goto out; 1086 break; 1087 case PAX_CHR: 1088 case PAX_BLK: 1089 if (arcn->type == PAX_CHR) 1090 hd->typeflag = CHRTYPE; 1091 else 1092 hd->typeflag = BLKTYPE; 1093 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1094 sizeof(hd->devmajor), 3) || 1095 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1096 sizeof(hd->devminor), 3) || 1097 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1098 goto out; 1099 break; 1100 case PAX_FIF: 1101 hd->typeflag = FIFOTYPE; 1102 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1103 goto out; 1104 break; 1105 case PAX_GLL: 1106 case PAX_SLK: 1107 case PAX_HLK: 1108 case PAX_HRG: 1109 if (arcn->type == PAX_SLK) 1110 hd->typeflag = SYMTYPE; 1111 else if (arcn->type == PAX_GLL) 1112 hd->typeflag = LONGLINKTYPE; 1113 else 1114 hd->typeflag = LNKTYPE; 1115 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1116 if (ul_oct((u_long)gnu_hack_len, hd->size, 1117 sizeof(hd->size), 3)) 1118 goto out; 1119 break; 1120 case PAX_GLF: 1121 case PAX_REG: 1122 case PAX_CTG: 1123 default: 1124 /* 1125 * file data with this type, set the padding 1126 */ 1127 if (arcn->type == PAX_GLF) { 1128 hd->typeflag = LONGNAMETYPE; 1129 arcn->pad = TAR_PAD(gnu_hack_len); 1130 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1131 sizeof(hd->size), 3)) { 1132 tty_warn(1,"File is too long for ustar %s", 1133 arcn->org_name); 1134 return(1); 1135 } 1136 } else { 1137 if (arcn->type == PAX_CTG) 1138 hd->typeflag = CONTTYPE; 1139 else 1140 hd->typeflag = REGTYPE; 1141 arcn->pad = TAR_PAD(arcn->sb.st_size); 1142 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1143 sizeof(hd->size), 3)) { 1144 tty_warn(1,"File is too long for ustar %s", 1145 arcn->org_name); 1146 return(1); 1147 } 1148 } 1149 break; 1150 } 1151 1152 strncpy(hd->magic, TMAGIC, TMAGLEN); 1153 if (is_gnutar) 1154 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1155 else 1156 strncpy(hd->version, TVERSION, TVERSLEN); 1157 1158 /* 1159 * set the remaining fields. Some versions want all 16 bits of mode 1160 * we better humor them (they really do not meet spec though).... 1161 */ 1162 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1163 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1164 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1165 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1166 goto out; 1167 user = user_from_uid(arcn->sb.st_uid, 1); 1168 group = group_from_gid(arcn->sb.st_gid, 1); 1169 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1170 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1171 1172 /* 1173 * calculate and store the checksum write the header to the archive 1174 * return 0 tells the caller to now write the file data, 1 says no data 1175 * needs to be written 1176 */ 1177 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1178 sizeof(hd->chksum), 3)) 1179 goto out; 1180 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1181 return(-1); 1182 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1183 return(-1); 1184 if (gnu_hack_string) { 1185 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1186 int pad = gnu_hack_len; 1187 gnu_hack_string = NULL; 1188 gnu_hack_len = 0; 1189 if (res < 0) 1190 return(-1); 1191 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1192 return(-1); 1193 } 1194 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1195 return(0); 1196 return(1); 1197 1198 out: 1199 /* 1200 * header field is out of range 1201 */ 1202 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1203 return(1); 1204} 1205 1206/* 1207 * name_split() 1208 * see if the name has to be split for storage in a ustar header. We try 1209 * to fit the entire name in the name field without splitting if we can. 1210 * The split point is always at a / 1211 * Return 1212 * character pointer to split point (always the / that is to be removed 1213 * if the split is not needed, the points is set to the start of the file 1214 * name (it would violate the spec to split there). A NULL is returned if 1215 * the file name is too long 1216 */ 1217 1218static char * 1219name_split(char *name, int len) 1220{ 1221 char *start; 1222 1223 /* 1224 * check to see if the file name is small enough to fit in the name 1225 * field. if so just return a pointer to the name. 1226 */ 1227 if (len < TNMSZ) 1228 return(name); 1229 /* 1230 * GNU tar does not honor the prefix+name mode if the magic 1231 * is not "ustar\0". So in GNU tar compatibility mode, we don't 1232 * split the filename into prefix+name because we are setting 1233 * the magic to "ustar " as GNU tar does. This of course will 1234 * end up creating a LongLink record in cases where it does not 1235 * really need do, but we are behaving like GNU tar after all. 1236 */ 1237 if (is_gnutar || len > (TPFSZ + TNMSZ)) 1238 return(NULL); 1239 1240 /* 1241 * we start looking at the biggest sized piece that fits in the name 1242 * field. We walk forward looking for a slash to split at. The idea is 1243 * to find the biggest piece to fit in the name field (or the smallest 1244 * prefix we can find) (the -1 is correct the biggest piece would 1245 * include the slash between the two parts that gets thrown away) 1246 */ 1247 start = name + len - TNMSZ; 1248 while ((*start != '\0') && (*start != '/')) 1249 ++start; 1250 1251 /* 1252 * if we hit the end of the string, this name cannot be split, so we 1253 * cannot store this file. 1254 */ 1255 if (*start == '\0') 1256 return(NULL); 1257 len = start - name; 1258 1259 /* 1260 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1261 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1262 * the file would then expand on extract to //str. The len == 0 below 1263 * makes this special case follow the spec to the letter. 1264 */ 1265 if ((len >= TPFSZ) || (len == 0)) 1266 return(NULL); 1267 1268 /* 1269 * ok have a split point, return it to the caller 1270 */ 1271 return(start); 1272} 1273 1274/* 1275 * convert a glob into a RE, and add it to the list. we convert to 1276 * four different RE's (because we're using BRE's and can't use | 1277 * alternation :-() with this padding: 1278 * .*\/ and $ 1279 * .*\/ and \/.* 1280 * ^ and $ 1281 * ^ and \/.* 1282 */ 1283static int 1284tar_gnutar_exclude_one(const char *line, size_t len) 1285{ 1286 /* 2 * buffer len + nul */ 1287 char sbuf[MAXPATHLEN * 2 + 1]; 1288 /* + / + // + .*""/\/ + \/.* */ 1289 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1290 int i, j; 1291 1292 if (line[len - 1] == '\n') 1293 len--; 1294 strncpy(sbuf, ".*" "\\/", j = 4); 1295 for (i = 0; i < len; i++) { 1296 /* 1297 * convert glob to regexp, escaping everything 1298 */ 1299 if (line[i] == '*') 1300 sbuf[j++] = '.'; 1301 else if (line[i] == '?') { 1302 sbuf[j++] = '.'; 1303 continue; 1304 } else if (!isalnum((unsigned char)line[i]) && 1305 !isblank((unsigned char)line[i])) 1306 sbuf[j++] = '\\'; 1307 sbuf[j++] = line[i]; 1308 } 1309 sbuf[j] = '\0'; 1310 /* don't need the .*\/ ones if we start with /, i guess */ 1311 if (line[0] != '/') { 1312 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1313 if (rep_add(rabuf) < 0) 1314 return (-1); 1315 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1316 if (rep_add(rabuf) < 0) 1317 return (-1); 1318 } 1319 1320 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1321 if (rep_add(rabuf) < 0) 1322 return (-1); 1323 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1324 if (rep_add(rabuf) < 0) 1325 return (-1); 1326 1327 return (0); 1328} 1329 1330/* 1331 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1332 * we go through each line of the file, building a string from the "glob" 1333 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1334 * to rep_add(), which will add a empty replacement (exclusion), for the 1335 * named files. 1336 */ 1337int 1338tar_gnutar_minus_minus_exclude(path) 1339 const char *path; 1340{ 1341 size_t len = strlen(path); 1342 1343 if (len > MAXPATHLEN) 1344 tty_warn(0, "pathname too long: %s", path); 1345 1346 return (tar_gnutar_exclude_one(path, len)); 1347} 1348 1349int 1350tar_gnutar_X_compat(path) 1351 const char *path; 1352{ 1353 char *line; 1354 FILE *fp; 1355 int lineno = 0; 1356 size_t len; 1357 1358 fp = fopen(path, "r"); 1359 if (fp == NULL) { 1360 tty_warn(1, "cannot open %s: %s", path, 1361 strerror(errno)); 1362 return(-1); 1363 } 1364 1365 while ((line = fgetln(fp, &len))) { 1366 lineno++; 1367 if (len > MAXPATHLEN) { 1368 tty_warn(0, "pathname too long, line %d of %s", 1369 lineno, path); 1370 } 1371 if (tar_gnutar_exclude_one(line, len)) 1372 return (-1); 1373 } 1374 return (0); 1375} 1376