tar.c revision 1.50
1/* $NetBSD: tar.c,v 1.50 2004/04/16 22:45:56 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.50 2004/04/16 22:45:56 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96 97static int 98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 99{ 100 u_long hdck, blck; 101 102 hdck = asc_ul(hd, hdlen, OCT); 103 blck = tar_chksm(bl, bllen); 104 105 if (hdck != blck) { 106 if (!quiet) 107 tty_warn(0, "Header checksum %lo does not match %lo", 108 hdck, blck); 109 return(-1); 110 } 111 return(0); 112} 113 114 115/* 116 * tar_endwr() 117 * add the tar trailer of two null blocks 118 * Return: 119 * 0 if ok, -1 otherwise (what wr_skip returns) 120 */ 121 122int 123tar_endwr(void) 124{ 125 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 126} 127 128/* 129 * tar_endrd() 130 * no cleanup needed here, just return size of trailer (for append) 131 * Return: 132 * size of trailer BLKMULT 133 */ 134 135off_t 136tar_endrd(void) 137{ 138 return((off_t)(NULLCNT*BLKMULT)); 139} 140 141/* 142 * tar_trail() 143 * Called to determine if a header block is a valid trailer. We are passed 144 * the block, the in_sync flag (which tells us we are in resync mode; 145 * looking for a valid header), and cnt (which starts at zero) which is 146 * used to count the number of empty blocks we have seen so far. 147 * Return: 148 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 149 * could never contain a header. 150 */ 151 152int 153tar_trail(char *buf, int in_resync, int *cnt) 154{ 155 int i; 156 157 /* 158 * look for all zero, trailer is two consecutive blocks of zero 159 */ 160 for (i = 0; i < BLKMULT; ++i) { 161 if (buf[i] != '\0') 162 break; 163 } 164 165 /* 166 * if not all zero it is not a trailer, but MIGHT be a header. 167 */ 168 if (i != BLKMULT) 169 return(-1); 170 171 /* 172 * When given a zero block, we must be careful! 173 * If we are not in resync mode, check for the trailer. Have to watch 174 * out that we do not mis-identify file data as the trailer, so we do 175 * NOT try to id a trailer during resync mode. During resync mode we 176 * might as well throw this block out since a valid header can NEVER be 177 * a block of all 0 (we must have a valid file name). 178 */ 179 if (!in_resync) { 180 ++*cnt; 181#if 0 182 /* 183 * old GNU tar (up through 1.13) only writes one block of 184 * trailers, so we pretend we got another 185 */ 186 if (is_gnutar) 187 ++*cnt; 188#endif 189 if (*cnt >= NULLCNT) 190 return(0); 191 } 192 return(1); 193} 194 195/* 196 * ul_oct() 197 * convert an unsigned long to an octal string. many oddball field 198 * termination characters are used by the various versions of tar in the 199 * different fields. term selects which kind to use. str is '0' padded 200 * at the front to len. we are unable to use only one format as many old 201 * tar readers are very cranky about this. 202 * Return: 203 * 0 if the number fit into the string, -1 otherwise 204 */ 205 206static int 207ul_oct(u_long val, char *str, int len, int term) 208{ 209 char *pt; 210 211 /* 212 * term selects the appropriate character(s) for the end of the string 213 */ 214 pt = str + len - 1; 215 switch(term) { 216 case 3: 217 *pt-- = '\0'; 218 break; 219 case 2: 220 *pt-- = ' '; 221 *pt-- = '\0'; 222 break; 223 case 1: 224 *pt-- = ' '; 225 break; 226 case 0: 227 default: 228 *pt-- = '\0'; 229 *pt-- = ' '; 230 break; 231 } 232 233 /* 234 * convert and blank pad if there is space 235 */ 236 while (pt >= str) { 237 *pt-- = '0' + (char)(val & 0x7); 238 if ((val = val >> 3) == (u_long)0) 239 break; 240 } 241 242 while (pt >= str) 243 *pt-- = '0'; 244 if (val != (u_long)0) 245 return(-1); 246 return(0); 247} 248 249#if !defined(NET2_STAT) && !defined(_LP64) 250/* 251 * ull_oct() 252 * convert an unsigned long long to an octal string. one of many oddball 253 * field termination characters are used by the various versions of tar 254 * in the different fields. term selects which kind to use. str is '0' 255 * padded at the front to len. we are unable to use only one format as 256 * many old tar readers are very cranky about this. 257 * Return: 258 * 0 if the number fit into the string, -1 otherwise 259 */ 260 261static int 262ull_oct(unsigned long long val, char *str, int len, int term) 263{ 264 char *pt; 265 266 /* 267 * term selects the appropriate character(s) for the end of the string 268 */ 269 pt = str + len - 1; 270 switch(term) { 271 case 3: 272 *pt-- = '\0'; 273 break; 274 case 2: 275 *pt-- = ' '; 276 *pt-- = '\0'; 277 break; 278 case 1: 279 *pt-- = ' '; 280 break; 281 case 0: 282 default: 283 *pt-- = '\0'; 284 *pt-- = ' '; 285 break; 286 } 287 288 /* 289 * convert and blank pad if there is space 290 */ 291 while (pt >= str) { 292 *pt-- = '0' + (char)(val & 0x7); 293 if ((val = val >> 3) == 0) 294 break; 295 } 296 297 while (pt >= str) 298 *pt-- = '0'; 299 if (val != (unsigned long long)0) 300 return(-1); 301 return(0); 302} 303#endif 304 305/* 306 * tar_chksm() 307 * calculate the checksum for a tar block counting the checksum field as 308 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 309 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 310 * pad headers with 0. 311 * Return: 312 * unsigned long checksum 313 */ 314 315static u_long 316tar_chksm(char *blk, int len) 317{ 318 char *stop; 319 char *pt; 320 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 321 322 /* 323 * add the part of the block before the checksum field 324 */ 325 pt = blk; 326 stop = blk + CHK_OFFSET; 327 while (pt < stop) 328 chksm += (u_long)(*pt++ & 0xff); 329 /* 330 * move past the checksum field and keep going, spec counts the 331 * checksum field as the sum of 8 blanks (which is pre-computed as 332 * BLNKSUM). 333 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 334 * starts, no point in summing zero's) 335 */ 336 pt += CHK_LEN; 337 stop = blk + len; 338 while (pt < stop) 339 chksm += (u_long)(*pt++ & 0xff); 340 return(chksm); 341} 342 343/* 344 * Routines for old BSD style tar (also made portable to sysV tar) 345 */ 346 347/* 348 * tar_id() 349 * determine if a block given to us is a valid tar header (and not a USTAR 350 * header). We have to be on the lookout for those pesky blocks of all 351 * zero's. 352 * Return: 353 * 0 if a tar header, -1 otherwise 354 */ 355 356int 357tar_id(char *blk, int size) 358{ 359 HD_TAR *hd; 360 HD_USTAR *uhd; 361 362 if (size < BLKMULT) 363 return(-1); 364 hd = (HD_TAR *)blk; 365 uhd = (HD_USTAR *)blk; 366 367 /* 368 * check for block of zero's first, a simple and fast test, then make 369 * sure this is not a ustar header by looking for the ustar magic 370 * cookie. We should use TMAGLEN, but some USTAR archive programs are 371 * wrong and create archives missing the \0. Last we check the 372 * checksum. If this is ok we have to assume it is a valid header. 373 */ 374 if (hd->name[0] == '\0') 375 return(-1); 376 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 377 return(-1); 378 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 379} 380 381/* 382 * tar_opt() 383 * handle tar format specific -o options 384 * Return: 385 * 0 if ok -1 otherwise 386 */ 387 388int 389tar_opt(void) 390{ 391 OPLIST *opt; 392 393 while ((opt = opt_next()) != NULL) { 394 if (strcmp(opt->name, TAR_OPTION) || 395 strcmp(opt->value, TAR_NODIR)) { 396 tty_warn(1, 397 "Unknown tar format -o option/value pair %s=%s", 398 opt->name, opt->value); 399 tty_warn(1, 400 "%s=%s is the only supported tar format option", 401 TAR_OPTION, TAR_NODIR); 402 return(-1); 403 } 404 405 /* 406 * we only support one option, and only when writing 407 */ 408 if ((act != APPND) && (act != ARCHIVE)) { 409 tty_warn(1, "%s=%s is only supported when writing.", 410 opt->name, opt->value); 411 return(-1); 412 } 413 tar_nodir = 1; 414 } 415 return(0); 416} 417 418 419/* 420 * tar_rd() 421 * extract the values out of block already determined to be a tar header. 422 * store the values in the ARCHD parameter. 423 * Return: 424 * 0 425 */ 426 427int 428tar_rd(ARCHD *arcn, char *buf) 429{ 430 HD_TAR *hd; 431 char *pt; 432 433 /* 434 * we only get proper sized buffers passed to us 435 */ 436 if (tar_id(buf, BLKMULT) < 0) 437 return(-1); 438 memset(arcn, 0, sizeof(*arcn)); 439 arcn->org_name = arcn->name; 440 arcn->pat = NULL; 441 arcn->sb.st_nlink = 1; 442 443 /* 444 * copy out the name and values in the stat buffer 445 */ 446 hd = (HD_TAR *)buf; 447 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 448 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 449 &gnu_name_string, hd->name, sizeof(hd->name)); 450 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 451 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 452 } 453 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 454 0xfff); 455 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 456 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 457 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 458 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 459 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 460 461 /* 462 * have to look at the last character, it may be a '/' and that is used 463 * to encode this as a directory 464 */ 465 pt = &(arcn->name[arcn->nlen - 1]); 466 arcn->pad = 0; 467 arcn->skip = 0; 468 switch(hd->linkflag) { 469 case SYMTYPE: 470 /* 471 * symbolic link, need to get the link name and set the type in 472 * the st_mode so -v printing will look correct. 473 */ 474 arcn->type = PAX_SLK; 475 arcn->sb.st_mode |= S_IFLNK; 476 break; 477 case LNKTYPE: 478 /* 479 * hard link, need to get the link name, set the type in the 480 * st_mode and st_nlink so -v printing will look better. 481 */ 482 arcn->type = PAX_HLK; 483 arcn->sb.st_nlink = 2; 484 485 /* 486 * no idea of what type this thing really points at, but 487 * we set something for printing only. 488 */ 489 arcn->sb.st_mode |= S_IFREG; 490 break; 491 case LONGLINKTYPE: 492 arcn->type = PAX_GLL; 493 /* FALLTHROUGH */ 494 case LONGNAMETYPE: 495 /* 496 * GNU long link/file; we tag these here and let the 497 * pax internals deal with it -- too ugly otherwise. 498 */ 499 if (hd->linkflag != LONGLINKTYPE) 500 arcn->type = PAX_GLF; 501 arcn->pad = TAR_PAD(arcn->sb.st_size); 502 arcn->skip = arcn->sb.st_size; 503 break; 504 case AREGTYPE: 505 case REGTYPE: 506 case DIRTYPE: /* see below */ 507 default: 508 /* 509 * If we have a trailing / this is a directory and NOT a file. 510 * Note: V7 tar doesn't actually have DIRTYPE, but it was 511 * reported that V7 archives using USTAR directories do exist. 512 */ 513 if (*pt == '/' || hd->linkflag == DIRTYPE) { 514 /* 515 * it is a directory, set the mode for -v printing 516 */ 517 arcn->type = PAX_DIR; 518 arcn->sb.st_mode |= S_IFDIR; 519 arcn->sb.st_nlink = 2; 520 } else { 521 /* 522 * have a file that will be followed by data. Set the 523 * skip value to the size field and calculate the size 524 * of the padding. 525 */ 526 arcn->type = PAX_REG; 527 arcn->sb.st_mode |= S_IFREG; 528 arcn->pad = TAR_PAD(arcn->sb.st_size); 529 arcn->skip = arcn->sb.st_size; 530 } 531 break; 532 } 533 534 /* 535 * strip off any trailing slash. 536 */ 537 if (*pt == '/') { 538 *pt = '\0'; 539 --arcn->nlen; 540 } 541 return(0); 542} 543 544/* 545 * tar_wr() 546 * write a tar header for the file specified in the ARCHD to the archive. 547 * Have to check for file types that cannot be stored and file names that 548 * are too long. Be careful of the term (last arg) to ul_oct, each field 549 * of tar has it own spec for the termination character(s). 550 * ASSUMED: space after header in header block is zero filled 551 * Return: 552 * 0 if file has data to be written after the header, 1 if file has NO 553 * data to write after the header, -1 if archive write failed 554 */ 555 556int 557tar_wr(ARCHD *arcn) 558{ 559 HD_TAR *hd; 560 int len; 561 char hdblk[sizeof(HD_TAR)]; 562 563 /* 564 * check for those file system types which tar cannot store 565 */ 566 switch(arcn->type) { 567 case PAX_DIR: 568 /* 569 * user asked that dirs not be written to the archive 570 */ 571 if (tar_nodir) 572 return(1); 573 break; 574 case PAX_CHR: 575 tty_warn(1, "Tar cannot archive a character device %s", 576 arcn->org_name); 577 return(1); 578 case PAX_BLK: 579 tty_warn(1, 580 "Tar cannot archive a block device %s", arcn->org_name); 581 return(1); 582 case PAX_SCK: 583 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 584 return(1); 585 case PAX_FIF: 586 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 587 return(1); 588 case PAX_SLK: 589 case PAX_HLK: 590 case PAX_HRG: 591 if (arcn->ln_nlen > sizeof(hd->linkname)) { 592 tty_warn(1,"Link name too long for tar %s", 593 arcn->ln_name); 594 return(1); 595 } 596 break; 597 case PAX_REG: 598 case PAX_CTG: 599 default: 600 break; 601 } 602 603 /* 604 * check file name len, remember extra char for dirs (the / at the end) 605 */ 606 len = arcn->nlen; 607 if (arcn->type == PAX_DIR) 608 ++len; 609 if (len >= sizeof(hd->name)) { 610 tty_warn(1, "File name too long for tar %s", arcn->name); 611 return(1); 612 } 613 614 /* 615 * copy the data out of the ARCHD into the tar header based on the type 616 * of the file. Remember many tar readers want the unused fields to be 617 * padded with zero. We set the linkflag field (type), the linkname 618 * (or zero if not used),the size, and set the padding (if any) to be 619 * added after the file data (0 for all other types, as they only have 620 * a header) 621 */ 622 memset(hdblk, 0, sizeof(hdblk)); 623 hd = (HD_TAR *)hdblk; 624 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 625 arcn->pad = 0; 626 627 if (arcn->type == PAX_DIR) { 628 /* 629 * directories are the same as files, except have a filename 630 * that ends with a /, we add the slash here. No data follows, 631 * dirs, so no pad. 632 */ 633 hd->linkflag = AREGTYPE; 634 hd->name[len-1] = '/'; 635 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 636 goto out; 637 } else if (arcn->type == PAX_SLK) { 638 /* 639 * no data follows this file, so no pad 640 */ 641 hd->linkflag = SYMTYPE; 642 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 643 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 644 goto out; 645 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 646 /* 647 * no data follows this file, so no pad 648 */ 649 hd->linkflag = LNKTYPE; 650 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 651 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 652 goto out; 653 } else { 654 /* 655 * data follows this file, so set the pad 656 */ 657 hd->linkflag = AREGTYPE; 658 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 659 tty_warn(1,"File is too large for tar %s", 660 arcn->org_name); 661 return(1); 662 } 663 arcn->pad = TAR_PAD(arcn->sb.st_size); 664 } 665 666 /* 667 * copy those fields that are independent of the type 668 */ 669 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 670 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 671 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 672 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 673 goto out; 674 675 /* 676 * calculate and add the checksum, then write the header. A return of 677 * 0 tells the caller to now write the file data, 1 says no data needs 678 * to be written 679 */ 680 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 681 sizeof(hd->chksum), 3)) 682 goto out; /* XXX Something's wrong here 683 * because a zero-byte file can 684 * cause this to be done and 685 * yet the resulting warning 686 * seems incorrect */ 687 688 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 689 return(-1); 690 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 691 return(-1); 692 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 693 return(0); 694 return(1); 695 696 out: 697 /* 698 * header field is out of range 699 */ 700 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 701 return(1); 702} 703 704/* 705 * Routines for POSIX ustar 706 */ 707 708/* 709 * ustar_strd() 710 * initialization for ustar read 711 * Return: 712 * 0 if ok, -1 otherwise 713 */ 714 715int 716ustar_strd(void) 717{ 718 return(0); 719} 720 721/* 722 * ustar_stwr() 723 * initialization for ustar write 724 * Return: 725 * 0 if ok, -1 otherwise 726 */ 727 728int 729ustar_stwr(void) 730{ 731 return(0); 732} 733 734/* 735 * ustar_id() 736 * determine if a block given to us is a valid ustar header. We have to 737 * be on the lookout for those pesky blocks of all zero's 738 * Return: 739 * 0 if a ustar header, -1 otherwise 740 */ 741 742int 743ustar_id(char *blk, int size) 744{ 745 HD_USTAR *hd; 746 747 if (size < BLKMULT) 748 return(-1); 749 hd = (HD_USTAR *)blk; 750 751 /* 752 * check for block of zero's first, a simple and fast test then check 753 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 754 * programs are fouled up and create archives missing the \0. Last we 755 * check the checksum. If ok we have to assume it is a valid header. 756 */ 757 if (hd->name[0] == '\0') 758 return(-1); 759 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 760 return(-1); 761 /* This is GNU tar */ 762 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 763 !seen_gnu_warning) { 764 seen_gnu_warning = 1; 765 tty_warn(0, 766 "Trying to read GNU tar archive with extensions off"); 767 } 768 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 769} 770 771/* 772 * ustar_rd() 773 * extract the values out of block already determined to be a ustar header. 774 * store the values in the ARCHD parameter. 775 * Return: 776 * 0 777 */ 778 779int 780ustar_rd(ARCHD *arcn, char *buf) 781{ 782 HD_USTAR *hd; 783 char *dest; 784 int cnt; 785 dev_t devmajor; 786 dev_t devminor; 787 788 /* 789 * we only get proper sized buffers 790 */ 791 if (ustar_id(buf, BLKMULT) < 0) 792 return(-1); 793 794 memset(arcn, 0, sizeof(*arcn)); 795 arcn->org_name = arcn->name; 796 arcn->pat = NULL; 797 arcn->sb.st_nlink = 1; 798 hd = (HD_USTAR *)buf; 799 800 /* 801 * see if the filename is split into two parts. if, so joint the parts. 802 * we copy the prefix first and add a / between the prefix and name. 803 */ 804 dest = arcn->name; 805 if (*(hd->prefix) != '\0') { 806 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 807 dest += cnt; 808 *dest++ = '/'; 809 cnt++; 810 } else { 811 cnt = 0; 812 } 813 814 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 815 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 816 &gnu_name_string, hd->name, sizeof(hd->name)); 817 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 818 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 819 } 820 821 /* 822 * follow the spec to the letter. we should only have mode bits, strip 823 * off all other crud we may be passed. 824 */ 825 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 826 0xfff); 827 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 828 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 829 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 830 831 /* 832 * If we can find the ascii names for gname and uname in the password 833 * and group files we will use the uid's and gid they bind. Otherwise 834 * we use the uid and gid values stored in the header. (This is what 835 * the posix spec wants). 836 */ 837 hd->gname[sizeof(hd->gname) - 1] = '\0'; 838 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 839 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 840 hd->uname[sizeof(hd->uname) - 1] = '\0'; 841 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 842 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 843 844 /* 845 * set the defaults, these may be changed depending on the file type 846 */ 847 arcn->pad = 0; 848 arcn->skip = 0; 849 arcn->sb.st_rdev = (dev_t)0; 850 851 /* 852 * set the mode and PAX type according to the typeflag in the header 853 */ 854 switch(hd->typeflag) { 855 case FIFOTYPE: 856 arcn->type = PAX_FIF; 857 arcn->sb.st_mode |= S_IFIFO; 858 break; 859 case DIRTYPE: 860 arcn->type = PAX_DIR; 861 arcn->sb.st_mode |= S_IFDIR; 862 arcn->sb.st_nlink = 2; 863 864 /* 865 * Some programs that create ustar archives append a '/' 866 * to the pathname for directories. This clearly violates 867 * ustar specs, but we will silently strip it off anyway. 868 */ 869 if (arcn->name[arcn->nlen - 1] == '/') 870 arcn->name[--arcn->nlen] = '\0'; 871 break; 872 case BLKTYPE: 873 case CHRTYPE: 874 /* 875 * this type requires the rdev field to be set. 876 */ 877 if (hd->typeflag == BLKTYPE) { 878 arcn->type = PAX_BLK; 879 arcn->sb.st_mode |= S_IFBLK; 880 } else { 881 arcn->type = PAX_CHR; 882 arcn->sb.st_mode |= S_IFCHR; 883 } 884 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 885 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 886 arcn->sb.st_rdev = TODEV(devmajor, devminor); 887 break; 888 case SYMTYPE: 889 case LNKTYPE: 890 if (hd->typeflag == SYMTYPE) { 891 arcn->type = PAX_SLK; 892 arcn->sb.st_mode |= S_IFLNK; 893 } else { 894 arcn->type = PAX_HLK; 895 /* 896 * so printing looks better 897 */ 898 arcn->sb.st_mode |= S_IFREG; 899 arcn->sb.st_nlink = 2; 900 } 901 break; 902 case LONGLINKTYPE: 903 if (is_gnutar) 904 arcn->type = PAX_GLL; 905 /* FALLTHROUGH */ 906 case LONGNAMETYPE: 907 if (is_gnutar) { 908 /* 909 * GNU long link/file; we tag these here and let the 910 * pax internals deal with it -- too ugly otherwise. 911 */ 912 if (hd->typeflag != LONGLINKTYPE) 913 arcn->type = PAX_GLF; 914 arcn->pad = TAR_PAD(arcn->sb.st_size); 915 arcn->skip = arcn->sb.st_size; 916 } else { 917 tty_warn(1, "GNU Long %s found in posix ustar archive.", 918 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 919 } 920 break; 921 case CONTTYPE: 922 case AREGTYPE: 923 case REGTYPE: 924 default: 925 /* 926 * these types have file data that follows. Set the skip and 927 * pad fields. 928 */ 929 arcn->type = PAX_REG; 930 arcn->pad = TAR_PAD(arcn->sb.st_size); 931 arcn->skip = arcn->sb.st_size; 932 arcn->sb.st_mode |= S_IFREG; 933 break; 934 } 935 return(0); 936} 937 938static int 939expandname(char *buf, size_t len, char **gnu_name, const char *name, 940 size_t nlen) 941{ 942 if (*gnu_name) { 943 len = strlcpy(buf, *gnu_name, len); 944 free(*gnu_name); 945 *gnu_name = NULL; 946 } else { 947 if (len > ++nlen) 948 len = nlen; 949 len = strlcpy(buf, name, len); 950 } 951 return len; 952} 953 954static void 955longlink(ARCHD *arcn) 956{ 957 ARCHD larc; 958 959 memset(&larc, 0, sizeof(larc)); 960 961 switch (arcn->type) { 962 case PAX_SLK: 963 case PAX_HRG: 964 case PAX_HLK: 965 larc.type = PAX_GLL; 966 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 967 sizeof(larc.ln_name)); 968 gnu_hack_string = arcn->ln_name; 969 gnu_hack_len = arcn->ln_nlen + 1; 970 break; 971 default: 972 larc.nlen = strlcpy(larc.name, "././@LongLink", 973 sizeof(larc.name)); 974 gnu_hack_string = arcn->name; 975 gnu_hack_len = arcn->nlen + 1; 976 larc.type = PAX_GLF; 977 } 978 /* 979 * We need a longlink now. 980 */ 981 ustar_wr(&larc); 982} 983 984/* 985 * ustar_wr() 986 * write a ustar header for the file specified in the ARCHD to the archive 987 * Have to check for file types that cannot be stored and file names that 988 * are too long. Be careful of the term (last arg) to ul_oct, we only use 989 * '\0' for the termination character (this is different than picky tar) 990 * ASSUMED: space after header in header block is zero filled 991 * Return: 992 * 0 if file has data to be written after the header, 1 if file has NO 993 * data to write after the header, -1 if archive write failed 994 */ 995 996int 997ustar_wr(ARCHD *arcn) 998{ 999 HD_USTAR *hd; 1000 char *pt; 1001 char hdblk[sizeof(HD_USTAR)]; 1002 const char *user, *group; 1003 1004 /* 1005 * check for those file system types ustar cannot store 1006 */ 1007 if (arcn->type == PAX_SCK) { 1008 if (!is_gnutar) 1009 tty_warn(1, "Ustar cannot archive a socket %s", 1010 arcn->org_name); 1011 return(1); 1012 } 1013 1014 /* 1015 * check the length of the linkname 1016 */ 1017 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1018 (arcn->type == PAX_HRG)) && 1019 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1020 if (is_gnutar) { 1021 longlink(arcn); 1022 } else { 1023 tty_warn(1, "Link name too long for ustar %s", 1024 arcn->ln_name); 1025 return(1); 1026 } 1027 } 1028 1029 /* 1030 * split the path name into prefix and name fields (if needed). if 1031 * pt != arcn->name, the name has to be split 1032 */ 1033 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1034 if (is_gnutar) { 1035 longlink(arcn); 1036 pt = arcn->name; 1037 } else { 1038 tty_warn(1, "File name too long for ustar %s", 1039 arcn->name); 1040 return(1); 1041 } 1042 } 1043 1044 /* 1045 * zero out the header so we don't have to worry about zero fill below 1046 */ 1047 memset(hdblk, 0, sizeof(hdblk)); 1048 hd = (HD_USTAR *)hdblk; 1049 arcn->pad = 0L; 1050 1051 /* 1052 * split the name, or zero out the prefix 1053 */ 1054 if (pt != arcn->name) { 1055 /* 1056 * name was split, pt points at the / where the split is to 1057 * occur, we remove the / and copy the first part to the prefix 1058 */ 1059 *pt = '\0'; 1060 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1061 *pt++ = '/'; 1062 } 1063 1064 /* 1065 * copy the name part. this may be the whole path or the part after 1066 * the prefix 1067 */ 1068 strlcpy(hd->name, pt, sizeof(hd->name)); 1069 1070 /* 1071 * set the fields in the header that are type dependent 1072 */ 1073 switch(arcn->type) { 1074 case PAX_DIR: 1075 hd->typeflag = DIRTYPE; 1076 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1077 goto out; 1078 break; 1079 case PAX_CHR: 1080 case PAX_BLK: 1081 if (arcn->type == PAX_CHR) 1082 hd->typeflag = CHRTYPE; 1083 else 1084 hd->typeflag = BLKTYPE; 1085 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1086 sizeof(hd->devmajor), 3) || 1087 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1088 sizeof(hd->devminor), 3) || 1089 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1090 goto out; 1091 break; 1092 case PAX_FIF: 1093 hd->typeflag = FIFOTYPE; 1094 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1095 goto out; 1096 break; 1097 case PAX_GLL: 1098 case PAX_SLK: 1099 case PAX_HLK: 1100 case PAX_HRG: 1101 if (arcn->type == PAX_SLK) 1102 hd->typeflag = SYMTYPE; 1103 else if (arcn->type == PAX_GLL) 1104 hd->typeflag = LONGLINKTYPE; 1105 else 1106 hd->typeflag = LNKTYPE; 1107 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1108 if (ul_oct((u_long)gnu_hack_len, hd->size, 1109 sizeof(hd->size), 3)) 1110 goto out; 1111 break; 1112 case PAX_GLF: 1113 case PAX_REG: 1114 case PAX_CTG: 1115 default: 1116 /* 1117 * file data with this type, set the padding 1118 */ 1119 if (arcn->type == PAX_GLF) { 1120 hd->typeflag = LONGNAMETYPE; 1121 arcn->pad = TAR_PAD(gnu_hack_len); 1122 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1123 sizeof(hd->size), 3)) { 1124 tty_warn(1,"File is too long for ustar %s", 1125 arcn->org_name); 1126 return(1); 1127 } 1128 } else { 1129 if (arcn->type == PAX_CTG) 1130 hd->typeflag = CONTTYPE; 1131 else 1132 hd->typeflag = REGTYPE; 1133 arcn->pad = TAR_PAD(arcn->sb.st_size); 1134 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1135 sizeof(hd->size), 3)) { 1136 tty_warn(1,"File is too long for ustar %s", 1137 arcn->org_name); 1138 return(1); 1139 } 1140 } 1141 break; 1142 } 1143 1144 strncpy(hd->magic, TMAGIC, TMAGLEN); 1145 if (is_gnutar) 1146 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1147 else 1148 strncpy(hd->version, TVERSION, TVERSLEN); 1149 1150 /* 1151 * set the remaining fields. Some versions want all 16 bits of mode 1152 * we better humor them (they really do not meet spec though).... 1153 */ 1154 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1155 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1156 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1157 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1158 goto out; 1159 user = user_from_uid(arcn->sb.st_uid, 1); 1160 group = group_from_gid(arcn->sb.st_gid, 1); 1161 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1162 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1163 1164 /* 1165 * calculate and store the checksum write the header to the archive 1166 * return 0 tells the caller to now write the file data, 1 says no data 1167 * needs to be written 1168 */ 1169 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1170 sizeof(hd->chksum), 3)) 1171 goto out; 1172 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1173 return(-1); 1174 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1175 return(-1); 1176 if (gnu_hack_string) { 1177 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1178 int pad = gnu_hack_len; 1179 gnu_hack_string = NULL; 1180 gnu_hack_len = 0; 1181 if (res < 0) 1182 return(-1); 1183 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1184 return(-1); 1185 } 1186 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1187 return(0); 1188 return(1); 1189 1190 out: 1191 /* 1192 * header field is out of range 1193 */ 1194 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1195 return(1); 1196} 1197 1198/* 1199 * name_split() 1200 * see if the name has to be split for storage in a ustar header. We try 1201 * to fit the entire name in the name field without splitting if we can. 1202 * The split point is always at a / 1203 * Return 1204 * character pointer to split point (always the / that is to be removed 1205 * if the split is not needed, the points is set to the start of the file 1206 * name (it would violate the spec to split there). A NULL is returned if 1207 * the file name is too long 1208 */ 1209 1210static char * 1211name_split(char *name, int len) 1212{ 1213 char *start; 1214 1215 /* 1216 * check to see if the file name is small enough to fit in the name 1217 * field. if so just return a pointer to the name. 1218 */ 1219 if (len < TNMSZ) 1220 return(name); 1221 if (len > (TPFSZ + TNMSZ)) 1222 return(NULL); 1223 1224 /* 1225 * we start looking at the biggest sized piece that fits in the name 1226 * field. We walk forward looking for a slash to split at. The idea is 1227 * to find the biggest piece to fit in the name field (or the smallest 1228 * prefix we can find) (the -1 is correct the biggest piece would 1229 * include the slash between the two parts that gets thrown away) 1230 */ 1231 start = name + len - TNMSZ; 1232 while ((*start != '\0') && (*start != '/')) 1233 ++start; 1234 1235 /* 1236 * if we hit the end of the string, this name cannot be split, so we 1237 * cannot store this file. 1238 */ 1239 if (*start == '\0') 1240 return(NULL); 1241 len = start - name; 1242 1243 /* 1244 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1245 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1246 * the file would then expand on extract to //str. The len == 0 below 1247 * makes this special case follow the spec to the letter. 1248 */ 1249 if ((len >= TPFSZ) || (len == 0)) 1250 return(NULL); 1251 1252 /* 1253 * ok have a split point, return it to the caller 1254 */ 1255 return(start); 1256} 1257 1258/* 1259 * convert a glob into a RE, and add it to the list. we convert to 1260 * four different RE's (because we're using BRE's and can't use | 1261 * alternation :-() with this padding: 1262 * .*\/ and $ 1263 * .*\/ and \/.* 1264 * ^ and $ 1265 * ^ and \/.* 1266 */ 1267static int 1268tar_gnutar_exclude_one(const char *line, size_t len) 1269{ 1270 /* 2 * buffer len + nul */ 1271 char sbuf[MAXPATHLEN * 2 + 1]; 1272 /* + / + // + .*""/\/ + \/.* */ 1273 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1274 int i, j; 1275 1276 if (line[len - 1] == '\n') 1277 len--; 1278 strncpy(sbuf, ".*" "\\/", j = 4); 1279 for (i = 0; i < len; i++) { 1280 /* 1281 * convert glob to regexp, escaping everything 1282 */ 1283 if (line[i] == '*') 1284 sbuf[j++] = '.'; 1285 else if (line[i] == '?') { 1286 sbuf[j++] = '.'; 1287 continue; 1288 } else if (!isalnum(line[i]) && !isblank(line[i])) 1289 sbuf[j++] = '\\'; 1290 sbuf[j++] = line[i]; 1291 } 1292 /* don't need the .*\/ ones if we start with /, i guess */ 1293 if (line[0] != '/') { 1294 snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1295 if (rep_add(rabuf) < 0) 1296 return (-1); 1297 snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1298 if (rep_add(rabuf) < 0) 1299 return (-1); 1300 } 1301 1302 snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1303 if (rep_add(rabuf) < 0) 1304 return (-1); 1305 snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1306 if (rep_add(rabuf) < 0) 1307 return (-1); 1308 1309 return (0); 1310} 1311 1312/* 1313 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1314 * we go through each line of the file, building a string from the "glob" 1315 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1316 * to rep_add(), which will add a empty replacement (exclusion), for the 1317 * named files. 1318 */ 1319int 1320tar_gnutar_minus_minus_exclude(path) 1321 const char *path; 1322{ 1323 size_t len = strlen(path); 1324 1325 if (len > MAXPATHLEN) 1326 tty_warn(0, "pathname too long: %s", path); 1327 1328 return (tar_gnutar_exclude_one(path, len)); 1329} 1330 1331int 1332tar_gnutar_X_compat(path) 1333 const char *path; 1334{ 1335 char *line; 1336 FILE *fp; 1337 int lineno = 0; 1338 size_t len; 1339 1340 fp = fopen(path, "r"); 1341 if (fp == NULL) { 1342 tty_warn(1, "cannot open %s: %s", path, 1343 strerror(errno)); 1344 return(-1); 1345 } 1346 1347 while ((line = fgetln(fp, &len))) { 1348 lineno++; 1349 if (len > MAXPATHLEN) { 1350 tty_warn(0, "pathname too long, line %d of %s", 1351 lineno, path); 1352 } 1353 if (tar_gnutar_exclude_one(line, len)) 1354 return (-1); 1355 } 1356 return (0); 1357} 1358