tar.c revision 1.49
1/* $NetBSD: tar.c,v 1.49 2004/04/12 14:41:09 mrg Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.49 2004/04/12 14:41:09 mrg Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96 97static int 98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 99{ 100 u_long hdck, blck; 101 102 hdck = asc_ul(hd, hdlen, OCT); 103 blck = tar_chksm(bl, bllen); 104 105 if (hdck != blck) { 106 if (!quiet) 107 tty_warn(0, "Header checksum %lo does not match %lo", 108 hdck, blck); 109 return(-1); 110 } 111 return(0); 112} 113 114 115/* 116 * tar_endwr() 117 * add the tar trailer of two null blocks 118 * Return: 119 * 0 if ok, -1 otherwise (what wr_skip returns) 120 */ 121 122int 123tar_endwr(void) 124{ 125 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 126} 127 128/* 129 * tar_endrd() 130 * no cleanup needed here, just return size of trailer (for append) 131 * Return: 132 * size of trailer BLKMULT 133 */ 134 135off_t 136tar_endrd(void) 137{ 138 return((off_t)BLKMULT); 139} 140 141/* 142 * tar_trail() 143 * Called to determine if a header block is a valid trailer. We are passed 144 * the block, the in_sync flag (which tells us we are in resync mode; 145 * looking for a valid header), and cnt (which starts at zero) which is 146 * used to count the number of empty blocks we have seen so far. 147 * Return: 148 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 149 * could never contain a header. 150 */ 151 152int 153tar_trail(char *buf, int in_resync, int *cnt) 154{ 155 int i; 156 157 /* 158 * look for all zero, trailer is two consecutive blocks of zero 159 */ 160 for (i = 0; i < BLKMULT; ++i) { 161 if (buf[i] != '\0') 162 break; 163 } 164 165 /* 166 * if not all zero it is not a trailer, but MIGHT be a header. 167 */ 168 if (i != BLKMULT) 169 return(-1); 170 171 /* 172 * When given a zero block, we must be careful! 173 * If we are not in resync mode, check for the trailer. Have to watch 174 * out that we do not mis-identify file data as the trailer, so we do 175 * NOT try to id a trailer during resync mode. During resync mode we 176 * might as well throw this block out since a valid header can NEVER be 177 * a block of all 0 (we must have a valid file name). 178 */ 179 if (!in_resync) { 180 ++*cnt; 181 /* 182 * old GNU tar (up through 1.13) only writes one block of 183 * trailers, so we pretend we got another 184 */ 185 if (is_gnutar) 186 ++*cnt; 187 if (*cnt >= NULLCNT) 188 return(0); 189 } 190 return(1); 191} 192 193/* 194 * ul_oct() 195 * convert an unsigned long to an octal string. many oddball field 196 * termination characters are used by the various versions of tar in the 197 * different fields. term selects which kind to use. str is '0' padded 198 * at the front to len. we are unable to use only one format as many old 199 * tar readers are very cranky about this. 200 * Return: 201 * 0 if the number fit into the string, -1 otherwise 202 */ 203 204static int 205ul_oct(u_long val, char *str, int len, int term) 206{ 207 char *pt; 208 209 /* 210 * term selects the appropriate character(s) for the end of the string 211 */ 212 pt = str + len - 1; 213 switch(term) { 214 case 3: 215 *pt-- = '\0'; 216 break; 217 case 2: 218 *pt-- = ' '; 219 *pt-- = '\0'; 220 break; 221 case 1: 222 *pt-- = ' '; 223 break; 224 case 0: 225 default: 226 *pt-- = '\0'; 227 *pt-- = ' '; 228 break; 229 } 230 231 /* 232 * convert and blank pad if there is space 233 */ 234 while (pt >= str) { 235 *pt-- = '0' + (char)(val & 0x7); 236 if ((val = val >> 3) == (u_long)0) 237 break; 238 } 239 240 while (pt >= str) 241 *pt-- = '0'; 242 if (val != (u_long)0) 243 return(-1); 244 return(0); 245} 246 247#if !defined(NET2_STAT) && !defined(_LP64) 248/* 249 * ull_oct() 250 * convert an unsigned long long to an octal string. one of many oddball 251 * field termination characters are used by the various versions of tar 252 * in the different fields. term selects which kind to use. str is '0' 253 * padded at the front to len. we are unable to use only one format as 254 * many old tar readers are very cranky about this. 255 * Return: 256 * 0 if the number fit into the string, -1 otherwise 257 */ 258 259static int 260ull_oct(unsigned long long val, char *str, int len, int term) 261{ 262 char *pt; 263 264 /* 265 * term selects the appropriate character(s) for the end of the string 266 */ 267 pt = str + len - 1; 268 switch(term) { 269 case 3: 270 *pt-- = '\0'; 271 break; 272 case 2: 273 *pt-- = ' '; 274 *pt-- = '\0'; 275 break; 276 case 1: 277 *pt-- = ' '; 278 break; 279 case 0: 280 default: 281 *pt-- = '\0'; 282 *pt-- = ' '; 283 break; 284 } 285 286 /* 287 * convert and blank pad if there is space 288 */ 289 while (pt >= str) { 290 *pt-- = '0' + (char)(val & 0x7); 291 if ((val = val >> 3) == 0) 292 break; 293 } 294 295 while (pt >= str) 296 *pt-- = '0'; 297 if (val != (unsigned long long)0) 298 return(-1); 299 return(0); 300} 301#endif 302 303/* 304 * tar_chksm() 305 * calculate the checksum for a tar block counting the checksum field as 306 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 307 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 308 * pad headers with 0. 309 * Return: 310 * unsigned long checksum 311 */ 312 313static u_long 314tar_chksm(char *blk, int len) 315{ 316 char *stop; 317 char *pt; 318 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 319 320 /* 321 * add the part of the block before the checksum field 322 */ 323 pt = blk; 324 stop = blk + CHK_OFFSET; 325 while (pt < stop) 326 chksm += (u_long)(*pt++ & 0xff); 327 /* 328 * move past the checksum field and keep going, spec counts the 329 * checksum field as the sum of 8 blanks (which is pre-computed as 330 * BLNKSUM). 331 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 332 * starts, no point in summing zero's) 333 */ 334 pt += CHK_LEN; 335 stop = blk + len; 336 while (pt < stop) 337 chksm += (u_long)(*pt++ & 0xff); 338 return(chksm); 339} 340 341/* 342 * Routines for old BSD style tar (also made portable to sysV tar) 343 */ 344 345/* 346 * tar_id() 347 * determine if a block given to us is a valid tar header (and not a USTAR 348 * header). We have to be on the lookout for those pesky blocks of all 349 * zero's. 350 * Return: 351 * 0 if a tar header, -1 otherwise 352 */ 353 354int 355tar_id(char *blk, int size) 356{ 357 HD_TAR *hd; 358 HD_USTAR *uhd; 359 360 if (size < BLKMULT) 361 return(-1); 362 hd = (HD_TAR *)blk; 363 uhd = (HD_USTAR *)blk; 364 365 /* 366 * check for block of zero's first, a simple and fast test, then make 367 * sure this is not a ustar header by looking for the ustar magic 368 * cookie. We should use TMAGLEN, but some USTAR archive programs are 369 * wrong and create archives missing the \0. Last we check the 370 * checksum. If this is ok we have to assume it is a valid header. 371 */ 372 if (hd->name[0] == '\0') 373 return(-1); 374 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 375 return(-1); 376 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 377} 378 379/* 380 * tar_opt() 381 * handle tar format specific -o options 382 * Return: 383 * 0 if ok -1 otherwise 384 */ 385 386int 387tar_opt(void) 388{ 389 OPLIST *opt; 390 391 while ((opt = opt_next()) != NULL) { 392 if (strcmp(opt->name, TAR_OPTION) || 393 strcmp(opt->value, TAR_NODIR)) { 394 tty_warn(1, 395 "Unknown tar format -o option/value pair %s=%s", 396 opt->name, opt->value); 397 tty_warn(1, 398 "%s=%s is the only supported tar format option", 399 TAR_OPTION, TAR_NODIR); 400 return(-1); 401 } 402 403 /* 404 * we only support one option, and only when writing 405 */ 406 if ((act != APPND) && (act != ARCHIVE)) { 407 tty_warn(1, "%s=%s is only supported when writing.", 408 opt->name, opt->value); 409 return(-1); 410 } 411 tar_nodir = 1; 412 } 413 return(0); 414} 415 416 417/* 418 * tar_rd() 419 * extract the values out of block already determined to be a tar header. 420 * store the values in the ARCHD parameter. 421 * Return: 422 * 0 423 */ 424 425int 426tar_rd(ARCHD *arcn, char *buf) 427{ 428 HD_TAR *hd; 429 char *pt; 430 431 /* 432 * we only get proper sized buffers passed to us 433 */ 434 if (tar_id(buf, BLKMULT) < 0) 435 return(-1); 436 memset(arcn, 0, sizeof(*arcn)); 437 arcn->org_name = arcn->name; 438 arcn->pat = NULL; 439 arcn->sb.st_nlink = 1; 440 441 /* 442 * copy out the name and values in the stat buffer 443 */ 444 hd = (HD_TAR *)buf; 445 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 446 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 447 &gnu_name_string, hd->name, sizeof(hd->name)); 448 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 449 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 450 } 451 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 452 0xfff); 453 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 454 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 455 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 456 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 457 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 458 459 /* 460 * have to look at the last character, it may be a '/' and that is used 461 * to encode this as a directory 462 */ 463 pt = &(arcn->name[arcn->nlen - 1]); 464 arcn->pad = 0; 465 arcn->skip = 0; 466 switch(hd->linkflag) { 467 case SYMTYPE: 468 /* 469 * symbolic link, need to get the link name and set the type in 470 * the st_mode so -v printing will look correct. 471 */ 472 arcn->type = PAX_SLK; 473 arcn->sb.st_mode |= S_IFLNK; 474 break; 475 case LNKTYPE: 476 /* 477 * hard link, need to get the link name, set the type in the 478 * st_mode and st_nlink so -v printing will look better. 479 */ 480 arcn->type = PAX_HLK; 481 arcn->sb.st_nlink = 2; 482 483 /* 484 * no idea of what type this thing really points at, but 485 * we set something for printing only. 486 */ 487 arcn->sb.st_mode |= S_IFREG; 488 break; 489 case LONGLINKTYPE: 490 arcn->type = PAX_GLL; 491 /* FALLTHROUGH */ 492 case LONGNAMETYPE: 493 /* 494 * GNU long link/file; we tag these here and let the 495 * pax internals deal with it -- too ugly otherwise. 496 */ 497 if (hd->linkflag != LONGLINKTYPE) 498 arcn->type = PAX_GLF; 499 arcn->pad = TAR_PAD(arcn->sb.st_size); 500 arcn->skip = arcn->sb.st_size; 501 break; 502 case AREGTYPE: 503 case REGTYPE: 504 case DIRTYPE: /* see below */ 505 default: 506 /* 507 * If we have a trailing / this is a directory and NOT a file. 508 * Note: V7 tar doesn't actually have DIRTYPE, but it was 509 * reported that V7 archives using USTAR directories do exist. 510 */ 511 if (*pt == '/' || hd->linkflag == DIRTYPE) { 512 /* 513 * it is a directory, set the mode for -v printing 514 */ 515 arcn->type = PAX_DIR; 516 arcn->sb.st_mode |= S_IFDIR; 517 arcn->sb.st_nlink = 2; 518 } else { 519 /* 520 * have a file that will be followed by data. Set the 521 * skip value to the size field and calculate the size 522 * of the padding. 523 */ 524 arcn->type = PAX_REG; 525 arcn->sb.st_mode |= S_IFREG; 526 arcn->pad = TAR_PAD(arcn->sb.st_size); 527 arcn->skip = arcn->sb.st_size; 528 } 529 break; 530 } 531 532 /* 533 * strip off any trailing slash. 534 */ 535 if (*pt == '/') { 536 *pt = '\0'; 537 --arcn->nlen; 538 } 539 return(0); 540} 541 542/* 543 * tar_wr() 544 * write a tar header for the file specified in the ARCHD to the archive. 545 * Have to check for file types that cannot be stored and file names that 546 * are too long. Be careful of the term (last arg) to ul_oct, each field 547 * of tar has it own spec for the termination character(s). 548 * ASSUMED: space after header in header block is zero filled 549 * Return: 550 * 0 if file has data to be written after the header, 1 if file has NO 551 * data to write after the header, -1 if archive write failed 552 */ 553 554int 555tar_wr(ARCHD *arcn) 556{ 557 HD_TAR *hd; 558 int len; 559 char hdblk[sizeof(HD_TAR)]; 560 561 /* 562 * check for those file system types which tar cannot store 563 */ 564 switch(arcn->type) { 565 case PAX_DIR: 566 /* 567 * user asked that dirs not be written to the archive 568 */ 569 if (tar_nodir) 570 return(1); 571 break; 572 case PAX_CHR: 573 tty_warn(1, "Tar cannot archive a character device %s", 574 arcn->org_name); 575 return(1); 576 case PAX_BLK: 577 tty_warn(1, 578 "Tar cannot archive a block device %s", arcn->org_name); 579 return(1); 580 case PAX_SCK: 581 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 582 return(1); 583 case PAX_FIF: 584 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 585 return(1); 586 case PAX_SLK: 587 case PAX_HLK: 588 case PAX_HRG: 589 if (arcn->ln_nlen > sizeof(hd->linkname)) { 590 tty_warn(1,"Link name too long for tar %s", 591 arcn->ln_name); 592 return(1); 593 } 594 break; 595 case PAX_REG: 596 case PAX_CTG: 597 default: 598 break; 599 } 600 601 /* 602 * check file name len, remember extra char for dirs (the / at the end) 603 */ 604 len = arcn->nlen; 605 if (arcn->type == PAX_DIR) 606 ++len; 607 if (len >= sizeof(hd->name)) { 608 tty_warn(1, "File name too long for tar %s", arcn->name); 609 return(1); 610 } 611 612 /* 613 * copy the data out of the ARCHD into the tar header based on the type 614 * of the file. Remember many tar readers want the unused fields to be 615 * padded with zero. We set the linkflag field (type), the linkname 616 * (or zero if not used),the size, and set the padding (if any) to be 617 * added after the file data (0 for all other types, as they only have 618 * a header) 619 */ 620 memset(hdblk, 0, sizeof(hdblk)); 621 hd = (HD_TAR *)hdblk; 622 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 623 arcn->pad = 0; 624 625 if (arcn->type == PAX_DIR) { 626 /* 627 * directories are the same as files, except have a filename 628 * that ends with a /, we add the slash here. No data follows, 629 * dirs, so no pad. 630 */ 631 hd->linkflag = AREGTYPE; 632 hd->name[len-1] = '/'; 633 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 634 goto out; 635 } else if (arcn->type == PAX_SLK) { 636 /* 637 * no data follows this file, so no pad 638 */ 639 hd->linkflag = SYMTYPE; 640 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 641 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 642 goto out; 643 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 644 /* 645 * no data follows this file, so no pad 646 */ 647 hd->linkflag = LNKTYPE; 648 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 649 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 650 goto out; 651 } else { 652 /* 653 * data follows this file, so set the pad 654 */ 655 hd->linkflag = AREGTYPE; 656 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 657 tty_warn(1,"File is too large for tar %s", 658 arcn->org_name); 659 return(1); 660 } 661 arcn->pad = TAR_PAD(arcn->sb.st_size); 662 } 663 664 /* 665 * copy those fields that are independent of the type 666 */ 667 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 668 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 669 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 670 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 671 goto out; 672 673 /* 674 * calculate and add the checksum, then write the header. A return of 675 * 0 tells the caller to now write the file data, 1 says no data needs 676 * to be written 677 */ 678 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 679 sizeof(hd->chksum), 3)) 680 goto out; /* XXX Something's wrong here 681 * because a zero-byte file can 682 * cause this to be done and 683 * yet the resulting warning 684 * seems incorrect */ 685 686 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 687 return(-1); 688 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 689 return(-1); 690 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 691 return(0); 692 return(1); 693 694 out: 695 /* 696 * header field is out of range 697 */ 698 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 699 return(1); 700} 701 702/* 703 * Routines for POSIX ustar 704 */ 705 706/* 707 * ustar_strd() 708 * initialization for ustar read 709 * Return: 710 * 0 if ok, -1 otherwise 711 */ 712 713int 714ustar_strd(void) 715{ 716 return(0); 717} 718 719/* 720 * ustar_stwr() 721 * initialization for ustar write 722 * Return: 723 * 0 if ok, -1 otherwise 724 */ 725 726int 727ustar_stwr(void) 728{ 729 return(0); 730} 731 732/* 733 * ustar_id() 734 * determine if a block given to us is a valid ustar header. We have to 735 * be on the lookout for those pesky blocks of all zero's 736 * Return: 737 * 0 if a ustar header, -1 otherwise 738 */ 739 740int 741ustar_id(char *blk, int size) 742{ 743 HD_USTAR *hd; 744 745 if (size < BLKMULT) 746 return(-1); 747 hd = (HD_USTAR *)blk; 748 749 /* 750 * check for block of zero's first, a simple and fast test then check 751 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 752 * programs are fouled up and create archives missing the \0. Last we 753 * check the checksum. If ok we have to assume it is a valid header. 754 */ 755 if (hd->name[0] == '\0') 756 return(-1); 757 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 758 return(-1); 759 /* This is GNU tar */ 760 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 761 !seen_gnu_warning) { 762 seen_gnu_warning = 1; 763 tty_warn(0, 764 "Trying to read GNU tar archive with extensions off"); 765 } 766 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 767} 768 769/* 770 * ustar_rd() 771 * extract the values out of block already determined to be a ustar header. 772 * store the values in the ARCHD parameter. 773 * Return: 774 * 0 775 */ 776 777int 778ustar_rd(ARCHD *arcn, char *buf) 779{ 780 HD_USTAR *hd; 781 char *dest; 782 int cnt; 783 dev_t devmajor; 784 dev_t devminor; 785 786 /* 787 * we only get proper sized buffers 788 */ 789 if (ustar_id(buf, BLKMULT) < 0) 790 return(-1); 791 792 memset(arcn, 0, sizeof(*arcn)); 793 arcn->org_name = arcn->name; 794 arcn->pat = NULL; 795 arcn->sb.st_nlink = 1; 796 hd = (HD_USTAR *)buf; 797 798 /* 799 * see if the filename is split into two parts. if, so joint the parts. 800 * we copy the prefix first and add a / between the prefix and name. 801 */ 802 dest = arcn->name; 803 if (*(hd->prefix) != '\0') { 804 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 805 dest += cnt; 806 *dest++ = '/'; 807 cnt++; 808 } else { 809 cnt = 0; 810 } 811 812 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 813 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 814 &gnu_name_string, hd->name, sizeof(hd->name)); 815 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 816 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 817 } 818 819 /* 820 * follow the spec to the letter. we should only have mode bits, strip 821 * off all other crud we may be passed. 822 */ 823 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 824 0xfff); 825 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 826 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 827 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 828 829 /* 830 * If we can find the ascii names for gname and uname in the password 831 * and group files we will use the uid's and gid they bind. Otherwise 832 * we use the uid and gid values stored in the header. (This is what 833 * the posix spec wants). 834 */ 835 hd->gname[sizeof(hd->gname) - 1] = '\0'; 836 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 837 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 838 hd->uname[sizeof(hd->uname) - 1] = '\0'; 839 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 840 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 841 842 /* 843 * set the defaults, these may be changed depending on the file type 844 */ 845 arcn->pad = 0; 846 arcn->skip = 0; 847 arcn->sb.st_rdev = (dev_t)0; 848 849 /* 850 * set the mode and PAX type according to the typeflag in the header 851 */ 852 switch(hd->typeflag) { 853 case FIFOTYPE: 854 arcn->type = PAX_FIF; 855 arcn->sb.st_mode |= S_IFIFO; 856 break; 857 case DIRTYPE: 858 arcn->type = PAX_DIR; 859 arcn->sb.st_mode |= S_IFDIR; 860 arcn->sb.st_nlink = 2; 861 862 /* 863 * Some programs that create ustar archives append a '/' 864 * to the pathname for directories. This clearly violates 865 * ustar specs, but we will silently strip it off anyway. 866 */ 867 if (arcn->name[arcn->nlen - 1] == '/') 868 arcn->name[--arcn->nlen] = '\0'; 869 break; 870 case BLKTYPE: 871 case CHRTYPE: 872 /* 873 * this type requires the rdev field to be set. 874 */ 875 if (hd->typeflag == BLKTYPE) { 876 arcn->type = PAX_BLK; 877 arcn->sb.st_mode |= S_IFBLK; 878 } else { 879 arcn->type = PAX_CHR; 880 arcn->sb.st_mode |= S_IFCHR; 881 } 882 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 883 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 884 arcn->sb.st_rdev = TODEV(devmajor, devminor); 885 break; 886 case SYMTYPE: 887 case LNKTYPE: 888 if (hd->typeflag == SYMTYPE) { 889 arcn->type = PAX_SLK; 890 arcn->sb.st_mode |= S_IFLNK; 891 } else { 892 arcn->type = PAX_HLK; 893 /* 894 * so printing looks better 895 */ 896 arcn->sb.st_mode |= S_IFREG; 897 arcn->sb.st_nlink = 2; 898 } 899 break; 900 case LONGLINKTYPE: 901 if (is_gnutar) 902 arcn->type = PAX_GLL; 903 /* FALLTHROUGH */ 904 case LONGNAMETYPE: 905 if (is_gnutar) { 906 /* 907 * GNU long link/file; we tag these here and let the 908 * pax internals deal with it -- too ugly otherwise. 909 */ 910 if (hd->typeflag != LONGLINKTYPE) 911 arcn->type = PAX_GLF; 912 arcn->pad = TAR_PAD(arcn->sb.st_size); 913 arcn->skip = arcn->sb.st_size; 914 } else { 915 tty_warn(1, "GNU Long %s found in posix ustar archive.", 916 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 917 } 918 break; 919 case CONTTYPE: 920 case AREGTYPE: 921 case REGTYPE: 922 default: 923 /* 924 * these types have file data that follows. Set the skip and 925 * pad fields. 926 */ 927 arcn->type = PAX_REG; 928 arcn->pad = TAR_PAD(arcn->sb.st_size); 929 arcn->skip = arcn->sb.st_size; 930 arcn->sb.st_mode |= S_IFREG; 931 break; 932 } 933 return(0); 934} 935 936static int 937expandname(char *buf, size_t len, char **gnu_name, const char *name, 938 size_t nlen) 939{ 940 if (*gnu_name) { 941 len = strlcpy(buf, *gnu_name, len); 942 free(*gnu_name); 943 *gnu_name = NULL; 944 } else { 945 if (len > ++nlen) 946 len = nlen; 947 len = strlcpy(buf, name, len); 948 } 949 return len; 950} 951 952static void 953longlink(ARCHD *arcn) 954{ 955 ARCHD larc; 956 957 memset(&larc, 0, sizeof(larc)); 958 959 switch (arcn->type) { 960 case PAX_SLK: 961 case PAX_HRG: 962 case PAX_HLK: 963 larc.type = PAX_GLL; 964 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 965 sizeof(larc.ln_name)); 966 gnu_hack_string = arcn->ln_name; 967 gnu_hack_len = arcn->ln_nlen + 1; 968 break; 969 default: 970 larc.nlen = strlcpy(larc.name, "././@LongLink", 971 sizeof(larc.name)); 972 gnu_hack_string = arcn->name; 973 gnu_hack_len = arcn->nlen + 1; 974 larc.type = PAX_GLF; 975 } 976 /* 977 * We need a longlink now. 978 */ 979 ustar_wr(&larc); 980} 981 982/* 983 * ustar_wr() 984 * write a ustar header for the file specified in the ARCHD to the archive 985 * Have to check for file types that cannot be stored and file names that 986 * are too long. Be careful of the term (last arg) to ul_oct, we only use 987 * '\0' for the termination character (this is different than picky tar) 988 * ASSUMED: space after header in header block is zero filled 989 * Return: 990 * 0 if file has data to be written after the header, 1 if file has NO 991 * data to write after the header, -1 if archive write failed 992 */ 993 994int 995ustar_wr(ARCHD *arcn) 996{ 997 HD_USTAR *hd; 998 char *pt; 999 char hdblk[sizeof(HD_USTAR)]; 1000 const char *user, *group; 1001 1002 /* 1003 * check for those file system types ustar cannot store 1004 */ 1005 if (arcn->type == PAX_SCK) { 1006 if (!is_gnutar) 1007 tty_warn(1, "Ustar cannot archive a socket %s", 1008 arcn->org_name); 1009 return(1); 1010 } 1011 1012 /* 1013 * check the length of the linkname 1014 */ 1015 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1016 (arcn->type == PAX_HRG)) && 1017 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1018 if (is_gnutar) { 1019 longlink(arcn); 1020 } else { 1021 tty_warn(1, "Link name too long for ustar %s", 1022 arcn->ln_name); 1023 return(1); 1024 } 1025 } 1026 1027 /* 1028 * split the path name into prefix and name fields (if needed). if 1029 * pt != arcn->name, the name has to be split 1030 */ 1031 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1032 if (is_gnutar) { 1033 longlink(arcn); 1034 pt = arcn->name; 1035 } else { 1036 tty_warn(1, "File name too long for ustar %s", 1037 arcn->name); 1038 return(1); 1039 } 1040 } 1041 1042 /* 1043 * zero out the header so we don't have to worry about zero fill below 1044 */ 1045 memset(hdblk, 0, sizeof(hdblk)); 1046 hd = (HD_USTAR *)hdblk; 1047 arcn->pad = 0L; 1048 1049 /* 1050 * split the name, or zero out the prefix 1051 */ 1052 if (pt != arcn->name) { 1053 /* 1054 * name was split, pt points at the / where the split is to 1055 * occur, we remove the / and copy the first part to the prefix 1056 */ 1057 *pt = '\0'; 1058 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1059 *pt++ = '/'; 1060 } 1061 1062 /* 1063 * copy the name part. this may be the whole path or the part after 1064 * the prefix 1065 */ 1066 strlcpy(hd->name, pt, sizeof(hd->name)); 1067 1068 /* 1069 * set the fields in the header that are type dependent 1070 */ 1071 switch(arcn->type) { 1072 case PAX_DIR: 1073 hd->typeflag = DIRTYPE; 1074 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1075 goto out; 1076 break; 1077 case PAX_CHR: 1078 case PAX_BLK: 1079 if (arcn->type == PAX_CHR) 1080 hd->typeflag = CHRTYPE; 1081 else 1082 hd->typeflag = BLKTYPE; 1083 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1084 sizeof(hd->devmajor), 3) || 1085 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1086 sizeof(hd->devminor), 3) || 1087 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1088 goto out; 1089 break; 1090 case PAX_FIF: 1091 hd->typeflag = FIFOTYPE; 1092 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1093 goto out; 1094 break; 1095 case PAX_GLL: 1096 case PAX_SLK: 1097 case PAX_HLK: 1098 case PAX_HRG: 1099 if (arcn->type == PAX_SLK) 1100 hd->typeflag = SYMTYPE; 1101 else if (arcn->type == PAX_GLL) 1102 hd->typeflag = LONGLINKTYPE; 1103 else 1104 hd->typeflag = LNKTYPE; 1105 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1106 if (ul_oct((u_long)gnu_hack_len, hd->size, 1107 sizeof(hd->size), 3)) 1108 goto out; 1109 break; 1110 case PAX_GLF: 1111 case PAX_REG: 1112 case PAX_CTG: 1113 default: 1114 /* 1115 * file data with this type, set the padding 1116 */ 1117 if (arcn->type == PAX_GLF) { 1118 hd->typeflag = LONGNAMETYPE; 1119 arcn->pad = TAR_PAD(gnu_hack_len); 1120 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1121 sizeof(hd->size), 3)) { 1122 tty_warn(1,"File is too long for ustar %s", 1123 arcn->org_name); 1124 return(1); 1125 } 1126 } else { 1127 if (arcn->type == PAX_CTG) 1128 hd->typeflag = CONTTYPE; 1129 else 1130 hd->typeflag = REGTYPE; 1131 arcn->pad = TAR_PAD(arcn->sb.st_size); 1132 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1133 sizeof(hd->size), 3)) { 1134 tty_warn(1,"File is too long for ustar %s", 1135 arcn->org_name); 1136 return(1); 1137 } 1138 } 1139 break; 1140 } 1141 1142 strncpy(hd->magic, TMAGIC, TMAGLEN); 1143 if (is_gnutar) 1144 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1145 else 1146 strncpy(hd->version, TVERSION, TVERSLEN); 1147 1148 /* 1149 * set the remaining fields. Some versions want all 16 bits of mode 1150 * we better humor them (they really do not meet spec though).... 1151 */ 1152 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1153 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1154 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1155 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1156 goto out; 1157 user = user_from_uid(arcn->sb.st_uid, 1); 1158 group = group_from_gid(arcn->sb.st_gid, 1); 1159 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1160 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1161 1162 /* 1163 * calculate and store the checksum write the header to the archive 1164 * return 0 tells the caller to now write the file data, 1 says no data 1165 * needs to be written 1166 */ 1167 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1168 sizeof(hd->chksum), 3)) 1169 goto out; 1170 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1171 return(-1); 1172 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1173 return(-1); 1174 if (gnu_hack_string) { 1175 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1176 int pad = gnu_hack_len; 1177 gnu_hack_string = NULL; 1178 gnu_hack_len = 0; 1179 if (res < 0) 1180 return(-1); 1181 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1182 return(-1); 1183 } 1184 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1185 return(0); 1186 return(1); 1187 1188 out: 1189 /* 1190 * header field is out of range 1191 */ 1192 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1193 return(1); 1194} 1195 1196/* 1197 * name_split() 1198 * see if the name has to be split for storage in a ustar header. We try 1199 * to fit the entire name in the name field without splitting if we can. 1200 * The split point is always at a / 1201 * Return 1202 * character pointer to split point (always the / that is to be removed 1203 * if the split is not needed, the points is set to the start of the file 1204 * name (it would violate the spec to split there). A NULL is returned if 1205 * the file name is too long 1206 */ 1207 1208static char * 1209name_split(char *name, int len) 1210{ 1211 char *start; 1212 1213 /* 1214 * check to see if the file name is small enough to fit in the name 1215 * field. if so just return a pointer to the name. 1216 */ 1217 if (len < TNMSZ) 1218 return(name); 1219 if (len > (TPFSZ + TNMSZ)) 1220 return(NULL); 1221 1222 /* 1223 * we start looking at the biggest sized piece that fits in the name 1224 * field. We walk forward looking for a slash to split at. The idea is 1225 * to find the biggest piece to fit in the name field (or the smallest 1226 * prefix we can find) (the -1 is correct the biggest piece would 1227 * include the slash between the two parts that gets thrown away) 1228 */ 1229 start = name + len - TNMSZ; 1230 while ((*start != '\0') && (*start != '/')) 1231 ++start; 1232 1233 /* 1234 * if we hit the end of the string, this name cannot be split, so we 1235 * cannot store this file. 1236 */ 1237 if (*start == '\0') 1238 return(NULL); 1239 len = start - name; 1240 1241 /* 1242 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1243 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1244 * the file would then expand on extract to //str. The len == 0 below 1245 * makes this special case follow the spec to the letter. 1246 */ 1247 if ((len >= TPFSZ) || (len == 0)) 1248 return(NULL); 1249 1250 /* 1251 * ok have a split point, return it to the caller 1252 */ 1253 return(start); 1254} 1255 1256/* 1257 * convert a glob into a RE, and add it to the list. we convert to 1258 * four different RE's (because we're using BRE's and can't use | 1259 * alternation :-() with this padding: 1260 * .*\/ and $ 1261 * .*\/ and \/.* 1262 * ^ and $ 1263 * ^ and \/.* 1264 */ 1265static int 1266tar_gnutar_exclude_one(const char *line, size_t len) 1267{ 1268 /* 2 * buffer len + nul */ 1269 char sbuf[MAXPATHLEN * 2 + 1]; 1270 /* + / + // + .*""/\/ + \/.* */ 1271 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1272 int i, j; 1273 1274 if (line[len - 1] == '\n') 1275 len--; 1276 strncpy(sbuf, ".*" "\\/", j = 4); 1277 for (i = 0; i < len; i++) { 1278 /* 1279 * convert glob to regexp, escaping everything 1280 */ 1281 if (line[i] == '*') 1282 sbuf[j++] = '.'; 1283 else if (line[i] == '?') { 1284 sbuf[j++] = '.'; 1285 continue; 1286 } else if (!isalnum(line[i]) && !isblank(line[i])) 1287 sbuf[j++] = '\\'; 1288 sbuf[j++] = line[i]; 1289 } 1290 /* don't need the .*\/ ones if we start with /, i guess */ 1291 if (line[0] != '/') { 1292 snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1293 if (rep_add(rabuf) < 0) 1294 return (-1); 1295 snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1296 if (rep_add(rabuf) < 0) 1297 return (-1); 1298 } 1299 1300 snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1301 if (rep_add(rabuf) < 0) 1302 return (-1); 1303 snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1304 if (rep_add(rabuf) < 0) 1305 return (-1); 1306 1307 return (0); 1308} 1309 1310/* 1311 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1312 * we go through each line of the file, building a string from the "glob" 1313 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1314 * to rep_add(), which will add a empty replacement (exclusion), for the 1315 * named files. 1316 */ 1317int 1318tar_gnutar_minus_minus_exclude(path) 1319 const char *path; 1320{ 1321 size_t len = strlen(path); 1322 1323 if (len > MAXPATHLEN) 1324 tty_warn(0, "pathname too long: %s", path); 1325 1326 return (tar_gnutar_exclude_one(path, len)); 1327} 1328 1329int 1330tar_gnutar_X_compat(path) 1331 const char *path; 1332{ 1333 char *line; 1334 FILE *fp; 1335 int lineno = 0; 1336 size_t len; 1337 1338 fp = fopen(path, "r"); 1339 if (fp == NULL) { 1340 tty_warn(1, "cannot open %s: %s", path, 1341 strerror(errno)); 1342 return(-1); 1343 } 1344 1345 while ((line = fgetln(fp, &len))) { 1346 lineno++; 1347 if (len > MAXPATHLEN) { 1348 tty_warn(0, "pathname too long, line %d of %s", 1349 lineno, path); 1350 } 1351 if (tar_gnutar_exclude_one(line, len)) 1352 return (-1); 1353 } 1354 return (0); 1355} 1356