tar.c revision 1.51
1/* $NetBSD: tar.c,v 1.51 2004/04/20 19:59:54 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.51 2004/04/20 19:59:54 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96 97static int 98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 99{ 100 u_long hdck, blck; 101 102 hdck = asc_ul(hd, hdlen, OCT); 103 blck = tar_chksm(bl, bllen); 104 105 if (hdck != blck) { 106 if (!quiet) 107 tty_warn(0, "Header checksum %lo does not match %lo", 108 hdck, blck); 109 return(-1); 110 } 111 return(0); 112} 113 114 115/* 116 * tar_endwr() 117 * add the tar trailer of two null blocks 118 * Return: 119 * 0 if ok, -1 otherwise (what wr_skip returns) 120 */ 121 122int 123tar_endwr(void) 124{ 125 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 126} 127 128/* 129 * tar_endrd() 130 * no cleanup needed here, just return size of trailer (for append) 131 * Return: 132 * size of trailer BLKMULT 133 */ 134 135off_t 136tar_endrd(void) 137{ 138 return((off_t)(NULLCNT*BLKMULT)); 139} 140 141/* 142 * tar_trail() 143 * Called to determine if a header block is a valid trailer. We are passed 144 * the block, the in_sync flag (which tells us we are in resync mode; 145 * looking for a valid header), and cnt (which starts at zero) which is 146 * used to count the number of empty blocks we have seen so far. 147 * Return: 148 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 149 * could never contain a header. 150 */ 151 152int 153tar_trail(char *buf, int in_resync, int *cnt) 154{ 155 int i; 156 157 /* 158 * look for all zero, trailer is two consecutive blocks of zero 159 */ 160 for (i = 0; i < BLKMULT; ++i) { 161 if (buf[i] != '\0') 162 break; 163 } 164 165 /* 166 * if not all zero it is not a trailer, but MIGHT be a header. 167 */ 168 if (i != BLKMULT) 169 return(-1); 170 171 /* 172 * When given a zero block, we must be careful! 173 * If we are not in resync mode, check for the trailer. Have to watch 174 * out that we do not mis-identify file data as the trailer, so we do 175 * NOT try to id a trailer during resync mode. During resync mode we 176 * might as well throw this block out since a valid header can NEVER be 177 * a block of all 0 (we must have a valid file name). 178 */ 179 if (!in_resync) { 180 ++*cnt; 181#if 0 182 /* 183 * old GNU tar (up through 1.13) only writes one block of 184 * trailers, so we pretend we got another 185 */ 186 if (is_gnutar) 187 ++*cnt; 188#endif 189 if (*cnt >= NULLCNT) 190 return(0); 191 } 192 return(1); 193} 194 195/* 196 * ul_oct() 197 * convert an unsigned long to an octal string. many oddball field 198 * termination characters are used by the various versions of tar in the 199 * different fields. term selects which kind to use. str is '0' padded 200 * at the front to len. we are unable to use only one format as many old 201 * tar readers are very cranky about this. 202 * Return: 203 * 0 if the number fit into the string, -1 otherwise 204 */ 205 206static int 207ul_oct(u_long val, char *str, int len, int term) 208{ 209 char *pt; 210 211 /* 212 * term selects the appropriate character(s) for the end of the string 213 */ 214 pt = str + len - 1; 215 switch(term) { 216 case 3: 217 *pt-- = '\0'; 218 break; 219 case 2: 220 *pt-- = ' '; 221 *pt-- = '\0'; 222 break; 223 case 1: 224 *pt-- = ' '; 225 break; 226 case 0: 227 default: 228 *pt-- = '\0'; 229 *pt-- = ' '; 230 break; 231 } 232 233 /* 234 * convert and blank pad if there is space 235 */ 236 while (pt >= str) { 237 *pt-- = '0' + (char)(val & 0x7); 238 if ((val = val >> 3) == (u_long)0) 239 break; 240 } 241 242 while (pt >= str) 243 *pt-- = '0'; 244 if (val != (u_long)0) 245 return(-1); 246 return(0); 247} 248 249#if !defined(NET2_STAT) && !defined(_LP64) 250/* 251 * ull_oct() 252 * convert an unsigned long long to an octal string. one of many oddball 253 * field termination characters are used by the various versions of tar 254 * in the different fields. term selects which kind to use. str is '0' 255 * padded at the front to len. we are unable to use only one format as 256 * many old tar readers are very cranky about this. 257 * Return: 258 * 0 if the number fit into the string, -1 otherwise 259 */ 260 261static int 262ull_oct(unsigned long long val, char *str, int len, int term) 263{ 264 char *pt; 265 266 /* 267 * term selects the appropriate character(s) for the end of the string 268 */ 269 pt = str + len - 1; 270 switch(term) { 271 case 3: 272 *pt-- = '\0'; 273 break; 274 case 2: 275 *pt-- = ' '; 276 *pt-- = '\0'; 277 break; 278 case 1: 279 *pt-- = ' '; 280 break; 281 case 0: 282 default: 283 *pt-- = '\0'; 284 *pt-- = ' '; 285 break; 286 } 287 288 /* 289 * convert and blank pad if there is space 290 */ 291 while (pt >= str) { 292 *pt-- = '0' + (char)(val & 0x7); 293 if ((val = val >> 3) == 0) 294 break; 295 } 296 297 while (pt >= str) 298 *pt-- = '0'; 299 if (val != (unsigned long long)0) 300 return(-1); 301 return(0); 302} 303#endif 304 305/* 306 * tar_chksm() 307 * calculate the checksum for a tar block counting the checksum field as 308 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 309 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 310 * pad headers with 0. 311 * Return: 312 * unsigned long checksum 313 */ 314 315static u_long 316tar_chksm(char *blk, int len) 317{ 318 char *stop; 319 char *pt; 320 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 321 322 /* 323 * add the part of the block before the checksum field 324 */ 325 pt = blk; 326 stop = blk + CHK_OFFSET; 327 while (pt < stop) 328 chksm += (u_long)(*pt++ & 0xff); 329 /* 330 * move past the checksum field and keep going, spec counts the 331 * checksum field as the sum of 8 blanks (which is pre-computed as 332 * BLNKSUM). 333 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 334 * starts, no point in summing zero's) 335 */ 336 pt += CHK_LEN; 337 stop = blk + len; 338 while (pt < stop) 339 chksm += (u_long)(*pt++ & 0xff); 340 return(chksm); 341} 342 343/* 344 * Routines for old BSD style tar (also made portable to sysV tar) 345 */ 346 347/* 348 * tar_id() 349 * determine if a block given to us is a valid tar header (and not a USTAR 350 * header). We have to be on the lookout for those pesky blocks of all 351 * zero's. 352 * Return: 353 * 0 if a tar header, -1 otherwise 354 */ 355 356int 357tar_id(char *blk, int size) 358{ 359 HD_TAR *hd; 360 HD_USTAR *uhd; 361 362 if (size < BLKMULT) 363 return(-1); 364 hd = (HD_TAR *)blk; 365 uhd = (HD_USTAR *)blk; 366 367 /* 368 * check for block of zero's first, a simple and fast test, then make 369 * sure this is not a ustar header by looking for the ustar magic 370 * cookie. We should use TMAGLEN, but some USTAR archive programs are 371 * wrong and create archives missing the \0. Last we check the 372 * checksum. If this is ok we have to assume it is a valid header. 373 */ 374 if (hd->name[0] == '\0') 375 return(-1); 376 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 377 return(-1); 378 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 379} 380 381/* 382 * tar_opt() 383 * handle tar format specific -o options 384 * Return: 385 * 0 if ok -1 otherwise 386 */ 387 388int 389tar_opt(void) 390{ 391 OPLIST *opt; 392 393 while ((opt = opt_next()) != NULL) { 394 if (strcmp(opt->name, TAR_OPTION) || 395 strcmp(opt->value, TAR_NODIR)) { 396 tty_warn(1, 397 "Unknown tar format -o option/value pair %s=%s", 398 opt->name, opt->value); 399 tty_warn(1, 400 "%s=%s is the only supported tar format option", 401 TAR_OPTION, TAR_NODIR); 402 return(-1); 403 } 404 405 /* 406 * we only support one option, and only when writing 407 */ 408 if ((act != APPND) && (act != ARCHIVE)) { 409 tty_warn(1, "%s=%s is only supported when writing.", 410 opt->name, opt->value); 411 return(-1); 412 } 413 tar_nodir = 1; 414 } 415 return(0); 416} 417 418 419/* 420 * tar_rd() 421 * extract the values out of block already determined to be a tar header. 422 * store the values in the ARCHD parameter. 423 * Return: 424 * 0 425 */ 426 427int 428tar_rd(ARCHD *arcn, char *buf) 429{ 430 HD_TAR *hd; 431 char *pt; 432 433 /* 434 * we only get proper sized buffers passed to us 435 */ 436 if (tar_id(buf, BLKMULT) < 0) 437 return(-1); 438 memset(arcn, 0, sizeof(*arcn)); 439 arcn->org_name = arcn->name; 440 arcn->pat = NULL; 441 arcn->sb.st_nlink = 1; 442 443 /* 444 * copy out the name and values in the stat buffer 445 */ 446 hd = (HD_TAR *)buf; 447 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 448 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 449 &gnu_name_string, hd->name, sizeof(hd->name)); 450 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 451 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 452 } 453 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 454 0xfff); 455 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 456 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 457 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 458 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 459 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 460 461 /* 462 * have to look at the last character, it may be a '/' and that is used 463 * to encode this as a directory 464 */ 465 pt = &(arcn->name[arcn->nlen - 1]); 466 arcn->pad = 0; 467 arcn->skip = 0; 468 switch(hd->linkflag) { 469 case SYMTYPE: 470 /* 471 * symbolic link, need to get the link name and set the type in 472 * the st_mode so -v printing will look correct. 473 */ 474 arcn->type = PAX_SLK; 475 arcn->sb.st_mode |= S_IFLNK; 476 break; 477 case LNKTYPE: 478 /* 479 * hard link, need to get the link name, set the type in the 480 * st_mode and st_nlink so -v printing will look better. 481 */ 482 arcn->type = PAX_HLK; 483 arcn->sb.st_nlink = 2; 484 485 /* 486 * no idea of what type this thing really points at, but 487 * we set something for printing only. 488 */ 489 arcn->sb.st_mode |= S_IFREG; 490 break; 491 case LONGLINKTYPE: 492 arcn->type = PAX_GLL; 493 /* FALLTHROUGH */ 494 case LONGNAMETYPE: 495 /* 496 * GNU long link/file; we tag these here and let the 497 * pax internals deal with it -- too ugly otherwise. 498 */ 499 if (hd->linkflag != LONGLINKTYPE) 500 arcn->type = PAX_GLF; 501 arcn->pad = TAR_PAD(arcn->sb.st_size); 502 arcn->skip = arcn->sb.st_size; 503 break; 504 case AREGTYPE: 505 case REGTYPE: 506 case DIRTYPE: /* see below */ 507 default: 508 /* 509 * If we have a trailing / this is a directory and NOT a file. 510 * Note: V7 tar doesn't actually have DIRTYPE, but it was 511 * reported that V7 archives using USTAR directories do exist. 512 */ 513 if (*pt == '/' || hd->linkflag == DIRTYPE) { 514 /* 515 * it is a directory, set the mode for -v printing 516 */ 517 arcn->type = PAX_DIR; 518 arcn->sb.st_mode |= S_IFDIR; 519 arcn->sb.st_nlink = 2; 520 } else { 521 /* 522 * have a file that will be followed by data. Set the 523 * skip value to the size field and calculate the size 524 * of the padding. 525 */ 526 arcn->type = PAX_REG; 527 arcn->sb.st_mode |= S_IFREG; 528 arcn->pad = TAR_PAD(arcn->sb.st_size); 529 arcn->skip = arcn->sb.st_size; 530 } 531 break; 532 } 533 534 /* 535 * strip off any trailing slash. 536 */ 537 if (*pt == '/') { 538 *pt = '\0'; 539 --arcn->nlen; 540 } 541 return(0); 542} 543 544/* 545 * tar_wr() 546 * write a tar header for the file specified in the ARCHD to the archive. 547 * Have to check for file types that cannot be stored and file names that 548 * are too long. Be careful of the term (last arg) to ul_oct, each field 549 * of tar has it own spec for the termination character(s). 550 * ASSUMED: space after header in header block is zero filled 551 * Return: 552 * 0 if file has data to be written after the header, 1 if file has NO 553 * data to write after the header, -1 if archive write failed 554 */ 555 556int 557tar_wr(ARCHD *arcn) 558{ 559 HD_TAR *hd; 560 int len; 561 char hdblk[sizeof(HD_TAR)]; 562 563 /* 564 * check for those file system types which tar cannot store 565 */ 566 switch(arcn->type) { 567 case PAX_DIR: 568 /* 569 * user asked that dirs not be written to the archive 570 */ 571 if (tar_nodir) 572 return(1); 573 break; 574 case PAX_CHR: 575 tty_warn(1, "Tar cannot archive a character device %s", 576 arcn->org_name); 577 return(1); 578 case PAX_BLK: 579 tty_warn(1, 580 "Tar cannot archive a block device %s", arcn->org_name); 581 return(1); 582 case PAX_SCK: 583 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 584 return(1); 585 case PAX_FIF: 586 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 587 return(1); 588 case PAX_SLK: 589 case PAX_HLK: 590 case PAX_HRG: 591 if (arcn->ln_nlen > sizeof(hd->linkname)) { 592 tty_warn(1,"Link name too long for tar %s", 593 arcn->ln_name); 594 return(1); 595 } 596 break; 597 case PAX_REG: 598 case PAX_CTG: 599 default: 600 break; 601 } 602 603 /* 604 * check file name len, remember extra char for dirs (the / at the end) 605 */ 606 len = arcn->nlen; 607 if (arcn->type == PAX_DIR) 608 ++len; 609 if (len >= sizeof(hd->name)) { 610 tty_warn(1, "File name too long for tar %s", arcn->name); 611 return(1); 612 } 613 614 /* 615 * copy the data out of the ARCHD into the tar header based on the type 616 * of the file. Remember many tar readers want the unused fields to be 617 * padded with zero. We set the linkflag field (type), the linkname 618 * (or zero if not used),the size, and set the padding (if any) to be 619 * added after the file data (0 for all other types, as they only have 620 * a header) 621 */ 622 memset(hdblk, 0, sizeof(hdblk)); 623 hd = (HD_TAR *)hdblk; 624 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 625 arcn->pad = 0; 626 627 if (arcn->type == PAX_DIR) { 628 /* 629 * directories are the same as files, except have a filename 630 * that ends with a /, we add the slash here. No data follows, 631 * dirs, so no pad. 632 */ 633 hd->linkflag = AREGTYPE; 634 hd->name[len-1] = '/'; 635 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 636 goto out; 637 } else if (arcn->type == PAX_SLK) { 638 /* 639 * no data follows this file, so no pad 640 */ 641 hd->linkflag = SYMTYPE; 642 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 643 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 644 goto out; 645 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 646 /* 647 * no data follows this file, so no pad 648 */ 649 hd->linkflag = LNKTYPE; 650 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 651 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 652 goto out; 653 } else { 654 /* 655 * data follows this file, so set the pad 656 */ 657 hd->linkflag = AREGTYPE; 658 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 659 tty_warn(1,"File is too large for tar %s", 660 arcn->org_name); 661 return(1); 662 } 663 arcn->pad = TAR_PAD(arcn->sb.st_size); 664 } 665 666 /* 667 * copy those fields that are independent of the type 668 */ 669 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 670 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 671 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 672 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 673 goto out; 674 675 /* 676 * calculate and add the checksum, then write the header. A return of 677 * 0 tells the caller to now write the file data, 1 says no data needs 678 * to be written 679 */ 680 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 681 sizeof(hd->chksum), 3)) 682 goto out; /* XXX Something's wrong here 683 * because a zero-byte file can 684 * cause this to be done and 685 * yet the resulting warning 686 * seems incorrect */ 687 688 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 689 return(-1); 690 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 691 return(-1); 692 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 693 return(0); 694 return(1); 695 696 out: 697 /* 698 * header field is out of range 699 */ 700 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 701 return(1); 702} 703 704/* 705 * Routines for POSIX ustar 706 */ 707 708/* 709 * ustar_strd() 710 * initialization for ustar read 711 * Return: 712 * 0 if ok, -1 otherwise 713 */ 714 715int 716ustar_strd(void) 717{ 718 return(0); 719} 720 721/* 722 * ustar_stwr() 723 * initialization for ustar write 724 * Return: 725 * 0 if ok, -1 otherwise 726 */ 727 728int 729ustar_stwr(void) 730{ 731 return(0); 732} 733 734/* 735 * ustar_id() 736 * determine if a block given to us is a valid ustar header. We have to 737 * be on the lookout for those pesky blocks of all zero's 738 * Return: 739 * 0 if a ustar header, -1 otherwise 740 */ 741 742int 743ustar_id(char *blk, int size) 744{ 745 HD_USTAR *hd; 746 747 if (size < BLKMULT) 748 return(-1); 749 hd = (HD_USTAR *)blk; 750 751 /* 752 * check for block of zero's first, a simple and fast test then check 753 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 754 * programs are fouled up and create archives missing the \0. Last we 755 * check the checksum. If ok we have to assume it is a valid header. 756 */ 757 if (hd->name[0] == '\0') 758 return(-1); 759 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 760 return(-1); 761 /* This is GNU tar */ 762 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 763 !seen_gnu_warning) { 764 seen_gnu_warning = 1; 765 tty_warn(0, 766 "Trying to read GNU tar archive with extensions off"); 767 } 768 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 769} 770 771/* 772 * ustar_rd() 773 * extract the values out of block already determined to be a ustar header. 774 * store the values in the ARCHD parameter. 775 * Return: 776 * 0 777 */ 778 779int 780ustar_rd(ARCHD *arcn, char *buf) 781{ 782 HD_USTAR *hd; 783 char *dest; 784 int cnt; 785 dev_t devmajor; 786 dev_t devminor; 787 788 /* 789 * we only get proper sized buffers 790 */ 791 if (ustar_id(buf, BLKMULT) < 0) 792 return(-1); 793 794 memset(arcn, 0, sizeof(*arcn)); 795 arcn->org_name = arcn->name; 796 arcn->pat = NULL; 797 arcn->sb.st_nlink = 1; 798 hd = (HD_USTAR *)buf; 799 800 /* 801 * see if the filename is split into two parts. if, so joint the parts. 802 * we copy the prefix first and add a / between the prefix and name. 803 */ 804 dest = arcn->name; 805 if (*(hd->prefix) != '\0') { 806 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 807 dest += cnt; 808 *dest++ = '/'; 809 cnt++; 810 } else { 811 cnt = 0; 812 } 813 814 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 815 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 816 &gnu_name_string, hd->name, sizeof(hd->name)) + cnt; 817 arcn->ln_nlen = expandname(arcn->ln_name, 818 sizeof(arcn->ln_name), &gnu_link_string, hd->linkname, 819 sizeof(hd->linkname)); 820 } 821 822 /* 823 * follow the spec to the letter. we should only have mode bits, strip 824 * off all other crud we may be passed. 825 */ 826 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 827 0xfff); 828 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 829 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 830 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 831 832 /* 833 * If we can find the ascii names for gname and uname in the password 834 * and group files we will use the uid's and gid they bind. Otherwise 835 * we use the uid and gid values stored in the header. (This is what 836 * the posix spec wants). 837 */ 838 hd->gname[sizeof(hd->gname) - 1] = '\0'; 839 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 840 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 841 hd->uname[sizeof(hd->uname) - 1] = '\0'; 842 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 843 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 844 845 /* 846 * set the defaults, these may be changed depending on the file type 847 */ 848 arcn->pad = 0; 849 arcn->skip = 0; 850 arcn->sb.st_rdev = (dev_t)0; 851 852 /* 853 * set the mode and PAX type according to the typeflag in the header 854 */ 855 switch(hd->typeflag) { 856 case FIFOTYPE: 857 arcn->type = PAX_FIF; 858 arcn->sb.st_mode |= S_IFIFO; 859 break; 860 case DIRTYPE: 861 arcn->type = PAX_DIR; 862 arcn->sb.st_mode |= S_IFDIR; 863 arcn->sb.st_nlink = 2; 864 865 /* 866 * Some programs that create ustar archives append a '/' 867 * to the pathname for directories. This clearly violates 868 * ustar specs, but we will silently strip it off anyway. 869 */ 870 if (arcn->name[arcn->nlen - 1] == '/') 871 arcn->name[--arcn->nlen] = '\0'; 872 break; 873 case BLKTYPE: 874 case CHRTYPE: 875 /* 876 * this type requires the rdev field to be set. 877 */ 878 if (hd->typeflag == BLKTYPE) { 879 arcn->type = PAX_BLK; 880 arcn->sb.st_mode |= S_IFBLK; 881 } else { 882 arcn->type = PAX_CHR; 883 arcn->sb.st_mode |= S_IFCHR; 884 } 885 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 886 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 887 arcn->sb.st_rdev = TODEV(devmajor, devminor); 888 break; 889 case SYMTYPE: 890 case LNKTYPE: 891 if (hd->typeflag == SYMTYPE) { 892 arcn->type = PAX_SLK; 893 arcn->sb.st_mode |= S_IFLNK; 894 } else { 895 arcn->type = PAX_HLK; 896 /* 897 * so printing looks better 898 */ 899 arcn->sb.st_mode |= S_IFREG; 900 arcn->sb.st_nlink = 2; 901 } 902 break; 903 case LONGLINKTYPE: 904 if (is_gnutar) 905 arcn->type = PAX_GLL; 906 /* FALLTHROUGH */ 907 case LONGNAMETYPE: 908 if (is_gnutar) { 909 /* 910 * GNU long link/file; we tag these here and let the 911 * pax internals deal with it -- too ugly otherwise. 912 */ 913 if (hd->typeflag != LONGLINKTYPE) 914 arcn->type = PAX_GLF; 915 arcn->pad = TAR_PAD(arcn->sb.st_size); 916 arcn->skip = arcn->sb.st_size; 917 } else { 918 tty_warn(1, "GNU Long %s found in posix ustar archive.", 919 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 920 } 921 break; 922 case CONTTYPE: 923 case AREGTYPE: 924 case REGTYPE: 925 default: 926 /* 927 * these types have file data that follows. Set the skip and 928 * pad fields. 929 */ 930 arcn->type = PAX_REG; 931 arcn->pad = TAR_PAD(arcn->sb.st_size); 932 arcn->skip = arcn->sb.st_size; 933 arcn->sb.st_mode |= S_IFREG; 934 break; 935 } 936 return(0); 937} 938 939static int 940expandname(char *buf, size_t len, char **gnu_name, const char *name, 941 size_t nlen) 942{ 943 if (*gnu_name) { 944 len = strlcpy(buf, *gnu_name, len); 945 free(*gnu_name); 946 *gnu_name = NULL; 947 } else { 948 if (len > ++nlen) 949 len = nlen; 950 len = strlcpy(buf, name, len); 951 } 952 return len; 953} 954 955static void 956longlink(ARCHD *arcn) 957{ 958 ARCHD larc; 959 960 memset(&larc, 0, sizeof(larc)); 961 962 switch (arcn->type) { 963 case PAX_SLK: 964 case PAX_HRG: 965 case PAX_HLK: 966 larc.type = PAX_GLL; 967 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 968 sizeof(larc.ln_name)); 969 gnu_hack_string = arcn->ln_name; 970 gnu_hack_len = arcn->ln_nlen + 1; 971 break; 972 default: 973 larc.nlen = strlcpy(larc.name, "././@LongLink", 974 sizeof(larc.name)); 975 gnu_hack_string = arcn->name; 976 gnu_hack_len = arcn->nlen + 1; 977 larc.type = PAX_GLF; 978 } 979 /* 980 * We need a longlink now. 981 */ 982 ustar_wr(&larc); 983} 984 985/* 986 * ustar_wr() 987 * write a ustar header for the file specified in the ARCHD to the archive 988 * Have to check for file types that cannot be stored and file names that 989 * are too long. Be careful of the term (last arg) to ul_oct, we only use 990 * '\0' for the termination character (this is different than picky tar) 991 * ASSUMED: space after header in header block is zero filled 992 * Return: 993 * 0 if file has data to be written after the header, 1 if file has NO 994 * data to write after the header, -1 if archive write failed 995 */ 996 997int 998ustar_wr(ARCHD *arcn) 999{ 1000 HD_USTAR *hd; 1001 char *pt; 1002 char hdblk[sizeof(HD_USTAR)]; 1003 const char *user, *group; 1004 1005 /* 1006 * check for those file system types ustar cannot store 1007 */ 1008 if (arcn->type == PAX_SCK) { 1009 if (!is_gnutar) 1010 tty_warn(1, "Ustar cannot archive a socket %s", 1011 arcn->org_name); 1012 return(1); 1013 } 1014 1015 /* 1016 * check the length of the linkname 1017 */ 1018 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1019 (arcn->type == PAX_HRG)) && 1020 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1021 if (is_gnutar) { 1022 longlink(arcn); 1023 } else { 1024 tty_warn(1, "Link name too long for ustar %s", 1025 arcn->ln_name); 1026 return(1); 1027 } 1028 } 1029 1030 /* 1031 * split the path name into prefix and name fields (if needed). if 1032 * pt != arcn->name, the name has to be split 1033 */ 1034 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1035 if (is_gnutar) { 1036 longlink(arcn); 1037 pt = arcn->name; 1038 } else { 1039 tty_warn(1, "File name too long for ustar %s", 1040 arcn->name); 1041 return(1); 1042 } 1043 } 1044 1045 /* 1046 * zero out the header so we don't have to worry about zero fill below 1047 */ 1048 memset(hdblk, 0, sizeof(hdblk)); 1049 hd = (HD_USTAR *)hdblk; 1050 arcn->pad = 0L; 1051 1052 /* 1053 * split the name, or zero out the prefix 1054 */ 1055 if (pt != arcn->name) { 1056 /* 1057 * name was split, pt points at the / where the split is to 1058 * occur, we remove the / and copy the first part to the prefix 1059 */ 1060 *pt = '\0'; 1061 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1062 *pt++ = '/'; 1063 } 1064 1065 /* 1066 * copy the name part. this may be the whole path or the part after 1067 * the prefix 1068 */ 1069 strlcpy(hd->name, pt, sizeof(hd->name)); 1070 1071 /* 1072 * set the fields in the header that are type dependent 1073 */ 1074 switch(arcn->type) { 1075 case PAX_DIR: 1076 hd->typeflag = DIRTYPE; 1077 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1078 goto out; 1079 break; 1080 case PAX_CHR: 1081 case PAX_BLK: 1082 if (arcn->type == PAX_CHR) 1083 hd->typeflag = CHRTYPE; 1084 else 1085 hd->typeflag = BLKTYPE; 1086 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1087 sizeof(hd->devmajor), 3) || 1088 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1089 sizeof(hd->devminor), 3) || 1090 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1091 goto out; 1092 break; 1093 case PAX_FIF: 1094 hd->typeflag = FIFOTYPE; 1095 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1096 goto out; 1097 break; 1098 case PAX_GLL: 1099 case PAX_SLK: 1100 case PAX_HLK: 1101 case PAX_HRG: 1102 if (arcn->type == PAX_SLK) 1103 hd->typeflag = SYMTYPE; 1104 else if (arcn->type == PAX_GLL) 1105 hd->typeflag = LONGLINKTYPE; 1106 else 1107 hd->typeflag = LNKTYPE; 1108 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1109 if (ul_oct((u_long)gnu_hack_len, hd->size, 1110 sizeof(hd->size), 3)) 1111 goto out; 1112 break; 1113 case PAX_GLF: 1114 case PAX_REG: 1115 case PAX_CTG: 1116 default: 1117 /* 1118 * file data with this type, set the padding 1119 */ 1120 if (arcn->type == PAX_GLF) { 1121 hd->typeflag = LONGNAMETYPE; 1122 arcn->pad = TAR_PAD(gnu_hack_len); 1123 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1124 sizeof(hd->size), 3)) { 1125 tty_warn(1,"File is too long for ustar %s", 1126 arcn->org_name); 1127 return(1); 1128 } 1129 } else { 1130 if (arcn->type == PAX_CTG) 1131 hd->typeflag = CONTTYPE; 1132 else 1133 hd->typeflag = REGTYPE; 1134 arcn->pad = TAR_PAD(arcn->sb.st_size); 1135 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1136 sizeof(hd->size), 3)) { 1137 tty_warn(1,"File is too long for ustar %s", 1138 arcn->org_name); 1139 return(1); 1140 } 1141 } 1142 break; 1143 } 1144 1145 strncpy(hd->magic, TMAGIC, TMAGLEN); 1146 if (is_gnutar) 1147 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1148 else 1149 strncpy(hd->version, TVERSION, TVERSLEN); 1150 1151 /* 1152 * set the remaining fields. Some versions want all 16 bits of mode 1153 * we better humor them (they really do not meet spec though).... 1154 */ 1155 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1156 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1157 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1158 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1159 goto out; 1160 user = user_from_uid(arcn->sb.st_uid, 1); 1161 group = group_from_gid(arcn->sb.st_gid, 1); 1162 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1163 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1164 1165 /* 1166 * calculate and store the checksum write the header to the archive 1167 * return 0 tells the caller to now write the file data, 1 says no data 1168 * needs to be written 1169 */ 1170 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1171 sizeof(hd->chksum), 3)) 1172 goto out; 1173 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1174 return(-1); 1175 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1176 return(-1); 1177 if (gnu_hack_string) { 1178 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1179 int pad = gnu_hack_len; 1180 gnu_hack_string = NULL; 1181 gnu_hack_len = 0; 1182 if (res < 0) 1183 return(-1); 1184 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1185 return(-1); 1186 } 1187 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1188 return(0); 1189 return(1); 1190 1191 out: 1192 /* 1193 * header field is out of range 1194 */ 1195 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1196 return(1); 1197} 1198 1199/* 1200 * name_split() 1201 * see if the name has to be split for storage in a ustar header. We try 1202 * to fit the entire name in the name field without splitting if we can. 1203 * The split point is always at a / 1204 * Return 1205 * character pointer to split point (always the / that is to be removed 1206 * if the split is not needed, the points is set to the start of the file 1207 * name (it would violate the spec to split there). A NULL is returned if 1208 * the file name is too long 1209 */ 1210 1211static char * 1212name_split(char *name, int len) 1213{ 1214 char *start; 1215 1216 /* 1217 * check to see if the file name is small enough to fit in the name 1218 * field. if so just return a pointer to the name. 1219 */ 1220 if (len < TNMSZ) 1221 return(name); 1222 if (len > (TPFSZ + TNMSZ)) 1223 return(NULL); 1224 1225 /* 1226 * we start looking at the biggest sized piece that fits in the name 1227 * field. We walk forward looking for a slash to split at. The idea is 1228 * to find the biggest piece to fit in the name field (or the smallest 1229 * prefix we can find) (the -1 is correct the biggest piece would 1230 * include the slash between the two parts that gets thrown away) 1231 */ 1232 start = name + len - TNMSZ; 1233 while ((*start != '\0') && (*start != '/')) 1234 ++start; 1235 1236 /* 1237 * if we hit the end of the string, this name cannot be split, so we 1238 * cannot store this file. 1239 */ 1240 if (*start == '\0') 1241 return(NULL); 1242 len = start - name; 1243 1244 /* 1245 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1246 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1247 * the file would then expand on extract to //str. The len == 0 below 1248 * makes this special case follow the spec to the letter. 1249 */ 1250 if ((len >= TPFSZ) || (len == 0)) 1251 return(NULL); 1252 1253 /* 1254 * ok have a split point, return it to the caller 1255 */ 1256 return(start); 1257} 1258 1259/* 1260 * convert a glob into a RE, and add it to the list. we convert to 1261 * four different RE's (because we're using BRE's and can't use | 1262 * alternation :-() with this padding: 1263 * .*\/ and $ 1264 * .*\/ and \/.* 1265 * ^ and $ 1266 * ^ and \/.* 1267 */ 1268static int 1269tar_gnutar_exclude_one(const char *line, size_t len) 1270{ 1271 /* 2 * buffer len + nul */ 1272 char sbuf[MAXPATHLEN * 2 + 1]; 1273 /* + / + // + .*""/\/ + \/.* */ 1274 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1275 int i, j; 1276 1277 if (line[len - 1] == '\n') 1278 len--; 1279 strncpy(sbuf, ".*" "\\/", j = 4); 1280 for (i = 0; i < len; i++) { 1281 /* 1282 * convert glob to regexp, escaping everything 1283 */ 1284 if (line[i] == '*') 1285 sbuf[j++] = '.'; 1286 else if (line[i] == '?') { 1287 sbuf[j++] = '.'; 1288 continue; 1289 } else if (!isalnum(line[i]) && !isblank(line[i])) 1290 sbuf[j++] = '\\'; 1291 sbuf[j++] = line[i]; 1292 } 1293 /* don't need the .*\/ ones if we start with /, i guess */ 1294 if (line[0] != '/') { 1295 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1296 if (rep_add(rabuf) < 0) 1297 return (-1); 1298 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1299 if (rep_add(rabuf) < 0) 1300 return (-1); 1301 } 1302 1303 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1304 if (rep_add(rabuf) < 0) 1305 return (-1); 1306 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1307 if (rep_add(rabuf) < 0) 1308 return (-1); 1309 1310 return (0); 1311} 1312 1313/* 1314 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1315 * we go through each line of the file, building a string from the "glob" 1316 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1317 * to rep_add(), which will add a empty replacement (exclusion), for the 1318 * named files. 1319 */ 1320int 1321tar_gnutar_minus_minus_exclude(path) 1322 const char *path; 1323{ 1324 size_t len = strlen(path); 1325 1326 if (len > MAXPATHLEN) 1327 tty_warn(0, "pathname too long: %s", path); 1328 1329 return (tar_gnutar_exclude_one(path, len)); 1330} 1331 1332int 1333tar_gnutar_X_compat(path) 1334 const char *path; 1335{ 1336 char *line; 1337 FILE *fp; 1338 int lineno = 0; 1339 size_t len; 1340 1341 fp = fopen(path, "r"); 1342 if (fp == NULL) { 1343 tty_warn(1, "cannot open %s: %s", path, 1344 strerror(errno)); 1345 return(-1); 1346 } 1347 1348 while ((line = fgetln(fp, &len))) { 1349 lineno++; 1350 if (len > MAXPATHLEN) { 1351 tty_warn(0, "pathname too long, line %d of %s", 1352 lineno, path); 1353 } 1354 if (tar_gnutar_exclude_one(line, len)) 1355 return (-1); 1356 } 1357 return (0); 1358} 1359