tar.c revision 1.44
1/* $NetBSD: tar.c,v 1.44 2003/10/13 07:41:22 agc Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#include <sys/cdefs.h> 37#if defined(__RCSID) && !defined(lint) 38#if 0 39static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 40#else 41__RCSID("$NetBSD: tar.c,v 1.44 2003/10/13 07:41:22 agc Exp $"); 42#endif 43#endif /* not lint */ 44 45#include <sys/types.h> 46#include <sys/time.h> 47#include <sys/stat.h> 48#include <sys/param.h> 49 50#include <ctype.h> 51#include <errno.h> 52#include <grp.h> 53#include <pwd.h> 54#include <stdio.h> 55#include <stdlib.h> 56#include <string.h> 57#include <unistd.h> 58 59#include "pax.h" 60#include "extern.h" 61#include "tar.h" 62 63/* 64 * Routines for reading, writing and header identify of various versions of tar 65 */ 66 67static int expandname(char *, size_t, char **, const char *, size_t); 68static void longlink(ARCHD *); 69static u_long tar_chksm(char *, int); 70static char *name_split(char *, int); 71static int ul_oct(u_long, char *, int, int); 72#if !defined(NET2_STAT) && !defined(_LP64) 73static int ull_oct(unsigned long long, char *, int, int); 74#endif 75static int tar_gnutar_exclude_one(const char *, size_t); 76static int check_sum(char *, size_t, char *, size_t); 77 78/* 79 * Routines common to all versions of tar 80 */ 81 82static int tar_nodir; /* do not write dirs under old tar */ 83int is_gnutar; /* behave like gnu tar; enable gnu 84 * extensions and skip end-ofvolume 85 * checks 86 */ 87static int seen_gnu_warning; /* Have we warned yet? */ 88static char *gnu_hack_string; /* ././@LongLink hackery */ 89static int gnu_hack_len; /* len of gnu_hack_string */ 90char *gnu_name_string; /* ././@LongLink hackery name */ 91char *gnu_link_string; /* ././@LongLink hackery link */ 92 93static int 94check_sum(char *hd, size_t hdlen, char *bl, size_t bllen) 95{ 96 u_long hdck, blck; 97 98 hdck = asc_ul(hd, hdlen, OCT); 99 blck = tar_chksm(bl, bllen); 100 101 if (hdck != blck) { 102 tty_warn(0, "Header checksum %lo does not match %lo", 103 hdck, blck); 104 return(-1); 105 } 106 return(0); 107} 108 109 110/* 111 * tar_endwr() 112 * add the tar trailer of two null blocks 113 * Return: 114 * 0 if ok, -1 otherwise (what wr_skip returns) 115 */ 116 117int 118tar_endwr(void) 119{ 120 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 121} 122 123/* 124 * tar_endrd() 125 * no cleanup needed here, just return size of trailer (for append) 126 * Return: 127 * size of trailer (2 * BLKMULT) 128 */ 129 130off_t 131tar_endrd(void) 132{ 133 return((off_t)(NULLCNT*BLKMULT)); 134} 135 136/* 137 * tar_trail() 138 * Called to determine if a header block is a valid trailer. We are passed 139 * the block, the in_sync flag (which tells us we are in resync mode; 140 * looking for a valid header), and cnt (which starts at zero) which is 141 * used to count the number of empty blocks we have seen so far. 142 * Return: 143 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 144 * could never contain a header. 145 */ 146 147int 148tar_trail(char *buf, int in_resync, int *cnt) 149{ 150 int i; 151 152 /* 153 * look for all zero, trailer is two consecutive blocks of zero 154 */ 155 for (i = 0; i < BLKMULT; ++i) { 156 if (buf[i] != '\0') 157 break; 158 } 159 160 /* 161 * if not all zero it is not a trailer, but MIGHT be a header. 162 */ 163 if (i != BLKMULT) 164 return(-1); 165 166 /* 167 * When given a zero block, we must be careful! 168 * If we are not in resync mode, check for the trailer. Have to watch 169 * out that we do not mis-identify file data as the trailer, so we do 170 * NOT try to id a trailer during resync mode. During resync mode we 171 * might as well throw this block out since a valid header can NEVER be 172 * a block of all 0 (we must have a valid file name). 173 */ 174 if (!in_resync) { 175 ++*cnt; 176 /* 177 * old GNU tar (up through 1.13) only writes one block of 178 * trailers, so we pretend we got another 179 */ 180 if (is_gnutar) 181 ++*cnt; 182 if (*cnt >= NULLCNT) 183 return(0); 184 } 185 return(1); 186} 187 188/* 189 * ul_oct() 190 * convert an unsigned long to an octal string. many oddball field 191 * termination characters are used by the various versions of tar in the 192 * different fields. term selects which kind to use. str is '0' padded 193 * at the front to len. we are unable to use only one format as many old 194 * tar readers are very cranky about this. 195 * Return: 196 * 0 if the number fit into the string, -1 otherwise 197 */ 198 199static int 200ul_oct(u_long val, char *str, int len, int term) 201{ 202 char *pt; 203 204 /* 205 * term selects the appropriate character(s) for the end of the string 206 */ 207 pt = str + len - 1; 208 switch(term) { 209 case 3: 210 *pt-- = '\0'; 211 break; 212 case 2: 213 *pt-- = ' '; 214 *pt-- = '\0'; 215 break; 216 case 1: 217 *pt-- = ' '; 218 break; 219 case 0: 220 default: 221 *pt-- = '\0'; 222 *pt-- = ' '; 223 break; 224 } 225 226 /* 227 * convert and blank pad if there is space 228 */ 229 while (pt >= str) { 230 *pt-- = '0' + (char)(val & 0x7); 231 if ((val = val >> 3) == (u_long)0) 232 break; 233 } 234 235 while (pt >= str) 236 *pt-- = '0'; 237 if (val != (u_long)0) 238 return(-1); 239 return(0); 240} 241 242#if !defined(NET2_STAT) && !defined(_LP64) 243/* 244 * ull_oct() 245 * convert an unsigned long long to an octal string. one of many oddball 246 * field termination characters are used by the various versions of tar 247 * in the different fields. term selects which kind to use. str is '0' 248 * padded at the front to len. we are unable to use only one format as 249 * many old tar readers are very cranky about this. 250 * Return: 251 * 0 if the number fit into the string, -1 otherwise 252 */ 253 254static int 255ull_oct(unsigned long long val, char *str, int len, int term) 256{ 257 char *pt; 258 259 /* 260 * term selects the appropriate character(s) for the end of the string 261 */ 262 pt = str + len - 1; 263 switch(term) { 264 case 3: 265 *pt-- = '\0'; 266 break; 267 case 2: 268 *pt-- = ' '; 269 *pt-- = '\0'; 270 break; 271 case 1: 272 *pt-- = ' '; 273 break; 274 case 0: 275 default: 276 *pt-- = '\0'; 277 *pt-- = ' '; 278 break; 279 } 280 281 /* 282 * convert and blank pad if there is space 283 */ 284 while (pt >= str) { 285 *pt-- = '0' + (char)(val & 0x7); 286 if ((val = val >> 3) == 0) 287 break; 288 } 289 290 while (pt >= str) 291 *pt-- = '0'; 292 if (val != (unsigned long long)0) 293 return(-1); 294 return(0); 295} 296#endif 297 298/* 299 * tar_chksm() 300 * calculate the checksum for a tar block counting the checksum field as 301 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 302 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 303 * pad headers with 0. 304 * Return: 305 * unsigned long checksum 306 */ 307 308static u_long 309tar_chksm(char *blk, int len) 310{ 311 char *stop; 312 char *pt; 313 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 314 315 /* 316 * add the part of the block before the checksum field 317 */ 318 pt = blk; 319 stop = blk + CHK_OFFSET; 320 while (pt < stop) 321 chksm += (u_long)(*pt++ & 0xff); 322 /* 323 * move past the checksum field and keep going, spec counts the 324 * checksum field as the sum of 8 blanks (which is pre-computed as 325 * BLNKSUM). 326 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 327 * starts, no point in summing zero's) 328 */ 329 pt += CHK_LEN; 330 stop = blk + len; 331 while (pt < stop) 332 chksm += (u_long)(*pt++ & 0xff); 333 return(chksm); 334} 335 336/* 337 * Routines for old BSD style tar (also made portable to sysV tar) 338 */ 339 340/* 341 * tar_id() 342 * determine if a block given to us is a valid tar header (and not a USTAR 343 * header). We have to be on the lookout for those pesky blocks of all 344 * zero's. 345 * Return: 346 * 0 if a tar header, -1 otherwise 347 */ 348 349int 350tar_id(char *blk, int size) 351{ 352 HD_TAR *hd; 353 HD_USTAR *uhd; 354 355 if (size < BLKMULT) 356 return(-1); 357 hd = (HD_TAR *)blk; 358 uhd = (HD_USTAR *)blk; 359 360 /* 361 * check for block of zero's first, a simple and fast test, then make 362 * sure this is not a ustar header by looking for the ustar magic 363 * cookie. We should use TMAGLEN, but some USTAR archive programs are 364 * wrong and create archives missing the \0. Last we check the 365 * checksum. If this is ok we have to assume it is a valid header. 366 */ 367 if (hd->name[0] == '\0') 368 return(-1); 369 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 370 return(-1); 371 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 372} 373 374/* 375 * tar_opt() 376 * handle tar format specific -o options 377 * Return: 378 * 0 if ok -1 otherwise 379 */ 380 381int 382tar_opt(void) 383{ 384 OPLIST *opt; 385 386 while ((opt = opt_next()) != NULL) { 387 if (strcmp(opt->name, TAR_OPTION) || 388 strcmp(opt->value, TAR_NODIR)) { 389 tty_warn(1, 390 "Unknown tar format -o option/value pair %s=%s", 391 opt->name, opt->value); 392 tty_warn(1, 393 "%s=%s is the only supported tar format option", 394 TAR_OPTION, TAR_NODIR); 395 return(-1); 396 } 397 398 /* 399 * we only support one option, and only when writing 400 */ 401 if ((act != APPND) && (act != ARCHIVE)) { 402 tty_warn(1, "%s=%s is only supported when writing.", 403 opt->name, opt->value); 404 return(-1); 405 } 406 tar_nodir = 1; 407 } 408 return(0); 409} 410 411 412/* 413 * tar_rd() 414 * extract the values out of block already determined to be a tar header. 415 * store the values in the ARCHD parameter. 416 * Return: 417 * 0 418 */ 419 420int 421tar_rd(ARCHD *arcn, char *buf) 422{ 423 HD_TAR *hd; 424 char *pt; 425 426 /* 427 * we only get proper sized buffers passed to us 428 */ 429 if (tar_id(buf, BLKMULT) < 0) 430 return(-1); 431 memset(arcn, 0, sizeof(*arcn)); 432 arcn->org_name = arcn->name; 433 arcn->pat = NULL; 434 arcn->sb.st_nlink = 1; 435 436 /* 437 * copy out the name and values in the stat buffer 438 */ 439 hd = (HD_TAR *)buf; 440 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 441 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 442 &gnu_name_string, hd->name, sizeof(hd->name)); 443 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 444 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 445 } 446 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 447 0xfff); 448 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 449 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 450 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 451 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 452 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 453 454 /* 455 * have to look at the last character, it may be a '/' and that is used 456 * to encode this as a directory 457 */ 458 pt = &(arcn->name[arcn->nlen - 1]); 459 arcn->pad = 0; 460 arcn->skip = 0; 461 switch(hd->linkflag) { 462 case SYMTYPE: 463 /* 464 * symbolic link, need to get the link name and set the type in 465 * the st_mode so -v printing will look correct. 466 */ 467 arcn->type = PAX_SLK; 468 arcn->sb.st_mode |= S_IFLNK; 469 break; 470 case LNKTYPE: 471 /* 472 * hard link, need to get the link name, set the type in the 473 * st_mode and st_nlink so -v printing will look better. 474 */ 475 arcn->type = PAX_HLK; 476 arcn->sb.st_nlink = 2; 477 478 /* 479 * no idea of what type this thing really points at, but 480 * we set something for printing only. 481 */ 482 arcn->sb.st_mode |= S_IFREG; 483 break; 484 case LONGLINKTYPE: 485 arcn->type = PAX_GLL; 486 /* FALLTHROUGH */ 487 case LONGNAMETYPE: 488 /* 489 * GNU long link/file; we tag these here and let the 490 * pax internals deal with it -- too ugly otherwise. 491 */ 492 if (hd->linkflag != LONGLINKTYPE) 493 arcn->type = PAX_GLF; 494 arcn->pad = TAR_PAD(arcn->sb.st_size); 495 arcn->skip = arcn->sb.st_size; 496 break; 497 case AREGTYPE: 498 case REGTYPE: 499 case DIRTYPE: /* see below */ 500 default: 501 /* 502 * If we have a trailing / this is a directory and NOT a file. 503 * Note: V7 tar doesn't actually have DIRTYPE, but it was 504 * reported that V7 archives using USTAR directories do exist. 505 */ 506 if (*pt == '/' || hd->linkflag == DIRTYPE) { 507 /* 508 * it is a directory, set the mode for -v printing 509 */ 510 arcn->type = PAX_DIR; 511 arcn->sb.st_mode |= S_IFDIR; 512 arcn->sb.st_nlink = 2; 513 } else { 514 /* 515 * have a file that will be followed by data. Set the 516 * skip value to the size field and calculate the size 517 * of the padding. 518 */ 519 arcn->type = PAX_REG; 520 arcn->sb.st_mode |= S_IFREG; 521 arcn->pad = TAR_PAD(arcn->sb.st_size); 522 arcn->skip = arcn->sb.st_size; 523 } 524 break; 525 } 526 527 /* 528 * strip off any trailing slash. 529 */ 530 if (*pt == '/') { 531 *pt = '\0'; 532 --arcn->nlen; 533 } 534 return(0); 535} 536 537/* 538 * tar_wr() 539 * write a tar header for the file specified in the ARCHD to the archive. 540 * Have to check for file types that cannot be stored and file names that 541 * are too long. Be careful of the term (last arg) to ul_oct, each field 542 * of tar has it own spec for the termination character(s). 543 * ASSUMED: space after header in header block is zero filled 544 * Return: 545 * 0 if file has data to be written after the header, 1 if file has NO 546 * data to write after the header, -1 if archive write failed 547 */ 548 549int 550tar_wr(ARCHD *arcn) 551{ 552 HD_TAR *hd; 553 int len; 554 char hdblk[sizeof(HD_TAR)]; 555 556 /* 557 * check for those file system types which tar cannot store 558 */ 559 switch(arcn->type) { 560 case PAX_DIR: 561 /* 562 * user asked that dirs not be written to the archive 563 */ 564 if (tar_nodir) 565 return(1); 566 break; 567 case PAX_CHR: 568 tty_warn(1, "Tar cannot archive a character device %s", 569 arcn->org_name); 570 return(1); 571 case PAX_BLK: 572 tty_warn(1, 573 "Tar cannot archive a block device %s", arcn->org_name); 574 return(1); 575 case PAX_SCK: 576 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 577 return(1); 578 case PAX_FIF: 579 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 580 return(1); 581 case PAX_SLK: 582 case PAX_HLK: 583 case PAX_HRG: 584 if (arcn->ln_nlen > sizeof(hd->linkname)) { 585 tty_warn(1,"Link name too long for tar %s", 586 arcn->ln_name); 587 return(1); 588 } 589 break; 590 case PAX_REG: 591 case PAX_CTG: 592 default: 593 break; 594 } 595 596 /* 597 * check file name len, remember extra char for dirs (the / at the end) 598 */ 599 len = arcn->nlen; 600 if (arcn->type == PAX_DIR) 601 ++len; 602 if (len >= sizeof(hd->name)) { 603 tty_warn(1, "File name too long for tar %s", arcn->name); 604 return(1); 605 } 606 607 /* 608 * copy the data out of the ARCHD into the tar header based on the type 609 * of the file. Remember many tar readers want the unused fields to be 610 * padded with zero. We set the linkflag field (type), the linkname 611 * (or zero if not used),the size, and set the padding (if any) to be 612 * added after the file data (0 for all other types, as they only have 613 * a header) 614 */ 615 memset(hdblk, 0, sizeof(hdblk)); 616 hd = (HD_TAR *)hdblk; 617 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 618 arcn->pad = 0; 619 620 if (arcn->type == PAX_DIR) { 621 /* 622 * directories are the same as files, except have a filename 623 * that ends with a /, we add the slash here. No data follows, 624 * dirs, so no pad. 625 */ 626 hd->linkflag = AREGTYPE; 627 hd->name[len-1] = '/'; 628 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 629 goto out; 630 } else if (arcn->type == PAX_SLK) { 631 /* 632 * no data follows this file, so no pad 633 */ 634 hd->linkflag = SYMTYPE; 635 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 636 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 637 goto out; 638 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 639 /* 640 * no data follows this file, so no pad 641 */ 642 hd->linkflag = LNKTYPE; 643 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 644 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 645 goto out; 646 } else { 647 /* 648 * data follows this file, so set the pad 649 */ 650 hd->linkflag = AREGTYPE; 651 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 652 tty_warn(1,"File is too large for tar %s", 653 arcn->org_name); 654 return(1); 655 } 656 arcn->pad = TAR_PAD(arcn->sb.st_size); 657 } 658 659 /* 660 * copy those fields that are independent of the type 661 */ 662 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 663 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 664 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 665 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 666 goto out; 667 668 /* 669 * calculate and add the checksum, then write the header. A return of 670 * 0 tells the caller to now write the file data, 1 says no data needs 671 * to be written 672 */ 673 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 674 sizeof(hd->chksum), 3)) 675 goto out; /* XXX Something's wrong here 676 * because a zero-byte file can 677 * cause this to be done and 678 * yet the resulting warning 679 * seems incorrect */ 680 681 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 682 return(-1); 683 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 684 return(-1); 685 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 686 return(0); 687 return(1); 688 689 out: 690 /* 691 * header field is out of range 692 */ 693 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 694 return(1); 695} 696 697/* 698 * Routines for POSIX ustar 699 */ 700 701/* 702 * ustar_strd() 703 * initialization for ustar read 704 * Return: 705 * 0 if ok, -1 otherwise 706 */ 707 708int 709ustar_strd(void) 710{ 711 return(0); 712} 713 714/* 715 * ustar_stwr() 716 * initialization for ustar write 717 * Return: 718 * 0 if ok, -1 otherwise 719 */ 720 721int 722ustar_stwr(void) 723{ 724 return(0); 725} 726 727/* 728 * ustar_id() 729 * determine if a block given to us is a valid ustar header. We have to 730 * be on the lookout for those pesky blocks of all zero's 731 * Return: 732 * 0 if a ustar header, -1 otherwise 733 */ 734 735int 736ustar_id(char *blk, int size) 737{ 738 HD_USTAR *hd; 739 740 if (size < BLKMULT) 741 return(-1); 742 hd = (HD_USTAR *)blk; 743 744 /* 745 * check for block of zero's first, a simple and fast test then check 746 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 747 * programs are fouled up and create archives missing the \0. Last we 748 * check the checksum. If ok we have to assume it is a valid header. 749 */ 750 if (hd->name[0] == '\0') 751 return(-1); 752 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 753 return(-1); 754 /* This is GNU tar */ 755 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 756 !seen_gnu_warning) { 757 seen_gnu_warning = 1; 758 tty_warn(0, 759 "Trying to read GNU tar archive with extensions off"); 760 } 761 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 762} 763 764/* 765 * ustar_rd() 766 * extract the values out of block already determined to be a ustar header. 767 * store the values in the ARCHD parameter. 768 * Return: 769 * 0 770 */ 771 772int 773ustar_rd(ARCHD *arcn, char *buf) 774{ 775 HD_USTAR *hd; 776 char *dest; 777 int cnt; 778 dev_t devmajor; 779 dev_t devminor; 780 781 /* 782 * we only get proper sized buffers 783 */ 784 if (ustar_id(buf, BLKMULT) < 0) 785 return(-1); 786 787 memset(arcn, 0, sizeof(*arcn)); 788 arcn->org_name = arcn->name; 789 arcn->pat = NULL; 790 arcn->sb.st_nlink = 1; 791 hd = (HD_USTAR *)buf; 792 793 /* 794 * see if the filename is split into two parts. if, so joint the parts. 795 * we copy the prefix first and add a / between the prefix and name. 796 */ 797 dest = arcn->name; 798 if (*(hd->prefix) != '\0') { 799 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 800 dest += cnt; 801 *dest++ = '/'; 802 cnt++; 803 } else { 804 cnt = 0; 805 } 806 807 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 808 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 809 &gnu_name_string, hd->name, sizeof(hd->name)); 810 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 811 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 812 } 813 814 /* 815 * follow the spec to the letter. we should only have mode bits, strip 816 * off all other crud we may be passed. 817 */ 818 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 819 0xfff); 820 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 821 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 822 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 823 824 /* 825 * If we can find the ascii names for gname and uname in the password 826 * and group files we will use the uid's and gid they bind. Otherwise 827 * we use the uid and gid values stored in the header. (This is what 828 * the posix spec wants). 829 */ 830 hd->gname[sizeof(hd->gname) - 1] = '\0'; 831 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 832 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 833 hd->uname[sizeof(hd->uname) - 1] = '\0'; 834 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 835 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 836 837 /* 838 * set the defaults, these may be changed depending on the file type 839 */ 840 arcn->pad = 0; 841 arcn->skip = 0; 842 arcn->sb.st_rdev = (dev_t)0; 843 844 /* 845 * set the mode and PAX type according to the typeflag in the header 846 */ 847 switch(hd->typeflag) { 848 case FIFOTYPE: 849 arcn->type = PAX_FIF; 850 arcn->sb.st_mode |= S_IFIFO; 851 break; 852 case DIRTYPE: 853 arcn->type = PAX_DIR; 854 arcn->sb.st_mode |= S_IFDIR; 855 arcn->sb.st_nlink = 2; 856 857 /* 858 * Some programs that create ustar archives append a '/' 859 * to the pathname for directories. This clearly violates 860 * ustar specs, but we will silently strip it off anyway. 861 */ 862 if (arcn->name[arcn->nlen - 1] == '/') 863 arcn->name[--arcn->nlen] = '\0'; 864 break; 865 case BLKTYPE: 866 case CHRTYPE: 867 /* 868 * this type requires the rdev field to be set. 869 */ 870 if (hd->typeflag == BLKTYPE) { 871 arcn->type = PAX_BLK; 872 arcn->sb.st_mode |= S_IFBLK; 873 } else { 874 arcn->type = PAX_CHR; 875 arcn->sb.st_mode |= S_IFCHR; 876 } 877 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 878 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 879 arcn->sb.st_rdev = TODEV(devmajor, devminor); 880 break; 881 case SYMTYPE: 882 case LNKTYPE: 883 if (hd->typeflag == SYMTYPE) { 884 arcn->type = PAX_SLK; 885 arcn->sb.st_mode |= S_IFLNK; 886 } else { 887 arcn->type = PAX_HLK; 888 /* 889 * so printing looks better 890 */ 891 arcn->sb.st_mode |= S_IFREG; 892 arcn->sb.st_nlink = 2; 893 } 894 break; 895 case LONGLINKTYPE: 896 if (is_gnutar) 897 arcn->type = PAX_GLL; 898 /* FALLTHROUGH */ 899 case LONGNAMETYPE: 900 if (is_gnutar) { 901 /* 902 * GNU long link/file; we tag these here and let the 903 * pax internals deal with it -- too ugly otherwise. 904 */ 905 if (hd->typeflag != LONGLINKTYPE) 906 arcn->type = PAX_GLF; 907 arcn->pad = TAR_PAD(arcn->sb.st_size); 908 arcn->skip = arcn->sb.st_size; 909 } else { 910 tty_warn(1, "GNU Long %s found in posix ustar archive.", 911 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 912 } 913 break; 914 case CONTTYPE: 915 case AREGTYPE: 916 case REGTYPE: 917 default: 918 /* 919 * these types have file data that follows. Set the skip and 920 * pad fields. 921 */ 922 arcn->type = PAX_REG; 923 arcn->pad = TAR_PAD(arcn->sb.st_size); 924 arcn->skip = arcn->sb.st_size; 925 arcn->sb.st_mode |= S_IFREG; 926 break; 927 } 928 return(0); 929} 930 931static int 932expandname(char *buf, size_t len, char **gnu_name, const char *name, 933 size_t nlen) 934{ 935 if (*gnu_name) { 936 len = strlcpy(buf, *gnu_name, len); 937 free(*gnu_name); 938 *gnu_name = NULL; 939 } else { 940 if (len > ++nlen) 941 len = nlen; 942 len = strlcpy(buf, name, len); 943 } 944 return len; 945} 946 947static void 948longlink(ARCHD *arcn) 949{ 950 ARCHD larc; 951 952 memset(&larc, 0, sizeof(larc)); 953 954 switch (arcn->type) { 955 case PAX_SLK: 956 case PAX_HRG: 957 case PAX_HLK: 958 larc.type = PAX_GLL; 959 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 960 sizeof(larc.ln_name)); 961 gnu_hack_string = arcn->ln_name; 962 gnu_hack_len = arcn->ln_nlen + 1; 963 break; 964 default: 965 larc.nlen = strlcpy(larc.name, "././@LongLink", 966 sizeof(larc.name)); 967 gnu_hack_string = arcn->name; 968 gnu_hack_len = arcn->nlen + 1; 969 larc.type = PAX_GLF; 970 } 971 /* 972 * We need a longlink now. 973 */ 974 ustar_wr(&larc); 975} 976 977/* 978 * ustar_wr() 979 * write a ustar header for the file specified in the ARCHD to the archive 980 * Have to check for file types that cannot be stored and file names that 981 * are too long. Be careful of the term (last arg) to ul_oct, we only use 982 * '\0' for the termination character (this is different than picky tar) 983 * ASSUMED: space after header in header block is zero filled 984 * Return: 985 * 0 if file has data to be written after the header, 1 if file has NO 986 * data to write after the header, -1 if archive write failed 987 */ 988 989int 990ustar_wr(ARCHD *arcn) 991{ 992 HD_USTAR *hd; 993 char *pt; 994 char hdblk[sizeof(HD_USTAR)]; 995 const char *user, *group; 996 997 /* 998 * check for those file system types ustar cannot store 999 */ 1000 if (arcn->type == PAX_SCK) { 1001 if (!is_gnutar) 1002 tty_warn(1, "Ustar cannot archive a socket %s", 1003 arcn->org_name); 1004 return(1); 1005 } 1006 1007 /* 1008 * check the length of the linkname 1009 */ 1010 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1011 (arcn->type == PAX_HRG)) && 1012 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1013 if (is_gnutar) { 1014 longlink(arcn); 1015 } else { 1016 tty_warn(1, "Link name too long for ustar %s", 1017 arcn->ln_name); 1018 return(1); 1019 } 1020 } 1021 1022 /* 1023 * split the path name into prefix and name fields (if needed). if 1024 * pt != arcn->name, the name has to be split 1025 */ 1026 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1027 if (is_gnutar) { 1028 longlink(arcn); 1029 pt = arcn->name; 1030 } else { 1031 tty_warn(1, "File name too long for ustar %s", 1032 arcn->name); 1033 return(1); 1034 } 1035 } 1036 1037 /* 1038 * zero out the header so we don't have to worry about zero fill below 1039 */ 1040 memset(hdblk, 0, sizeof(hdblk)); 1041 hd = (HD_USTAR *)hdblk; 1042 arcn->pad = 0L; 1043 1044 /* 1045 * split the name, or zero out the prefix 1046 */ 1047 if (pt != arcn->name) { 1048 /* 1049 * name was split, pt points at the / where the split is to 1050 * occur, we remove the / and copy the first part to the prefix 1051 */ 1052 *pt = '\0'; 1053 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1054 *pt++ = '/'; 1055 } 1056 1057 /* 1058 * copy the name part. this may be the whole path or the part after 1059 * the prefix 1060 */ 1061 strlcpy(hd->name, pt, sizeof(hd->name)); 1062 1063 /* 1064 * set the fields in the header that are type dependent 1065 */ 1066 switch(arcn->type) { 1067 case PAX_DIR: 1068 hd->typeflag = DIRTYPE; 1069 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1070 goto out; 1071 break; 1072 case PAX_CHR: 1073 case PAX_BLK: 1074 if (arcn->type == PAX_CHR) 1075 hd->typeflag = CHRTYPE; 1076 else 1077 hd->typeflag = BLKTYPE; 1078 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1079 sizeof(hd->devmajor), 3) || 1080 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1081 sizeof(hd->devminor), 3) || 1082 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1083 goto out; 1084 break; 1085 case PAX_FIF: 1086 hd->typeflag = FIFOTYPE; 1087 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1088 goto out; 1089 break; 1090 case PAX_GLL: 1091 case PAX_SLK: 1092 case PAX_HLK: 1093 case PAX_HRG: 1094 if (arcn->type == PAX_SLK) 1095 hd->typeflag = SYMTYPE; 1096 else if (arcn->type == PAX_GLL) 1097 hd->typeflag = LONGLINKTYPE; 1098 else 1099 hd->typeflag = LNKTYPE; 1100 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1101 if (ul_oct((u_long)gnu_hack_len, hd->size, 1102 sizeof(hd->size), 3)) 1103 goto out; 1104 break; 1105 case PAX_GLF: 1106 case PAX_REG: 1107 case PAX_CTG: 1108 default: 1109 /* 1110 * file data with this type, set the padding 1111 */ 1112 if (arcn->type == PAX_GLF) { 1113 hd->typeflag = LONGNAMETYPE; 1114 arcn->pad = TAR_PAD(gnu_hack_len); 1115 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1116 sizeof(hd->size), 3)) { 1117 tty_warn(1,"File is too long for ustar %s", 1118 arcn->org_name); 1119 return(1); 1120 } 1121 } else { 1122 if (arcn->type == PAX_CTG) 1123 hd->typeflag = CONTTYPE; 1124 else 1125 hd->typeflag = REGTYPE; 1126 arcn->pad = TAR_PAD(arcn->sb.st_size); 1127 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1128 sizeof(hd->size), 3)) { 1129 tty_warn(1,"File is too long for ustar %s", 1130 arcn->org_name); 1131 return(1); 1132 } 1133 } 1134 break; 1135 } 1136 1137 strncpy(hd->magic, TMAGIC, TMAGLEN); 1138 if (is_gnutar) 1139 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1140 else 1141 strncpy(hd->version, TVERSION, TVERSLEN); 1142 1143 /* 1144 * set the remaining fields. Some versions want all 16 bits of mode 1145 * we better humor them (they really do not meet spec though).... 1146 */ 1147 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1148 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1149 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1150 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1151 goto out; 1152 user = user_from_uid(arcn->sb.st_uid, 1); 1153 group = group_from_gid(arcn->sb.st_gid, 1); 1154 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1155 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1156 1157 /* 1158 * calculate and store the checksum write the header to the archive 1159 * return 0 tells the caller to now write the file data, 1 says no data 1160 * needs to be written 1161 */ 1162 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1163 sizeof(hd->chksum), 3)) 1164 goto out; 1165 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1166 return(-1); 1167 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1168 return(-1); 1169 if (gnu_hack_string) { 1170 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1171 int pad = gnu_hack_len; 1172 gnu_hack_string = NULL; 1173 gnu_hack_len = 0; 1174 if (res < 0) 1175 return(-1); 1176 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1177 return(-1); 1178 } 1179 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1180 return(0); 1181 return(1); 1182 1183 out: 1184 /* 1185 * header field is out of range 1186 */ 1187 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1188 return(1); 1189} 1190 1191/* 1192 * name_split() 1193 * see if the name has to be split for storage in a ustar header. We try 1194 * to fit the entire name in the name field without splitting if we can. 1195 * The split point is always at a / 1196 * Return 1197 * character pointer to split point (always the / that is to be removed 1198 * if the split is not needed, the points is set to the start of the file 1199 * name (it would violate the spec to split there). A NULL is returned if 1200 * the file name is too long 1201 */ 1202 1203static char * 1204name_split(char *name, int len) 1205{ 1206 char *start; 1207 1208 /* 1209 * check to see if the file name is small enough to fit in the name 1210 * field. if so just return a pointer to the name. 1211 */ 1212 if (len < TNMSZ) 1213 return(name); 1214 if (len > (TPFSZ + TNMSZ)) 1215 return(NULL); 1216 1217 /* 1218 * we start looking at the biggest sized piece that fits in the name 1219 * field. We walk forward looking for a slash to split at. The idea is 1220 * to find the biggest piece to fit in the name field (or the smallest 1221 * prefix we can find) (the -1 is correct the biggest piece would 1222 * include the slash between the two parts that gets thrown away) 1223 */ 1224 start = name + len - TNMSZ; 1225 while ((*start != '\0') && (*start != '/')) 1226 ++start; 1227 1228 /* 1229 * if we hit the end of the string, this name cannot be split, so we 1230 * cannot store this file. 1231 */ 1232 if (*start == '\0') 1233 return(NULL); 1234 len = start - name; 1235 1236 /* 1237 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1238 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1239 * the file would then expand on extract to //str. The len == 0 below 1240 * makes this special case follow the spec to the letter. 1241 */ 1242 if ((len >= TPFSZ) || (len == 0)) 1243 return(NULL); 1244 1245 /* 1246 * ok have a split point, return it to the caller 1247 */ 1248 return(start); 1249} 1250 1251/* convert a glob into a RE, and add it to the list */ 1252static int 1253tar_gnutar_exclude_one(const char *line, size_t len) 1254{ 1255 char sbuf[MAXPATHLEN * 2 + 1 + 5]; 1256 int i, j; 1257 1258 if (line[len - 1] == '\n') 1259 len--; 1260 for (i = 0, j = 2; i < len; i++) { 1261 /* 1262 * convert glob to regexp, escaping everything 1263 */ 1264 if (line[i] == '*') 1265 sbuf[j++] = '.'; 1266 else if (line[i] == '?') { 1267 sbuf[j++] = '.'; 1268 continue; 1269 } else if (!isalnum(line[i]) && !isblank(line[i])) 1270 sbuf[j++] = '\\'; 1271 sbuf[j++] = line[i]; 1272 } 1273 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/'; 1274 sbuf[1] = '^'; 1275 sbuf[j] = '$'; 1276 sbuf[j + 3] = '\0'; 1277 if (rep_add(sbuf) < 0) 1278 return (-1); 1279 1280 return (0); 1281} 1282 1283/* 1284 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1285 * we go through each line of the file, building a string from the "glob" 1286 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1287 * to rep_add(), which will add a empty replacement (exclusion), for the 1288 * named files. 1289 */ 1290int 1291tar_gnutar_minus_minus_exclude(path) 1292 const char *path; 1293{ 1294 size_t len = strlen(path); 1295 1296 if (len > MAXPATHLEN) 1297 tty_warn(0, "pathname too long: %s", path); 1298 1299 return (tar_gnutar_exclude_one(path, len)); 1300} 1301 1302int 1303tar_gnutar_X_compat(path) 1304 const char *path; 1305{ 1306 char *line; 1307 FILE *fp; 1308 int lineno = 0; 1309 size_t len; 1310 1311 fp = fopen(path, "r"); 1312 if (fp == NULL) { 1313 tty_warn(1, "cannot open %s: %s", path, 1314 strerror(errno)); 1315 return(-1); 1316 } 1317 1318 while ((line = fgetln(fp, &len))) { 1319 lineno++; 1320 if (len > MAXPATHLEN) { 1321 tty_warn(0, "pathname too long, line %d of %s", 1322 lineno, path); 1323 } 1324 if (tar_gnutar_exclude_one(line, len)) 1325 return (-1); 1326 } 1327 return (0); 1328} 1329