tar.c revision 1.36
1/* $NetBSD: tar.c,v 1.36 2003/01/09 18:24:08 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40#include <sys/cdefs.h> 41#if defined(__RCSID) && !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.36 2003/01/09 18:24:08 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96 97static int 98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen) 99{ 100 u_long hdck, blck; 101 102 hdck = asc_ul(hd, hdlen, OCT); 103 blck = tar_chksm(bl, bllen); 104 105 if (hdck != blck) { 106 tty_warn(0, "Header checksum %lo does not match %lo", 107 hdck, blck); 108 return(-1); 109 } 110 return(0); 111} 112 113 114/* 115 * tar_endwr() 116 * add the tar trailer of two null blocks 117 * Return: 118 * 0 if ok, -1 otherwise (what wr_skip returns) 119 */ 120 121int 122tar_endwr(void) 123{ 124 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 125} 126 127/* 128 * tar_endrd() 129 * no cleanup needed here, just return size of trailer (for append) 130 * Return: 131 * size of trailer (2 * BLKMULT) 132 */ 133 134off_t 135tar_endrd(void) 136{ 137 return((off_t)(NULLCNT*BLKMULT)); 138} 139 140/* 141 * tar_trail() 142 * Called to determine if a header block is a valid trailer. We are passed 143 * the block, the in_sync flag (which tells us we are in resync mode; 144 * looking for a valid header), and cnt (which starts at zero) which is 145 * used to count the number of empty blocks we have seen so far. 146 * Return: 147 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 148 * could never contain a header. 149 */ 150 151int 152tar_trail(char *buf, int in_resync, int *cnt) 153{ 154 int i; 155 156 /* 157 * look for all zero, trailer is two consecutive blocks of zero 158 */ 159 for (i = 0; i < BLKMULT; ++i) { 160 if (buf[i] != '\0') 161 break; 162 } 163 164 /* 165 * if not all zero it is not a trailer, but MIGHT be a header. 166 */ 167 if (i != BLKMULT) 168 return(-1); 169 170 /* 171 * When given a zero block, we must be careful! 172 * If we are not in resync mode, check for the trailer. Have to watch 173 * out that we do not mis-identify file data as the trailer, so we do 174 * NOT try to id a trailer during resync mode. During resync mode we 175 * might as well throw this block out since a valid header can NEVER be 176 * a block of all 0 (we must have a valid file name). 177 */ 178 if (!in_resync && (++*cnt >= NULLCNT)) 179 return(0); 180 return(1); 181} 182 183/* 184 * ul_oct() 185 * convert an unsigned long to an octal string. many oddball field 186 * termination characters are used by the various versions of tar in the 187 * different fields. term selects which kind to use. str is '0' padded 188 * at the front to len. we are unable to use only one format as many old 189 * tar readers are very cranky about this. 190 * Return: 191 * 0 if the number fit into the string, -1 otherwise 192 */ 193 194static int 195ul_oct(u_long val, char *str, int len, int term) 196{ 197 char *pt; 198 199 /* 200 * term selects the appropriate character(s) for the end of the string 201 */ 202 pt = str + len - 1; 203 switch(term) { 204 case 3: 205 *pt-- = '\0'; 206 break; 207 case 2: 208 *pt-- = ' '; 209 *pt-- = '\0'; 210 break; 211 case 1: 212 *pt-- = ' '; 213 break; 214 case 0: 215 default: 216 *pt-- = '\0'; 217 *pt-- = ' '; 218 break; 219 } 220 221 /* 222 * convert and blank pad if there is space 223 */ 224 while (pt >= str) { 225 *pt-- = '0' + (char)(val & 0x7); 226 if ((val = val >> 3) == (u_long)0) 227 break; 228 } 229 230 while (pt >= str) 231 *pt-- = '0'; 232 if (val != (u_long)0) 233 return(-1); 234 return(0); 235} 236 237#if !defined(NET2_STAT) && !defined(_LP64) 238/* 239 * ull_oct() 240 * convert an unsigned long long to an octal string. one of many oddball 241 * field termination characters are used by the various versions of tar 242 * in the different fields. term selects which kind to use. str is '0' 243 * padded at the front to len. we are unable to use only one format as 244 * many old tar readers are very cranky about this. 245 * Return: 246 * 0 if the number fit into the string, -1 otherwise 247 */ 248 249static int 250ull_oct(unsigned long long val, char *str, int len, int term) 251{ 252 char *pt; 253 254 /* 255 * term selects the appropriate character(s) for the end of the string 256 */ 257 pt = str + len - 1; 258 switch(term) { 259 case 3: 260 *pt-- = '\0'; 261 break; 262 case 2: 263 *pt-- = ' '; 264 *pt-- = '\0'; 265 break; 266 case 1: 267 *pt-- = ' '; 268 break; 269 case 0: 270 default: 271 *pt-- = '\0'; 272 *pt-- = ' '; 273 break; 274 } 275 276 /* 277 * convert and blank pad if there is space 278 */ 279 while (pt >= str) { 280 *pt-- = '0' + (char)(val & 0x7); 281 if ((val = val >> 3) == 0) 282 break; 283 } 284 285 while (pt >= str) 286 *pt-- = '0'; 287 if (val != (unsigned long long)0) 288 return(-1); 289 return(0); 290} 291#endif 292 293/* 294 * tar_chksm() 295 * calculate the checksum for a tar block counting the checksum field as 296 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 297 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 298 * pad headers with 0. 299 * Return: 300 * unsigned long checksum 301 */ 302 303static u_long 304tar_chksm(char *blk, int len) 305{ 306 char *stop; 307 char *pt; 308 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 309 310 /* 311 * add the part of the block before the checksum field 312 */ 313 pt = blk; 314 stop = blk + CHK_OFFSET; 315 while (pt < stop) 316 chksm += (u_long)(*pt++ & 0xff); 317 /* 318 * move past the checksum field and keep going, spec counts the 319 * checksum field as the sum of 8 blanks (which is pre-computed as 320 * BLNKSUM). 321 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 322 * starts, no point in summing zero's) 323 */ 324 pt += CHK_LEN; 325 stop = blk + len; 326 while (pt < stop) 327 chksm += (u_long)(*pt++ & 0xff); 328 return(chksm); 329} 330 331/* 332 * Routines for old BSD style tar (also made portable to sysV tar) 333 */ 334 335/* 336 * tar_id() 337 * determine if a block given to us is a valid tar header (and not a USTAR 338 * header). We have to be on the lookout for those pesky blocks of all 339 * zero's. 340 * Return: 341 * 0 if a tar header, -1 otherwise 342 */ 343 344int 345tar_id(char *blk, int size) 346{ 347 HD_TAR *hd; 348 HD_USTAR *uhd; 349 350 if (size < BLKMULT) 351 return(-1); 352 hd = (HD_TAR *)blk; 353 uhd = (HD_USTAR *)blk; 354 355 /* 356 * check for block of zero's first, a simple and fast test, then make 357 * sure this is not a ustar header by looking for the ustar magic 358 * cookie. We should use TMAGLEN, but some USTAR archive programs are 359 * wrong and create archives missing the \0. Last we check the 360 * checksum. If this is ok we have to assume it is a valid header. 361 */ 362 if (hd->name[0] == '\0') 363 return(-1); 364 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 365 return(-1); 366 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 367} 368 369/* 370 * tar_opt() 371 * handle tar format specific -o options 372 * Return: 373 * 0 if ok -1 otherwise 374 */ 375 376int 377tar_opt(void) 378{ 379 OPLIST *opt; 380 381 while ((opt = opt_next()) != NULL) { 382 if (strcmp(opt->name, TAR_OPTION) || 383 strcmp(opt->value, TAR_NODIR)) { 384 tty_warn(1, 385 "Unknown tar format -o option/value pair %s=%s", 386 opt->name, opt->value); 387 tty_warn(1, 388 "%s=%s is the only supported tar format option", 389 TAR_OPTION, TAR_NODIR); 390 return(-1); 391 } 392 393 /* 394 * we only support one option, and only when writing 395 */ 396 if ((act != APPND) && (act != ARCHIVE)) { 397 tty_warn(1, "%s=%s is only supported when writing.", 398 opt->name, opt->value); 399 return(-1); 400 } 401 tar_nodir = 1; 402 } 403 return(0); 404} 405 406 407/* 408 * tar_rd() 409 * extract the values out of block already determined to be a tar header. 410 * store the values in the ARCHD parameter. 411 * Return: 412 * 0 413 */ 414 415int 416tar_rd(ARCHD *arcn, char *buf) 417{ 418 HD_TAR *hd; 419 char *pt; 420 421 /* 422 * we only get proper sized buffers passed to us 423 */ 424 if (tar_id(buf, BLKMULT) < 0) 425 return(-1); 426 memset(arcn, 0, sizeof(*arcn)); 427 arcn->org_name = arcn->name; 428 arcn->pat = NULL; 429 arcn->sb.st_nlink = 1; 430 431 /* 432 * copy out the name and values in the stat buffer 433 */ 434 hd = (HD_TAR *)buf; 435 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 436 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 437 &gnu_name_string, hd->name); 438 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 439 &gnu_link_string, hd->linkname); 440 } 441 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 442 0xfff); 443 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 444 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 445 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 446 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 447 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 448 449 /* 450 * have to look at the last character, it may be a '/' and that is used 451 * to encode this as a directory 452 */ 453 pt = &(arcn->name[arcn->nlen - 1]); 454 arcn->pad = 0; 455 arcn->skip = 0; 456 switch(hd->linkflag) { 457 case SYMTYPE: 458 /* 459 * symbolic link, need to get the link name and set the type in 460 * the st_mode so -v printing will look correct. 461 */ 462 arcn->type = PAX_SLK; 463 arcn->sb.st_mode |= S_IFLNK; 464 break; 465 case LNKTYPE: 466 /* 467 * hard link, need to get the link name, set the type in the 468 * st_mode and st_nlink so -v printing will look better. 469 */ 470 arcn->type = PAX_HLK; 471 arcn->sb.st_nlink = 2; 472 473 /* 474 * no idea of what type this thing really points at, but 475 * we set something for printing only. 476 */ 477 arcn->sb.st_mode |= S_IFREG; 478 break; 479 case LONGLINKTYPE: 480 arcn->type = PAX_GLL; 481 /* FALLTHROUGH */ 482 case LONGNAMETYPE: 483 /* 484 * GNU long link/file; we tag these here and let the 485 * pax internals deal with it -- too ugly otherwise. 486 */ 487 if (hd->linkflag != LONGLINKTYPE) 488 arcn->type = PAX_GLF; 489 arcn->pad = TAR_PAD(arcn->sb.st_size); 490 arcn->skip = arcn->sb.st_size; 491 break; 492 case AREGTYPE: 493 case REGTYPE: 494 case DIRTYPE: /* see below */ 495 default: 496 /* 497 * If we have a trailing / this is a directory and NOT a file. 498 * Note: V7 tar doesn't actually have DIRTYPE, but it was 499 * reported that V7 archives using USTAR directories do exist. 500 */ 501 if (*pt == '/' || hd->linkflag == DIRTYPE) { 502 /* 503 * it is a directory, set the mode for -v printing 504 */ 505 arcn->type = PAX_DIR; 506 arcn->sb.st_mode |= S_IFDIR; 507 arcn->sb.st_nlink = 2; 508 } else { 509 /* 510 * have a file that will be followed by data. Set the 511 * skip value to the size field and calculate the size 512 * of the padding. 513 */ 514 arcn->type = PAX_REG; 515 arcn->sb.st_mode |= S_IFREG; 516 arcn->pad = TAR_PAD(arcn->sb.st_size); 517 arcn->skip = arcn->sb.st_size; 518 } 519 break; 520 } 521 522 /* 523 * strip off any trailing slash. 524 */ 525 if (*pt == '/') { 526 *pt = '\0'; 527 --arcn->nlen; 528 } 529 return(0); 530} 531 532/* 533 * tar_wr() 534 * write a tar header for the file specified in the ARCHD to the archive. 535 * Have to check for file types that cannot be stored and file names that 536 * are too long. Be careful of the term (last arg) to ul_oct, each field 537 * of tar has it own spec for the termination character(s). 538 * ASSUMED: space after header in header block is zero filled 539 * Return: 540 * 0 if file has data to be written after the header, 1 if file has NO 541 * data to write after the header, -1 if archive write failed 542 */ 543 544int 545tar_wr(ARCHD *arcn) 546{ 547 HD_TAR *hd; 548 int len; 549 char hdblk[sizeof(HD_TAR)]; 550 551 /* 552 * check for those file system types which tar cannot store 553 */ 554 switch(arcn->type) { 555 case PAX_DIR: 556 /* 557 * user asked that dirs not be written to the archive 558 */ 559 if (tar_nodir) 560 return(1); 561 break; 562 case PAX_CHR: 563 tty_warn(1, "Tar cannot archive a character device %s", 564 arcn->org_name); 565 return(1); 566 case PAX_BLK: 567 tty_warn(1, 568 "Tar cannot archive a block device %s", arcn->org_name); 569 return(1); 570 case PAX_SCK: 571 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 572 return(1); 573 case PAX_FIF: 574 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 575 return(1); 576 case PAX_SLK: 577 case PAX_HLK: 578 case PAX_HRG: 579 if (arcn->ln_nlen > sizeof(hd->linkname)) { 580 tty_warn(1,"Link name too long for tar %s", 581 arcn->ln_name); 582 return(1); 583 } 584 break; 585 case PAX_REG: 586 case PAX_CTG: 587 default: 588 break; 589 } 590 591 /* 592 * check file name len, remember extra char for dirs (the / at the end) 593 */ 594 len = arcn->nlen; 595 if (arcn->type == PAX_DIR) 596 ++len; 597 if (len >= sizeof(hd->name)) { 598 tty_warn(1, "File name too long for tar %s", arcn->name); 599 return(1); 600 } 601 602 /* 603 * copy the data out of the ARCHD into the tar header based on the type 604 * of the file. Remember many tar readers want the unused fields to be 605 * padded with zero. We set the linkflag field (type), the linkname 606 * (or zero if not used),the size, and set the padding (if any) to be 607 * added after the file data (0 for all other types, as they only have 608 * a header) 609 */ 610 memset(hdblk, 0, sizeof(hdblk)); 611 hd = (HD_TAR *)hdblk; 612 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 613 arcn->pad = 0; 614 615 if (arcn->type == PAX_DIR) { 616 /* 617 * directories are the same as files, except have a filename 618 * that ends with a /, we add the slash here. No data follows, 619 * dirs, so no pad. 620 */ 621 hd->linkflag = AREGTYPE; 622 hd->name[len-1] = '/'; 623 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 624 goto out; 625 } else if (arcn->type == PAX_SLK) { 626 /* 627 * no data follows this file, so no pad 628 */ 629 hd->linkflag = SYMTYPE; 630 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 631 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 632 goto out; 633 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 634 /* 635 * no data follows this file, so no pad 636 */ 637 hd->linkflag = LNKTYPE; 638 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 639 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 640 goto out; 641 } else { 642 /* 643 * data follows this file, so set the pad 644 */ 645 hd->linkflag = AREGTYPE; 646 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 647 tty_warn(1,"File is too large for tar %s", 648 arcn->org_name); 649 return(1); 650 } 651 arcn->pad = TAR_PAD(arcn->sb.st_size); 652 } 653 654 /* 655 * copy those fields that are independent of the type 656 */ 657 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 658 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 659 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 660 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 661 goto out; 662 663 /* 664 * calculate and add the checksum, then write the header. A return of 665 * 0 tells the caller to now write the file data, 1 says no data needs 666 * to be written 667 */ 668 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 669 sizeof(hd->chksum), 3)) 670 goto out; /* XXX Something's wrong here 671 * because a zero-byte file can 672 * cause this to be done and 673 * yet the resulting warning 674 * seems incorrect */ 675 676 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 677 return(-1); 678 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 679 return(-1); 680 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 681 return(0); 682 return(1); 683 684 out: 685 /* 686 * header field is out of range 687 */ 688 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 689 return(1); 690} 691 692/* 693 * Routines for POSIX ustar 694 */ 695 696/* 697 * ustar_strd() 698 * initialization for ustar read 699 * Return: 700 * 0 if ok, -1 otherwise 701 */ 702 703int 704ustar_strd(void) 705{ 706 return(0); 707} 708 709/* 710 * ustar_stwr() 711 * initialization for ustar write 712 * Return: 713 * 0 if ok, -1 otherwise 714 */ 715 716int 717ustar_stwr(void) 718{ 719 return(0); 720} 721 722/* 723 * ustar_id() 724 * determine if a block given to us is a valid ustar header. We have to 725 * be on the lookout for those pesky blocks of all zero's 726 * Return: 727 * 0 if a ustar header, -1 otherwise 728 */ 729 730int 731ustar_id(char *blk, int size) 732{ 733 HD_USTAR *hd; 734 735 if (size < BLKMULT) 736 return(-1); 737 hd = (HD_USTAR *)blk; 738 739 /* 740 * check for block of zero's first, a simple and fast test then check 741 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 742 * programs are fouled up and create archives missing the \0. Last we 743 * check the checksum. If ok we have to assume it is a valid header. 744 */ 745 if (hd->name[0] == '\0') 746 return(-1); 747 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 748 return(-1); 749 /* This is GNU tar */ 750 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 751 !seen_gnu_warning) { 752 seen_gnu_warning = 1; 753 tty_warn(0, 754 "Trying to read GNU tar archive with extensions off"); 755 } 756 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 757} 758 759/* 760 * ustar_rd() 761 * extract the values out of block already determined to be a ustar header. 762 * store the values in the ARCHD parameter. 763 * Return: 764 * 0 765 */ 766 767int 768ustar_rd(ARCHD *arcn, char *buf) 769{ 770 HD_USTAR *hd; 771 char *dest; 772 int cnt; 773 dev_t devmajor; 774 dev_t devminor; 775 776 /* 777 * we only get proper sized buffers 778 */ 779 if (ustar_id(buf, BLKMULT) < 0) 780 return(-1); 781 782 memset(arcn, 0, sizeof(*arcn)); 783 arcn->org_name = arcn->name; 784 arcn->pat = NULL; 785 arcn->sb.st_nlink = 1; 786 hd = (HD_USTAR *)buf; 787 788 /* 789 * see if the filename is split into two parts. if, so joint the parts. 790 * we copy the prefix first and add a / between the prefix and name. 791 */ 792 dest = arcn->name; 793 if (*(hd->prefix) != '\0') { 794 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 795 dest += cnt; 796 *dest++ = '/'; 797 cnt++; 798 } else { 799 cnt = 0; 800 } 801 802 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 803 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 804 &gnu_name_string, hd->name); 805 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 806 &gnu_link_string, hd->linkname); 807 } 808 809 /* 810 * follow the spec to the letter. we should only have mode bits, strip 811 * off all other crud we may be passed. 812 */ 813 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 814 0xfff); 815 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 816 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 817 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 818 819 /* 820 * If we can find the ascii names for gname and uname in the password 821 * and group files we will use the uid's and gid they bind. Otherwise 822 * we use the uid and gid values stored in the header. (This is what 823 * the posix spec wants). 824 */ 825 hd->gname[sizeof(hd->gname) - 1] = '\0'; 826 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 827 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 828 hd->uname[sizeof(hd->uname) - 1] = '\0'; 829 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 830 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 831 832 /* 833 * set the defaults, these may be changed depending on the file type 834 */ 835 arcn->pad = 0; 836 arcn->skip = 0; 837 arcn->sb.st_rdev = (dev_t)0; 838 839 /* 840 * set the mode and PAX type according to the typeflag in the header 841 */ 842 switch(hd->typeflag) { 843 case FIFOTYPE: 844 arcn->type = PAX_FIF; 845 arcn->sb.st_mode |= S_IFIFO; 846 break; 847 case DIRTYPE: 848 arcn->type = PAX_DIR; 849 arcn->sb.st_mode |= S_IFDIR; 850 arcn->sb.st_nlink = 2; 851 852 /* 853 * Some programs that create ustar archives append a '/' 854 * to the pathname for directories. This clearly violates 855 * ustar specs, but we will silently strip it off anyway. 856 */ 857 if (arcn->name[arcn->nlen - 1] == '/') 858 arcn->name[--arcn->nlen] = '\0'; 859 break; 860 case BLKTYPE: 861 case CHRTYPE: 862 /* 863 * this type requires the rdev field to be set. 864 */ 865 if (hd->typeflag == BLKTYPE) { 866 arcn->type = PAX_BLK; 867 arcn->sb.st_mode |= S_IFBLK; 868 } else { 869 arcn->type = PAX_CHR; 870 arcn->sb.st_mode |= S_IFCHR; 871 } 872 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 873 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 874 arcn->sb.st_rdev = TODEV(devmajor, devminor); 875 break; 876 case SYMTYPE: 877 case LNKTYPE: 878 if (hd->typeflag == SYMTYPE) { 879 arcn->type = PAX_SLK; 880 arcn->sb.st_mode |= S_IFLNK; 881 } else { 882 arcn->type = PAX_HLK; 883 /* 884 * so printing looks better 885 */ 886 arcn->sb.st_mode |= S_IFREG; 887 arcn->sb.st_nlink = 2; 888 } 889 break; 890 case LONGLINKTYPE: 891 if (is_gnutar) 892 arcn->type = PAX_GLL; 893 /* FALLTHROUGH */ 894 case LONGNAMETYPE: 895 if (is_gnutar) { 896 /* 897 * GNU long link/file; we tag these here and let the 898 * pax internals deal with it -- too ugly otherwise. 899 */ 900 if (hd->typeflag != LONGLINKTYPE) 901 arcn->type = PAX_GLF; 902 arcn->pad = TAR_PAD(arcn->sb.st_size); 903 arcn->skip = arcn->sb.st_size; 904 } else { 905 tty_warn(1, "GNU Long %s found in posix ustar archive.", 906 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 907 } 908 break; 909 case CONTTYPE: 910 case AREGTYPE: 911 case REGTYPE: 912 default: 913 /* 914 * these types have file data that follows. Set the skip and 915 * pad fields. 916 */ 917 arcn->type = PAX_REG; 918 arcn->pad = TAR_PAD(arcn->sb.st_size); 919 arcn->skip = arcn->sb.st_size; 920 arcn->sb.st_mode |= S_IFREG; 921 break; 922 } 923 return(0); 924} 925 926static int 927expandname(char *buf, size_t len, char **gnu_name, const char *name) 928{ 929 if (*gnu_name) { 930 len = strlcpy(buf, *gnu_name, len); 931 free(*gnu_name); 932 *gnu_name = NULL; 933 } else { 934 len = strlcpy(buf, name, len); 935 } 936 return len; 937} 938 939static void 940longlink(ARCHD *arcn) 941{ 942 ARCHD larc; 943 944 memset(&larc, 0, sizeof(larc)); 945 946 switch (arcn->type) { 947 case PAX_SLK: 948 case PAX_HRG: 949 case PAX_HLK: 950 larc.type = PAX_GLL; 951 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 952 sizeof(larc.ln_name)); 953 gnu_hack_string = arcn->ln_name; 954 gnu_hack_len = arcn->ln_nlen + 1; 955 break; 956 default: 957 larc.nlen = strlcpy(larc.name, "././@LongLink", 958 sizeof(larc.name)); 959 gnu_hack_string = arcn->name; 960 gnu_hack_len = arcn->nlen + 1; 961 larc.type = PAX_GLF; 962 } 963 /* 964 * We need a longlink now. 965 */ 966 ustar_wr(&larc); 967} 968 969/* 970 * ustar_wr() 971 * write a ustar header for the file specified in the ARCHD to the archive 972 * Have to check for file types that cannot be stored and file names that 973 * are too long. Be careful of the term (last arg) to ul_oct, we only use 974 * '\0' for the termination character (this is different than picky tar) 975 * ASSUMED: space after header in header block is zero filled 976 * Return: 977 * 0 if file has data to be written after the header, 1 if file has NO 978 * data to write after the header, -1 if archive write failed 979 */ 980 981int 982ustar_wr(ARCHD *arcn) 983{ 984 HD_USTAR *hd; 985 char *pt; 986 char hdblk[sizeof(HD_USTAR)]; 987 const char *user, *group; 988 989 /* 990 * check for those file system types ustar cannot store 991 */ 992 if (arcn->type == PAX_SCK) { 993 tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name); 994 return(1); 995 } 996 997 /* 998 * check the length of the linkname 999 */ 1000 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1001 (arcn->type == PAX_HRG)) && 1002 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1003 if (is_gnutar) { 1004 longlink(arcn); 1005 } else { 1006 tty_warn(1, "Link name too long for ustar %s", 1007 arcn->ln_name); 1008 return(1); 1009 } 1010 } 1011 1012 /* 1013 * split the path name into prefix and name fields (if needed). if 1014 * pt != arcn->name, the name has to be split 1015 */ 1016 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1017 if (is_gnutar) { 1018 longlink(arcn); 1019 pt = arcn->name; 1020 } else { 1021 tty_warn(1, "File name too long for ustar %s", 1022 arcn->name); 1023 return(1); 1024 } 1025 } 1026 1027 /* 1028 * zero out the header so we don't have to worry about zero fill below 1029 */ 1030 memset(hdblk, 0, sizeof(hdblk)); 1031 hd = (HD_USTAR *)hdblk; 1032 arcn->pad = 0L; 1033 1034 /* 1035 * split the name, or zero out the prefix 1036 */ 1037 if (pt != arcn->name) { 1038 /* 1039 * name was split, pt points at the / where the split is to 1040 * occur, we remove the / and copy the first part to the prefix 1041 */ 1042 *pt = '\0'; 1043 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1044 *pt++ = '/'; 1045 } 1046 1047 /* 1048 * copy the name part. this may be the whole path or the part after 1049 * the prefix 1050 */ 1051 strlcpy(hd->name, pt, sizeof(hd->name)); 1052 1053 /* 1054 * set the fields in the header that are type dependent 1055 */ 1056 switch(arcn->type) { 1057 case PAX_DIR: 1058 hd->typeflag = DIRTYPE; 1059 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1060 goto out; 1061 break; 1062 case PAX_CHR: 1063 case PAX_BLK: 1064 if (arcn->type == PAX_CHR) 1065 hd->typeflag = CHRTYPE; 1066 else 1067 hd->typeflag = BLKTYPE; 1068 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1069 sizeof(hd->devmajor), 3) || 1070 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1071 sizeof(hd->devminor), 3) || 1072 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1073 goto out; 1074 break; 1075 case PAX_FIF: 1076 hd->typeflag = FIFOTYPE; 1077 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1078 goto out; 1079 break; 1080 case PAX_GLL: 1081 case PAX_SLK: 1082 case PAX_HLK: 1083 case PAX_HRG: 1084 if (arcn->type == PAX_SLK) 1085 hd->typeflag = SYMTYPE; 1086 else if (arcn->type == PAX_GLL) 1087 hd->typeflag = LONGLINKTYPE; 1088 else 1089 hd->typeflag = LNKTYPE; 1090 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1091 if (ul_oct((u_long)gnu_hack_len, hd->size, 1092 sizeof(hd->size), 3)) 1093 goto out; 1094 break; 1095 case PAX_GLF: 1096 case PAX_REG: 1097 case PAX_CTG: 1098 default: 1099 /* 1100 * file data with this type, set the padding 1101 */ 1102 if (arcn->type == PAX_GLF) { 1103 hd->typeflag = LONGNAMETYPE; 1104 arcn->pad = TAR_PAD(gnu_hack_len); 1105 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1106 sizeof(hd->size), 3)) { 1107 tty_warn(1,"File is too long for ustar %s", 1108 arcn->org_name); 1109 return(1); 1110 } 1111 } else { 1112 if (arcn->type == PAX_CTG) 1113 hd->typeflag = CONTTYPE; 1114 else 1115 hd->typeflag = REGTYPE; 1116 arcn->pad = TAR_PAD(arcn->sb.st_size); 1117 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1118 sizeof(hd->size), 3)) { 1119 tty_warn(1,"File is too long for ustar %s", 1120 arcn->org_name); 1121 return(1); 1122 } 1123 } 1124 break; 1125 } 1126 1127 strncpy(hd->magic, TMAGIC, TMAGLEN); 1128 if (is_gnutar) 1129 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1130 else 1131 strncpy(hd->version, TVERSION, TVERSLEN); 1132 1133 /* 1134 * set the remaining fields. Some versions want all 16 bits of mode 1135 * we better humor them (they really do not meet spec though).... 1136 */ 1137 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1138 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1139 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1140 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1141 goto out; 1142 user = user_from_uid(arcn->sb.st_uid, 1); 1143 group = group_from_gid(arcn->sb.st_gid, 1); 1144 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1145 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1146 1147 /* 1148 * calculate and store the checksum write the header to the archive 1149 * return 0 tells the caller to now write the file data, 1 says no data 1150 * needs to be written 1151 */ 1152 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1153 sizeof(hd->chksum), 3)) 1154 goto out; 1155 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1156 return(-1); 1157 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1158 return(-1); 1159 if (gnu_hack_string) { 1160 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1161 int pad = gnu_hack_len; 1162 gnu_hack_string = NULL; 1163 gnu_hack_len = 0; 1164 if (res < 0) 1165 return(-1); 1166 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1167 return(-1); 1168 } 1169 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1170 return(0); 1171 return(1); 1172 1173 out: 1174 /* 1175 * header field is out of range 1176 */ 1177 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1178 return(1); 1179} 1180 1181/* 1182 * name_split() 1183 * see if the name has to be split for storage in a ustar header. We try 1184 * to fit the entire name in the name field without splitting if we can. 1185 * The split point is always at a / 1186 * Return 1187 * character pointer to split point (always the / that is to be removed 1188 * if the split is not needed, the points is set to the start of the file 1189 * name (it would violate the spec to split there). A NULL is returned if 1190 * the file name is too long 1191 */ 1192 1193static char * 1194name_split(char *name, int len) 1195{ 1196 char *start; 1197 1198 /* 1199 * check to see if the file name is small enough to fit in the name 1200 * field. if so just return a pointer to the name. 1201 */ 1202 if (len < TNMSZ) 1203 return(name); 1204 if (len > (TPFSZ + TNMSZ)) 1205 return(NULL); 1206 1207 /* 1208 * we start looking at the biggest sized piece that fits in the name 1209 * field. We walk forward looking for a slash to split at. The idea is 1210 * to find the biggest piece to fit in the name field (or the smallest 1211 * prefix we can find) (the -1 is correct the biggest piece would 1212 * include the slash between the two parts that gets thrown away) 1213 */ 1214 start = name + len - TNMSZ; 1215 while ((*start != '\0') && (*start != '/')) 1216 ++start; 1217 1218 /* 1219 * if we hit the end of the string, this name cannot be split, so we 1220 * cannot store this file. 1221 */ 1222 if (*start == '\0') 1223 return(NULL); 1224 len = start - name; 1225 1226 /* 1227 * NOTE: /str where the length of str == TNMSZ can not be stored under 1228 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1229 * the file would then expand on extract to //str. The len == 0 below 1230 * makes this special case follow the spec to the letter. 1231 */ 1232 if ((len >= TPFSZ) || (len == 0)) 1233 return(NULL); 1234 1235 /* 1236 * ok have a split point, return it to the caller 1237 */ 1238 return(start); 1239} 1240 1241/* convert a glob into a RE, and add it to the list */ 1242static int 1243tar_gnutar_exclude_one(const char *line, size_t len) 1244{ 1245 char sbuf[MAXPATHLEN * 2 + 1 + 5]; 1246 int i, j; 1247 1248 if (line[len - 1] == '\n') 1249 len--; 1250 for (i = 0, j = 2; i < len; i++) { 1251 /* 1252 * convert glob to regexp, escaping everything 1253 */ 1254 if (line[i] == '*') 1255 sbuf[j++] = '.'; 1256 else if (line[i] == '?') { 1257 sbuf[j++] = '.'; 1258 continue; 1259 } else if (!isalnum(line[i]) && !isblank(line[i])) 1260 sbuf[j++] = '\\'; 1261 sbuf[j++] = line[i]; 1262 } 1263 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/'; 1264 sbuf[1] = '^'; 1265 sbuf[j] = '$'; 1266 sbuf[j + 3] = '\0'; 1267 if (rep_add(sbuf) < 0) 1268 return (-1); 1269 1270 return (0); 1271} 1272 1273/* 1274 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1275 * we go through each line of the file, building a string from the "glob" 1276 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1277 * to rep_add(), which will add a empty replacement (exclusion), for the 1278 * named files. 1279 */ 1280int 1281tar_gnutar_minus_minus_exclude(path) 1282 const char *path; 1283{ 1284 size_t len = strlen(path); 1285 1286 if (len > MAXPATHLEN) 1287 tty_warn(0, "pathname too long: %s", path); 1288 1289 return (tar_gnutar_exclude_one(path, len)); 1290} 1291 1292int 1293tar_gnutar_X_compat(path) 1294 const char *path; 1295{ 1296 char *line; 1297 FILE *fp; 1298 int lineno = 0; 1299 size_t len; 1300 1301 fp = fopen(path, "r"); 1302 if (fp == NULL) { 1303 tty_warn(1, "can not open %s: %s", path, 1304 strerror(errno)); 1305 return(-1); 1306 } 1307 1308 while ((line = fgetln(fp, &len))) { 1309 lineno++; 1310 if (len > MAXPATHLEN) { 1311 tty_warn(0, "pathname too long, line %d of %s", 1312 lineno, path); 1313 } 1314 if (tar_gnutar_exclude_one(line, len)) 1315 return (-1); 1316 } 1317 return (0); 1318} 1319