tar.c revision 1.39
1/* $NetBSD: tar.c,v 1.39 2003/04/20 21:41:52 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the University of 22 * California, Berkeley and its contributors. 23 * 4. Neither the name of the University nor the names of its contributors 24 * may be used to endorse or promote products derived from this software 25 * without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 37 * SUCH DAMAGE. 38 */ 39 40#include <sys/cdefs.h> 41#if defined(__RCSID) && !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.39 2003/04/20 21:41:52 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, const char *, size_t); 72static void longlink(ARCHD *); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96 97static int 98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen) 99{ 100 u_long hdck, blck; 101 102 hdck = asc_ul(hd, hdlen, OCT); 103 blck = tar_chksm(bl, bllen); 104 105 if (hdck != blck) { 106 tty_warn(0, "Header checksum %lo does not match %lo", 107 hdck, blck); 108 return(-1); 109 } 110 return(0); 111} 112 113 114/* 115 * tar_endwr() 116 * add the tar trailer of two null blocks 117 * Return: 118 * 0 if ok, -1 otherwise (what wr_skip returns) 119 */ 120 121int 122tar_endwr(void) 123{ 124 return(wr_skip((off_t)(NULLCNT*BLKMULT))); 125} 126 127/* 128 * tar_endrd() 129 * no cleanup needed here, just return size of trailer (for append) 130 * Return: 131 * size of trailer (2 * BLKMULT) 132 */ 133 134off_t 135tar_endrd(void) 136{ 137 return((off_t)(NULLCNT*BLKMULT)); 138} 139 140/* 141 * tar_trail() 142 * Called to determine if a header block is a valid trailer. We are passed 143 * the block, the in_sync flag (which tells us we are in resync mode; 144 * looking for a valid header), and cnt (which starts at zero) which is 145 * used to count the number of empty blocks we have seen so far. 146 * Return: 147 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 148 * could never contain a header. 149 */ 150 151int 152tar_trail(char *buf, int in_resync, int *cnt) 153{ 154 int i; 155 156 /* 157 * look for all zero, trailer is two consecutive blocks of zero 158 */ 159 for (i = 0; i < BLKMULT; ++i) { 160 if (buf[i] != '\0') 161 break; 162 } 163 164 /* 165 * if not all zero it is not a trailer, but MIGHT be a header. 166 */ 167 if (i != BLKMULT) 168 return(-1); 169 170 /* 171 * When given a zero block, we must be careful! 172 * If we are not in resync mode, check for the trailer. Have to watch 173 * out that we do not mis-identify file data as the trailer, so we do 174 * NOT try to id a trailer during resync mode. During resync mode we 175 * might as well throw this block out since a valid header can NEVER be 176 * a block of all 0 (we must have a valid file name). 177 */ 178 if (!in_resync) { 179 ++*cnt; 180 /* 181 * old GNU tar (up through 1.13) only writes one block of 182 * trailers, so we pretend we got another 183 */ 184 if (is_gnutar) 185 ++*cnt; 186 if (*cnt >= NULLCNT) 187 return(0); 188 } 189 return(1); 190} 191 192/* 193 * ul_oct() 194 * convert an unsigned long to an octal string. many oddball field 195 * termination characters are used by the various versions of tar in the 196 * different fields. term selects which kind to use. str is '0' padded 197 * at the front to len. we are unable to use only one format as many old 198 * tar readers are very cranky about this. 199 * Return: 200 * 0 if the number fit into the string, -1 otherwise 201 */ 202 203static int 204ul_oct(u_long val, char *str, int len, int term) 205{ 206 char *pt; 207 208 /* 209 * term selects the appropriate character(s) for the end of the string 210 */ 211 pt = str + len - 1; 212 switch(term) { 213 case 3: 214 *pt-- = '\0'; 215 break; 216 case 2: 217 *pt-- = ' '; 218 *pt-- = '\0'; 219 break; 220 case 1: 221 *pt-- = ' '; 222 break; 223 case 0: 224 default: 225 *pt-- = '\0'; 226 *pt-- = ' '; 227 break; 228 } 229 230 /* 231 * convert and blank pad if there is space 232 */ 233 while (pt >= str) { 234 *pt-- = '0' + (char)(val & 0x7); 235 if ((val = val >> 3) == (u_long)0) 236 break; 237 } 238 239 while (pt >= str) 240 *pt-- = '0'; 241 if (val != (u_long)0) 242 return(-1); 243 return(0); 244} 245 246#if !defined(NET2_STAT) && !defined(_LP64) 247/* 248 * ull_oct() 249 * convert an unsigned long long to an octal string. one of many oddball 250 * field termination characters are used by the various versions of tar 251 * in the different fields. term selects which kind to use. str is '0' 252 * padded at the front to len. we are unable to use only one format as 253 * many old tar readers are very cranky about this. 254 * Return: 255 * 0 if the number fit into the string, -1 otherwise 256 */ 257 258static int 259ull_oct(unsigned long long val, char *str, int len, int term) 260{ 261 char *pt; 262 263 /* 264 * term selects the appropriate character(s) for the end of the string 265 */ 266 pt = str + len - 1; 267 switch(term) { 268 case 3: 269 *pt-- = '\0'; 270 break; 271 case 2: 272 *pt-- = ' '; 273 *pt-- = '\0'; 274 break; 275 case 1: 276 *pt-- = ' '; 277 break; 278 case 0: 279 default: 280 *pt-- = '\0'; 281 *pt-- = ' '; 282 break; 283 } 284 285 /* 286 * convert and blank pad if there is space 287 */ 288 while (pt >= str) { 289 *pt-- = '0' + (char)(val & 0x7); 290 if ((val = val >> 3) == 0) 291 break; 292 } 293 294 while (pt >= str) 295 *pt-- = '0'; 296 if (val != (unsigned long long)0) 297 return(-1); 298 return(0); 299} 300#endif 301 302/* 303 * tar_chksm() 304 * calculate the checksum for a tar block counting the checksum field as 305 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 306 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 307 * pad headers with 0. 308 * Return: 309 * unsigned long checksum 310 */ 311 312static u_long 313tar_chksm(char *blk, int len) 314{ 315 char *stop; 316 char *pt; 317 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 318 319 /* 320 * add the part of the block before the checksum field 321 */ 322 pt = blk; 323 stop = blk + CHK_OFFSET; 324 while (pt < stop) 325 chksm += (u_long)(*pt++ & 0xff); 326 /* 327 * move past the checksum field and keep going, spec counts the 328 * checksum field as the sum of 8 blanks (which is pre-computed as 329 * BLNKSUM). 330 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 331 * starts, no point in summing zero's) 332 */ 333 pt += CHK_LEN; 334 stop = blk + len; 335 while (pt < stop) 336 chksm += (u_long)(*pt++ & 0xff); 337 return(chksm); 338} 339 340/* 341 * Routines for old BSD style tar (also made portable to sysV tar) 342 */ 343 344/* 345 * tar_id() 346 * determine if a block given to us is a valid tar header (and not a USTAR 347 * header). We have to be on the lookout for those pesky blocks of all 348 * zero's. 349 * Return: 350 * 0 if a tar header, -1 otherwise 351 */ 352 353int 354tar_id(char *blk, int size) 355{ 356 HD_TAR *hd; 357 HD_USTAR *uhd; 358 359 if (size < BLKMULT) 360 return(-1); 361 hd = (HD_TAR *)blk; 362 uhd = (HD_USTAR *)blk; 363 364 /* 365 * check for block of zero's first, a simple and fast test, then make 366 * sure this is not a ustar header by looking for the ustar magic 367 * cookie. We should use TMAGLEN, but some USTAR archive programs are 368 * wrong and create archives missing the \0. Last we check the 369 * checksum. If this is ok we have to assume it is a valid header. 370 */ 371 if (hd->name[0] == '\0') 372 return(-1); 373 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) 374 return(-1); 375 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 376} 377 378/* 379 * tar_opt() 380 * handle tar format specific -o options 381 * Return: 382 * 0 if ok -1 otherwise 383 */ 384 385int 386tar_opt(void) 387{ 388 OPLIST *opt; 389 390 while ((opt = opt_next()) != NULL) { 391 if (strcmp(opt->name, TAR_OPTION) || 392 strcmp(opt->value, TAR_NODIR)) { 393 tty_warn(1, 394 "Unknown tar format -o option/value pair %s=%s", 395 opt->name, opt->value); 396 tty_warn(1, 397 "%s=%s is the only supported tar format option", 398 TAR_OPTION, TAR_NODIR); 399 return(-1); 400 } 401 402 /* 403 * we only support one option, and only when writing 404 */ 405 if ((act != APPND) && (act != ARCHIVE)) { 406 tty_warn(1, "%s=%s is only supported when writing.", 407 opt->name, opt->value); 408 return(-1); 409 } 410 tar_nodir = 1; 411 } 412 return(0); 413} 414 415 416/* 417 * tar_rd() 418 * extract the values out of block already determined to be a tar header. 419 * store the values in the ARCHD parameter. 420 * Return: 421 * 0 422 */ 423 424int 425tar_rd(ARCHD *arcn, char *buf) 426{ 427 HD_TAR *hd; 428 char *pt; 429 430 /* 431 * we only get proper sized buffers passed to us 432 */ 433 if (tar_id(buf, BLKMULT) < 0) 434 return(-1); 435 memset(arcn, 0, sizeof(*arcn)); 436 arcn->org_name = arcn->name; 437 arcn->pat = NULL; 438 arcn->sb.st_nlink = 1; 439 440 /* 441 * copy out the name and values in the stat buffer 442 */ 443 hd = (HD_TAR *)buf; 444 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 445 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 446 &gnu_name_string, hd->name, sizeof(hd->name)); 447 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 448 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 449 } 450 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 451 0xfff); 452 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 453 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 454 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 455 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 456 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 457 458 /* 459 * have to look at the last character, it may be a '/' and that is used 460 * to encode this as a directory 461 */ 462 pt = &(arcn->name[arcn->nlen - 1]); 463 arcn->pad = 0; 464 arcn->skip = 0; 465 switch(hd->linkflag) { 466 case SYMTYPE: 467 /* 468 * symbolic link, need to get the link name and set the type in 469 * the st_mode so -v printing will look correct. 470 */ 471 arcn->type = PAX_SLK; 472 arcn->sb.st_mode |= S_IFLNK; 473 break; 474 case LNKTYPE: 475 /* 476 * hard link, need to get the link name, set the type in the 477 * st_mode and st_nlink so -v printing will look better. 478 */ 479 arcn->type = PAX_HLK; 480 arcn->sb.st_nlink = 2; 481 482 /* 483 * no idea of what type this thing really points at, but 484 * we set something for printing only. 485 */ 486 arcn->sb.st_mode |= S_IFREG; 487 break; 488 case LONGLINKTYPE: 489 arcn->type = PAX_GLL; 490 /* FALLTHROUGH */ 491 case LONGNAMETYPE: 492 /* 493 * GNU long link/file; we tag these here and let the 494 * pax internals deal with it -- too ugly otherwise. 495 */ 496 if (hd->linkflag != LONGLINKTYPE) 497 arcn->type = PAX_GLF; 498 arcn->pad = TAR_PAD(arcn->sb.st_size); 499 arcn->skip = arcn->sb.st_size; 500 break; 501 case AREGTYPE: 502 case REGTYPE: 503 case DIRTYPE: /* see below */ 504 default: 505 /* 506 * If we have a trailing / this is a directory and NOT a file. 507 * Note: V7 tar doesn't actually have DIRTYPE, but it was 508 * reported that V7 archives using USTAR directories do exist. 509 */ 510 if (*pt == '/' || hd->linkflag == DIRTYPE) { 511 /* 512 * it is a directory, set the mode for -v printing 513 */ 514 arcn->type = PAX_DIR; 515 arcn->sb.st_mode |= S_IFDIR; 516 arcn->sb.st_nlink = 2; 517 } else { 518 /* 519 * have a file that will be followed by data. Set the 520 * skip value to the size field and calculate the size 521 * of the padding. 522 */ 523 arcn->type = PAX_REG; 524 arcn->sb.st_mode |= S_IFREG; 525 arcn->pad = TAR_PAD(arcn->sb.st_size); 526 arcn->skip = arcn->sb.st_size; 527 } 528 break; 529 } 530 531 /* 532 * strip off any trailing slash. 533 */ 534 if (*pt == '/') { 535 *pt = '\0'; 536 --arcn->nlen; 537 } 538 return(0); 539} 540 541/* 542 * tar_wr() 543 * write a tar header for the file specified in the ARCHD to the archive. 544 * Have to check for file types that cannot be stored and file names that 545 * are too long. Be careful of the term (last arg) to ul_oct, each field 546 * of tar has it own spec for the termination character(s). 547 * ASSUMED: space after header in header block is zero filled 548 * Return: 549 * 0 if file has data to be written after the header, 1 if file has NO 550 * data to write after the header, -1 if archive write failed 551 */ 552 553int 554tar_wr(ARCHD *arcn) 555{ 556 HD_TAR *hd; 557 int len; 558 char hdblk[sizeof(HD_TAR)]; 559 560 /* 561 * check for those file system types which tar cannot store 562 */ 563 switch(arcn->type) { 564 case PAX_DIR: 565 /* 566 * user asked that dirs not be written to the archive 567 */ 568 if (tar_nodir) 569 return(1); 570 break; 571 case PAX_CHR: 572 tty_warn(1, "Tar cannot archive a character device %s", 573 arcn->org_name); 574 return(1); 575 case PAX_BLK: 576 tty_warn(1, 577 "Tar cannot archive a block device %s", arcn->org_name); 578 return(1); 579 case PAX_SCK: 580 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 581 return(1); 582 case PAX_FIF: 583 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 584 return(1); 585 case PAX_SLK: 586 case PAX_HLK: 587 case PAX_HRG: 588 if (arcn->ln_nlen > sizeof(hd->linkname)) { 589 tty_warn(1,"Link name too long for tar %s", 590 arcn->ln_name); 591 return(1); 592 } 593 break; 594 case PAX_REG: 595 case PAX_CTG: 596 default: 597 break; 598 } 599 600 /* 601 * check file name len, remember extra char for dirs (the / at the end) 602 */ 603 len = arcn->nlen; 604 if (arcn->type == PAX_DIR) 605 ++len; 606 if (len >= sizeof(hd->name)) { 607 tty_warn(1, "File name too long for tar %s", arcn->name); 608 return(1); 609 } 610 611 /* 612 * copy the data out of the ARCHD into the tar header based on the type 613 * of the file. Remember many tar readers want the unused fields to be 614 * padded with zero. We set the linkflag field (type), the linkname 615 * (or zero if not used),the size, and set the padding (if any) to be 616 * added after the file data (0 for all other types, as they only have 617 * a header) 618 */ 619 memset(hdblk, 0, sizeof(hdblk)); 620 hd = (HD_TAR *)hdblk; 621 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 622 arcn->pad = 0; 623 624 if (arcn->type == PAX_DIR) { 625 /* 626 * directories are the same as files, except have a filename 627 * that ends with a /, we add the slash here. No data follows, 628 * dirs, so no pad. 629 */ 630 hd->linkflag = AREGTYPE; 631 hd->name[len-1] = '/'; 632 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 633 goto out; 634 } else if (arcn->type == PAX_SLK) { 635 /* 636 * no data follows this file, so no pad 637 */ 638 hd->linkflag = SYMTYPE; 639 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 640 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 641 goto out; 642 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 643 /* 644 * no data follows this file, so no pad 645 */ 646 hd->linkflag = LNKTYPE; 647 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 648 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 649 goto out; 650 } else { 651 /* 652 * data follows this file, so set the pad 653 */ 654 hd->linkflag = AREGTYPE; 655 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 656 tty_warn(1,"File is too large for tar %s", 657 arcn->org_name); 658 return(1); 659 } 660 arcn->pad = TAR_PAD(arcn->sb.st_size); 661 } 662 663 /* 664 * copy those fields that are independent of the type 665 */ 666 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 667 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 668 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 669 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 670 goto out; 671 672 /* 673 * calculate and add the checksum, then write the header. A return of 674 * 0 tells the caller to now write the file data, 1 says no data needs 675 * to be written 676 */ 677 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 678 sizeof(hd->chksum), 3)) 679 goto out; /* XXX Something's wrong here 680 * because a zero-byte file can 681 * cause this to be done and 682 * yet the resulting warning 683 * seems incorrect */ 684 685 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 686 return(-1); 687 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 688 return(-1); 689 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 690 return(0); 691 return(1); 692 693 out: 694 /* 695 * header field is out of range 696 */ 697 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 698 return(1); 699} 700 701/* 702 * Routines for POSIX ustar 703 */ 704 705/* 706 * ustar_strd() 707 * initialization for ustar read 708 * Return: 709 * 0 if ok, -1 otherwise 710 */ 711 712int 713ustar_strd(void) 714{ 715 return(0); 716} 717 718/* 719 * ustar_stwr() 720 * initialization for ustar write 721 * Return: 722 * 0 if ok, -1 otherwise 723 */ 724 725int 726ustar_stwr(void) 727{ 728 return(0); 729} 730 731/* 732 * ustar_id() 733 * determine if a block given to us is a valid ustar header. We have to 734 * be on the lookout for those pesky blocks of all zero's 735 * Return: 736 * 0 if a ustar header, -1 otherwise 737 */ 738 739int 740ustar_id(char *blk, int size) 741{ 742 HD_USTAR *hd; 743 744 if (size < BLKMULT) 745 return(-1); 746 hd = (HD_USTAR *)blk; 747 748 /* 749 * check for block of zero's first, a simple and fast test then check 750 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 751 * programs are fouled up and create archives missing the \0. Last we 752 * check the checksum. If ok we have to assume it is a valid header. 753 */ 754 if (hd->name[0] == '\0') 755 return(-1); 756 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 757 return(-1); 758 /* This is GNU tar */ 759 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 760 !seen_gnu_warning) { 761 seen_gnu_warning = 1; 762 tty_warn(0, 763 "Trying to read GNU tar archive with extensions off"); 764 } 765 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT); 766} 767 768/* 769 * ustar_rd() 770 * extract the values out of block already determined to be a ustar header. 771 * store the values in the ARCHD parameter. 772 * Return: 773 * 0 774 */ 775 776int 777ustar_rd(ARCHD *arcn, char *buf) 778{ 779 HD_USTAR *hd; 780 char *dest; 781 int cnt; 782 dev_t devmajor; 783 dev_t devminor; 784 785 /* 786 * we only get proper sized buffers 787 */ 788 if (ustar_id(buf, BLKMULT) < 0) 789 return(-1); 790 791 memset(arcn, 0, sizeof(*arcn)); 792 arcn->org_name = arcn->name; 793 arcn->pat = NULL; 794 arcn->sb.st_nlink = 1; 795 hd = (HD_USTAR *)buf; 796 797 /* 798 * see if the filename is split into two parts. if, so joint the parts. 799 * we copy the prefix first and add a / between the prefix and name. 800 */ 801 dest = arcn->name; 802 if (*(hd->prefix) != '\0') { 803 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 804 dest += cnt; 805 *dest++ = '/'; 806 cnt++; 807 } else { 808 cnt = 0; 809 } 810 811 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 812 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 813 &gnu_name_string, hd->name, sizeof(hd->name)); 814 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 815 &gnu_link_string, hd->linkname, sizeof(hd->linkname)); 816 } 817 818 /* 819 * follow the spec to the letter. we should only have mode bits, strip 820 * off all other crud we may be passed. 821 */ 822 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 823 0xfff); 824 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 825 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 826 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 827 828 /* 829 * If we can find the ascii names for gname and uname in the password 830 * and group files we will use the uid's and gid they bind. Otherwise 831 * we use the uid and gid values stored in the header. (This is what 832 * the posix spec wants). 833 */ 834 hd->gname[sizeof(hd->gname) - 1] = '\0'; 835 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 836 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 837 hd->uname[sizeof(hd->uname) - 1] = '\0'; 838 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 839 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 840 841 /* 842 * set the defaults, these may be changed depending on the file type 843 */ 844 arcn->pad = 0; 845 arcn->skip = 0; 846 arcn->sb.st_rdev = (dev_t)0; 847 848 /* 849 * set the mode and PAX type according to the typeflag in the header 850 */ 851 switch(hd->typeflag) { 852 case FIFOTYPE: 853 arcn->type = PAX_FIF; 854 arcn->sb.st_mode |= S_IFIFO; 855 break; 856 case DIRTYPE: 857 arcn->type = PAX_DIR; 858 arcn->sb.st_mode |= S_IFDIR; 859 arcn->sb.st_nlink = 2; 860 861 /* 862 * Some programs that create ustar archives append a '/' 863 * to the pathname for directories. This clearly violates 864 * ustar specs, but we will silently strip it off anyway. 865 */ 866 if (arcn->name[arcn->nlen - 1] == '/') 867 arcn->name[--arcn->nlen] = '\0'; 868 break; 869 case BLKTYPE: 870 case CHRTYPE: 871 /* 872 * this type requires the rdev field to be set. 873 */ 874 if (hd->typeflag == BLKTYPE) { 875 arcn->type = PAX_BLK; 876 arcn->sb.st_mode |= S_IFBLK; 877 } else { 878 arcn->type = PAX_CHR; 879 arcn->sb.st_mode |= S_IFCHR; 880 } 881 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 882 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 883 arcn->sb.st_rdev = TODEV(devmajor, devminor); 884 break; 885 case SYMTYPE: 886 case LNKTYPE: 887 if (hd->typeflag == SYMTYPE) { 888 arcn->type = PAX_SLK; 889 arcn->sb.st_mode |= S_IFLNK; 890 } else { 891 arcn->type = PAX_HLK; 892 /* 893 * so printing looks better 894 */ 895 arcn->sb.st_mode |= S_IFREG; 896 arcn->sb.st_nlink = 2; 897 } 898 break; 899 case LONGLINKTYPE: 900 if (is_gnutar) 901 arcn->type = PAX_GLL; 902 /* FALLTHROUGH */ 903 case LONGNAMETYPE: 904 if (is_gnutar) { 905 /* 906 * GNU long link/file; we tag these here and let the 907 * pax internals deal with it -- too ugly otherwise. 908 */ 909 if (hd->typeflag != LONGLINKTYPE) 910 arcn->type = PAX_GLF; 911 arcn->pad = TAR_PAD(arcn->sb.st_size); 912 arcn->skip = arcn->sb.st_size; 913 } else { 914 tty_warn(1, "GNU Long %s found in posix ustar archive.", 915 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 916 } 917 break; 918 case CONTTYPE: 919 case AREGTYPE: 920 case REGTYPE: 921 default: 922 /* 923 * these types have file data that follows. Set the skip and 924 * pad fields. 925 */ 926 arcn->type = PAX_REG; 927 arcn->pad = TAR_PAD(arcn->sb.st_size); 928 arcn->skip = arcn->sb.st_size; 929 arcn->sb.st_mode |= S_IFREG; 930 break; 931 } 932 return(0); 933} 934 935static int 936expandname(char *buf, size_t len, char **gnu_name, const char *name, 937 size_t nlen) 938{ 939 if (*gnu_name) { 940 len = strlcpy(buf, *gnu_name, len); 941 free(*gnu_name); 942 *gnu_name = NULL; 943 } else { 944 if (len > ++nlen) 945 len = nlen; 946 len = strlcpy(buf, name, len); 947 } 948 return len; 949} 950 951static void 952longlink(ARCHD *arcn) 953{ 954 ARCHD larc; 955 956 memset(&larc, 0, sizeof(larc)); 957 958 switch (arcn->type) { 959 case PAX_SLK: 960 case PAX_HRG: 961 case PAX_HLK: 962 larc.type = PAX_GLL; 963 larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink", 964 sizeof(larc.ln_name)); 965 gnu_hack_string = arcn->ln_name; 966 gnu_hack_len = arcn->ln_nlen + 1; 967 break; 968 default: 969 larc.nlen = strlcpy(larc.name, "././@LongLink", 970 sizeof(larc.name)); 971 gnu_hack_string = arcn->name; 972 gnu_hack_len = arcn->nlen + 1; 973 larc.type = PAX_GLF; 974 } 975 /* 976 * We need a longlink now. 977 */ 978 ustar_wr(&larc); 979} 980 981/* 982 * ustar_wr() 983 * write a ustar header for the file specified in the ARCHD to the archive 984 * Have to check for file types that cannot be stored and file names that 985 * are too long. Be careful of the term (last arg) to ul_oct, we only use 986 * '\0' for the termination character (this is different than picky tar) 987 * ASSUMED: space after header in header block is zero filled 988 * Return: 989 * 0 if file has data to be written after the header, 1 if file has NO 990 * data to write after the header, -1 if archive write failed 991 */ 992 993int 994ustar_wr(ARCHD *arcn) 995{ 996 HD_USTAR *hd; 997 char *pt; 998 char hdblk[sizeof(HD_USTAR)]; 999 const char *user, *group; 1000 1001 /* 1002 * check for those file system types ustar cannot store 1003 */ 1004 if (arcn->type == PAX_SCK) { 1005 if (!is_gnutar) 1006 tty_warn(1, "Ustar cannot archive a socket %s", 1007 arcn->org_name); 1008 return(1); 1009 } 1010 1011 /* 1012 * check the length of the linkname 1013 */ 1014 if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) || 1015 (arcn->type == PAX_HRG)) && 1016 (arcn->ln_nlen >= sizeof(hd->linkname))){ 1017 if (is_gnutar) { 1018 longlink(arcn); 1019 } else { 1020 tty_warn(1, "Link name too long for ustar %s", 1021 arcn->ln_name); 1022 return(1); 1023 } 1024 } 1025 1026 /* 1027 * split the path name into prefix and name fields (if needed). if 1028 * pt != arcn->name, the name has to be split 1029 */ 1030 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1031 if (is_gnutar) { 1032 longlink(arcn); 1033 pt = arcn->name; 1034 } else { 1035 tty_warn(1, "File name too long for ustar %s", 1036 arcn->name); 1037 return(1); 1038 } 1039 } 1040 1041 /* 1042 * zero out the header so we don't have to worry about zero fill below 1043 */ 1044 memset(hdblk, 0, sizeof(hdblk)); 1045 hd = (HD_USTAR *)hdblk; 1046 arcn->pad = 0L; 1047 1048 /* 1049 * split the name, or zero out the prefix 1050 */ 1051 if (pt != arcn->name) { 1052 /* 1053 * name was split, pt points at the / where the split is to 1054 * occur, we remove the / and copy the first part to the prefix 1055 */ 1056 *pt = '\0'; 1057 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1058 *pt++ = '/'; 1059 } 1060 1061 /* 1062 * copy the name part. this may be the whole path or the part after 1063 * the prefix 1064 */ 1065 strlcpy(hd->name, pt, sizeof(hd->name)); 1066 1067 /* 1068 * set the fields in the header that are type dependent 1069 */ 1070 switch(arcn->type) { 1071 case PAX_DIR: 1072 hd->typeflag = DIRTYPE; 1073 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1074 goto out; 1075 break; 1076 case PAX_CHR: 1077 case PAX_BLK: 1078 if (arcn->type == PAX_CHR) 1079 hd->typeflag = CHRTYPE; 1080 else 1081 hd->typeflag = BLKTYPE; 1082 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1083 sizeof(hd->devmajor), 3) || 1084 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1085 sizeof(hd->devminor), 3) || 1086 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1087 goto out; 1088 break; 1089 case PAX_FIF: 1090 hd->typeflag = FIFOTYPE; 1091 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1092 goto out; 1093 break; 1094 case PAX_GLL: 1095 case PAX_SLK: 1096 case PAX_HLK: 1097 case PAX_HRG: 1098 if (arcn->type == PAX_SLK) 1099 hd->typeflag = SYMTYPE; 1100 else if (arcn->type == PAX_GLL) 1101 hd->typeflag = LONGLINKTYPE; 1102 else 1103 hd->typeflag = LNKTYPE; 1104 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1105 if (ul_oct((u_long)gnu_hack_len, hd->size, 1106 sizeof(hd->size), 3)) 1107 goto out; 1108 break; 1109 case PAX_GLF: 1110 case PAX_REG: 1111 case PAX_CTG: 1112 default: 1113 /* 1114 * file data with this type, set the padding 1115 */ 1116 if (arcn->type == PAX_GLF) { 1117 hd->typeflag = LONGNAMETYPE; 1118 arcn->pad = TAR_PAD(gnu_hack_len); 1119 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1120 sizeof(hd->size), 3)) { 1121 tty_warn(1,"File is too long for ustar %s", 1122 arcn->org_name); 1123 return(1); 1124 } 1125 } else { 1126 if (arcn->type == PAX_CTG) 1127 hd->typeflag = CONTTYPE; 1128 else 1129 hd->typeflag = REGTYPE; 1130 arcn->pad = TAR_PAD(arcn->sb.st_size); 1131 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1132 sizeof(hd->size), 3)) { 1133 tty_warn(1,"File is too long for ustar %s", 1134 arcn->org_name); 1135 return(1); 1136 } 1137 } 1138 break; 1139 } 1140 1141 strncpy(hd->magic, TMAGIC, TMAGLEN); 1142 if (is_gnutar) 1143 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1144 else 1145 strncpy(hd->version, TVERSION, TVERSLEN); 1146 1147 /* 1148 * set the remaining fields. Some versions want all 16 bits of mode 1149 * we better humor them (they really do not meet spec though).... 1150 */ 1151 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) || 1152 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3) || 1153 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) || 1154 ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1155 goto out; 1156 user = user_from_uid(arcn->sb.st_uid, 1); 1157 group = group_from_gid(arcn->sb.st_gid, 1); 1158 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1159 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1160 1161 /* 1162 * calculate and store the checksum write the header to the archive 1163 * return 0 tells the caller to now write the file data, 1 says no data 1164 * needs to be written 1165 */ 1166 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1167 sizeof(hd->chksum), 3)) 1168 goto out; 1169 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1170 return(-1); 1171 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1172 return(-1); 1173 if (gnu_hack_string) { 1174 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1175 int pad = gnu_hack_len; 1176 gnu_hack_string = NULL; 1177 gnu_hack_len = 0; 1178 if (res < 0) 1179 return(-1); 1180 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1181 return(-1); 1182 } 1183 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1184 return(0); 1185 return(1); 1186 1187 out: 1188 /* 1189 * header field is out of range 1190 */ 1191 tty_warn(1, "Ustar header field is too small for %s", arcn->org_name); 1192 return(1); 1193} 1194 1195/* 1196 * name_split() 1197 * see if the name has to be split for storage in a ustar header. We try 1198 * to fit the entire name in the name field without splitting if we can. 1199 * The split point is always at a / 1200 * Return 1201 * character pointer to split point (always the / that is to be removed 1202 * if the split is not needed, the points is set to the start of the file 1203 * name (it would violate the spec to split there). A NULL is returned if 1204 * the file name is too long 1205 */ 1206 1207static char * 1208name_split(char *name, int len) 1209{ 1210 char *start; 1211 1212 /* 1213 * check to see if the file name is small enough to fit in the name 1214 * field. if so just return a pointer to the name. 1215 */ 1216 if (len < TNMSZ) 1217 return(name); 1218 if (len > (TPFSZ + TNMSZ)) 1219 return(NULL); 1220 1221 /* 1222 * we start looking at the biggest sized piece that fits in the name 1223 * field. We walk forward looking for a slash to split at. The idea is 1224 * to find the biggest piece to fit in the name field (or the smallest 1225 * prefix we can find) (the -1 is correct the biggest piece would 1226 * include the slash between the two parts that gets thrown away) 1227 */ 1228 start = name + len - TNMSZ; 1229 while ((*start != '\0') && (*start != '/')) 1230 ++start; 1231 1232 /* 1233 * if we hit the end of the string, this name cannot be split, so we 1234 * cannot store this file. 1235 */ 1236 if (*start == '\0') 1237 return(NULL); 1238 len = start - name; 1239 1240 /* 1241 * NOTE: /str where the length of str == TNMSZ can not be stored under 1242 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1243 * the file would then expand on extract to //str. The len == 0 below 1244 * makes this special case follow the spec to the letter. 1245 */ 1246 if ((len >= TPFSZ) || (len == 0)) 1247 return(NULL); 1248 1249 /* 1250 * ok have a split point, return it to the caller 1251 */ 1252 return(start); 1253} 1254 1255/* convert a glob into a RE, and add it to the list */ 1256static int 1257tar_gnutar_exclude_one(const char *line, size_t len) 1258{ 1259 char sbuf[MAXPATHLEN * 2 + 1 + 5]; 1260 int i, j; 1261 1262 if (line[len - 1] == '\n') 1263 len--; 1264 for (i = 0, j = 2; i < len; i++) { 1265 /* 1266 * convert glob to regexp, escaping everything 1267 */ 1268 if (line[i] == '*') 1269 sbuf[j++] = '.'; 1270 else if (line[i] == '?') { 1271 sbuf[j++] = '.'; 1272 continue; 1273 } else if (!isalnum(line[i]) && !isblank(line[i])) 1274 sbuf[j++] = '\\'; 1275 sbuf[j++] = line[i]; 1276 } 1277 sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/'; 1278 sbuf[1] = '^'; 1279 sbuf[j] = '$'; 1280 sbuf[j + 3] = '\0'; 1281 if (rep_add(sbuf) < 0) 1282 return (-1); 1283 1284 return (0); 1285} 1286 1287/* 1288 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1289 * we go through each line of the file, building a string from the "glob" 1290 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1291 * to rep_add(), which will add a empty replacement (exclusion), for the 1292 * named files. 1293 */ 1294int 1295tar_gnutar_minus_minus_exclude(path) 1296 const char *path; 1297{ 1298 size_t len = strlen(path); 1299 1300 if (len > MAXPATHLEN) 1301 tty_warn(0, "pathname too long: %s", path); 1302 1303 return (tar_gnutar_exclude_one(path, len)); 1304} 1305 1306int 1307tar_gnutar_X_compat(path) 1308 const char *path; 1309{ 1310 char *line; 1311 FILE *fp; 1312 int lineno = 0; 1313 size_t len; 1314 1315 fp = fopen(path, "r"); 1316 if (fp == NULL) { 1317 tty_warn(1, "can not open %s: %s", path, 1318 strerror(errno)); 1319 return(-1); 1320 } 1321 1322 while ((line = fgetln(fp, &len))) { 1323 lineno++; 1324 if (len > MAXPATHLEN) { 1325 tty_warn(0, "pathname too long, line %d of %s", 1326 lineno, path); 1327 } 1328 if (tar_gnutar_exclude_one(line, len)) 1329 return (-1); 1330 } 1331 return (0); 1332} 1333