tar.c revision 1.61
1/* $NetBSD: tar.c,v 1.61 2005/02/20 07:35:47 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1992 Keith Muller. 5 * Copyright (c) 1992, 1993 6 * The Regents of the University of California. All rights reserved. 7 * 8 * This code is derived from software contributed to Berkeley by 9 * Keith Muller of the University of California, San Diego. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. Neither the name of the University nor the names of its contributors 20 * may be used to endorse or promote products derived from this software 21 * without specific prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36#if HAVE_NBTOOL_CONFIG_H 37#include "nbtool_config.h" 38#endif 39 40#include <sys/cdefs.h> 41#if !defined(lint) 42#if 0 43static char sccsid[] = "@(#)tar.c 8.2 (Berkeley) 4/18/94"; 44#else 45__RCSID("$NetBSD: tar.c,v 1.61 2005/02/20 07:35:47 christos Exp $"); 46#endif 47#endif /* not lint */ 48 49#include <sys/types.h> 50#include <sys/time.h> 51#include <sys/stat.h> 52#include <sys/param.h> 53 54#include <ctype.h> 55#include <errno.h> 56#include <grp.h> 57#include <pwd.h> 58#include <stdio.h> 59#include <stdlib.h> 60#include <string.h> 61#include <unistd.h> 62 63#include "pax.h" 64#include "extern.h" 65#include "tar.h" 66 67/* 68 * Routines for reading, writing and header identify of various versions of tar 69 */ 70 71static int expandname(char *, size_t, char **, size_t *, const char *, size_t); 72static void longlink(ARCHD *, int); 73static u_long tar_chksm(char *, int); 74static char *name_split(char *, int); 75static int ul_oct(u_long, char *, int, int); 76#if !defined(NET2_STAT) && !defined(_LP64) 77static int ull_oct(unsigned long long, char *, int, int); 78#endif 79static int tar_gnutar_exclude_one(const char *, size_t); 80static int check_sum(char *, size_t, char *, size_t, int); 81 82/* 83 * Routines common to all versions of tar 84 */ 85 86static int tar_nodir; /* do not write dirs under old tar */ 87int is_gnutar; /* behave like gnu tar; enable gnu 88 * extensions and skip end-ofvolume 89 * checks 90 */ 91static int seen_gnu_warning; /* Have we warned yet? */ 92static char *gnu_hack_string; /* ././@LongLink hackery */ 93static int gnu_hack_len; /* len of gnu_hack_string */ 94char *gnu_name_string; /* ././@LongLink hackery name */ 95char *gnu_link_string; /* ././@LongLink hackery link */ 96size_t gnu_name_length; /* ././@LongLink hackery name */ 97size_t gnu_link_length; /* ././@LongLink hackery link */ 98static int gnu_short_trailer; /* gnu short trailer */ 99 100static const char LONG_LINK[] = "././@LongLink"; 101 102#ifdef _PAX_ 103char DEV_0[] = "/dev/rst0"; 104char DEV_1[] = "/dev/rst1"; 105char DEV_4[] = "/dev/rst4"; 106char DEV_5[] = "/dev/rst5"; 107char DEV_7[] = "/dev/rst7"; 108char DEV_8[] = "/dev/rst8"; 109#endif 110 111static int 112check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet) 113{ 114 u_long hdck, blck; 115 116 hdck = asc_ul(hd, hdlen, OCT); 117 blck = tar_chksm(bl, bllen); 118 119 if (hdck != blck) { 120 if (!quiet) 121 tty_warn(0, "Header checksum %lo does not match %lo", 122 hdck, blck); 123 return(-1); 124 } 125 return(0); 126} 127 128 129/* 130 * tar_endwr() 131 * add the tar trailer of two null blocks 132 * Return: 133 * 0 if ok, -1 otherwise (what wr_skip returns) 134 */ 135 136int 137tar_endwr(void) 138{ 139 return(wr_skip((off_t)(NULLCNT * BLKMULT))); 140} 141 142/* 143 * tar_endrd() 144 * no cleanup needed here, just return size of trailer (for append) 145 * Return: 146 * size of trailer BLKMULT 147 */ 148 149off_t 150tar_endrd(void) 151{ 152 return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT)); 153} 154 155/* 156 * tar_trail() 157 * Called to determine if a header block is a valid trailer. We are passed 158 * the block, the in_sync flag (which tells us we are in resync mode; 159 * looking for a valid header), and cnt (which starts at zero) which is 160 * used to count the number of empty blocks we have seen so far. 161 * Return: 162 * 0 if a valid trailer, -1 if not a valid trailer, or 1 if the block 163 * could never contain a header. 164 */ 165 166int 167tar_trail(char *buf, int in_resync, int *cnt) 168{ 169 int i; 170 171 gnu_short_trailer = 0; 172 /* 173 * look for all zero, trailer is two consecutive blocks of zero 174 */ 175 for (i = 0; i < BLKMULT; ++i) { 176 if (buf[i] != '\0') 177 break; 178 } 179 180 /* 181 * if not all zero it is not a trailer, but MIGHT be a header. 182 */ 183 if (i != BLKMULT) 184 return(-1); 185 186 /* 187 * When given a zero block, we must be careful! 188 * If we are not in resync mode, check for the trailer. Have to watch 189 * out that we do not mis-identify file data as the trailer, so we do 190 * NOT try to id a trailer during resync mode. During resync mode we 191 * might as well throw this block out since a valid header can NEVER be 192 * a block of all 0 (we must have a valid file name). 193 */ 194 if (!in_resync) { 195 ++*cnt; 196 /* 197 * old GNU tar (up through 1.13) only writes one block of 198 * trailers, so we pretend we got another 199 */ 200 if (is_gnutar) { 201 gnu_short_trailer = 1; 202 ++*cnt; 203 } 204 if (*cnt >= NULLCNT) 205 return(0); 206 } 207 return(1); 208} 209 210/* 211 * ul_oct() 212 * convert an unsigned long to an octal string. many oddball field 213 * termination characters are used by the various versions of tar in the 214 * different fields. term selects which kind to use. str is '0' padded 215 * at the front to len. we are unable to use only one format as many old 216 * tar readers are very cranky about this. 217 * Return: 218 * 0 if the number fit into the string, -1 otherwise 219 */ 220 221static int 222ul_oct(u_long val, char *str, int len, int term) 223{ 224 char *pt; 225 226 /* 227 * term selects the appropriate character(s) for the end of the string 228 */ 229 pt = str + len - 1; 230 switch(term) { 231 case 3: 232 *pt-- = '\0'; 233 break; 234 case 2: 235 *pt-- = ' '; 236 *pt-- = '\0'; 237 break; 238 case 1: 239 *pt-- = ' '; 240 break; 241 case 0: 242 default: 243 *pt-- = '\0'; 244 *pt-- = ' '; 245 break; 246 } 247 248 /* 249 * convert and blank pad if there is space 250 */ 251 while (pt >= str) { 252 *pt-- = '0' + (char)(val & 0x7); 253 if ((val = val >> 3) == (u_long)0) 254 break; 255 } 256 257 while (pt >= str) 258 *pt-- = '0'; 259 if (val != (u_long)0) 260 return(-1); 261 return(0); 262} 263 264#if !defined(NET2_STAT) && !defined(_LP64) 265/* 266 * ull_oct() 267 * convert an unsigned long long to an octal string. one of many oddball 268 * field termination characters are used by the various versions of tar 269 * in the different fields. term selects which kind to use. str is '0' 270 * padded at the front to len. we are unable to use only one format as 271 * many old tar readers are very cranky about this. 272 * Return: 273 * 0 if the number fit into the string, -1 otherwise 274 */ 275 276static int 277ull_oct(unsigned long long val, char *str, int len, int term) 278{ 279 char *pt; 280 281 /* 282 * term selects the appropriate character(s) for the end of the string 283 */ 284 pt = str + len - 1; 285 switch(term) { 286 case 3: 287 *pt-- = '\0'; 288 break; 289 case 2: 290 *pt-- = ' '; 291 *pt-- = '\0'; 292 break; 293 case 1: 294 *pt-- = ' '; 295 break; 296 case 0: 297 default: 298 *pt-- = '\0'; 299 *pt-- = ' '; 300 break; 301 } 302 303 /* 304 * convert and blank pad if there is space 305 */ 306 while (pt >= str) { 307 *pt-- = '0' + (char)(val & 0x7); 308 if ((val = val >> 3) == 0) 309 break; 310 } 311 312 while (pt >= str) 313 *pt-- = '0'; 314 if (val != (unsigned long long)0) 315 return(-1); 316 return(0); 317} 318#endif 319 320/* 321 * tar_chksm() 322 * calculate the checksum for a tar block counting the checksum field as 323 * all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks). 324 * NOTE: we use len to short circuit summing 0's on write since we ALWAYS 325 * pad headers with 0. 326 * Return: 327 * unsigned long checksum 328 */ 329 330static u_long 331tar_chksm(char *blk, int len) 332{ 333 char *stop; 334 char *pt; 335 u_long chksm = BLNKSUM; /* initial value is checksum field sum */ 336 337 /* 338 * add the part of the block before the checksum field 339 */ 340 pt = blk; 341 stop = blk + CHK_OFFSET; 342 while (pt < stop) 343 chksm += (u_long)(*pt++ & 0xff); 344 /* 345 * move past the checksum field and keep going, spec counts the 346 * checksum field as the sum of 8 blanks (which is pre-computed as 347 * BLNKSUM). 348 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding 349 * starts, no point in summing zero's) 350 */ 351 pt += CHK_LEN; 352 stop = blk + len; 353 while (pt < stop) 354 chksm += (u_long)(*pt++ & 0xff); 355 return(chksm); 356} 357 358/* 359 * Routines for old BSD style tar (also made portable to sysV tar) 360 */ 361 362/* 363 * tar_id() 364 * determine if a block given to us is a valid tar header (and not a USTAR 365 * header). We have to be on the lookout for those pesky blocks of all 366 * zero's. 367 * Return: 368 * 0 if a tar header, -1 otherwise 369 */ 370 371int 372tar_id(char *blk, int size) 373{ 374 HD_TAR *hd; 375 HD_USTAR *uhd; 376 static int is_ustar = -1; 377 378 if (size < BLKMULT) 379 return(-1); 380 hd = (HD_TAR *)blk; 381 uhd = (HD_USTAR *)blk; 382 383 /* 384 * check for block of zero's first, a simple and fast test, then make 385 * sure this is not a ustar header by looking for the ustar magic 386 * cookie. We should use TMAGLEN, but some USTAR archive programs are 387 * wrong and create archives missing the \0. Last we check the 388 * checksum. If this is ok we have to assume it is a valid header. 389 */ 390 if (hd->name[0] == '\0') 391 return(-1); 392 if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0) { 393 if (is_ustar == -1) { 394 is_ustar = 1; 395 return(-1); 396 } else 397 tty_warn(0, 398 "Busted tar archive: has both ustar and old tar " 399 "records"); 400 } else 401 is_ustar = 0; 402 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1); 403} 404 405/* 406 * tar_opt() 407 * handle tar format specific -o options 408 * Return: 409 * 0 if ok -1 otherwise 410 */ 411 412int 413tar_opt(void) 414{ 415 OPLIST *opt; 416 417 while ((opt = opt_next()) != NULL) { 418 if (strcmp(opt->name, TAR_OPTION) || 419 strcmp(opt->value, TAR_NODIR)) { 420 tty_warn(1, 421 "Unknown tar format -o option/value pair %s=%s", 422 opt->name, opt->value); 423 tty_warn(1, 424 "%s=%s is the only supported tar format option", 425 TAR_OPTION, TAR_NODIR); 426 return(-1); 427 } 428 429 /* 430 * we only support one option, and only when writing 431 */ 432 if ((act != APPND) && (act != ARCHIVE)) { 433 tty_warn(1, "%s=%s is only supported when writing.", 434 opt->name, opt->value); 435 return(-1); 436 } 437 tar_nodir = 1; 438 } 439 return(0); 440} 441 442 443/* 444 * tar_rd() 445 * extract the values out of block already determined to be a tar header. 446 * store the values in the ARCHD parameter. 447 * Return: 448 * 0 449 */ 450 451int 452tar_rd(ARCHD *arcn, char *buf) 453{ 454 HD_TAR *hd; 455 char *pt; 456 457 /* 458 * we only get proper sized buffers passed to us 459 */ 460 if (tar_id(buf, BLKMULT) < 0) 461 return(-1); 462 memset(arcn, 0, sizeof(*arcn)); 463 arcn->org_name = arcn->name; 464 arcn->pat = NULL; 465 arcn->sb.st_nlink = 1; 466 467 /* 468 * copy out the name and values in the stat buffer 469 */ 470 hd = (HD_TAR *)buf; 471 if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) { 472 arcn->nlen = expandname(arcn->name, sizeof(arcn->name), 473 &gnu_name_string, &gnu_name_length, hd->name, 474 sizeof(hd->name)); 475 arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name), 476 &gnu_link_string, &gnu_link_length, hd->linkname, 477 sizeof(hd->linkname)); 478 } 479 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) & 480 0xfff); 481 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 482 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 483 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 484 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 485 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 486 487 /* 488 * have to look at the last character, it may be a '/' and that is used 489 * to encode this as a directory 490 */ 491 pt = &(arcn->name[arcn->nlen - 1]); 492 arcn->pad = 0; 493 arcn->skip = 0; 494 switch(hd->linkflag) { 495 case SYMTYPE: 496 /* 497 * symbolic link, need to get the link name and set the type in 498 * the st_mode so -v printing will look correct. 499 */ 500 arcn->type = PAX_SLK; 501 arcn->sb.st_mode |= S_IFLNK; 502 break; 503 case LNKTYPE: 504 /* 505 * hard link, need to get the link name, set the type in the 506 * st_mode and st_nlink so -v printing will look better. 507 */ 508 arcn->type = PAX_HLK; 509 arcn->sb.st_nlink = 2; 510 511 /* 512 * no idea of what type this thing really points at, but 513 * we set something for printing only. 514 */ 515 arcn->sb.st_mode |= S_IFREG; 516 break; 517 case LONGLINKTYPE: 518 case LONGNAMETYPE: 519 /* 520 * GNU long link/file; we tag these here and let the 521 * pax internals deal with it -- too ugly otherwise. 522 */ 523 if (hd->linkflag != LONGLINKTYPE) 524 arcn->type = PAX_GLF; 525 else 526 arcn->type = PAX_GLL; 527 arcn->pad = TAR_PAD(arcn->sb.st_size); 528 arcn->skip = arcn->sb.st_size; 529 break; 530 case AREGTYPE: 531 case REGTYPE: 532 case DIRTYPE: /* see below */ 533 default: 534 /* 535 * If we have a trailing / this is a directory and NOT a file. 536 * Note: V7 tar doesn't actually have DIRTYPE, but it was 537 * reported that V7 archives using USTAR directories do exist. 538 */ 539 if (*pt == '/' || hd->linkflag == DIRTYPE) { 540 /* 541 * it is a directory, set the mode for -v printing 542 */ 543 arcn->type = PAX_DIR; 544 arcn->sb.st_mode |= S_IFDIR; 545 arcn->sb.st_nlink = 2; 546 } else { 547 /* 548 * have a file that will be followed by data. Set the 549 * skip value to the size field and calculate the size 550 * of the padding. 551 */ 552 arcn->type = PAX_REG; 553 arcn->sb.st_mode |= S_IFREG; 554 arcn->pad = TAR_PAD(arcn->sb.st_size); 555 arcn->skip = arcn->sb.st_size; 556 } 557 break; 558 } 559 560 /* 561 * strip off any trailing slash. 562 */ 563 if (*pt == '/') { 564 *pt = '\0'; 565 --arcn->nlen; 566 } 567 return(0); 568} 569 570/* 571 * tar_wr() 572 * write a tar header for the file specified in the ARCHD to the archive. 573 * Have to check for file types that cannot be stored and file names that 574 * are too long. Be careful of the term (last arg) to ul_oct, each field 575 * of tar has it own spec for the termination character(s). 576 * ASSUMED: space after header in header block is zero filled 577 * Return: 578 * 0 if file has data to be written after the header, 1 if file has NO 579 * data to write after the header, -1 if archive write failed 580 */ 581 582int 583tar_wr(ARCHD *arcn) 584{ 585 HD_TAR *hd; 586 int len; 587 char hdblk[sizeof(HD_TAR)]; 588 589 /* 590 * check for those file system types which tar cannot store 591 */ 592 switch(arcn->type) { 593 case PAX_DIR: 594 /* 595 * user asked that dirs not be written to the archive 596 */ 597 if (tar_nodir) 598 return(1); 599 break; 600 case PAX_CHR: 601 tty_warn(1, "Tar cannot archive a character device %s", 602 arcn->org_name); 603 return(1); 604 case PAX_BLK: 605 tty_warn(1, 606 "Tar cannot archive a block device %s", arcn->org_name); 607 return(1); 608 case PAX_SCK: 609 tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name); 610 return(1); 611 case PAX_FIF: 612 tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name); 613 return(1); 614 case PAX_SLK: 615 case PAX_HLK: 616 case PAX_HRG: 617 if (arcn->ln_nlen > sizeof(hd->linkname)) { 618 tty_warn(1,"Link name too long for tar %s", 619 arcn->ln_name); 620 return(1); 621 } 622 break; 623 case PAX_REG: 624 case PAX_CTG: 625 default: 626 break; 627 } 628 629 /* 630 * check file name len, remember extra char for dirs (the / at the end) 631 */ 632 len = arcn->nlen; 633 if (arcn->type == PAX_DIR) 634 ++len; 635 if (len >= sizeof(hd->name)) { 636 tty_warn(1, "File name too long for tar %s", arcn->name); 637 return(1); 638 } 639 640 /* 641 * copy the data out of the ARCHD into the tar header based on the type 642 * of the file. Remember many tar readers want the unused fields to be 643 * padded with zero. We set the linkflag field (type), the linkname 644 * (or zero if not used),the size, and set the padding (if any) to be 645 * added after the file data (0 for all other types, as they only have 646 * a header) 647 */ 648 memset(hdblk, 0, sizeof(hdblk)); 649 hd = (HD_TAR *)hdblk; 650 strlcpy(hd->name, arcn->name, sizeof(hd->name)); 651 arcn->pad = 0; 652 653 if (arcn->type == PAX_DIR) { 654 /* 655 * directories are the same as files, except have a filename 656 * that ends with a /, we add the slash here. No data follows, 657 * dirs, so no pad. 658 */ 659 hd->linkflag = AREGTYPE; 660 hd->name[len-1] = '/'; 661 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 662 goto out; 663 } else if (arcn->type == PAX_SLK) { 664 /* 665 * no data follows this file, so no pad 666 */ 667 hd->linkflag = SYMTYPE; 668 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 669 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 670 goto out; 671 } else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) { 672 /* 673 * no data follows this file, so no pad 674 */ 675 hd->linkflag = LNKTYPE; 676 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 677 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1)) 678 goto out; 679 } else { 680 /* 681 * data follows this file, so set the pad 682 */ 683 hd->linkflag = AREGTYPE; 684 if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) { 685 tty_warn(1,"File is too large for tar %s", 686 arcn->org_name); 687 return(1); 688 } 689 arcn->pad = TAR_PAD(arcn->sb.st_size); 690 } 691 692 /* 693 * copy those fields that are independent of the type 694 */ 695 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) || 696 ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) || 697 ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) || 698 ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1)) 699 goto out; 700 701 /* 702 * calculate and add the checksum, then write the header. A return of 703 * 0 tells the caller to now write the file data, 1 says no data needs 704 * to be written 705 */ 706 if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum, 707 sizeof(hd->chksum), 3)) 708 goto out; /* XXX Something's wrong here 709 * because a zero-byte file can 710 * cause this to be done and 711 * yet the resulting warning 712 * seems incorrect */ 713 714 if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0) 715 return(-1); 716 if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0) 717 return(-1); 718 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 719 return(0); 720 return(1); 721 722 out: 723 /* 724 * header field is out of range 725 */ 726 tty_warn(1, "Tar header field is too small for %s", arcn->org_name); 727 return(1); 728} 729 730/* 731 * Routines for POSIX ustar 732 */ 733 734/* 735 * ustar_strd() 736 * initialization for ustar read 737 * Return: 738 * 0 if ok, -1 otherwise 739 */ 740 741int 742ustar_strd(void) 743{ 744 return(0); 745} 746 747/* 748 * ustar_stwr() 749 * initialization for ustar write 750 * Return: 751 * 0 if ok, -1 otherwise 752 */ 753 754int 755ustar_stwr(void) 756{ 757 return(0); 758} 759 760/* 761 * ustar_id() 762 * determine if a block given to us is a valid ustar header. We have to 763 * be on the lookout for those pesky blocks of all zero's 764 * Return: 765 * 0 if a ustar header, -1 otherwise 766 */ 767 768int 769ustar_id(char *blk, int size) 770{ 771 HD_USTAR *hd; 772 773 if (size < BLKMULT) 774 return(-1); 775 hd = (HD_USTAR *)blk; 776 777 /* 778 * check for block of zero's first, a simple and fast test then check 779 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive 780 * programs are fouled up and create archives missing the \0. Last we 781 * check the checksum. If ok we have to assume it is a valid header. 782 */ 783 if (hd->name[0] == '\0') 784 return(-1); 785 if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) 786 return(-1); 787 /* This is GNU tar */ 788 if (strncmp(hd->magic, "ustar ", 8) == 0 && !is_gnutar && 789 !seen_gnu_warning) { 790 seen_gnu_warning = 1; 791 tty_warn(0, 792 "Trying to read GNU tar archive with extensions off"); 793 } 794 return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0); 795} 796 797/* 798 * ustar_rd() 799 * extract the values out of block already determined to be a ustar header. 800 * store the values in the ARCHD parameter. 801 * Return: 802 * 0 803 */ 804 805int 806ustar_rd(ARCHD *arcn, char *buf) 807{ 808 HD_USTAR *hd; 809 char *dest; 810 int cnt; 811 dev_t devmajor; 812 dev_t devminor; 813 814 /* 815 * we only get proper sized buffers 816 */ 817 if (ustar_id(buf, BLKMULT) < 0) 818 return(-1); 819 820 memset(arcn, 0, sizeof(*arcn)); 821 arcn->org_name = arcn->name; 822 arcn->pat = NULL; 823 arcn->sb.st_nlink = 1; 824 hd = (HD_USTAR *)buf; 825 826 /* 827 * see if the filename is split into two parts. if, so joint the parts. 828 * we copy the prefix first and add a / between the prefix and name. 829 */ 830 dest = arcn->name; 831 if (*(hd->prefix) != '\0') { 832 cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name)); 833 dest += cnt; 834 *dest++ = '/'; 835 cnt++; 836 } else { 837 cnt = 0; 838 } 839 840 if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) { 841 arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt, 842 &gnu_name_string, &gnu_name_length, hd->name, 843 sizeof(hd->name)) + cnt; 844 arcn->ln_nlen = expandname(arcn->ln_name, 845 sizeof(arcn->ln_name), &gnu_link_string, &gnu_link_length, 846 hd->linkname, sizeof(hd->linkname)); 847 } 848 849 /* 850 * follow the spec to the letter. we should only have mode bits, strip 851 * off all other crud we may be passed. 852 */ 853 arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) & 854 0xfff); 855 arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT); 856 arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT); 857 arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime; 858 859 /* 860 * If we can find the ascii names for gname and uname in the password 861 * and group files we will use the uid's and gid they bind. Otherwise 862 * we use the uid and gid values stored in the header. (This is what 863 * the posix spec wants). 864 */ 865 hd->gname[sizeof(hd->gname) - 1] = '\0'; 866 if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0) 867 arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT); 868 hd->uname[sizeof(hd->uname) - 1] = '\0'; 869 if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0) 870 arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT); 871 872 /* 873 * set the defaults, these may be changed depending on the file type 874 */ 875 arcn->pad = 0; 876 arcn->skip = 0; 877 arcn->sb.st_rdev = (dev_t)0; 878 879 /* 880 * set the mode and PAX type according to the typeflag in the header 881 */ 882 switch(hd->typeflag) { 883 case FIFOTYPE: 884 arcn->type = PAX_FIF; 885 arcn->sb.st_mode |= S_IFIFO; 886 break; 887 case DIRTYPE: 888 arcn->type = PAX_DIR; 889 arcn->sb.st_mode |= S_IFDIR; 890 arcn->sb.st_nlink = 2; 891 892 /* 893 * Some programs that create ustar archives append a '/' 894 * to the pathname for directories. This clearly violates 895 * ustar specs, but we will silently strip it off anyway. 896 */ 897 if (arcn->name[arcn->nlen - 1] == '/') 898 arcn->name[--arcn->nlen] = '\0'; 899 break; 900 case BLKTYPE: 901 case CHRTYPE: 902 /* 903 * this type requires the rdev field to be set. 904 */ 905 if (hd->typeflag == BLKTYPE) { 906 arcn->type = PAX_BLK; 907 arcn->sb.st_mode |= S_IFBLK; 908 } else { 909 arcn->type = PAX_CHR; 910 arcn->sb.st_mode |= S_IFCHR; 911 } 912 devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT); 913 devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT); 914 arcn->sb.st_rdev = TODEV(devmajor, devminor); 915 break; 916 case SYMTYPE: 917 case LNKTYPE: 918 if (hd->typeflag == SYMTYPE) { 919 arcn->type = PAX_SLK; 920 arcn->sb.st_mode |= S_IFLNK; 921 } else { 922 arcn->type = PAX_HLK; 923 /* 924 * so printing looks better 925 */ 926 arcn->sb.st_mode |= S_IFREG; 927 arcn->sb.st_nlink = 2; 928 } 929 break; 930 case LONGLINKTYPE: 931 case LONGNAMETYPE: 932 if (is_gnutar) { 933 /* 934 * GNU long link/file; we tag these here and let the 935 * pax internals deal with it -- too ugly otherwise. 936 */ 937 if (hd->typeflag != LONGLINKTYPE) 938 arcn->type = PAX_GLF; 939 else 940 arcn->type = PAX_GLL; 941 arcn->pad = TAR_PAD(arcn->sb.st_size); 942 arcn->skip = arcn->sb.st_size; 943 } else { 944 tty_warn(1, "GNU Long %s found in posix ustar archive.", 945 hd->typeflag == LONGLINKTYPE ? "Link" : "File"); 946 } 947 break; 948 case CONTTYPE: 949 case AREGTYPE: 950 case REGTYPE: 951 default: 952 /* 953 * these types have file data that follows. Set the skip and 954 * pad fields. 955 */ 956 arcn->type = PAX_REG; 957 arcn->pad = TAR_PAD(arcn->sb.st_size); 958 arcn->skip = arcn->sb.st_size; 959 arcn->sb.st_mode |= S_IFREG; 960 break; 961 } 962 return(0); 963} 964 965static int 966expandname(char *buf, size_t len, char **gnu_name, size_t *gnu_length, 967 const char *name, size_t nlen) 968{ 969 if (*gnu_name) { 970 len = strlcpy(buf, *gnu_name, len); 971 free(*gnu_name); 972 *gnu_name = NULL; 973 *gnu_length = 0; 974 } else { 975 if (len > ++nlen) 976 len = nlen; 977 len = strlcpy(buf, name, len); 978 } 979 return len; 980} 981 982static void 983longlink(ARCHD *arcn, int type) 984{ 985 ARCHD larc; 986 987 (void)memset(&larc, 0, sizeof(larc)); 988 989 larc.type = type; 990 larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name)); 991 992 switch (type) { 993 case PAX_GLL: 994 gnu_hack_string = arcn->ln_name; 995 gnu_hack_len = arcn->ln_nlen + 1; 996 break; 997 case PAX_GLF: 998 gnu_hack_string = arcn->name; 999 gnu_hack_len = arcn->nlen + 1; 1000 break; 1001 default: 1002 errx(1, "Invalid type in GNU longlink %d\n", type); 1003 } 1004 1005 /* 1006 * We need a longlink now. 1007 */ 1008 ustar_wr(&larc); 1009} 1010 1011/* 1012 * ustar_wr() 1013 * write a ustar header for the file specified in the ARCHD to the archive 1014 * Have to check for file types that cannot be stored and file names that 1015 * are too long. Be careful of the term (last arg) to ul_oct, we only use 1016 * '\0' for the termination character (this is different than picky tar) 1017 * ASSUMED: space after header in header block is zero filled 1018 * Return: 1019 * 0 if file has data to be written after the header, 1 if file has NO 1020 * data to write after the header, -1 if archive write failed 1021 */ 1022 1023static int 1024size_err(const char *what, ARCHD *arcn) 1025{ 1026 /* 1027 * header field is out of range 1028 */ 1029 tty_warn(1, "Ustar %s header field is too small for %s", 1030 what, arcn->org_name); 1031 return 1; 1032} 1033 1034int 1035ustar_wr(ARCHD *arcn) 1036{ 1037 HD_USTAR *hd; 1038 char *pt; 1039 char hdblk[sizeof(HD_USTAR)]; 1040 const char *user, *group; 1041 1042 switch (arcn->type) { 1043 case PAX_SCK: 1044 /* 1045 * check for those file system types ustar cannot store 1046 */ 1047 if (!is_gnutar) 1048 tty_warn(1, "Ustar cannot archive a socket %s", 1049 arcn->org_name); 1050 return(1); 1051 1052 case PAX_SLK: 1053 case PAX_HLK: 1054 case PAX_HRG: 1055 /* 1056 * check the length of the linkname 1057 */ 1058 if (arcn->ln_nlen >= sizeof(hd->linkname)) { 1059 if (is_gnutar) { 1060 longlink(arcn, PAX_GLL); 1061 } else { 1062 tty_warn(1, "Link name too long for ustar %s", 1063 arcn->ln_name); 1064 return(1); 1065 } 1066 } 1067 break; 1068 default: 1069 break; 1070 } 1071 1072 /* 1073 * split the path name into prefix and name fields (if needed). if 1074 * pt != arcn->name, the name has to be split 1075 */ 1076 if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) { 1077 if (is_gnutar) { 1078 longlink(arcn, PAX_GLF); 1079 pt = arcn->name; 1080 } else { 1081 tty_warn(1, "File name too long for ustar %s", 1082 arcn->name); 1083 return(1); 1084 } 1085 } 1086 1087 /* 1088 * zero out the header so we don't have to worry about zero fill below 1089 */ 1090 memset(hdblk, 0, sizeof(hdblk)); 1091 hd = (HD_USTAR *)hdblk; 1092 arcn->pad = 0L; 1093 1094 /* 1095 * split the name, or zero out the prefix 1096 */ 1097 if (pt != arcn->name) { 1098 /* 1099 * name was split, pt points at the / where the split is to 1100 * occur, we remove the / and copy the first part to the prefix 1101 */ 1102 *pt = '\0'; 1103 strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix)); 1104 *pt++ = '/'; 1105 } 1106 1107 /* 1108 * copy the name part. this may be the whole path or the part after 1109 * the prefix 1110 */ 1111 strlcpy(hd->name, pt, sizeof(hd->name)); 1112 1113 /* 1114 * set the fields in the header that are type dependent 1115 */ 1116 switch(arcn->type) { 1117 case PAX_DIR: 1118 hd->typeflag = DIRTYPE; 1119 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1120 return size_err("DIRTYPE", arcn); 1121 break; 1122 case PAX_CHR: 1123 case PAX_BLK: 1124 if (arcn->type == PAX_CHR) 1125 hd->typeflag = CHRTYPE; 1126 else 1127 hd->typeflag = BLKTYPE; 1128 if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor, 1129 sizeof(hd->devmajor), 3) || 1130 ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor, 1131 sizeof(hd->devminor), 3) || 1132 ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1133 return size_err("DEVTYPE", arcn); 1134 break; 1135 case PAX_FIF: 1136 hd->typeflag = FIFOTYPE; 1137 if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3)) 1138 return size_err("FIFOTYPE", arcn); 1139 break; 1140 case PAX_GLL: 1141 case PAX_SLK: 1142 case PAX_HLK: 1143 case PAX_HRG: 1144 if (arcn->type == PAX_SLK) 1145 hd->typeflag = SYMTYPE; 1146 else if (arcn->type == PAX_GLL) 1147 hd->typeflag = LONGLINKTYPE; 1148 else 1149 hd->typeflag = LNKTYPE; 1150 strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname)); 1151 if (ul_oct((u_long)gnu_hack_len, hd->size, 1152 sizeof(hd->size), 3)) 1153 return size_err("LINKTYPE", arcn); 1154 break; 1155 case PAX_GLF: 1156 case PAX_REG: 1157 case PAX_CTG: 1158 default: 1159 /* 1160 * file data with this type, set the padding 1161 */ 1162 if (arcn->type == PAX_GLF) { 1163 hd->typeflag = LONGNAMETYPE; 1164 arcn->pad = TAR_PAD(gnu_hack_len); 1165 if (OFFT_OCT((u_long)gnu_hack_len, hd->size, 1166 sizeof(hd->size), 3)) { 1167 tty_warn(1,"File is too long for ustar %s", 1168 arcn->org_name); 1169 return(1); 1170 } 1171 } else { 1172 if (arcn->type == PAX_CTG) 1173 hd->typeflag = CONTTYPE; 1174 else 1175 hd->typeflag = REGTYPE; 1176 arcn->pad = TAR_PAD(arcn->sb.st_size); 1177 if (OFFT_OCT(arcn->sb.st_size, hd->size, 1178 sizeof(hd->size), 3)) { 1179 tty_warn(1,"File is too long for ustar %s", 1180 arcn->org_name); 1181 return(1); 1182 } 1183 } 1184 break; 1185 } 1186 1187 strncpy(hd->magic, TMAGIC, TMAGLEN); 1188 if (is_gnutar) 1189 hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' '; 1190 else 1191 strncpy(hd->version, TVERSION, TVERSLEN); 1192 1193 /* 1194 * set the remaining fields. Some versions want all 16 bits of mode 1195 * we better humor them (they really do not meet spec though).... 1196 */ 1197 if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3)) 1198 return size_err("MODE", arcn); 1199 if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)) 1200 return size_err("UID", arcn); 1201 if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3)) 1202 return size_err("GID", arcn); 1203 if (ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3)) 1204 return size_err("MTIME", arcn); 1205 user = user_from_uid(arcn->sb.st_uid, 1); 1206 group = group_from_gid(arcn->sb.st_gid, 1); 1207 strncpy(hd->uname, user ? user : "", sizeof(hd->uname)); 1208 strncpy(hd->gname, group ? group : "", sizeof(hd->gname)); 1209 1210 /* 1211 * calculate and store the checksum write the header to the archive 1212 * return 0 tells the caller to now write the file data, 1 says no data 1213 * needs to be written 1214 */ 1215 if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum, 1216 sizeof(hd->chksum), 3)) 1217 return size_err("CHKSUM", arcn); 1218 if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0) 1219 return(-1); 1220 if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0) 1221 return(-1); 1222 if (gnu_hack_string) { 1223 int res = wr_rdbuf(gnu_hack_string, gnu_hack_len); 1224 int pad = gnu_hack_len; 1225 gnu_hack_string = NULL; 1226 gnu_hack_len = 0; 1227 if (res < 0) 1228 return(-1); 1229 if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0) 1230 return(-1); 1231 } 1232 if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG)) 1233 return(0); 1234 return(1); 1235} 1236 1237/* 1238 * name_split() 1239 * see if the name has to be split for storage in a ustar header. We try 1240 * to fit the entire name in the name field without splitting if we can. 1241 * The split point is always at a / 1242 * Return 1243 * character pointer to split point (always the / that is to be removed 1244 * if the split is not needed, the points is set to the start of the file 1245 * name (it would violate the spec to split there). A NULL is returned if 1246 * the file name is too long 1247 */ 1248 1249static char * 1250name_split(char *name, int len) 1251{ 1252 char *start; 1253 1254 /* 1255 * check to see if the file name is small enough to fit in the name 1256 * field. if so just return a pointer to the name. 1257 */ 1258 if (len < TNMSZ) 1259 return(name); 1260 /* 1261 * GNU tar does not honor the prefix+name mode if the magic 1262 * is not "ustar\0". So in GNU tar compatibility mode, we don't 1263 * split the filename into prefix+name because we are setting 1264 * the magic to "ustar " as GNU tar does. This of course will 1265 * end up creating a LongLink record in cases where it does not 1266 * really need do, but we are behaving like GNU tar after all. 1267 */ 1268 if (is_gnutar || len > (TPFSZ + TNMSZ)) 1269 return(NULL); 1270 1271 /* 1272 * we start looking at the biggest sized piece that fits in the name 1273 * field. We walk forward looking for a slash to split at. The idea is 1274 * to find the biggest piece to fit in the name field (or the smallest 1275 * prefix we can find) (the -1 is correct the biggest piece would 1276 * include the slash between the two parts that gets thrown away) 1277 */ 1278 start = name + len - TNMSZ; 1279 while ((*start != '\0') && (*start != '/')) 1280 ++start; 1281 1282 /* 1283 * if we hit the end of the string, this name cannot be split, so we 1284 * cannot store this file. 1285 */ 1286 if (*start == '\0') 1287 return(NULL); 1288 len = start - name; 1289 1290 /* 1291 * NOTE: /str where the length of str == TNMSZ cannot be stored under 1292 * the p1003.1-1990 spec for ustar. We could force a prefix of / and 1293 * the file would then expand on extract to //str. The len == 0 below 1294 * makes this special case follow the spec to the letter. 1295 */ 1296 if ((len >= TPFSZ) || (len == 0)) 1297 return(NULL); 1298 1299 /* 1300 * ok have a split point, return it to the caller 1301 */ 1302 return(start); 1303} 1304 1305/* 1306 * convert a glob into a RE, and add it to the list. we convert to 1307 * four different RE's (because we're using BRE's and can't use | 1308 * alternation :-() with this padding: 1309 * .*\/ and $ 1310 * .*\/ and \/.* 1311 * ^ and $ 1312 * ^ and \/.* 1313 */ 1314static int 1315tar_gnutar_exclude_one(const char *line, size_t len) 1316{ 1317 /* 2 * buffer len + nul */ 1318 char sbuf[MAXPATHLEN * 2 + 1]; 1319 /* + / + // + .*""/\/ + \/.* */ 1320 char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4]; 1321 int i, j; 1322 1323 if (line[len - 1] == '\n') 1324 len--; 1325 strncpy(sbuf, ".*" "\\/", j = 4); 1326 for (i = 0; i < len; i++) { 1327 /* 1328 * convert glob to regexp, escaping everything 1329 */ 1330 if (line[i] == '*') 1331 sbuf[j++] = '.'; 1332 else if (line[i] == '?') { 1333 sbuf[j++] = '.'; 1334 continue; 1335 } else if (!isalnum((unsigned char)line[i]) && 1336 !isblank((unsigned char)line[i])) 1337 sbuf[j++] = '\\'; 1338 sbuf[j++] = line[i]; 1339 } 1340 sbuf[j] = '\0'; 1341 /* don't need the .*\/ ones if we start with /, i guess */ 1342 if (line[0] != '/') { 1343 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf); 1344 if (rep_add(rabuf) < 0) 1345 return (-1); 1346 (void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf); 1347 if (rep_add(rabuf) < 0) 1348 return (-1); 1349 } 1350 1351 (void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf); 1352 if (rep_add(rabuf) < 0) 1353 return (-1); 1354 (void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf); 1355 if (rep_add(rabuf) < 0) 1356 return (-1); 1357 1358 return (0); 1359} 1360 1361/* 1362 * deal with GNU tar -X/--exclude-from & --exclude switchs. basically, 1363 * we go through each line of the file, building a string from the "glob" 1364 * lines in the file into RE lines, of the form `/^RE$//', which we pass 1365 * to rep_add(), which will add a empty replacement (exclusion), for the 1366 * named files. 1367 */ 1368int 1369tar_gnutar_minus_minus_exclude(path) 1370 const char *path; 1371{ 1372 size_t len = strlen(path); 1373 1374 if (len > MAXPATHLEN) 1375 tty_warn(0, "pathname too long: %s", path); 1376 1377 return (tar_gnutar_exclude_one(path, len)); 1378} 1379 1380int 1381tar_gnutar_X_compat(path) 1382 const char *path; 1383{ 1384 char *line; 1385 FILE *fp; 1386 int lineno = 0; 1387 size_t len; 1388 1389 fp = fopen(path, "r"); 1390 if (fp == NULL) { 1391 tty_warn(1, "cannot open %s: %s", path, 1392 strerror(errno)); 1393 return(-1); 1394 } 1395 1396 while ((line = fgetln(fp, &len))) { 1397 lineno++; 1398 if (len > MAXPATHLEN) { 1399 tty_warn(0, "pathname too long, line %d of %s", 1400 lineno, path); 1401 } 1402 if (tar_gnutar_exclude_one(line, len)) 1403 return (-1); 1404 } 1405 return (0); 1406} 1407