pax.c revision 3044
11590Srgrimes/*- 21590Srgrimes * Copyright (c) 1992 Keith Muller. 31590Srgrimes * Copyright (c) 1992, 1993 41590Srgrimes * The Regents of the University of California. All rights reserved. 51590Srgrimes * 61590Srgrimes * This code is derived from software contributed to Berkeley by 71590Srgrimes * Keith Muller of the University of California, San Diego. 81590Srgrimes * 91590Srgrimes * Redistribution and use in source and binary forms, with or without 101590Srgrimes * modification, are permitted provided that the following conditions 111590Srgrimes * are met: 121590Srgrimes * 1. Redistributions of source code must retain the above copyright 131590Srgrimes * notice, this list of conditions and the following disclaimer. 141590Srgrimes * 2. Redistributions in binary form must reproduce the above copyright 151590Srgrimes * notice, this list of conditions and the following disclaimer in the 161590Srgrimes * documentation and/or other materials provided with the distribution. 171590Srgrimes * 3. All advertising materials mentioning features or use of this software 181590Srgrimes * must display the following acknowledgement: 191590Srgrimes * This product includes software developed by the University of 201590Srgrimes * California, Berkeley and its contributors. 211590Srgrimes * 4. Neither the name of the University nor the names of its contributors 221590Srgrimes * may be used to endorse or promote products derived from this software 231590Srgrimes * without specific prior written permission. 241590Srgrimes * 251590Srgrimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 261590Srgrimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 271590Srgrimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 281590Srgrimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 291590Srgrimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 301590Srgrimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 311590Srgrimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 321590Srgrimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 331590Srgrimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 341590Srgrimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 351590Srgrimes * SUCH DAMAGE. 361590Srgrimes * 372537Spst * $Id$ 382537Spst */ 392537Spst 402537Spst#ifndef lint 412537Spststatic char copyright[] = 422537Spst"@(#) Copyright (c) 1992, 1993\n\ 432537Spst The Regents of the University of California. All rights reserved.\n"; 442537Spst#endif /* not lint */ 452537Spst 462537Spst#ifndef lint 471590Srgrimesstatic char sccsid[] = "@(#)pax.c 8.2 (Berkeley) 4/18/94"; 4867467Sru#endif /* not lint */ 491590Srgrimes 501590Srgrimes#include <stdio.h> 511590Srgrimes#include <sys/types.h> 521590Srgrimes#include <sys/param.h> 531590Srgrimes#include <sys/stat.h> 5427169Scharnier#include <sys/time.h> 5523693Speter#include <sys/resource.h> 5672109Scharnier#include <signal.h> 5727169Scharnier#include <unistd.h> 5850477Speter#include <stdlib.h> 591590Srgrimes#include <errno.h> 601590Srgrimes#include "pax.h" 611590Srgrimes#include "extern.h" 621590Srgrimesstatic int gen_init __P((void)); 631590Srgrimes 641590Srgrimes/* 651590Srgrimes * PAX main routines, general globals and some simple start up routines 661590Srgrimes */ 671590Srgrimes 681590Srgrimes/* 692537Spst * Variables that can be accessed by any routine within pax 702537Spst */ 712537Spstint act = DEFOP; /* read/write/append/copy */ 722537SpstFSUB *frmt = NULL; /* archive format type */ 731590Srgrimesint cflag; /* match all EXCEPT pattern/file */ 741590Srgrimesint dflag; /* directory member match only */ 7523693Speterint iflag; /* interactive file/archive rename */ 7623693Speterint kflag; /* do not overwrite existing files */ 771590Srgrimesint lflag; /* use hard links when possible */ 781590Srgrimesint nflag; /* select first archive member match */ 791590Srgrimesint tflag; /* restore access time after read */ 801590Srgrimesint uflag; /* ignore older modification time files */ 8123693Speterint vflag; /* produce verbose output */ 8223693Speterint Dflag; /* same as uflag except inode change time */ 8323693Speterint Hflag; /* follow command line symlinks (write only) */ 8411759Sacheint Lflag; /* follow symlinks when writing */ 8523693Speterint Xflag; /* archive files with same device id only */ 861590Srgrimesint Yflag; /* same as Dflg except after name mode */ 8764775Sbrianint Zflag; /* same as uflg except after name mode */ 881590Srgrimesint vfpart; /* is partial verbose output in progress */ 891590Srgrimesint patime = 1; /* preserve file access time */ 901590Srgrimesint pmtime = 1; /* preserve file modification times */ 9114631Solahint pmode; /* preserve file mode bits */ 921590Srgrimesint pids; /* preserve file uid/gid */ 931590Srgrimesint exit_val; /* exit value */ 941590Srgrimesint docrc; /* check/create file crc */ 9527169Scharnierchar *dirptr; /* destination dir in a copy */ 961590Srgrimeschar *ltmfrmt; /* -v locale time format (if any) */ 971590Srgrimeschar *argv0; /* root of argv[0] */ 982589Spstsigset_t s_mask; /* signal mask for cleanup critical sect */ 992589Spst 1001590Srgrimes/* 1011590Srgrimes * PAX - Portable Archive Interchange 1021590Srgrimes * 1031590Srgrimes * A utility to read, write, and write lists of the members of archive 1041590Srgrimes * files and copy directory hierarchies. A variety of archive formats 1052589Spst * are supported (some are described in POSIX 1003.1 10.1): 1062537Spst * 10724360Simp * ustar - 10.1.1 extended tar interchange format 1081590Srgrimes * cpio - 10.1.2 extended cpio interchange format 1091590Srgrimes * tar - old BSD 4.3 tar format 1101590Srgrimes * binary cpio - old cpio with binary header format 1111590Srgrimes * sysVR4 cpio - with and without CRC 1121590Srgrimes * 1131590Srgrimes * This version is a superset of IEEE Std 1003.2b-d3 1141590Srgrimes * 1151590Srgrimes * Summary of Extensions to the IEEE Standard: 1161590Srgrimes * 1171590Srgrimes * 1 READ ENHANCEMENTS 1181590Srgrimes * 1.1 Operations which read archives will continue to operate even when 1191590Srgrimes * processing archives which may be damaged, truncated, or fail to meet 1201590Srgrimes * format specs in several different ways. Damaged sections of archives 1212537Spst * are detected and avoided if possible. Attempts will be made to resync 1222537Spst * archive read operations even with badly damaged media. 1232537Spst * 1.2 Blocksize requirements are not strictly enforced on archive read. 1242537Spst * Tapes which have variable sized records can be read without errors. 1252537Spst * 1.3 The user can specify via the non-standard option flag -E if error 1262537Spst * resync operation should stop on a media error, try a specified number 12714631Solah * of times to correct, or try to correct forever. 12814631Solah * 1.4 Sparse files (lseek holes) stored on the archive (but stored with blocks 12914631Solah * of all zeros will be restored with holes appropriate for the target 1301590Srgrimes * filesystem 1311590Srgrimes * 1.5 The user is notified whenever something is found during archive 13227169Scharnier * read operations which violates spec (but the read will continue). 1331590Srgrimes * 1.6 Multiple archive volumes can be read and may span over different 1341590Srgrimes * archive devices 1352589Spst * 1.7 Rigidly restores all file attributes exactly as they are stored on the 1362589Spst * archive. 1372589Spst * 1.8 Modification change time ranges can be specified via multiple -T 13827169Scharnier * options. These allow a user to select files whose modification time 13927169Scharnier * lies within a specific time range. 14027169Scharnier * 1.9 Files can be selected based on owner (user name or uid) via one or more 14127169Scharnier * -U options. 14227169Scharnier * 1.10 Files can be selected based on group (group name or gid) via one o 14327169Scharnier * more -G options. 14427169Scharnier * 1.11 File modification time can be checked against exisiting file after 14527169Scharnier * name modification (-Z) 1462589Spst * 1472589Spst * 2 WRITE ENHANCEMENTS 1482589Spst * 2.1 Write operation will stop instead of allowing a user to create a flawed 1492589Spst * flawed archive (due to any problem). 15027169Scharnier * 2.2 Archives writtens by pax are forced to strictly conform to both the 1512589Spst * archive and pax the spceific format specifications. 15246662Sobrien * 2.3 Blocking size and format is rigidly enforced on writes. 1532589Spst * 2.4 Formats which may exhibit header overflow problems (they have fields 15446662Sobrien * too small for large file systems, such as inode number storage), use 15546662Sobrien * routines designed to repair this problem. These techniques still 15646662Sobrien * conform to both pax and format specifications, but no longer truncate 15746662Sobrien * these fields. This removes any restrictions on using these archive 15846662Sobrien * formats on large file systems. 15946662Sobrien * 2.5 Multiple archive volumes can be written and may span over different 16046662Sobrien * archive devices 16146662Sobrien * 2.6 A archive volume record limit allows the user to specify the number 16246662Sobrien * of bytes stored on an archive volume. When reached the user is 16346662Sobrien * prompted for the next archive volume. This is specified with the 16411811Sache * non-standard -B flag. THe limit is rounded up to the next blocksize. 16511759Sache * 2.7 All archive padding during write use zero filled sections. This makes 1662589Spst * it much easier to pull data out of flawed archive during read 1672589Spst * operations. 1682589Spst * 2.8 Access time reset with the -t applies to all file nodes (including 1692589Spst * directories). 1702589Spst * 2.9 Symbolic links can be followed with -L (optional in the spec). 1712589Spst * 2.10 Modification or inode change time ranges can be specified via 1722589Spst * multiple -T options. These allow a user to select files whose 1732589Spst * modification or inode change time lies within a specific time range. 1742589Spst * 2.11 Files can be selected based on owner (user name or uid) via one or more 1752589Spst * -U options. 1762589Spst * 2.12 Files can be selected based on group (group name or gid) via one o 1772589Spst * more -G options. 1782589Spst * 2.13 Symlinks which appear on the command line can be followed (without 1792589Spst * following other symlinks; -H flag) 1802589Spst * 1812589Spst * 3 COPY ENHANCEMENTS 1822589Spst * 3.1 Sparse files (lseek holes) can be copied without expanding the holes 1831590Srgrimes * into zero filled blocks. The file copy is created with holes which are 1841590Srgrimes * appropriate for the target filesystem 1851590Srgrimes * 3.2 Access time as well as modification time on copied file trees can be 1861590Srgrimes * preserved with the appropriate -p options. 1871590Srgrimes * 3.3 Access time reset with the -t applies to all file nodes (including 1881590Srgrimes * directories). 1891590Srgrimes * 3.4 Symbolic links can be followed with -L (optional in the spec). 1901590Srgrimes * 3.5 Modification or inode change time ranges can be specified via 1911590Srgrimes * multiple -T options. These allow a user to select files whose 1921590Srgrimes * modification or inode change time lies within a specific time range. 1931590Srgrimes * 3.6 Files can be selected based on owner (user name or uid) via one or more 1941590Srgrimes * -U options. 1951590Srgrimes * 3.7 Files can be selected based on group (group name or gid) via one o 1961590Srgrimes * more -G options. 1971590Srgrimes * 3.8 Symlinks which appear on the command line can be followed (without 1981590Srgrimes * following other symlinks; -H flag) 1991590Srgrimes * 3.9 File inode change time can be checked against exisiting file before 2001590Srgrimes * name modification (-D) 2011590Srgrimes * 3.10 File inode change time can be checked against exisiting file after 2021590Srgrimes * name modification (-Y) 2031590Srgrimes * 3.11 File modification time can be checked against exisiting file after 2041590Srgrimes * name modification (-Z) 2051590Srgrimes * 20648566Sbillf * 4 GENERAL ENHANCEMENTS 2071590Srgrimes * 4.1 Internal structure is designed to isolate format dependent and 2081590Srgrimes * independent functions. Formats are selected via a format driver table. 2091590Srgrimes * This encourages the addition of new archive formats by only having to 2101590Srgrimes * write those routines which id, read and write the archive header. 21148566Sbillf */ 21223693Speter 2131590Srgrimes/* 2141590Srgrimes * main() 2151590Srgrimes * parse options, set up and operate as specified by the user. 2161590Srgrimes * any operational flaw will set exit_val to non-zero 2171590Srgrimes * Return: 0 if ok, 1 otherwise 2181590Srgrimes */ 2191590Srgrimes 2201590Srgrimes#if __STDC__ 2211590Srgrimesint 2221590Srgrimesmain(int argc, char **argv) 2231590Srgrimes#else 2241590Srgrimesint 2251590Srgrimesmain(argc, argv) 22623693Speter int argc; 22727169Scharnier char **argv; 2281590Srgrimes#endif 2291590Srgrimes{ 2301590Srgrimes /* 2311590Srgrimes * parse options, determine operational mode, general init 2321590Srgrimes */ 2331590Srgrimes options(argc, argv); 2341590Srgrimes if ((gen_init() < 0) || (tty_init() < 0)) 2355369Sjkh return(exit_val); 2365369Sjkh 2371590Srgrimes /* 2381590Srgrimes * select a primary operation mode 2391590Srgrimes */ 2401590Srgrimes switch(act) { 2411590Srgrimes case EXTRACT: 2421590Srgrimes extract(); 24323693Speter break; 24423693Speter case ARCHIVE: 2451590Srgrimes archive(); 2461590Srgrimes break; 24723693Speter case APPND: 2481590Srgrimes append(); 2491590Srgrimes break; 25023693Speter case COPY: 25123693Speter copy(); 2521590Srgrimes break; 2531590Srgrimes default: 2541590Srgrimes case LIST: 2551590Srgrimes list(); 2561590Srgrimes break; 2571590Srgrimes } 2581590Srgrimes return(exit_val); 2591590Srgrimes} 2601590Srgrimes 2611590Srgrimes/* 2621590Srgrimes * sig_cleanup() 2631590Srgrimes * when interrupted we try to do whatever delayed processing we can. 2641590Srgrimes * This is not critical, but we really ought to limit our damage when we 2651590Srgrimes * are aborted by the user. 26664775Sbrian * Return: 26764775Sbrian * never.... 26864775Sbrian */ 26964775Sbrian 2701590Srgrimes#if __STDC__ 2711590Srgrimesvoid 2721590Srgrimessig_cleanup(int which_sig) 27323693Speter#else 2741590Srgrimesvoid 2751590Srgrimessig_cleanup(which_sig) 2761590Srgrimes int which_sig; 2771590Srgrimes#endif 2781590Srgrimes{ 2791590Srgrimes /* 2801590Srgrimes * restore modes and times for any dirs we may have created 2811590Srgrimes * or any dirs we may have read. Set vflag and vfpart so the user 2821590Srgrimes * will clearly see the message on a line by itself. 2831590Srgrimes */ 2841590Srgrimes vflag = vfpart = 1; 2851590Srgrimes if (which_sig == SIGXCPU) 2861590Srgrimes warn(0, "Cpu time limit reached, cleaning up."); 2871590Srgrimes else 2881590Srgrimes warn(0, "Signal caught, cleaning up."); 28966563Sbrian 29066563Sbrian ar_close(); 29166563Sbrian proc_dir(); 29266563Sbrian if (tflag) 29366563Sbrian atdir_end(); 29466563Sbrian exit(1); 29566563Sbrian} 29666563Sbrian 29766563Sbrian/* 29866563Sbrian * gen_init() 29964775Sbrian * general setup routines. Not all are required, but they really help 30064775Sbrian * when dealing with a medium to large sized archives. 30164775Sbrian */ 30264775Sbrian 30364775Sbrian#if __STDC__ 30464775Sbrianstatic int 30564775Sbriangen_init(void) 30664775Sbrian#else 30764775Sbrianstatic int 30864775Sbriangen_init() 30964775Sbrian#endif 31064775Sbrian{ 31164775Sbrian struct rlimit reslimit; 31264775Sbrian struct sigaction n_hand; 31364775Sbrian struct sigaction o_hand; 31464775Sbrian 31564775Sbrian /* 31664775Sbrian * Really needed to handle large archives. We can run out of memory for 31764775Sbrian * internal tables really fast when we have a whole lot of files... 31864775Sbrian */ 31964775Sbrian if (getrlimit(RLIMIT_DATA , &reslimit) == 0){ 32064775Sbrian reslimit.rlim_cur = reslimit.rlim_max; 32164775Sbrian (void)setrlimit(RLIMIT_DATA , &reslimit); 32264775Sbrian } 3231590Srgrimes 32465064Sbrian /* 32565064Sbrian * should file size limits be waived? if the os limits us, this is 32665064Sbrian * needed if we want to write a large archive 3271590Srgrimes */ 3281590Srgrimes if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){ 32966675Sru reslimit.rlim_cur = reslimit.rlim_max; 33066675Sru (void)setrlimit(RLIMIT_FSIZE , &reslimit); 33165064Sbrian } 33265064Sbrian 33366675Sru /* 33465064Sbrian * increase the size the stack can grow to 33565064Sbrian */ 33665064Sbrian if (getrlimit(RLIMIT_STACK , &reslimit) == 0){ 3371590Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 33865787Sbrian (void)setrlimit(RLIMIT_STACK , &reslimit); 3391590Srgrimes } 34065064Sbrian 34165787Sbrian /* 34265064Sbrian * not really needed, but doesn't hurt 34365787Sbrian */ 3441590Srgrimes if (getrlimit(RLIMIT_RSS , &reslimit) == 0){ 3451590Srgrimes reslimit.rlim_cur = reslimit.rlim_max; 3461590Srgrimes (void)setrlimit(RLIMIT_RSS , &reslimit); 3475369Sjkh } 3481590Srgrimes 3491590Srgrimes /* 35027169Scharnier * Handle posix locale 3511590Srgrimes * 3521590Srgrimes * set user defines time printing format for -v option 3531590Srgrimes */ 3544991Spst ltmfrmt = getenv("LC_TIME"); 3551590Srgrimes 3564991Spst /* 3574991Spst * signal handling to reset stored directory times and modes. Since 3584991Spst * we deal with broken pipes via failed writes we ignore it. We also 3591590Srgrimes * deal with any file size limit thorugh failed writes. Cpu time 3601590Srgrimes * limits are caught and a cleanup is forced. 3611590Srgrimes */ 3621590Srgrimes if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) || 3631590Srgrimes (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) || 3641590Srgrimes (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) || 3651590Srgrimes (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) { 3661590Srgrimes warn(1, "Unable to set up signal mask"); 3671590Srgrimes return(-1); 36823693Speter } 3691590Srgrimes n_hand.sa_mask = s_mask; 3701590Srgrimes n_hand.sa_flags = 0; 3711590Srgrimes n_hand.sa_handler = sig_cleanup; 3721590Srgrimes 3731590Srgrimes if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) && 3741590Srgrimes (o_hand.sa_handler == SIG_IGN) && 3751590Srgrimes (sigaction(SIGHUP, &o_hand, &o_hand) < 0)) 3761590Srgrimes goto out; 3771590Srgrimes 37823693Speter if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) && 37923693Speter (o_hand.sa_handler == SIG_IGN) && 3801590Srgrimes (sigaction(SIGTERM, &o_hand, &o_hand) < 0)) 3811590Srgrimes goto out; 38223693Speter 3831590Srgrimes if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) && 3841590Srgrimes (o_hand.sa_handler == SIG_IGN) && 38523693Speter (sigaction(SIGINT, &o_hand, &o_hand) < 0)) 38623693Speter goto out; 3871590Srgrimes 3881590Srgrimes if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) && 389 (o_hand.sa_handler == SIG_IGN) && 390 (sigaction(SIGQUIT, &o_hand, &o_hand) < 0)) 391 goto out; 392 393 if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) && 394 (o_hand.sa_handler == SIG_IGN) && 395 (sigaction(SIGXCPU, &o_hand, &o_hand) < 0)) 396 goto out; 397 398 n_hand.sa_handler = SIG_IGN; 399 if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) || 400 (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0)) 401 goto out; 402 return(0); 403 404 out: 405 syswarn(1, errno, "Unable to set up signal handler"); 406 return(-1); 407} 408