pax.c revision 46684
1193326Sed/*-
2193326Sed * Copyright (c) 1992 Keith Muller.
3193326Sed * Copyright (c) 1992, 1993
4193326Sed *	The Regents of the University of California.  All rights reserved.
5193326Sed *
6193326Sed * This code is derived from software contributed to Berkeley by
7193326Sed * Keith Muller of the University of California, San Diego.
8193326Sed *
9193326Sed * Redistribution and use in source and binary forms, with or without
10193326Sed * modification, are permitted provided that the following conditions
11193326Sed * are met:
12193326Sed * 1. Redistributions of source code must retain the above copyright
13193326Sed *    notice, this list of conditions and the following disclaimer.
14193326Sed * 2. Redistributions in binary form must reproduce the above copyright
15193326Sed *    notice, this list of conditions and the following disclaimer in the
16193326Sed *    documentation and/or other materials provided with the distribution.
17198092Srdivacky * 3. All advertising materials mentioning features or use of this software
18234353Sdim *    must display the following acknowledgement:
19193326Sed *	This product includes software developed by the University of
20193326Sed *	California, Berkeley and its contributors.
21193326Sed * 4. Neither the name of the University nor the names of its contributors
22193326Sed *    may be used to endorse or promote products derived from this software
23193326Sed *    without specific prior written permission.
24193326Sed *
25249423Sdim * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26249423Sdim * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27249423Sdim * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28193326Sed * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29193326Sed * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30193326Sed * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31193326Sed * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32249423Sdim * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33249423Sdim * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34249423Sdim * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35193326Sed * SUCH DAMAGE.
36193326Sed */
37249423Sdim
38249423Sdim#ifndef lint
39249423Sdimstatic char const copyright[] =
40193326Sed"@(#) Copyright (c) 1992, 1993\n\
41193326Sed	The Regents of the University of California.  All rights reserved.\n";
42193326Sed#endif /* not lint */
43193326Sed
44193326Sed#ifndef lint
45193326Sed#if 0
46193326Sedstatic char sccsid[] = "@(#)pax.c	8.2 (Berkeley) 4/18/94";
47193326Sed#endif
48193326Sedstatic const char rcsid[] =
49193326Sed	"$Id: pax.c,v 1.11 1998/05/15 06:27:45 charnier Exp $";
50193326Sed#endif /* not lint */
51193326Sed
52193326Sed#include <sys/types.h>
53193326Sed#include <sys/stat.h>
54193326Sed#include <sys/time.h>
55193326Sed#include <sys/resource.h>
56193326Sed#include <errno.h>
57193326Sed#include <locale.h>
58199990Srdivacky#include <signal.h>
59193326Sed#include <stdio.h>
60193326Sed#include <stdlib.h>
61193326Sed#include <unistd.h>
62193326Sed#include "pax.h"
63193326Sed#include "extern.h"
64193326Sedstatic int gen_init __P((void));
65239462Sdim
66239462Sdim/*
67239462Sdim * PAX main routines, general globals and some simple start up routines
68193326Sed */
69193326Sed
70239462Sdim/*
71239462Sdim * Variables that can be accessed by any routine within pax
72239462Sdim */
73193326Sedint	act = DEFOP;		/* read/write/append/copy */
74193326SedFSUB	*frmt = NULL;		/* archive format type */
75193326Sedint	cflag;			/* match all EXCEPT pattern/file */
76193326Sedint	dflag;			/* directory member match only  */
77193326Sedint	iflag;			/* interactive file/archive rename */
78193326Sedint	kflag;			/* do not overwrite existing files */
79193326Sedint	lflag;			/* use hard links when possible */
80193326Sedint	nflag;			/* select first archive member match */
81193326Sedint	tflag;			/* restore access time after read */
82193326Sedint	uflag;			/* ignore older modification time files */
83193326Sedint	vflag;			/* produce verbose output */
84193326Sedint	Dflag;			/* same as uflag except inode change time */
85193326Sedint	Hflag;			/* follow command line symlinks (write only) */
86193326Sedint	Lflag;			/* follow symlinks when writing */
87193326Sedint	Xflag;			/* archive files with same device id only */
88193326Sedint	Yflag;			/* same as Dflg except after name mode */
89193326Sedint	Zflag;			/* same as uflg except after name mode */
90193326Sedint	vfpart;			/* is partial verbose output in progress */
91193326Sedint	patime = 1;		/* preserve file access time */
92193326Sedint	pmtime = 1;		/* preserve file modification times */
93193326Sedint	pmode;			/* preserve file mode bits */
94193326Sedint	pids;			/* preserve file uid/gid */
95193326Sedint	exit_val;		/* exit value */
96193326Sedint	docrc;			/* check/create file crc */
97193326Sedchar	*dirptr;		/* destination dir in a copy */
98193326Sedchar	*ltmfrmt;		/* -v locale time format (if any) */
99193326Sedchar	*argv0;			/* root of argv[0] */
100193326Sedsigset_t s_mask;		/* signal mask for cleanup critical sect */
101193326Sed
102193326Sed/*
103193326Sed *	PAX - Portable Archive Interchange
104193326Sed *
105193326Sed * 	A utility to read, write, and write lists of the members of archive
106193326Sed *	files and copy directory hierarchies. A variety of archive formats
107193326Sed *	are supported (some are described in POSIX 1003.1 10.1):
108193326Sed *
109193326Sed *		ustar - 10.1.1 extended tar interchange format
110193326Sed *		cpio  - 10.1.2 extended cpio interchange format
111193326Sed *		tar - old BSD 4.3 tar format
112193326Sed *		binary cpio - old cpio with binary header format
113193326Sed *		sysVR4 cpio -  with and without CRC
114193326Sed *
115193326Sed * This version is a superset of IEEE Std 1003.2b-d3
116239462Sdim *
117239462Sdim * Summary of Extensions to the IEEE Standard:
118193326Sed *
119193326Sed * 1	READ ENHANCEMENTS
120193326Sed * 1.1	Operations which read archives will continue to operate even when
121193326Sed *	processing archives which may be damaged, truncated, or fail to meet
122193326Sed *	format specs in several different ways. Damaged sections of archives
123193326Sed *	are detected and avoided if possible. Attempts will be made to resync
124193326Sed *	archive read operations even with badly damaged media.
125193326Sed * 1.2	Blocksize requirements are not strictly enforced on archive read.
126193326Sed *	Tapes which have variable sized records can be read without errors.
127193326Sed * 1.3	The user can specify via the non-standard option flag -E if error
128193326Sed *	resync operation should stop on a media error, try a specified number
129199990Srdivacky *	of times to correct, or try to correct forever.
130199990Srdivacky * 1.4	Sparse files (lseek holes) stored on the archive (but stored with blocks
131199990Srdivacky *	of all zeros will be restored with holes appropriate for the target
132199990Srdivacky *	filesystem
133199990Srdivacky * 1.5	The user is notified whenever something is found during archive
134199990Srdivacky *	read operations which violates spec (but the read will continue).
135193326Sed * 1.6	Multiple archive volumes can be read and may span over different
136193326Sed *	archive devices
137198092Srdivacky * 1.7	Rigidly restores all file attributes exactly as they are stored on the
138198092Srdivacky *	archive.
139193326Sed * 1.8	Modification change time ranges can be specified via multiple -T
140193326Sed *	options. These allow a user to select files whose modification time
141193326Sed *	lies within a specific time range.
142193326Sed * 1.9	Files can be selected based on owner (user name or uid) via one or more
143193326Sed *	-U options.
144193326Sed * 1.10	Files can be selected based on group (group name or gid) via one o
145198092Srdivacky *	more -G options.
146198092Srdivacky * 1.11	File modification time can be checked against existing file after
147193326Sed *	name modification (-Z)
148193326Sed *
149193326Sed * 2	WRITE ENHANCEMENTS
150193326Sed * 2.1	Write operation will stop instead of allowing a user to create a flawed
151199990Srdivacky *	flawed archive (due to any problem).
152199990Srdivacky * 2.2	Archives written by pax are forced to strictly conform to both the
153199990Srdivacky *	archive and pax the specific format specifications.
154199990Srdivacky * 2.3	Blocking size and format is rigidly enforced on writes.
155199990Srdivacky * 2.4	Formats which may exhibit header overflow problems (they have fields
156199990Srdivacky *	too small for large file systems, such as inode number storage), use
157193326Sed *	routines designed to repair this problem. These techniques still
158193326Sed *	conform to both pax and format specifications, but no longer truncate
159193326Sed *	these fields. This removes any restrictions on using these archive
160193326Sed *	formats on large file systems.
161193326Sed * 2.5	Multiple archive volumes can be written and may span over different
162193326Sed *	archive devices
163239462Sdim * 2.6	A archive volume record limit allows the user to specify the number
164239462Sdim *	of bytes stored on an archive volume. When reached the user is
165239462Sdim *	prompted for the next archive volume. This is specified with the
166193326Sed *	non-standard -B flag. The limit is rounded up to the next blocksize.
167193326Sed * 2.7	All archive padding during write use zero filled sections. This makes
168193326Sed *	it much easier to pull data out of flawed archive during read
169193326Sed *	operations.
170193326Sed * 2.8	Access time reset with the -t applies to all file nodes (including
171193326Sed *	directories).
172198092Srdivacky * 2.9	Symbolic links can be followed with -L (optional in the spec).
173198092Srdivacky * 2.10	Modification or inode change time ranges can be specified via
174193326Sed *	multiple -T options. These allow a user to select files whose
175193326Sed *	modification or inode change time lies within a specific time range.
176193326Sed * 2.11	Files can be selected based on owner (user name or uid) via one or more
177193326Sed *	-U options.
178239462Sdim * 2.12	Files can be selected based on group (group name or gid) via one o
179193326Sed *	more -G options.
180193326Sed * 2.13	Symlinks which appear on the command line can be followed (without
181193326Sed *	following other symlinks; -H flag)
182193326Sed *
183193326Sed * 3	COPY ENHANCEMENTS
184193326Sed * 3.1	Sparse files (lseek holes) can be copied without expanding the holes
185263508Sdim *	into zero filled blocks. The file copy is created with holes which are
186198092Srdivacky *	appropriate for the target filesystem
187193326Sed * 3.2	Access time as well as modification time on copied file trees can be
188193326Sed *	preserved with the appropriate -p options.
189193326Sed * 3.3	Access time reset with the -t applies to all file nodes (including
190263508Sdim *	directories).
191263508Sdim * 3.4	Symbolic links can be followed with -L (optional in the spec).
192263508Sdim * 3.5	Modification or inode change time ranges can be specified via
193263508Sdim *	multiple -T options. These allow a user to select files whose
194263508Sdim *	modification or inode change time lies within a specific time range.
195193326Sed * 3.6	Files can be selected based on owner (user name or uid) via one or more
196193326Sed *	-U options.
197193326Sed * 3.7	Files can be selected based on group (group name or gid) via one o
198193326Sed *	more -G options.
199193326Sed * 3.8	Symlinks which appear on the command line can be followed (without
200193326Sed *	following other symlinks; -H flag)
201193326Sed * 3.9  File inode change time can be checked against existing file before
202193326Sed *	name modification (-D)
203198092Srdivacky * 3.10 File inode change time can be checked against existing file after
204193326Sed *	name modification (-Y)
205193326Sed * 3.11	File modification time can be checked against existing file after
206193326Sed *	name modification (-Z)
207202379Srdivacky *
208202379Srdivacky * 4	GENERAL ENHANCEMENTS
209202379Srdivacky * 4.1	Internal structure is designed to isolate format dependent and
210202379Srdivacky *	independent functions. Formats are selected via a format driver table.
211202379Srdivacky *	This encourages the addition of new archive formats by only having to
212202379Srdivacky *	write those routines which id, read and write the archive header.
213202379Srdivacky */
214202379Srdivacky
215207619Srdivacky/*
216193326Sed * main()
217193326Sed *	parse options, set up and operate as specified by the user.
218193326Sed *	any operational flaw will set exit_val to non-zero
219193326Sed * Return: 0 if ok, 1 otherwise
220193326Sed */
221198092Srdivacky
222193326Sed#if __STDC__
223193326Sedint
224193326Sedmain(int argc, char **argv)
225193326Sed#else
226193326Sedint
227193326Sedmain(argc, argv)
228193326Sed	int argc;
229193326Sed	char **argv;
230193326Sed#endif
231193326Sed{
232193326Sed	(void) setlocale(LC_ALL, "");
233193326Sed	/*
234193326Sed	 * parse options, determine operational mode, general init
235198092Srdivacky	 */
236198092Srdivacky	options(argc, argv);
237198092Srdivacky        if ((gen_init() < 0) || (tty_init() < 0))
238198092Srdivacky		return(exit_val);
239198092Srdivacky
240199990Srdivacky	/*
241193326Sed	 * select a primary operation mode
242193326Sed	 */
243193326Sed	switch(act) {
244193326Sed	case EXTRACT:
245193326Sed		extract();
246198092Srdivacky		break;
247193326Sed	case ARCHIVE:
248193326Sed		archive();
249193326Sed		break;
250193326Sed	case APPND:
251193326Sed		append();
252193326Sed		break;
253193326Sed	case COPY:
254193326Sed		copy();
255193326Sed		break;
256193326Sed	default:
257199990Srdivacky	case LIST:
258199990Srdivacky		list();
259199990Srdivacky		break;
260199990Srdivacky	}
261193326Sed	return(exit_val);
262193326Sed}
263239462Sdim
264239462Sdim/*
265239462Sdim * sig_cleanup()
266239462Sdim *	when interrupted we try to do whatever delayed processing we can.
267239462Sdim *	This is not critical, but we really ought to limit our damage when we
268239462Sdim *	are aborted by the user.
269239462Sdim * Return:
270193326Sed *	never....
271193326Sed */
272193326Sed
273193326Sed#if __STDC__
274193326Sedvoid
275193326Sedsig_cleanup(int which_sig)
276239462Sdim#else
277239462Sdimvoid
278239462Sdimsig_cleanup(which_sig)
279239462Sdim	int which_sig;
280239462Sdim#endif
281193326Sed{
282193326Sed	/*
283193326Sed	 * restore modes and times for any dirs we may have created
284193326Sed	 * or any dirs we may have read. Set vflag and vfpart so the user
285193326Sed	 * will clearly see the message on a line by itself.
286193326Sed	 */
287193326Sed	vflag = vfpart = 1;
288193326Sed	if (which_sig == SIGXCPU)
289193326Sed		pax_warn(0, "Cpu time limit reached, cleaning up.");
290193326Sed	else
291193326Sed		pax_warn(0, "Signal caught, cleaning up.");
292193326Sed
293193326Sed	ar_close();
294193326Sed	proc_dir();
295193326Sed	if (tflag)
296193326Sed		atdir_end();
297193326Sed	exit(1);
298193326Sed}
299193326Sed
300193326Sed/*
301203955Srdivacky * gen_init()
302203955Srdivacky *	general setup routines. Not all are required, but they really help
303199482Srdivacky *	when dealing with a medium to large sized archives.
304199482Srdivacky */
305193326Sed
306193326Sed#if __STDC__
307263508Sdimstatic int
308263508Sdimgen_init(void)
309193326Sed#else
310193326Sedstatic int
311203955Srdivackygen_init()
312203955Srdivacky#endif
313203955Srdivacky{
314193326Sed	struct rlimit reslimit;
315193326Sed	struct sigaction n_hand;
316193326Sed	struct sigaction o_hand;
317193326Sed
318203955Srdivacky	/*
319193326Sed	 * Really needed to handle large archives. We can run out of memory for
320193326Sed	 * internal tables really fast when we have a whole lot of files...
321193326Sed	 */
322193326Sed	if (getrlimit(RLIMIT_DATA , &reslimit) == 0){
323193326Sed		reslimit.rlim_cur = reslimit.rlim_max;
324203955Srdivacky		(void)setrlimit(RLIMIT_DATA , &reslimit);
325193326Sed	}
326193326Sed
327193326Sed	/*
328193326Sed	 * should file size limits be waived? if the os limits us, this is
329193326Sed	 * needed if we want to write a large archive
330203955Srdivacky	 */
331193326Sed	if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){
332193326Sed		reslimit.rlim_cur = reslimit.rlim_max;
333193326Sed		(void)setrlimit(RLIMIT_FSIZE , &reslimit);
334193326Sed	}
335193326Sed
336193326Sed	/*
337193326Sed	 * increase the size the stack can grow to
338193326Sed	 */
339193326Sed	if (getrlimit(RLIMIT_STACK , &reslimit) == 0){
340218893Sdim		reslimit.rlim_cur = reslimit.rlim_max;
341193326Sed		(void)setrlimit(RLIMIT_STACK , &reslimit);
342193326Sed	}
343208600Srdivacky
344193326Sed	/*
345243830Sdim	 * not really needed, but doesn't hurt
346243830Sdim	 */
347193326Sed	if (getrlimit(RLIMIT_RSS , &reslimit) == 0){
348193326Sed		reslimit.rlim_cur = reslimit.rlim_max;
349218893Sdim		(void)setrlimit(RLIMIT_RSS , &reslimit);
350193326Sed	}
351193326Sed
352193326Sed	/*
353193326Sed	 * Handle posix locale
354199990Srdivacky	 *
355193326Sed	 * set user defines time printing format for -v option
356193326Sed	 */
357193326Sed	ltmfrmt = getenv("LC_TIME");
358193326Sed
359193326Sed	/*
360249423Sdim	 * signal handling to reset stored directory times and modes. Since
361193326Sed	 * we deal with broken pipes via failed writes we ignore it. We also
362193326Sed	 * deal with any file size limit thorugh failed writes. Cpu time
363193326Sed	 * limits are caught and a cleanup is forced.
364249423Sdim	 */
365193326Sed	if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) ||
366193326Sed	    (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) ||
367193326Sed	    (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) ||
368249423Sdim	    (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) {
369193326Sed		pax_warn(1, "Unable to set up signal mask");
370193326Sed		return(-1);
371193326Sed	}
372193326Sed	n_hand.sa_mask = s_mask;
373198092Srdivacky	n_hand.sa_flags = 0;
374198092Srdivacky	n_hand.sa_handler = sig_cleanup;
375193326Sed
376193326Sed	if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) &&
377193326Sed	    (o_hand.sa_handler == SIG_IGN) &&
378193326Sed	    (sigaction(SIGHUP, &o_hand, &o_hand) < 0))
379199990Srdivacky		goto out;
380199990Srdivacky
381199990Srdivacky	if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) &&
382199990Srdivacky	    (o_hand.sa_handler == SIG_IGN) &&
383198092Srdivacky	    (sigaction(SIGTERM, &o_hand, &o_hand) < 0))
384193326Sed		goto out;
385212904Sdim
386212904Sdim	if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) &&
387212904Sdim	    (o_hand.sa_handler == SIG_IGN) &&
388212904Sdim	    (sigaction(SIGINT, &o_hand, &o_hand) < 0))
389249423Sdim		goto out;
390249423Sdim
391212904Sdim	if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) &&
392249423Sdim	    (o_hand.sa_handler == SIG_IGN) &&
393249423Sdim	    (sigaction(SIGQUIT, &o_hand, &o_hand) < 0))
394249423Sdim		goto out;
395249423Sdim
396249423Sdim	if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) &&
397249423Sdim	    (o_hand.sa_handler == SIG_IGN) &&
398212904Sdim	    (sigaction(SIGXCPU, &o_hand, &o_hand) < 0))
399249423Sdim		goto out;
400249423Sdim
401249423Sdim	n_hand.sa_handler = SIG_IGN;
402249423Sdim	if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) ||
403249423Sdim	    (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0))
404212904Sdim		goto out;
405249423Sdim	return(0);
406249423Sdim
407249423Sdim    out:
408249423Sdim	sys_warn(1, errno, "Unable to set up signal handler");
409212904Sdim	return(-1);
410249423Sdim}
411249423Sdim