1/*	$OpenBSD: pax.c,v 1.28 2005/08/04 10:02:44 mpf Exp $	*/
2/*	$NetBSD: pax.c,v 1.5 1996/03/26 23:54:20 mrg Exp $	*/
3
4/*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#ifndef lint
39__used static const char copyright[] =
40"@(#) Copyright (c) 1992, 1993\n\
41	The Regents of the University of California.  All rights reserved.\n";
42#endif /* not lint */
43
44#ifndef lint
45#if 0
46static const char sccsid[] = "@(#)pax.c	8.2 (Berkeley) 4/18/94";
47#else
48__used static const char rcsid[] = "$OpenBSD: pax.c,v 1.28 2005/08/04 10:02:44 mpf Exp $";
49#endif
50#endif /* not lint */
51
52#include <stdio.h>
53#include <sys/types.h>
54#include <sys/param.h>
55#include <sys/stat.h>
56#include <sys/time.h>
57#include <sys/resource.h>
58#include <signal.h>
59#include <unistd.h>
60#include <stdlib.h>
61#include <string.h>
62#include <errno.h>
63#include <err.h>
64#include <fcntl.h>
65#include <paths.h>
66#include "pax.h"
67#include "extern.h"
68static int gen_init(void);
69
70/*
71 * PAX main routines, general globals and some simple start up routines
72 */
73
74/*
75 * Variables that can be accessed by any routine within pax
76 */
77int	act = DEFOP;		/* read/write/append/copy */
78const FSUB	*frmt = NULL;		/* archive format type */
79int	cflag;			/* match all EXCEPT pattern/file */
80int	cwdfd;			/* starting cwd */
81int	dflag;			/* directory member match only  */
82int	iflag;			/* interactive file/archive rename */
83int	kflag;			/* do not overwrite existing files */
84int	lflag;			/* use hard links when possible */
85int	nflag;			/* select first archive member match */
86int	tflag;			/* restore access time after read */
87int	uflag;			/* ignore older modification time files */
88int	vflag;			/* produce verbose output */
89int	Dflag;			/* same as uflag except inode change time */
90int	Hflag;			/* follow command line symlinks (write only) */
91int	Lflag;			/* follow symlinks when writing */
92int	Xflag;			/* archive files with same device id only */
93int	Yflag;			/* same as Dflag except after name mode */
94int	Zflag;			/* same as uflag except after name mode */
95int	zeroflag;		/* use \0 as pathname terminator */
96int	vfpart;			/* is partial verbose output in progress */
97int	patime = 1;		/* preserve file access time */
98int	pmtime = 1;		/* preserve file modification times */
99int	nodirs;			/* do not create directories as needed */
100int	pmode;			/* preserve file mode bits */
101int	pids;			/* preserve file uid/gid */
102int	rmleadslash = 0;	/* remove leading '/' from pathnames */
103int	secure = 1; 		/* don't extract names that contain .. */
104int	exit_val;		/* exit value */
105int	docrc;			/* check/create file crc */
106char	*dirptr;		/* destination dir in a copy */
107char	*ltmfrmt;		/* -v locale time format (if any) */
108char	*argv0;			/* root of argv[0] */
109sigset_t s_mask;		/* signal mask for cleanup critical sect */
110FILE	*listf;			/* file pointer to print file list to */
111char	*tempfile;		/* tempfile to use for mkstemp(3) */
112char	*tempbase;		/* basename of tempfile to use for mkstemp(3) */
113
114/*
115 *	PAX - Portable Archive Interchange
116 *
117 *	A utility to read, write, and write lists of the members of archive
118 *	files and copy directory hierarchies. A variety of archive formats
119 *	are supported (some are described in POSIX 1003.1 10.1):
120 *
121 *		ustar - 10.1.1 extended tar interchange format
122 *		cpio  - 10.1.2 extended cpio interchange format
123 *		tar - old BSD 4.3 tar format
124 *		binary cpio - old cpio with binary header format
125 *		sysVR4 cpio -  with and without CRC
126 *
127 * This version is a superset of IEEE Std 1003.2b-d3
128 *
129 * Summary of Extensions to the IEEE Standard:
130 *
131 * 1	READ ENHANCEMENTS
132 * 1.1	Operations which read archives will continue to operate even when
133 *	processing archives which may be damaged, truncated, or fail to meet
134 *	format specs in several different ways. Damaged sections of archives
135 *	are detected and avoided if possible. Attempts will be made to resync
136 *	archive read operations even with badly damaged media.
137 * 1.2	Blocksize requirements are not strictly enforced on archive read.
138 *	Tapes which have variable sized records can be read without errors.
139 * 1.3	The user can specify via the non-standard option flag -E if error
140 *	resync operation should stop on a media error, try a specified number
141 *	of times to correct, or try to correct forever.
142 * 1.4	Sparse files (lseek holes) stored on the archive (but stored with blocks
143 *	of all zeros will be restored with holes appropriate for the target
144 *	filesystem
145 * 1.5	The user is notified whenever something is found during archive
146 *	read operations which violates spec (but the read will continue).
147 * 1.6	Multiple archive volumes can be read and may span over different
148 *	archive devices
149 * 1.7	Rigidly restores all file attributes exactly as they are stored on the
150 *	archive.
151 * 1.8	Modification change time ranges can be specified via multiple -T
152 *	options. These allow a user to select files whose modification time
153 *	lies within a specific time range.
154 * 1.9	Files can be selected based on owner (user name or uid) via one or more
155 *	-U options.
156 * 1.10	Files can be selected based on group (group name or gid) via one o
157 *	more -G options.
158 * 1.11	File modification time can be checked against existing file after
159 *	name modification (-Z)
160 *
161 * 2	WRITE ENHANCEMENTS
162 * 2.1	Write operation will stop instead of allowing a user to create a flawed
163 *	flawed archive (due to any problem).
164 * 2.2	Archives written by pax are forced to strictly conform to both the
165 *	archive and pax the specific format specifications.
166 * 2.3	Blocking size and format is rigidly enforced on writes.
167 * 2.4	Formats which may exhibit header overflow problems (they have fields
168 *	too small for large file systems, such as inode number storage), use
169 *	routines designed to repair this problem. These techniques still
170 *	conform to both pax and format specifications, but no longer truncate
171 *	these fields. This removes any restrictions on using these archive
172 *	formats on large file systems.
173 * 2.5	Multiple archive volumes can be written and may span over different
174 *	archive devices
175 * 2.6	A archive volume record limit allows the user to specify the number
176 *	of bytes stored on an archive volume. When reached the user is
177 *	prompted for the next archive volume. This is specified with the
178 *	non-standard -B flag. The limit is rounded up to the next blocksize.
179 * 2.7	All archive padding during write use zero filled sections. This makes
180 *	it much easier to pull data out of flawed archive during read
181 *	operations.
182 * 2.8	Access time reset with the -t applies to all file nodes (including
183 *	directories).
184 * 2.9	Symbolic links can be followed with -L (optional in the spec).
185 * 2.10	Modification or inode change time ranges can be specified via
186 *	multiple -T options. These allow a user to select files whose
187 *	modification or inode change time lies within a specific time range.
188 * 2.11	Files can be selected based on owner (user name or uid) via one or more
189 *	-U options.
190 * 2.12	Files can be selected based on group (group name or gid) via one o
191 *	more -G options.
192 * 2.13	Symlinks which appear on the command line can be followed (without
193 *	following other symlinks; -H flag)
194 *
195 * 3	COPY ENHANCEMENTS
196 * 3.1	Sparse files (lseek holes) can be copied without expanding the holes
197 *	into zero filled blocks. The file copy is created with holes which are
198 *	appropriate for the target filesystem
199 * 3.2	Access time as well as modification time on copied file trees can be
200 *	preserved with the appropriate -p options.
201 * 3.3	Access time reset with the -t applies to all file nodes (including
202 *	directories).
203 * 3.4	Symbolic links can be followed with -L (optional in the spec).
204 * 3.5	Modification or inode change time ranges can be specified via
205 *	multiple -T options. These allow a user to select files whose
206 *	modification or inode change time lies within a specific time range.
207 * 3.6	Files can be selected based on owner (user name or uid) via one or more
208 *	-U options.
209 * 3.7	Files can be selected based on group (group name or gid) via one o
210 *	more -G options.
211 * 3.8	Symlinks which appear on the command line can be followed (without
212 *	following other symlinks; -H flag)
213 * 3.9  File inode change time can be checked against existing file before
214 *	name modification (-D)
215 * 3.10 File inode change time can be checked against existing file after
216 *	name modification (-Y)
217 * 3.11	File modification time can be checked against existing file after
218 *	name modification (-Z)
219 *
220 * 4	GENERAL ENHANCEMENTS
221 * 4.1	Internal structure is designed to isolate format dependent and
222 *	independent functions. Formats are selected via a format driver table.
223 *	This encourages the addition of new archive formats by only having to
224 *	write those routines which id, read and write the archive header.
225 */
226
227/*
228 * main()
229 *	parse options, set up and operate as specified by the user.
230 *	any operational flaw will set exit_val to non-zero
231 * Return: 0 if ok, 1 otherwise
232 */
233
234int
235main(int argc, char **argv)
236{
237	char *tmpdir;
238	size_t tdlen;
239
240	listf = stderr;
241	/*
242	 * Keep a reference to cwd, so we can always come back home.
243	 */
244	cwdfd = open(".", O_RDONLY);
245	if (cwdfd < 0) {
246		syswarn(1, errno, "Can't open current working directory.");
247		return(exit_val);
248	}
249
250	if (updatepath() == -1)
251		return exit_val;
252	/*
253	 * Where should we put temporary files?
254	 */
255	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
256		tmpdir = _PATH_TMP;
257	tdlen = strlen(tmpdir);
258	while (tdlen > 0 && tmpdir[tdlen - 1] == '/')
259		tdlen--;
260	tempfile = malloc(tdlen + 1 + sizeof(_TFILE_BASE));
261	if (tempfile == NULL) {
262		paxwarn(1, "Cannot allocate memory for temp file name.");
263		return(exit_val);
264	}
265	if (tdlen)
266		memcpy(tempfile, tmpdir, tdlen);
267	tempbase = tempfile + tdlen;
268	*tempbase++ = '/';
269
270	/*
271	 * parse options, determine operational mode, general init
272	 */
273	options(argc, argv);
274	if ((gen_init() < 0) || (tty_init() < 0))
275		return(exit_val);
276
277	/*
278	 * select a primary operation mode
279	 */
280	switch (act) {
281	case EXTRACT:
282		extract();
283		break;
284	case ARCHIVE:
285		archive();
286		break;
287	case APPND:
288		if (gzip_program != NULL)
289			errx(1, "can not gzip while appending");
290		append();
291		break;
292	case COPY:
293		copy();
294		break;
295	default:
296	case LIST:
297		list();
298		break;
299	}
300	return(exit_val);
301}
302
303/*
304 * sig_cleanup()
305 *	when interrupted we try to do whatever delayed processing we can.
306 *	This is not critical, but we really ought to limit our damage when we
307 *	are aborted by the user.
308 * Return:
309 *	never....
310 */
311
312void
313sig_cleanup(int which_sig)
314{
315	/* XXX signal races */
316
317	/*
318	 * restore modes and times for any dirs we may have created
319	 * or any dirs we may have read. Set vflag and vfpart so the user
320	 * will clearly see the message on a line by itself.
321	 */
322	vflag = vfpart = 1;
323	if (which_sig == SIGXCPU)
324		paxwarn(0, "Cpu time limit reached, cleaning up.");
325	else
326		paxwarn(0, "Signal caught, cleaning up.");
327
328	ar_close();
329	proc_dir();
330	if (tflag)
331		atdir_end();
332	exit(1);
333}
334
335/*
336 * gen_init()
337 *	general setup routines. Not all are required, but they really help
338 *	when dealing with a medium to large sized archives.
339 */
340
341static int
342gen_init(void)
343{
344	struct rlimit reslimit;
345	struct sigaction n_hand;
346	struct sigaction o_hand;
347
348	/*
349	 * Really needed to handle large archives. We can run out of memory for
350	 * internal tables really fast when we have a whole lot of files...
351	 */
352	if (getrlimit(RLIMIT_DATA , &reslimit) == 0){
353		reslimit.rlim_cur = reslimit.rlim_max;
354		(void)setrlimit(RLIMIT_DATA , &reslimit);
355	}
356
357	/*
358	 * should file size limits be waived? if the os limits us, this is
359	 * needed if we want to write a large archive
360	 */
361	if (getrlimit(RLIMIT_FSIZE , &reslimit) == 0){
362		reslimit.rlim_cur = reslimit.rlim_max;
363		(void)setrlimit(RLIMIT_FSIZE , &reslimit);
364	}
365
366	/*
367	 * increase the size the stack can grow to
368	 */
369	if (getrlimit(RLIMIT_STACK , &reslimit) == 0){
370		reslimit.rlim_cur = reslimit.rlim_max;
371		(void)setrlimit(RLIMIT_STACK , &reslimit);
372	}
373
374	/*
375	 * not really needed, but doesn't hurt
376	 */
377	if (getrlimit(RLIMIT_RSS , &reslimit) == 0){
378		reslimit.rlim_cur = reslimit.rlim_max;
379		(void)setrlimit(RLIMIT_RSS , &reslimit);
380	}
381
382	/*
383	 * Handle posix locale
384	 *
385	 * set user defines time printing format for -v option
386	 */
387	ltmfrmt = getenv("LC_TIME");
388
389	/*
390	 * signal handling to reset stored directory times and modes. Since
391	 * we deal with broken pipes via failed writes we ignore it. We also
392	 * deal with any file size limit through failed writes. Cpu time
393	 * limits are caught and a cleanup is forced.
394	 */
395	if ((sigemptyset(&s_mask) < 0) || (sigaddset(&s_mask, SIGTERM) < 0) ||
396	    (sigaddset(&s_mask,SIGINT) < 0)||(sigaddset(&s_mask,SIGHUP) < 0) ||
397	    (sigaddset(&s_mask,SIGPIPE) < 0)||(sigaddset(&s_mask,SIGQUIT)<0) ||
398	    (sigaddset(&s_mask,SIGXCPU) < 0)||(sigaddset(&s_mask,SIGXFSZ)<0)) {
399		paxwarn(1, "Unable to set up signal mask");
400		return(-1);
401	}
402	memset(&n_hand, 0, sizeof n_hand);
403	n_hand.sa_mask = s_mask;
404	n_hand.sa_flags = 0;
405	n_hand.sa_handler = sig_cleanup;
406
407	if ((sigaction(SIGHUP, &n_hand, &o_hand) < 0) &&
408	    (o_hand.sa_handler == SIG_IGN) &&
409	    (sigaction(SIGHUP, &o_hand, &o_hand) < 0))
410		goto out;
411
412	if ((sigaction(SIGTERM, &n_hand, &o_hand) < 0) &&
413	    (o_hand.sa_handler == SIG_IGN) &&
414	    (sigaction(SIGTERM, &o_hand, &o_hand) < 0))
415		goto out;
416
417	if ((sigaction(SIGINT, &n_hand, &o_hand) < 0) &&
418	    (o_hand.sa_handler == SIG_IGN) &&
419	    (sigaction(SIGINT, &o_hand, &o_hand) < 0))
420		goto out;
421
422	if ((sigaction(SIGQUIT, &n_hand, &o_hand) < 0) &&
423	    (o_hand.sa_handler == SIG_IGN) &&
424	    (sigaction(SIGQUIT, &o_hand, &o_hand) < 0))
425		goto out;
426
427	if ((sigaction(SIGXCPU, &n_hand, &o_hand) < 0) &&
428	    (o_hand.sa_handler == SIG_IGN) &&
429	    (sigaction(SIGXCPU, &o_hand, &o_hand) < 0))
430		goto out;
431
432	n_hand.sa_handler = SIG_IGN;
433	if ((sigaction(SIGPIPE, &n_hand, &o_hand) < 0) ||
434	    (sigaction(SIGXFSZ, &n_hand, &o_hand) < 0))
435		goto out;
436	return(0);
437
438    out:
439	syswarn(1, errno, "Unable to set up signal handler");
440	return(-1);
441}
442