1/*	$OpenBSD: ar_subs.c,v 1.32 2008/05/06 06:54:28 henning Exp $	*/
2/*	$NetBSD: ar_subs.c,v 1.5 1995/03/21 09:07:06 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#ifndef lint
39#if 0
40static const char sccsid[] = "@(#)ar_subs.c	8.2 (Berkeley) 4/18/94";
41#else
42__used static const char rcsid[] = "$OpenBSD: ar_subs.c,v 1.32 2008/05/06 06:54:28 henning Exp $";
43#endif
44#endif /* not lint */
45
46#include <sys/types.h>
47#include <sys/time.h>
48#include <sys/stat.h>
49#include <sys/param.h>
50#include <signal.h>
51#include <string.h>
52#include <stdio.h>
53#include <fcntl.h>
54#include <errno.h>
55#include <unistd.h>
56#include <stdlib.h>
57#ifdef __APPLE__
58#include <sys/param.h>
59#include <copyfile.h>
60#include <libgen.h>
61#include <sys/queue.h>
62#endif
63#include "pax.h"
64#include "options.h"
65#include "extern.h"
66
67static int path_check(ARCHD *, int);
68static void wr_archive(ARCHD *, int is_app);
69static int get_arc(void);
70static int next_head(ARCHD *);
71extern sigset_t s_mask;
72
73/*
74 * Routines which control the overall operation modes of pax as specified by
75 * the user: list, append, read ...
76 */
77
78static char hdbuf[BLKMULT];		/* space for archive header on read */
79u_long flcnt;				/* number of files processed */
80
81static char	cwdpath[MAXPATHLEN];	/* current working directory path */
82static size_t	cwdpathlen;		/* current working directory path len */
83
84int
85updatepath(void)
86{
87	if (getcwd(cwdpath, sizeof(cwdpath)) == NULL) {
88		syswarn(1, errno, "Cannot get working directory");
89		return -1;
90	}
91	cwdpathlen = strlen(cwdpath);
92	return 0;
93}
94
95int
96fdochdir(int fcwd)
97{
98	if (fchdir(fcwd) == -1) {
99		syswarn(1, errno, "Cannot chdir to `.'");
100		return -1;
101	}
102	return updatepath();
103}
104
105int
106dochdir(const char *name)
107{
108	if (chdir(name) == -1)
109		syswarn(1, errno, "Cannot chdir to `%s'", name);
110	return updatepath();
111}
112
113static int
114path_check(ARCHD *arcn, int level)
115{
116	char buf[MAXPATHLEN];
117	char *p;
118
119	if ((p = strrchr(arcn->name, '/')) == NULL)
120		return 0;
121	*p = '\0';
122
123	if (realpath(arcn->name, buf) == NULL) {
124		int error;
125		error = path_check(arcn, level + 1);
126		*p = '/';
127		if (error == 0)
128			return 0;
129		if (level == 0)
130			syswarn(1, 0, "Cannot resolve `%s'", arcn->name);
131		return -1;
132	}
133	if (cwdpathlen == 1) {	/* We're in the root */
134		*p = '/';
135		return 0;
136	}
137	if ((strncmp(buf, cwdpath, cwdpathlen) != 0) || (buf[cwdpathlen] != '\0' && buf[cwdpathlen] != '/')) {
138		*p = '/';
139		syswarn(1, 0, "Attempt to write file `%s' that resolves into "
140		    "`%s/%s' outside current working directory `%s' ignored",
141		    arcn->name, buf, p + 1, cwdpath);
142		return -1;
143	}
144	*p = '/';
145	return 0;
146}
147
148/*
149 * list()
150 *	list the contents of an archive which match user supplied pattern(s)
151 *	(no pattern matches all).
152 */
153
154void
155list(void)
156{
157	ARCHD *arcn;
158	int res;
159	ARCHD archd;
160	time_t now;
161
162	arcn = &archd;
163	/*
164	 * figure out archive type; pass any format specific options to the
165	 * archive option processing routine; call the format init routine. We
166	 * also save current time for ls_list() so we do not make a system
167	 * call for each file we need to print. If verbose (vflag) start up
168	 * the name and group caches.
169	 */
170	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
171	    ((*frmt->st_rd)() < 0))
172		return;
173
174	if (vflag && ((uidtb_start() < 0) || (gidtb_start() < 0)))
175		return;
176
177	now = time(NULL);
178
179	/*
180	 * step through the archive until the format says it is done
181	 */
182	while (next_head(arcn) == 0) {
183		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
184			/*
185			 * we need to read, to get the real filename
186			 */
187			off_t cnt;
188			if (!(*frmt->rd_data)(arcn, arcn->type == PAX_GLF
189			    ? -1 : -2, &cnt))
190				(void)rd_skip(cnt + arcn->pad);
191			continue;
192		}
193
194		/*
195		 * check for pattern, and user specified options match.
196		 * When all patterns are matched we are done.
197		 */
198		if ((res = pat_match(arcn)) < 0)
199			break;
200
201		if ((res == 0) && (sel_chk(arcn) == 0)) {
202			/*
203			 * pattern resulted in a selected file
204			 */
205			if (pat_sel(arcn) < 0)
206				break;
207
208			/*
209			 * modify the name as requested by the user if name
210			 * survives modification, do a listing of the file
211			 */
212			if ((res = mod_name(arcn)) < 0)
213				break;
214			if (res == 0)
215				ls_list(arcn, now, stdout);
216		}
217
218		/*
219		 * skip to next archive format header using values calculated
220		 * by the format header read routine
221		 */
222		if (rd_skip(arcn->skip + arcn->pad) == 1)
223			break;
224	}
225
226	/*
227	 * all done, let format have a chance to cleanup, and make sure that
228	 * the patterns supplied by the user were all matched
229	 */
230	(void)(*frmt->end_rd)();
231	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
232	ar_close();
233	pat_chk();
234}
235
236/*
237 * extract()
238 *	extract the member(s) of an archive as specified by user supplied
239 *	pattern(s) (no patterns extracts all members)
240 */
241
242void
243extract(void)
244{
245	ARCHD *arcn;
246	int res;
247	off_t cnt;
248	ARCHD archd;
249	struct stat sb;
250	int fd;
251	time_t now;
252
253#ifdef __APPLE__
254	int copyfile_disable = (getenv(COPYFILE_DISABLE_VAR) != NULL);
255	LIST_HEAD(copyfile_list_t, copyfile_list_entry_t) copyfile_list;
256	struct copyfile_list_entry_t {
257	    char *src;
258	    char *dst;
259	    LIST_ENTRY(copyfile_list_entry_t) link;
260	} *cle;
261
262	LIST_INIT(&copyfile_list);
263#endif
264
265	arcn = &archd;
266	/*
267	 * figure out archive type; pass any format specific options to the
268	 * archive option processing routine; call the format init routine;
269	 * start up the directory modification time and access mode database
270	 */
271	if ((get_arc() < 0) || ((*frmt->options)() < 0) ||
272	    ((*frmt->st_rd)() < 0) || (dir_start() < 0))
273		return;
274
275	/*
276	 * When we are doing interactive rename, we store the mapping of names
277	 * so we can fix up hard links files later in the archive.
278	 */
279	if (iflag && (name_start() < 0))
280		return;
281
282	now = time(NULL);
283
284	/*
285	 * step through each entry on the archive until the format read routine
286	 * says it is done
287	 */
288	while (next_head(arcn) == 0) {
289		if (arcn->type == PAX_GLL || arcn->type == PAX_GLF) {
290			/*
291			 * we need to read, to get the real filename
292			 */
293			if (!(*frmt->rd_data)(arcn, arcn->type == PAX_GLF
294			    ? -1 : -2, &cnt))
295				(void)rd_skip(cnt + arcn->pad);
296			continue;
297		}
298
299		/*
300		 * check for pattern, and user specified options match. When
301		 * all the patterns are matched we are done
302		 */
303		if ((res = pat_match(arcn)) < 0)
304			break;
305
306		if ((res > 0) || (sel_chk(arcn) != 0)) {
307			/*
308			 * file is not selected. skip past any file data and
309			 * padding and go back for the next archive member
310			 */
311			(void)rd_skip(arcn->skip + arcn->pad);
312			continue;
313		}
314
315		/*
316		 * with -u or -D only extract when the archive member is newer
317		 * than the file with the same name in the file system (no
318		 * test of being the same type is required).
319		 * NOTE: this test is done BEFORE name modifications as
320		 * specified by pax. this operation can be confusing to the
321		 * user who might expect the test to be done on an existing
322		 * file AFTER the name mod. In honesty the pax spec is probably
323		 * flawed in this respect.
324		 */
325		if ((uflag || Dflag) && ((lstat(arcn->name, &sb) == 0))) {
326			if (uflag && Dflag) {
327				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
328				    (arcn->sb.st_ctime <= sb.st_ctime)) {
329					(void)rd_skip(arcn->skip + arcn->pad);
330					continue;
331				}
332			} else if (Dflag) {
333				if (arcn->sb.st_ctime <= sb.st_ctime) {
334					(void)rd_skip(arcn->skip + arcn->pad);
335					continue;
336				}
337			} else if (arcn->sb.st_mtime <= sb.st_mtime) {
338				(void)rd_skip(arcn->skip + arcn->pad);
339				continue;
340			}
341		}
342
343		/*
344		 * this archive member is now been selected. modify the name.
345		 */
346		if ((pat_sel(arcn) < 0) || ((res = mod_name(arcn)) < 0))
347			break;
348		if (res > 0) {
349			/*
350			 * a bad name mod, skip and purge name from link table
351			 */
352			purg_lnk(arcn);
353			(void)rd_skip(arcn->skip + arcn->pad);
354			continue;
355		}
356
357		/*
358		 * Non standard -Y and -Z flag. When the existing file is
359		 * same age or newer skip
360		 */
361		if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) {
362			if (Yflag && Zflag) {
363				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
364				    (arcn->sb.st_ctime <= sb.st_ctime)) {
365					(void)rd_skip(arcn->skip + arcn->pad);
366					continue;
367				}
368			} else if (Yflag) {
369				if (arcn->sb.st_ctime <= sb.st_ctime) {
370					(void)rd_skip(arcn->skip + arcn->pad);
371					continue;
372				}
373			} else if (arcn->sb.st_mtime <= sb.st_mtime) {
374				(void)rd_skip(arcn->skip + arcn->pad);
375				continue;
376			}
377		}
378
379		if (vflag) {
380			if (vflag > 1)
381				ls_list(arcn, now, listf);
382			else {
383				(void)safe_print(arcn->name, listf);
384				vfpart = 1;
385			}
386		}
387
388		/*
389		 * if required, chdir around.
390		 */
391		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
392			dochdir(arcn->pat->chdname);
393
394		if (secure && path_check(arcn, 0) != 0) {
395			(void)rd_skip(arcn->skip + arcn->pad);
396			continue;
397		}
398
399		/*
400		 * all ok, extract this member based on type
401		 */
402		if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) {
403			/*
404			 * process archive members that are not regular files.
405			 * throw out padding and any data that might follow the
406			 * header (as determined by the format).
407			 */
408			if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
409				res = lnk_creat(arcn);
410			else
411				res = node_creat(arcn);
412
413			(void)rd_skip(arcn->skip + arcn->pad);
414			if (res < 0)
415				purg_lnk(arcn);
416
417			if (vflag && vfpart) {
418				(void)putc('\n', listf);
419				vfpart = 0;
420			}
421			continue;
422		}
423		/*
424		 * we have a file with data here. If we can not create it, skip
425		 * over the data and purge the name from hard link table
426		 */
427		if ((fd = file_creat(arcn)) < 0) {
428			(void)rd_skip(arcn->skip + arcn->pad);
429			purg_lnk(arcn);
430			continue;
431		}
432		/*
433		 * extract the file from the archive and skip over padding and
434		 * any unprocessed data
435		 */
436		res = (*frmt->rd_data)(arcn, fd, &cnt);
437		file_close(arcn, fd);
438		if (vflag && vfpart) {
439			(void)putc('\n', listf);
440			vfpart = 0;
441		}
442		if (!res)
443			(void)rd_skip(cnt + arcn->pad);
444
445#ifdef __APPLE__
446		if (!strncmp(basename(arcn->name), "._", 2)) {
447			cle = alloca(sizeof(struct copyfile_list_entry_t));
448			cle->src = strdup(arcn->name);
449
450			if (asprintf(&cle->dst, "%s/%s",
451				     dirname(arcn->name), basename(arcn->name) + 2) != -1) {
452				LIST_INSERT_HEAD(&copyfile_list, cle, link);
453			} else {
454				free(cle->src);
455			}
456		}
457#endif
458		/*
459		 * if required, chdir around.
460		 */
461		if ((arcn->pat != NULL) && (arcn->pat->chdname != NULL))
462			fdochdir(cwdfd);
463	}
464#ifdef __APPLE__
465	LIST_FOREACH(cle, &copyfile_list, link)
466	{
467		if(copyfile_disable || copyfile(cle->src, cle->dst, NULL,
468						COPYFILE_UNPACK | COPYFILE_XATTR | COPYFILE_ACL)) {
469			if (!copyfile_disable) {
470				syswarn(1, errno, "Unable to set metadata on %s", cle->dst);
471			}
472		} else {
473			unlink(cle->src);
474		}
475		free(cle->dst);
476		free(cle->src);
477	}
478#endif
479
480	/*
481	 * all done, restore directory modes and times as required; make sure
482	 * all patterns supplied by the user were matched; block off signals
483	 * to avoid chance for multiple entry into the cleanup code.
484	 */
485	(void)(*frmt->end_rd)();
486	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
487	ar_close();
488	proc_dir();
489	pat_chk();
490}
491
492/*
493 * wr_archive()
494 *	Write an archive. used in both creating a new archive and appends on
495 *	previously written archive.
496 */
497
498static void
499wr_archive(ARCHD *arcn, int is_app)
500{
501	int res;
502	int hlk;
503	int wr_one;
504	off_t cnt;
505	int (*wrf)(ARCHD *);
506	int fd = -1;
507	time_t now;
508
509#ifdef __APPLE__
510	int metadata = 0;
511	char *md_fname = NULL;
512	ARCHD arcn_copy;
513	char arcn_copy_name[PAXPATHLEN+1];
514#endif
515
516	/*
517	 * if this format supports hard link storage, start up the database
518	 * that detects them.
519	 */
520	if (((hlk = frmt->hlk) == 1) && (lnk_start() < 0))
521		return;
522
523	if (hlk && want_linkdata) hlk=0; /* Treat hard links as individual files */
524
525	/*
526	 * start up the file traversal code and format specific write
527	 */
528	if ((ftree_start() < 0) || ((*frmt->st_wr)() < 0))
529		return;
530	wrf = frmt->wr;
531
532	/*
533	 * When we are doing interactive rename, we store the mapping of names
534	 * so we can fix up hard links files later in the archive.
535	 */
536	if (iflag && (name_start() < 0))
537		return;
538
539	/*
540	 * if this is not append, and there are no files, we do not write a
541	 * trailer
542	 */
543	wr_one = is_app;
544
545	now = time(NULL);
546
547	/*
548	 * while there are files to archive, process them one at at time
549	 */
550	while (next_file(arcn) == 0) {
551		/*
552		 * check if this file meets user specified options match.
553		 */
554		if (sel_chk(arcn) != 0) {
555			ftree_notsel();
556			continue;
557		}
558		fd = -1;
559		if (uflag) {
560			/*
561			 * only archive if this file is newer than a file with
562			 * the same name that is already stored on the archive
563			 */
564			if ((res = chk_ftime(arcn)) < 0)
565				break;
566			if (res > 0)
567				continue;
568		}
569
570#ifdef __APPLE__
571		/*
572		 * synthesize ._ files for each node we encounter
573		 */
574		if (getenv(COPYFILE_DISABLE_VAR) == NULL
575		    && copyfile(arcn->name, NULL, NULL,
576			COPYFILE_CHECK | COPYFILE_XATTR | COPYFILE_ACL)
577		    && arcn->nlen + 2 < sizeof(arcn->name)) {
578			char *tmpdir = P_tmpdir, *TMPDIR;
579			int fd_src, fd_dst;
580
581			if (!issetugid() && (TMPDIR = getenv("TMPDIR"))) {
582				tmpdir = TMPDIR;
583			}
584			asprintf(&md_fname, "%s%s", tmpdir, "/pax-md-XXXXXX");
585			if (!md_fname) {
586				syswarn(1, errno, "Unable to create temporary file name");
587				return;
588			}
589			memcpy(&arcn_copy, arcn, sizeof(ARCHD));
590			strncpy(arcn_copy_name, arcn->name, PAXPATHLEN+1);
591
592			arcn->skip = 0;
593			arcn->pad = 0;
594			arcn->ln_nlen = 0;
595			arcn->ln_name[0] = '\0';
596			arcn->type = PAX_REG;
597			fd_dst = mkstemp(md_fname);
598			if (fd_dst >= 0) {
599				fd_src = open(arcn->name, O_RDONLY, 0);
600				if (fd_src < 0) {
601					syswarn(1, errno, "Unable to open %s for reading", arcn->name);
602					close(fd_dst);
603					unlink(md_fname);
604					free(md_fname);
605					md_fname = NULL;
606					goto next;
607				}
608				if(fcopyfile(fd_src, fd_dst, NULL,
609					     COPYFILE_PACK | COPYFILE_XATTR | COPYFILE_ACL) < 0) {
610					syswarn(1, errno,
611						"Unable to preserve metadata on %s", arcn->name);
612					close(fd_src);
613					close(fd_dst);
614					unlink(md_fname);
615					free(md_fname);
616					md_fname = NULL;
617					goto next;
618				}
619				close(fd_src);
620				fstat(fd_dst, &arcn->sb);
621				close(fd_dst);
622			} else {
623				syswarn(1, errno, "Unable to create temporary file %s", md_fname);
624				free(md_fname);
625				goto next;
626			}
627			arcn->skip = arcn->sb.st_size;
628
629			if (!strncmp(dirname(arcn->name), ".", 2)) {
630				snprintf(arcn->name, sizeof(arcn->name),
631					 "._%s", basename(arcn->name));
632			} else {
633				snprintf(arcn->name, sizeof(arcn->name),
634					 "%s/._%s",
635					 dirname(arcn->name), basename(arcn->name));
636			}
637			arcn->nlen = strlen(arcn->name);
638			arcn->org_name = arcn->name;
639			metadata = 1;
640		} else if (metadata) {
641next:
642			metadata = 0;
643			memcpy(arcn, &arcn_copy, sizeof(ARCHD));
644			strncpy(arcn->name, arcn_copy_name, PAXPATHLEN+1);
645		}
646#endif	/* __APPLE__ */
647
648		fd = -1;
649
650		/*
651		 * this file is considered selected now. see if this is a hard
652		 * link to a file already stored
653		 */
654		ftree_sel(arcn);
655		if (hlk && (chk_lnk(arcn) < 0)) {
656			if (md_fname) {
657				unlink(md_fname);
658				free(md_fname);
659				md_fname = NULL;
660			}
661			break;
662		}
663
664		if ((arcn->type == PAX_REG) || (arcn->type == PAX_HRG) ||
665		    (arcn->type == PAX_CTG)) {
666			/*
667			 * we will have to read this file. by opening it now we
668			 * can avoid writing a header to the archive for a file
669			 * we were later unable to read (we also purge it from
670			 * the link table).
671			 */
672#ifdef __APPLE__
673			if (metadata) {
674				fd = open(md_fname, O_RDONLY, 0);
675				unlink(md_fname);
676				free(md_fname);
677				md_fname = NULL;
678			} else
679				fd = open(arcn->org_name, O_RDONLY, 0);
680			if (fd < 0) {
681#else  /* !__APPLE__ */
682			if ((fd = open(arcn->org_name, O_RDONLY, 0)) < 0) {
683#endif	/* __APPLE__ */
684				syswarn(1,errno, "Unable to open %s to read",
685					arcn->org_name);
686				purg_lnk(arcn);
687				continue;
688			}
689		}
690
691		/*
692		 * Now modify the name as requested by the user
693		 */
694		if ((res = mod_name(arcn)) < 0) {
695			/*
696			 * name modification says to skip this file, close the
697			 * file and purge link table entry
698			 */
699			rdfile_close(arcn, &fd);
700			purg_lnk(arcn);
701			break;
702		}
703
704		if ((res > 0) || (docrc && (set_crc(arcn, fd) < 0))) {
705			/*
706			 * unable to obtain the crc we need, close the file,
707			 * purge link table entry
708			 */
709			rdfile_close(arcn, &fd);
710			purg_lnk(arcn);
711			continue;
712		}
713
714		if (vflag) {
715			if (vflag > 1)
716				ls_list(arcn, now, listf);
717			else {
718				(void)safe_print(arcn->name, listf);
719				vfpart = 1;
720			}
721		}
722		++flcnt;
723
724		/*
725		 * looks safe to store the file, have the format specific
726		 * routine write routine store the file header on the archive
727		 */
728		if ((res = (*wrf)(arcn)) < 0) {
729			rdfile_close(arcn, &fd);
730			break;
731		}
732		wr_one = 1;
733		if (res > 0) {
734			/*
735			 * format write says no file data needs to be stored
736			 * so we are done messing with this file
737			 */
738			if (vflag && vfpart) {
739				(void)putc('\n', listf);
740				vfpart = 0;
741			}
742			rdfile_close(arcn, &fd);
743			continue;
744		}
745
746		/*
747		 * Add file data to the archive, quit on write error. if we
748		 * cannot write the entire file contents to the archive we
749		 * must pad the archive to replace the missing file data
750		 * (otherwise during an extract the file header for the file
751		 * which FOLLOWS this one will not be where we expect it to
752		 * be).
753		 */
754		res = (*frmt->wr_data)(arcn, fd, &cnt);
755		rdfile_close(arcn, &fd);
756		if (vflag && vfpart) {
757			(void)putc('\n', listf);
758			vfpart = 0;
759		}
760		if (res < 0)
761			break;
762
763		/*
764		 * pad as required, cnt is number of bytes not written
765		 */
766		if (((cnt > 0) && (wr_skip(cnt) < 0)) ||
767		    ((arcn->pad > 0) && (wr_skip(arcn->pad) < 0)))
768			break;
769#ifdef __APPLE__
770		if (metadata)
771			goto next;
772#endif	/* __APPLE__ */
773	}
774
775	/*
776	 * tell format to write trailer; pad to block boundary; reset directory
777	 * mode/access times, and check if all patterns supplied by the user
778	 * were matched. block off signals to avoid chance for multiple entry
779	 * into the cleanup code
780	 */
781	if (wr_one) {
782		(*frmt->end_wr)();
783		wr_fin();
784	}
785	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
786	ar_close();
787	if (tflag)
788		proc_dir();
789	ftree_chk();
790}
791
792/*
793 * append()
794 *	Add file to previously written archive. Archive format specified by the
795 *	user must agree with archive. The archive is read first to collect
796 *	modification times (if -u) and locate the archive trailer. The archive
797 *	is positioned in front of the record with the trailer and wr_archive()
798 *	is called to add the new members.
799 *	PAX IMPLEMENTATION DETAIL NOTE:
800 *	-u is implemented by adding the new members to the end of the archive.
801 *	Care is taken so that these do not end up as links to the older
802 *	version of the same file already stored in the archive. It is expected
803 *	when extraction occurs these newer versions will over-write the older
804 *	ones stored "earlier" in the archive (this may be a bad assumption as
805 *	it depends on the implementation of the program doing the extraction).
806 *	It is really difficult to splice in members without either re-writing
807 *	the entire archive (from the point were the old version was), or having
808 *	assistance of the format specification in terms of a special update
809 *	header that invalidates a previous archive record. The POSIX spec left
810 *	the method used to implement -u unspecified. This pax is able to
811 *	over write existing files that it creates.
812 */
813
814void
815append(void)
816{
817	ARCHD *arcn;
818	int res;
819	ARCHD archd;
820	const FSUB *orgfrmt;
821	int udev;
822	off_t tlen;
823
824	arcn = &archd;
825	orgfrmt = frmt;
826
827	/*
828	 * Do not allow an append operation if the actual archive is of a
829	 * different format than the user specified format.
830	 */
831	if (get_arc() < 0)
832		return;
833	if ((orgfrmt != NULL) && (orgfrmt != frmt)) {
834		paxwarn(1, "Cannot mix current archive format %s with %s",
835		    frmt->name, orgfrmt->name);
836		return;
837	}
838
839	/*
840	 * pass the format any options and start up format
841	 */
842	if (((*frmt->options)() < 0) || ((*frmt->st_rd)() < 0))
843		return;
844
845	/*
846	 * if we only are adding members that are newer, we need to save the
847	 * mod times for all files we see.
848	 */
849	if (uflag && (ftime_start() < 0))
850		return;
851
852	/*
853	 * some archive formats encode hard links by recording the device and
854	 * file serial number (inode) but copy the file anyway (multiple times)
855	 * to the archive. When we append, we run the risk that newly added
856	 * files may have the same device and inode numbers as those recorded
857	 * on the archive but during a previous run. If this happens, when the
858	 * archive is extracted we get INCORRECT hard links. We avoid this by
859	 * remapping the device numbers so that newly added files will never
860	 * use the same device number as one found on the archive. remapping
861	 * allows new members to safely have links among themselves. remapping
862	 * also avoids problems with file inode (serial number) truncations
863	 * when the inode number is larger than storage space in the archive
864	 * header. See the remap routines for more details.
865	 */
866	if ((udev = frmt->udev) && (dev_start() < 0))
867		return;
868
869	/*
870	 * reading the archive may take a long time. If verbose tell the user
871	 */
872	if (vflag) {
873		(void)fprintf(listf,
874			"%s: Reading archive to position at the end...", argv0);
875		vfpart = 1;
876	}
877
878	/*
879	 * step through the archive until the format says it is done
880	 */
881	while (next_head(arcn) == 0) {
882		/*
883		 * check if this file meets user specified options.
884		 */
885		if (sel_chk(arcn) != 0) {
886			if (rd_skip(arcn->skip + arcn->pad) == 1)
887				break;
888			continue;
889		}
890
891		if (uflag) {
892			/*
893			 * see if this is the newest version of this file has
894			 * already been seen, if so skip.
895			 */
896			if ((res = chk_ftime(arcn)) < 0)
897				break;
898			if (res > 0) {
899				if (rd_skip(arcn->skip + arcn->pad) == 1)
900					break;
901				continue;
902			}
903		}
904
905		/*
906		 * Store this device number. Device numbers seen during the
907		 * read phase of append will cause newly appended files with a
908		 * device number seen in the old part of the archive to be
909		 * remapped to an unused device number.
910		 */
911		if ((udev && (add_dev(arcn) < 0)) ||
912		    (rd_skip(arcn->skip + arcn->pad) == 1))
913			break;
914	}
915
916	/*
917	 * done, finish up read and get the number of bytes to back up so we
918	 * can add new members. The format might have used the hard link table,
919	 * purge it.
920	 */
921	tlen = (*frmt->end_rd)();
922	lnk_end();
923
924	/*
925	 * try to position for write, if this fails quit. if any error occurs,
926	 * we will refuse to write
927	 */
928	if (appnd_start(tlen) < 0)
929		return;
930
931	/*
932	 * tell the user we are done reading.
933	 */
934	if (vflag && vfpart) {
935		(void)fputs("done.\n", listf);
936		vfpart = 0;
937	}
938
939	/*
940	 * go to the writing phase to add the new members
941	 */
942	wr_archive(arcn, 1);
943}
944
945/*
946 * archive()
947 *	write a new archive
948 */
949
950void
951archive(void)
952{
953	ARCHD archd;
954
955	/*
956	 * if we only are adding members that are newer, we need to save the
957	 * mod times for all files; set up for writing; pass the format any
958	 * options write the archive
959	 */
960	if ((uflag && (ftime_start() < 0)) || (wr_start() < 0))
961		return;
962	if ((*frmt->options)() < 0)
963		return;
964
965	wr_archive(&archd, 0);
966}
967
968/*
969 * copy()
970 *	copy files from one part of the file system to another. this does not
971 *	use any archive storage. The EFFECT OF THE COPY IS THE SAME as if an
972 *	archive was written and then extracted in the destination directory
973 *	(except the files are forced to be under the destination directory).
974 */
975
976void
977copy(void)
978{
979	ARCHD *arcn;
980	int res;
981	int fddest;
982	char *dest_pt;
983	int dlen;
984	int drem;
985	int fdsrc = -1;
986	struct stat sb;
987	ARCHD archd;
988	char dirbuf[PAXPATHLEN+1];
989
990	arcn = &archd;
991	if (frmt && strcmp(frmt->name, NM_PAX)==0) {
992		/* Copy using pax format:  must check if any -o options */
993		if ((*frmt->options)() < 0)
994			return;
995		if (pax_invalid_action==0)
996			pax_invalid_action = PAX_INVALID_ACTION_BYPASS;
997	}
998	/*
999	 * set up the destination dir path and make sure it is a directory. We
1000	 * make sure we have a trailing / on the destination
1001	 */
1002	dlen = strlcpy(dirbuf, dirptr, sizeof(dirbuf));
1003	if (dlen >= sizeof(dirbuf) ||
1004	    (dlen == sizeof(dirbuf) - 1 && dirbuf[dlen - 1] != '/')) {
1005		paxwarn(1, "directory name is too long %s", dirptr);
1006		return;
1007	}
1008	dest_pt = dirbuf + dlen;
1009	if (*(dest_pt-1) != '/') {
1010		*dest_pt++ = '/';
1011		*dest_pt = '\0';
1012		++dlen;
1013	}
1014	drem = PAXPATHLEN - dlen;
1015
1016	if (stat(dirptr, &sb) < 0) {
1017		syswarn(1, errno, "Cannot access destination directory %s",
1018			dirptr);
1019		return;
1020	}
1021	if (!S_ISDIR(sb.st_mode)) {
1022		paxwarn(1, "Destination is not a directory %s", dirptr);
1023		return;
1024	}
1025
1026	/*
1027	 * start up the hard link table; file traversal routines and the
1028	 * modification time and access mode database
1029	 */
1030	if ((lnk_start() < 0) || (ftree_start() < 0) || (dir_start() < 0))
1031		return;
1032
1033	/*
1034	 * When we are doing interactive rename, we store the mapping of names
1035	 * so we can fix up hard links files later in the archive.
1036	 */
1037	if (iflag && (name_start() < 0))
1038		return;
1039
1040	/*
1041	 * set up to cp file trees
1042	 */
1043	cp_start();
1044
1045	/*
1046	 * while there are files to archive, process them
1047	 */
1048	while (next_file(arcn) == 0) {
1049		fdsrc = -1;
1050
1051		/*
1052		 * Fill in arcn from any pax options
1053		 */
1054		adjust_copy_for_pax_options(arcn);
1055
1056		/*
1057		 * check if this file meets user specified options
1058		 */
1059		if (sel_chk(arcn) != 0) {
1060			ftree_notsel();
1061			continue;
1062		}
1063
1064		/*
1065		 * if there is already a file in the destination directory with
1066		 * the same name and it is newer, skip the one stored on the
1067		 * archive.
1068		 * NOTE: this test is done BEFORE name modifications as
1069		 * specified by pax. this can be confusing to the user who
1070		 * might expect the test to be done on an existing file AFTER
1071		 * the name mod. In honesty the pax spec is probably flawed in
1072		 * this respect
1073		 */
1074		if (uflag || Dflag) {
1075			/*
1076			 * create the destination name
1077			 */
1078			if (strlcpy(dest_pt, arcn->name + (*arcn->name == '/'),
1079			    drem + 1) > drem) {
1080				paxwarn(1, "Destination pathname too long %s",
1081					arcn->name);
1082				continue;
1083			}
1084
1085			/*
1086			 * if existing file is same age or newer skip
1087			 */
1088			res = lstat(dirbuf, &sb);
1089			*dest_pt = '\0';
1090
1091			if (res == 0) {
1092				if (uflag && Dflag) {
1093					if ((arcn->sb.st_mtime<=sb.st_mtime) &&
1094					    (arcn->sb.st_ctime<=sb.st_ctime))
1095						continue;
1096				} else if (Dflag) {
1097					if (arcn->sb.st_ctime <= sb.st_ctime)
1098						continue;
1099				} else if (arcn->sb.st_mtime <= sb.st_mtime)
1100					continue;
1101			}
1102		}
1103
1104		/*
1105		 * this file is considered selected. See if this is a hard link
1106		 * to a previous file; modify the name as requested by the
1107		 * user; set the final destination.
1108		 */
1109		ftree_sel(arcn);
1110		if ((chk_lnk(arcn) < 0) || ((res = mod_name(arcn)) < 0))
1111			break;
1112		if ((res > 0) || (set_dest(arcn, dirbuf, dlen) < 0)) {
1113			/*
1114			 * skip file, purge from link table
1115			 */
1116			purg_lnk(arcn);
1117			continue;
1118		}
1119
1120		/*
1121		 * Non standard -Y and -Z flag. When the existing file is
1122		 * same age or newer skip
1123		 */
1124		if ((Yflag || Zflag) && ((lstat(arcn->name, &sb) == 0))) {
1125			if (Yflag && Zflag) {
1126				if ((arcn->sb.st_mtime <= sb.st_mtime) &&
1127				    (arcn->sb.st_ctime <= sb.st_ctime))
1128					continue;
1129			} else if (Yflag) {
1130				if (arcn->sb.st_ctime <= sb.st_ctime)
1131					continue;
1132			} else if (arcn->sb.st_mtime <= sb.st_mtime)
1133				continue;
1134		}
1135
1136		if (vflag) {
1137			(void)safe_print(arcn->name, listf);
1138			vfpart = 1;
1139		}
1140		++flcnt;
1141
1142		/*
1143		 * try to create a hard link to the src file if requested
1144		 * but make sure we are not trying to overwrite ourselves.
1145		 */
1146		if (lflag)
1147			res = cross_lnk(arcn);
1148		else
1149			res = chk_same(arcn);
1150		if (res <= 0) {
1151			if (vflag && vfpart) {
1152				(void)putc('\n', listf);
1153				vfpart = 0;
1154			}
1155			continue;
1156		}
1157
1158		/*
1159		 * have to create a new file
1160		 */
1161		if ((arcn->type != PAX_REG) && (arcn->type != PAX_CTG)) {
1162			/*
1163			 * create a link or special file
1164			 */
1165			if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG))
1166				res = lnk_creat(arcn);
1167			else
1168				res = node_creat(arcn);
1169			if (res < 0)
1170				purg_lnk(arcn);
1171#ifdef __APPLE__
1172			if (res >= 0 &&
1173			    arcn->type == PAX_DIR &&
1174			    copyfile(arcn->org_name, arcn->name, NULL, COPYFILE_ACL | COPYFILE_XATTR) < 0)
1175				paxwarn(1, "Directory %s had metadata that could not be copied: %s", arcn->org_name, strerror(errno));
1176#endif	/* __APPLE__ */
1177			if (vflag && vfpart) {
1178				(void)putc('\n', listf);
1179				vfpart = 0;
1180			}
1181			continue;
1182		}
1183
1184		/*
1185		 * have to copy a regular file to the destination directory.
1186		 * first open source file and then create the destination file
1187		 */
1188		if ((fdsrc = open(arcn->org_name, O_RDONLY, 0)) < 0) {
1189			syswarn(1, errno, "Unable to open %s to read",
1190			    arcn->org_name);
1191			purg_lnk(arcn);
1192			continue;
1193		}
1194		if ((fddest = file_creat(arcn)) < 0) {
1195			rdfile_close(arcn, &fdsrc);
1196			purg_lnk(arcn);
1197			continue;
1198		}
1199
1200		/*
1201		 * copy source file data to the destination file
1202		 */
1203		cp_file(arcn, fdsrc, fddest);
1204#ifdef __APPLE__
1205		/* do this before file close so that mtimes are correct regardless */
1206		if (getenv(COPYFILE_DISABLE_VAR) == NULL) {
1207			if (fcopyfile(fdsrc, fddest, NULL, COPYFILE_ACL | COPYFILE_XATTR) < 0)
1208				paxwarn(1, "File %s had metadata that could not be copied: %s", arcn->org_name,
1209					strerror(errno));
1210		}
1211#endif
1212		file_close(arcn, fddest);
1213		rdfile_close(arcn, &fdsrc);
1214
1215		if (vflag && vfpart) {
1216			(void)putc('\n', listf);
1217			vfpart = 0;
1218		}
1219	}
1220
1221	/*
1222	 * restore directory modes and times as required; make sure all
1223	 * patterns were selected block off signals to avoid chance for
1224	 * multiple entry into the cleanup code.
1225	 */
1226	(void)sigprocmask(SIG_BLOCK, &s_mask, NULL);
1227	ar_close();
1228	proc_dir();
1229	ftree_chk();
1230}
1231
1232/*
1233 * next_head()
1234 *	try to find a valid header in the archive. Uses format specific
1235 *	routines to extract the header and id the trailer. Trailers may be
1236 *	located within a valid header or in an invalid header (the location
1237 *	is format specific. The inhead field from the option table tells us
1238 *	where to look for the trailer).
1239 *	We keep reading (and resyncing) until we get enough contiguous data
1240 *	to check for a header. If we cannot find one, we shift by a byte
1241 *	add a new byte from the archive to the end of the buffer and try again.
1242 *	If we get a read error, we throw out what we have (as we must have
1243 *	contiguous data) and start over again.
1244 *	ASSUMED: headers fit within a BLKMULT header.
1245 * Return:
1246 *	0 if we got a header, -1 if we are unable to ever find another one
1247 *	(we reached the end of input, or we reached the limit on retries. see
1248 *	the specs for rd_wrbuf() for more details)
1249 */
1250
1251static int
1252next_head(ARCHD *arcn)
1253{
1254	int ret;
1255	char *hdend;
1256	int res;
1257	int shftsz;
1258	int hsz;
1259	int in_resync = 0;		/* set when we are in resync mode */
1260	int cnt = 0;			/* counter for trailer function */
1261	int first = 1;			/* on 1st read, EOF isn't premature. */
1262
1263	/*
1264	 * set up initial conditions, we want a whole frmt->hsz block as we
1265	 * have no data yet.
1266	 */
1267	res = hsz = frmt->hsz;
1268	hdend = hdbuf;
1269	shftsz = hsz - 1;
1270	for (;;) {
1271		/*
1272		 * keep looping until we get a contiguous FULL buffer
1273		 * (frmt->hsz is the proper size)
1274		 */
1275		for (;;) {
1276			if ((ret = rd_wrbuf(hdend, res)) == res)
1277				break;
1278
1279			/*
1280			 * If we read 0 bytes (EOF) from an archive when we
1281			 * expect to find a header, we have stepped upon
1282			 * an archive without the customary block of zeroes
1283			 * end marker.  It's just stupid to error out on
1284			 * them, so exit gracefully.
1285			 */
1286			if (first && ret == 0)
1287				return(-1);
1288			first = 0;
1289
1290			/*
1291			 * some kind of archive read problem, try to resync the
1292			 * storage device, better give the user the bad news.
1293			 */
1294			if ((ret == 0) || (rd_sync() < 0)) {
1295				paxwarn(1,"Premature end of file on archive read");
1296				return(-1);
1297			}
1298			if (!in_resync) {
1299				if (act == APPND) {
1300					paxwarn(1,
1301					  "Archive I/O error, cannot continue");
1302					return(-1);
1303				}
1304				paxwarn(1,"Archive I/O error. Trying to recover.");
1305				++in_resync;
1306			}
1307
1308			/*
1309			 * oh well, throw it all out and start over
1310			 */
1311			res = hsz;
1312			hdend = hdbuf;
1313		}
1314
1315		/*
1316		 * ok we have a contiguous buffer of the right size. Call the
1317		 * format read routine. If this was not a valid header and this
1318		 * format stores trailers outside of the header, call the
1319		 * format specific trailer routine to check for a trailer. We
1320		 * have to watch out that we do not mis-identify file data or
1321		 * block padding as a header or trailer. Format specific
1322		 * trailer functions must NOT check for the trailer while we
1323		 * are running in resync mode. Some trailer functions may tell
1324		 * us that this block cannot contain a valid header either, so
1325		 * we then throw out the entire block and start over.
1326		 */
1327		if ((*frmt->rd)(arcn, hdbuf) == 0)
1328			break;
1329
1330		if (!frmt->inhead) {
1331			/*
1332			 * this format has trailers outside of valid headers
1333			 */
1334			if ((ret = (*frmt->trail)(arcn,hdbuf,in_resync,&cnt)) == 0){
1335				/*
1336				 * valid trailer found, drain input as required
1337				 */
1338				ar_drain();
1339				return(-1);
1340			}
1341
1342			if (ret == 1) {
1343				/*
1344				 * we are in resync and we were told to throw
1345				 * the whole block out because none of the
1346				 * bytes in this block can be used to form a
1347				 * valid header
1348				 */
1349				res = hsz;
1350				hdend = hdbuf;
1351				continue;
1352			}
1353		}
1354
1355		/*
1356		 * Brute force section.
1357		 * not a valid header. We may be able to find a header yet. So
1358		 * we shift over by one byte, and set up to read one byte at a
1359		 * time from the archive and place it at the end of the buffer.
1360		 * We will keep moving byte at a time until we find a header or
1361		 * get a read error and have to start over.
1362		 */
1363		if (!in_resync) {
1364			if (act == APPND) {
1365				paxwarn(1,"Unable to append, archive header flaw");
1366				return(-1);
1367			}
1368			paxwarn(1,"Invalid header, starting valid header search.");
1369			++in_resync;
1370		}
1371		memmove(hdbuf, hdbuf+1, shftsz);
1372		res = 1;
1373		hdend = hdbuf + shftsz;
1374	}
1375
1376	/*
1377	 * ok got a valid header, check for trailer if format encodes it in the
1378	 * the header. NOTE: the parameters are different than trailer routines
1379	 * which encode trailers outside of the header!
1380	 */
1381	if (frmt->inhead && ((*frmt->trail)(arcn,NULL,0,NULL) == 0)) {
1382		/*
1383		 * valid trailer found, drain input as required
1384		 */
1385		ar_drain();
1386		return(-1);
1387	}
1388
1389	++flcnt;
1390	return(0);
1391}
1392
1393/*
1394 * get_arc()
1395 *	Figure out what format an archive is. Handles archive with flaws by
1396 *	brute force searches for a legal header in any supported format. The
1397 *	format id routines have to be careful to NOT mis-identify a format.
1398 *	ASSUMED: headers fit within a BLKMULT header.
1399 * Return:
1400 *	0 if archive found -1 otherwise
1401 */
1402
1403static int
1404get_arc(void)
1405{
1406	int i;
1407	int hdsz = 0;
1408	int res;
1409	int minhd = BLKMULT;
1410	char *hdend;
1411	int notice = 0;
1412
1413	/*
1414	 * find the smallest header size in all archive formats and then set up
1415	 * to read the archive.
1416	 */
1417	for (i = 0; ford[i] >= 0; ++i) {
1418		if (fsub[ford[i]].hsz < minhd)
1419			minhd = fsub[ford[i]].hsz;
1420	}
1421	if (rd_start() < 0)
1422		return(-1);
1423	res = BLKMULT;
1424	hdsz = 0;
1425	hdend = hdbuf;
1426	for (;;) {
1427		for (;;) {
1428			/*
1429			 * fill the buffer with at least the smallest header
1430			 */
1431			i = rd_wrbuf(hdend, res);
1432			if (i > 0)
1433				hdsz += i;
1434			if (hdsz >= minhd)
1435				break;
1436
1437			/*
1438			 * if we cannot recover from a read error quit
1439			 */
1440			if ((i == 0) || (rd_sync() < 0))
1441				goto out;
1442
1443			/*
1444			 * when we get an error none of the data we already
1445			 * have can be used to create a legal header (we just
1446			 * got an error in the middle), so we throw it all out
1447			 * and refill the buffer with fresh data.
1448			 */
1449			res = BLKMULT;
1450			hdsz = 0;
1451			hdend = hdbuf;
1452			if (!notice) {
1453				if (act == APPND)
1454					return(-1);
1455				paxwarn(1,"Cannot identify format. Searching...");
1456				++notice;
1457			}
1458		}
1459
1460		/*
1461		 * we have at least the size of the smallest header in any
1462		 * archive format. Look to see if we have a match. The array
1463		 * ford[] is used to specify the header id order to reduce the
1464		 * chance of incorrectly id'ing a valid header (some formats
1465		 * may be subsets of each other and the order would then be
1466		 * important).
1467		 */
1468		for (i = 0; ford[i] >= 0; ++i) {
1469			if ((*fsub[ford[i]].id)(hdbuf, hdsz) < 0)
1470				continue;
1471			frmt = &(fsub[ford[i]]);
1472			/*
1473			 * yuck, to avoid slow special case code in the extract
1474			 * routines, just push this header back as if it was
1475			 * not seen. We have left extra space at start of the
1476			 * buffer for this purpose. This is a bit ugly, but
1477			 * adding all the special case code is far worse.
1478			 */
1479			pback(hdbuf, hdsz);
1480			return(0);
1481		}
1482
1483		/*
1484		 * We have a flawed archive, no match. we start searching, but
1485		 * we never allow additions to flawed archives
1486		 */
1487		if (!notice) {
1488			if (act == APPND)
1489				return(-1);
1490			paxwarn(1, "Cannot identify format. Searching...");
1491			++notice;
1492		}
1493
1494		/*
1495		 * brute force search for a header that we can id.
1496		 * we shift through byte at a time. this is slow, but we cannot
1497		 * determine the nature of the flaw in the archive in a
1498		 * portable manner
1499		 */
1500		if (--hdsz > 0) {
1501			memmove(hdbuf, hdbuf+1, hdsz);
1502			res = BLKMULT - hdsz;
1503			hdend = hdbuf + hdsz;
1504		} else {
1505			res = BLKMULT;
1506			hdend = hdbuf;
1507			hdsz = 0;
1508		}
1509	}
1510
1511    out:
1512	/*
1513	 * we cannot find a header, bow, apologize and quit
1514	 */
1515	paxwarn(1, "Sorry, unable to determine archive format.");
1516	return(-1);
1517}
1518