tar.c revision 1.32
1/*	$NetBSD: tar.c,v 1.32 2002/10/27 20:48:15 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40#include <sys/cdefs.h>
41#if defined(__RCSID) && !defined(lint)
42#if 0
43static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44#else
45__RCSID("$NetBSD: tar.c,v 1.32 2002/10/27 20:48:15 christos Exp $");
46#endif
47#endif /* not lint */
48
49#include <sys/types.h>
50#include <sys/time.h>
51#include <sys/stat.h>
52#include <sys/param.h>
53
54#include <ctype.h>
55#include <errno.h>
56#include <grp.h>
57#include <pwd.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "pax.h"
64#include "extern.h"
65#include "tar.h"
66
67/*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71static int expandname(char *, size_t,  char **, const char *);
72static void longlink(ARCHD *);
73static u_long tar_chksm(char *, int);
74static char *name_split(char *, int);
75static int ul_oct(u_long, char *, int, int);
76#if !defined(NET2_STAT) && !defined(_LP64)
77static int ull_oct(unsigned long long, char *, int, int);
78#endif
79
80/*
81 * Routines common to all versions of tar
82 */
83
84static int tar_nodir;			/* do not write dirs under old tar */
85int is_gnutar;				/* behave like gnu tar; enable gnu
86					 * extensions and skip end-ofvolume
87					 * checks
88					 */
89static int seen_gnu_warning;		/* Have we warned yet? */
90static char *gnu_hack_string;		/* ././@LongLink hackery */
91static int gnu_hack_len;		/* len of gnu_hack_string */
92char *gnu_name_string;			/* ././@LongLink hackery name */
93char *gnu_link_string;			/* ././@LongLink hackery link */
94
95/*
96 * tar_endwr()
97 *	add the tar trailer of two null blocks
98 * Return:
99 *	0 if ok, -1 otherwise (what wr_skip returns)
100 */
101
102int
103tar_endwr(void)
104{
105	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
106}
107
108/*
109 * tar_endrd()
110 *	no cleanup needed here, just return size of trailer (for append)
111 * Return:
112 *	size of trailer (2 * BLKMULT)
113 */
114
115off_t
116tar_endrd(void)
117{
118	return((off_t)(NULLCNT*BLKMULT));
119}
120
121/*
122 * tar_trail()
123 *	Called to determine if a header block is a valid trailer. We are passed
124 *	the block, the in_sync flag (which tells us we are in resync mode;
125 *	looking for a valid header), and cnt (which starts at zero) which is
126 *	used to count the number of empty blocks we have seen so far.
127 * Return:
128 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
129 *	could never contain a header.
130 */
131
132int
133tar_trail(char *buf, int in_resync, int *cnt)
134{
135	int i;
136
137	/*
138	 * look for all zero, trailer is two consecutive blocks of zero
139	 */
140	for (i = 0; i < BLKMULT; ++i) {
141		if (buf[i] != '\0')
142			break;
143	}
144
145	/*
146	 * if not all zero it is not a trailer, but MIGHT be a header.
147	 */
148	if (i != BLKMULT)
149		return(-1);
150
151	/*
152	 * When given a zero block, we must be careful!
153	 * If we are not in resync mode, check for the trailer. Have to watch
154	 * out that we do not mis-identify file data as the trailer, so we do
155	 * NOT try to id a trailer during resync mode. During resync mode we
156	 * might as well throw this block out since a valid header can NEVER be
157	 * a block of all 0 (we must have a valid file name).
158	 */
159	if (!in_resync && (++*cnt >= NULLCNT))
160		return(0);
161	return(1);
162}
163
164/*
165 * ul_oct()
166 *	convert an unsigned long to an octal string. many oddball field
167 *	termination characters are used by the various versions of tar in the
168 *	different fields. term selects which kind to use. str is '0' padded
169 *	at the front to len. we are unable to use only one format as many old
170 *	tar readers are very cranky about this.
171 * Return:
172 *	0 if the number fit into the string, -1 otherwise
173 */
174
175static int
176ul_oct(u_long val, char *str, int len, int term)
177{
178	char *pt;
179
180	/*
181	 * term selects the appropriate character(s) for the end of the string
182	 */
183	pt = str + len - 1;
184	switch(term) {
185	case 3:
186		*pt-- = '\0';
187		break;
188	case 2:
189		*pt-- = ' ';
190		*pt-- = '\0';
191		break;
192	case 1:
193		*pt-- = ' ';
194		break;
195	case 0:
196	default:
197		*pt-- = '\0';
198		*pt-- = ' ';
199		break;
200	}
201
202	/*
203	 * convert and blank pad if there is space
204	 */
205	while (pt >= str) {
206		*pt-- = '0' + (char)(val & 0x7);
207		if ((val = val >> 3) == (u_long)0)
208			break;
209	}
210
211	while (pt >= str)
212		*pt-- = '0';
213	if (val != (u_long)0)
214		return(-1);
215	return(0);
216}
217
218#if !defined(NET2_STAT) && !defined(_LP64)
219/*
220 * ull_oct()
221 *	convert an unsigned long long to an octal string. one of many oddball
222 *	field termination characters are used by the various versions of tar
223 *	in the different fields. term selects which kind to use. str is '0'
224 *	padded at the front to len. we are unable to use only one format as
225 *	many old tar readers are very cranky about this.
226 * Return:
227 *	0 if the number fit into the string, -1 otherwise
228 */
229
230static int
231ull_oct(unsigned long long val, char *str, int len, int term)
232{
233	char *pt;
234
235	/*
236	 * term selects the appropriate character(s) for the end of the string
237	 */
238	pt = str + len - 1;
239	switch(term) {
240	case 3:
241		*pt-- = '\0';
242		break;
243	case 2:
244		*pt-- = ' ';
245		*pt-- = '\0';
246		break;
247	case 1:
248		*pt-- = ' ';
249		break;
250	case 0:
251	default:
252		*pt-- = '\0';
253		*pt-- = ' ';
254		break;
255	}
256
257	/*
258	 * convert and blank pad if there is space
259	 */
260	while (pt >= str) {
261		*pt-- = '0' + (char)(val & 0x7);
262		if ((val = val >> 3) == 0)
263			break;
264	}
265
266	while (pt >= str)
267		*pt-- = '0';
268	if (val != (unsigned long long)0)
269		return(-1);
270	return(0);
271}
272#endif
273
274/*
275 * tar_chksm()
276 *	calculate the checksum for a tar block counting the checksum field as
277 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
278 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
279 *	pad headers with 0.
280 * Return:
281 *	unsigned long checksum
282 */
283
284static u_long
285tar_chksm(char *blk, int len)
286{
287	char *stop;
288	char *pt;
289	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
290
291	/*
292	 * add the part of the block before the checksum field
293	 */
294	pt = blk;
295	stop = blk + CHK_OFFSET;
296	while (pt < stop)
297		chksm += (u_long)(*pt++ & 0xff);
298	/*
299	 * move past the checksum field and keep going, spec counts the
300	 * checksum field as the sum of 8 blanks (which is pre-computed as
301	 * BLNKSUM).
302	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
303	 * starts, no point in summing zero's)
304	 */
305	pt += CHK_LEN;
306	stop = blk + len;
307	while (pt < stop)
308		chksm += (u_long)(*pt++ & 0xff);
309	return(chksm);
310}
311
312/*
313 * Routines for old BSD style tar (also made portable to sysV tar)
314 */
315
316/*
317 * tar_id()
318 *	determine if a block given to us is a valid tar header (and not a USTAR
319 *	header). We have to be on the lookout for those pesky blocks of	all
320 *	zero's.
321 * Return:
322 *	0 if a tar header, -1 otherwise
323 */
324
325int
326tar_id(char *blk, int size)
327{
328	HD_TAR *hd;
329	HD_USTAR *uhd;
330
331	if (size < BLKMULT)
332		return(-1);
333	hd = (HD_TAR *)blk;
334	uhd = (HD_USTAR *)blk;
335
336	/*
337	 * check for block of zero's first, a simple and fast test, then make
338	 * sure this is not a ustar header by looking for the ustar magic
339	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
340	 * wrong and create archives missing the \0. Last we check the
341	 * checksum. If this is ok we have to assume it is a valid header.
342	 */
343	if (hd->name[0] == '\0')
344		return(-1);
345	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
346		return(-1);
347	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
348		return(-1);
349	return(0);
350}
351
352/*
353 * tar_opt()
354 *	handle tar format specific -o options
355 * Return:
356 *	0 if ok -1 otherwise
357 */
358
359int
360tar_opt(void)
361{
362	OPLIST *opt;
363
364	while ((opt = opt_next()) != NULL) {
365		if (strcmp(opt->name, TAR_OPTION) ||
366		    strcmp(opt->value, TAR_NODIR)) {
367			tty_warn(1,
368			    "Unknown tar format -o option/value pair %s=%s",
369			    opt->name, opt->value);
370			tty_warn(1,
371			    "%s=%s is the only supported tar format option",
372			    TAR_OPTION, TAR_NODIR);
373			return(-1);
374		}
375
376		/*
377		 * we only support one option, and only when writing
378		 */
379		if ((act != APPND) && (act != ARCHIVE)) {
380			tty_warn(1, "%s=%s is only supported when writing.",
381			    opt->name, opt->value);
382			return(-1);
383		}
384		tar_nodir = 1;
385	}
386	return(0);
387}
388
389
390/*
391 * tar_rd()
392 *	extract the values out of block already determined to be a tar header.
393 *	store the values in the ARCHD parameter.
394 * Return:
395 *	0
396 */
397
398int
399tar_rd(ARCHD *arcn, char *buf)
400{
401	HD_TAR *hd;
402	char *pt;
403
404	/*
405	 * we only get proper sized buffers passed to us
406	 */
407	if (tar_id(buf, BLKMULT) < 0)
408		return(-1);
409	memset(arcn, 0, sizeof(*arcn));
410	arcn->org_name = arcn->name;
411	arcn->pat = NULL;
412	arcn->sb.st_nlink = 1;
413
414	/*
415	 * copy out the name and values in the stat buffer
416	 */
417	hd = (HD_TAR *)buf;
418	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
419		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
420		    &gnu_name_string, hd->name);
421		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
422		    &gnu_link_string, hd->linkname);
423	}
424	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
425	    0xfff);
426	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
427	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
428	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
429	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
430	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
431
432	/*
433	 * have to look at the last character, it may be a '/' and that is used
434	 * to encode this as a directory
435	 */
436	pt = &(arcn->name[arcn->nlen - 1]);
437	arcn->pad = 0;
438	arcn->skip = 0;
439	switch(hd->linkflag) {
440	case SYMTYPE:
441		/*
442		 * symbolic link, need to get the link name and set the type in
443		 * the st_mode so -v printing will look correct.
444		 */
445		arcn->type = PAX_SLK;
446		arcn->sb.st_mode |= S_IFLNK;
447		break;
448	case LNKTYPE:
449		/*
450		 * hard link, need to get the link name, set the type in the
451		 * st_mode and st_nlink so -v printing will look better.
452		 */
453		arcn->type = PAX_HLK;
454		arcn->sb.st_nlink = 2;
455
456		/*
457		 * no idea of what type this thing really points at, but
458		 * we set something for printing only.
459		 */
460		arcn->sb.st_mode |= S_IFREG;
461		break;
462	case LONGLINKTYPE:
463		arcn->type = PAX_GLL;
464		/* FALLTHROUGH */
465	case LONGNAMETYPE:
466		/*
467		 * GNU long link/file; we tag these here and let the
468		 * pax internals deal with it -- too ugly otherwise.
469		 */
470		if (hd->linkflag != LONGLINKTYPE)
471			arcn->type = PAX_GLF;
472		arcn->pad = TAR_PAD(arcn->sb.st_size);
473		arcn->skip = arcn->sb.st_size;
474		break;
475	case AREGTYPE:
476	case REGTYPE:
477	case DIRTYPE:	/* see below */
478	default:
479		/*
480		 * If we have a trailing / this is a directory and NOT a file.
481		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
482		 * reported that V7 archives using USTAR directories do exist.
483		 */
484		if (*pt == '/' || hd->linkflag == DIRTYPE) {
485			/*
486			 * it is a directory, set the mode for -v printing
487			 */
488			arcn->type = PAX_DIR;
489			arcn->sb.st_mode |= S_IFDIR;
490			arcn->sb.st_nlink = 2;
491		} else {
492			/*
493			 * have a file that will be followed by data. Set the
494			 * skip value to the size field and calculate the size
495			 * of the padding.
496			 */
497			arcn->type = PAX_REG;
498			arcn->sb.st_mode |= S_IFREG;
499			arcn->pad = TAR_PAD(arcn->sb.st_size);
500			arcn->skip = arcn->sb.st_size;
501		}
502		break;
503	}
504
505	/*
506	 * strip off any trailing slash.
507	 */
508	if (*pt == '/') {
509		*pt = '\0';
510		--arcn->nlen;
511	}
512	return(0);
513}
514
515/*
516 * tar_wr()
517 *	write a tar header for the file specified in the ARCHD to the archive.
518 *	Have to check for file types that cannot be stored and file names that
519 *	are too long. Be careful of the term (last arg) to ul_oct, each field
520 *	of tar has it own spec for the termination character(s).
521 *	ASSUMED: space after header in header block is zero filled
522 * Return:
523 *	0 if file has data to be written after the header, 1 if file has NO
524 *	data to write after the header, -1 if archive write failed
525 */
526
527int
528tar_wr(ARCHD *arcn)
529{
530	HD_TAR *hd;
531	int len;
532	char hdblk[sizeof(HD_TAR)];
533
534	/*
535	 * check for those file system types which tar cannot store
536	 */
537	switch(arcn->type) {
538	case PAX_DIR:
539		/*
540		 * user asked that dirs not be written to the archive
541		 */
542		if (tar_nodir)
543			return(1);
544		break;
545	case PAX_CHR:
546		tty_warn(1, "Tar cannot archive a character device %s",
547		    arcn->org_name);
548		return(1);
549	case PAX_BLK:
550		tty_warn(1,
551		    "Tar cannot archive a block device %s", arcn->org_name);
552		return(1);
553	case PAX_SCK:
554		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
555		return(1);
556	case PAX_FIF:
557		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
558		return(1);
559	case PAX_SLK:
560	case PAX_HLK:
561	case PAX_HRG:
562		if (arcn->ln_nlen > sizeof(hd->linkname)) {
563			tty_warn(1,"Link name too long for tar %s",
564			    arcn->ln_name);
565			return(1);
566		}
567		break;
568	case PAX_REG:
569	case PAX_CTG:
570	default:
571		break;
572	}
573
574	/*
575	 * check file name len, remember extra char for dirs (the / at the end)
576	 */
577	len = arcn->nlen;
578	if (arcn->type == PAX_DIR)
579		++len;
580	if (len >= sizeof(hd->name)) {
581		tty_warn(1, "File name too long for tar %s", arcn->name);
582		return(1);
583	}
584
585	/*
586	 * copy the data out of the ARCHD into the tar header based on the type
587	 * of the file. Remember many tar readers want the unused fields to be
588	 * padded with zero. We set the linkflag field (type), the linkname
589	 * (or zero if not used),the size, and set the padding (if any) to be
590	 * added after the file data (0 for all other types, as they only have
591	 * a header)
592	 */
593	memset(hdblk, 0, sizeof(hdblk));
594	hd = (HD_TAR *)hdblk;
595	strlcpy(hd->name, arcn->name, sizeof(hd->name));
596	arcn->pad = 0;
597
598	if (arcn->type == PAX_DIR) {
599		/*
600		 * directories are the same as files, except have a filename
601		 * that ends with a /, we add the slash here. No data follows,
602		 * dirs, so no pad.
603		 */
604		hd->linkflag = AREGTYPE;
605		hd->name[len-1] = '/';
606		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
607			goto out;
608	} else if (arcn->type == PAX_SLK) {
609		/*
610		 * no data follows this file, so no pad
611		 */
612		hd->linkflag = SYMTYPE;
613		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
614		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
615			goto out;
616	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
617		/*
618		 * no data follows this file, so no pad
619		 */
620		hd->linkflag = LNKTYPE;
621		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
622		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
623			goto out;
624	} else {
625		/*
626		 * data follows this file, so set the pad
627		 */
628		hd->linkflag = AREGTYPE;
629		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
630			tty_warn(1,"File is too large for tar %s",
631			    arcn->org_name);
632			return(1);
633		}
634		arcn->pad = TAR_PAD(arcn->sb.st_size);
635	}
636
637	/*
638	 * copy those fields that are independent of the type
639	 */
640	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
641	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
642	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
643	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
644		goto out;
645
646	/*
647	 * calculate and add the checksum, then write the header. A return of
648	 * 0 tells the caller to now write the file data, 1 says no data needs
649	 * to be written
650	 */
651	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
652	    sizeof(hd->chksum), 3))
653		goto out;			/* XXX Something's wrong here
654						 * because a zero-byte file can
655						 * cause this to be done and
656						 * yet the resulting warning
657						 * seems incorrect */
658
659	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
660		return(-1);
661	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
662		return(-1);
663	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
664		return(0);
665	return(1);
666
667    out:
668	/*
669	 * header field is out of range
670	 */
671	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
672	return(1);
673}
674
675/*
676 * Routines for POSIX ustar
677 */
678
679/*
680 * ustar_strd()
681 *	initialization for ustar read
682 * Return:
683 *	0 if ok, -1 otherwise
684 */
685
686int
687ustar_strd(void)
688{
689	return(0);
690}
691
692/*
693 * ustar_stwr()
694 *	initialization for ustar write
695 * Return:
696 *	0 if ok, -1 otherwise
697 */
698
699int
700ustar_stwr(void)
701{
702	return(0);
703}
704
705/*
706 * ustar_id()
707 *	determine if a block given to us is a valid ustar header. We have to
708 *	be on the lookout for those pesky blocks of all zero's
709 * Return:
710 *	0 if a ustar header, -1 otherwise
711 */
712
713int
714ustar_id(char *blk, int size)
715{
716	HD_USTAR *hd;
717
718	if (size < BLKMULT)
719		return(-1);
720	hd = (HD_USTAR *)blk;
721
722	/*
723	 * check for block of zero's first, a simple and fast test then check
724	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
725	 * programs are fouled up and create archives missing the \0. Last we
726	 * check the checksum. If ok we have to assume it is a valid header.
727	 */
728	if (hd->name[0] == '\0')
729		return(-1);
730	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
731		return(-1);
732	/* This is GNU tar */
733	if (strncmp(hd->magic, "ustar  ", 8) == 0 && !is_gnutar &&
734	    !seen_gnu_warning) {
735		seen_gnu_warning = 1;
736		tty_warn(0,
737		    "Trying to read GNU tar archive with extensions off");
738	}
739	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
740		return(-1);
741	return(0);
742}
743
744/*
745 * ustar_rd()
746 *	extract the values out of block already determined to be a ustar header.
747 *	store the values in the ARCHD parameter.
748 * Return:
749 *	0
750 */
751
752int
753ustar_rd(ARCHD *arcn, char *buf)
754{
755	HD_USTAR *hd;
756	char *dest;
757	int cnt;
758	dev_t devmajor;
759	dev_t devminor;
760
761	/*
762	 * we only get proper sized buffers
763	 */
764	if (ustar_id(buf, BLKMULT) < 0)
765		return(-1);
766
767	memset(arcn, 0, sizeof(*arcn));
768	arcn->org_name = arcn->name;
769	arcn->pat = NULL;
770	arcn->sb.st_nlink = 1;
771	hd = (HD_USTAR *)buf;
772
773	/*
774	 * see if the filename is split into two parts. if, so joint the parts.
775	 * we copy the prefix first and add a / between the prefix and name.
776	 */
777	dest = arcn->name;
778	if (*(hd->prefix) != '\0') {
779		cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
780		dest += cnt;
781		*dest++ = '/';
782		cnt++;
783	} else {
784		cnt = 0;
785	}
786
787	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
788		arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
789		    &gnu_name_string, hd->name);
790		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
791		    &gnu_link_string, hd->linkname);
792	}
793
794	/*
795	 * follow the spec to the letter. we should only have mode bits, strip
796	 * off all other crud we may be passed.
797	 */
798	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
799	    0xfff);
800	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
801	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
802	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
803
804	/*
805	 * If we can find the ascii names for gname and uname in the password
806	 * and group files we will use the uid's and gid they bind. Otherwise
807	 * we use the uid and gid values stored in the header. (This is what
808	 * the posix spec wants).
809	 */
810	hd->gname[sizeof(hd->gname) - 1] = '\0';
811	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
812		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
813	hd->uname[sizeof(hd->uname) - 1] = '\0';
814	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
815		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
816
817	/*
818	 * set the defaults, these may be changed depending on the file type
819	 */
820	arcn->pad = 0;
821	arcn->skip = 0;
822	arcn->sb.st_rdev = (dev_t)0;
823
824	/*
825	 * set the mode and PAX type according to the typeflag in the header
826	 */
827	switch(hd->typeflag) {
828	case FIFOTYPE:
829		arcn->type = PAX_FIF;
830		arcn->sb.st_mode |= S_IFIFO;
831		break;
832	case DIRTYPE:
833		arcn->type = PAX_DIR;
834		arcn->sb.st_mode |= S_IFDIR;
835		arcn->sb.st_nlink = 2;
836
837		/*
838		 * Some programs that create ustar archives append a '/'
839		 * to the pathname for directories. This clearly violates
840		 * ustar specs, but we will silently strip it off anyway.
841		 */
842		if (arcn->name[arcn->nlen - 1] == '/')
843			arcn->name[--arcn->nlen] = '\0';
844		break;
845	case BLKTYPE:
846	case CHRTYPE:
847		/*
848		 * this type requires the rdev field to be set.
849		 */
850		if (hd->typeflag == BLKTYPE) {
851			arcn->type = PAX_BLK;
852			arcn->sb.st_mode |= S_IFBLK;
853		} else {
854			arcn->type = PAX_CHR;
855			arcn->sb.st_mode |= S_IFCHR;
856		}
857		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
858		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
859		arcn->sb.st_rdev = TODEV(devmajor, devminor);
860		break;
861	case SYMTYPE:
862	case LNKTYPE:
863		if (hd->typeflag == SYMTYPE) {
864			arcn->type = PAX_SLK;
865			arcn->sb.st_mode |= S_IFLNK;
866		} else {
867			arcn->type = PAX_HLK;
868			/*
869			 * so printing looks better
870			 */
871			arcn->sb.st_mode |= S_IFREG;
872			arcn->sb.st_nlink = 2;
873		}
874		break;
875	case LONGLINKTYPE:
876		if (is_gnutar)
877			arcn->type = PAX_GLL;
878		/* FALLTHROUGH */
879	case LONGNAMETYPE:
880		if (is_gnutar) {
881			/*
882			 * GNU long link/file; we tag these here and let the
883			 * pax internals deal with it -- too ugly otherwise.
884			 */
885			if (hd->typeflag != LONGLINKTYPE)
886				arcn->type = PAX_GLF;
887			arcn->pad = TAR_PAD(arcn->sb.st_size);
888			arcn->skip = arcn->sb.st_size;
889		} else {
890			tty_warn(1, "GNU Long %s found in posix ustar archive.",
891			    hd->typeflag == LONGLINKTYPE ? "Link" : "File");
892		}
893		break;
894	case CONTTYPE:
895	case AREGTYPE:
896	case REGTYPE:
897	default:
898		/*
899		 * these types have file data that follows. Set the skip and
900		 * pad fields.
901		 */
902		arcn->type = PAX_REG;
903		arcn->pad = TAR_PAD(arcn->sb.st_size);
904		arcn->skip = arcn->sb.st_size;
905		arcn->sb.st_mode |= S_IFREG;
906		break;
907	}
908	return(0);
909}
910
911static int
912expandname(char *buf, size_t len,  char **gnu_name, const char *name)
913{
914	if (*gnu_name) {
915		len = strlcpy(buf, *gnu_name, len);
916		free(*gnu_name);
917		*gnu_name = NULL;
918	} else {
919		len = strlcpy(buf, name, len);
920	}
921	return len;
922}
923
924static void
925longlink(ARCHD *arcn)
926{
927	ARCHD larc;
928
929	memset(&larc, 0, sizeof(larc));
930
931	switch (arcn->type) {
932	case PAX_SLK:
933	case PAX_HRG:
934	case PAX_HLK:
935		larc.type = PAX_GLL;
936		larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
937		    sizeof(larc.ln_name));
938		gnu_hack_string = arcn->ln_name;
939		gnu_hack_len = arcn->ln_nlen + 1;
940		break;
941	default:
942		larc.nlen = strlcpy(larc.name, "././@LongLink",
943		    sizeof(larc.name));
944		gnu_hack_string = arcn->name;
945		gnu_hack_len = arcn->nlen + 1;
946		larc.type = PAX_GLF;
947	}
948	/*
949	 * We need a longlink now.
950	 */
951	ustar_wr(&larc);
952}
953
954/*
955 * ustar_wr()
956 *	write a ustar header for the file specified in the ARCHD to the archive
957 *	Have to check for file types that cannot be stored and file names that
958 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
959 *	'\0' for the termination character (this is different than picky tar)
960 *	ASSUMED: space after header in header block is zero filled
961 * Return:
962 *	0 if file has data to be written after the header, 1 if file has NO
963 *	data to write after the header, -1 if archive write failed
964 */
965
966int
967ustar_wr(ARCHD *arcn)
968{
969	HD_USTAR *hd;
970	char *pt;
971	char hdblk[sizeof(HD_USTAR)];
972	const char *user, *group;
973
974	/*
975	 * check for those file system types ustar cannot store
976	 */
977	if (arcn->type == PAX_SCK) {
978		tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name);
979		return(1);
980	}
981
982	/*
983	 * check the length of the linkname
984	 */
985	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
986	    (arcn->type == PAX_HRG)) &&
987	    (arcn->ln_nlen >= sizeof(hd->linkname))){
988		if (is_gnutar) {
989			longlink(arcn);
990		} else {
991			tty_warn(1, "Link name too long for ustar %s",
992			    arcn->ln_name);
993			return(1);
994		}
995	}
996
997	/*
998	 * split the path name into prefix and name fields (if needed). if
999	 * pt != arcn->name, the name has to be split
1000	 */
1001	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1002		if (is_gnutar) {
1003			longlink(arcn);
1004			pt = arcn->name;
1005		} else {
1006			tty_warn(1, "File name too long for ustar %s",
1007			    arcn->name);
1008			return(1);
1009		}
1010	}
1011
1012	/*
1013	 * zero out the header so we don't have to worry about zero fill below
1014	 */
1015	memset(hdblk, 0, sizeof(hdblk));
1016	hd = (HD_USTAR *)hdblk;
1017	arcn->pad = 0L;
1018
1019	/*
1020	 * split the name, or zero out the prefix
1021	 */
1022	if (pt != arcn->name) {
1023		/*
1024		 * name was split, pt points at the / where the split is to
1025		 * occur, we remove the / and copy the first part to the prefix
1026		 */
1027		*pt = '\0';
1028		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1029		*pt++ = '/';
1030	}
1031
1032	/*
1033	 * copy the name part. this may be the whole path or the part after
1034	 * the prefix
1035	 */
1036	strlcpy(hd->name, pt, sizeof(hd->name));
1037
1038	/*
1039	 * set the fields in the header that are type dependent
1040	 */
1041	switch(arcn->type) {
1042	case PAX_DIR:
1043		hd->typeflag = DIRTYPE;
1044		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1045			goto out;
1046		break;
1047	case PAX_CHR:
1048	case PAX_BLK:
1049		if (arcn->type == PAX_CHR)
1050			hd->typeflag = CHRTYPE;
1051		else
1052			hd->typeflag = BLKTYPE;
1053		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1054		   sizeof(hd->devmajor), 3) ||
1055		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1056		   sizeof(hd->devminor), 3) ||
1057		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1058			goto out;
1059		break;
1060	case PAX_FIF:
1061		hd->typeflag = FIFOTYPE;
1062		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1063			goto out;
1064		break;
1065	case PAX_GLL:
1066	case PAX_SLK:
1067	case PAX_HLK:
1068	case PAX_HRG:
1069		if (arcn->type == PAX_SLK)
1070			hd->typeflag = SYMTYPE;
1071		else if (arcn->type == PAX_GLL)
1072			hd->typeflag = LONGLINKTYPE;
1073		else
1074			hd->typeflag = LNKTYPE;
1075		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1076		if (ul_oct((u_long)gnu_hack_len, hd->size,
1077		    sizeof(hd->size), 3))
1078			goto out;
1079		break;
1080	case PAX_GLF:
1081	case PAX_REG:
1082	case PAX_CTG:
1083	default:
1084		/*
1085		 * file data with this type, set the padding
1086		 */
1087		if (arcn->type == PAX_GLF) {
1088			hd->typeflag = LONGNAMETYPE;
1089			arcn->pad = TAR_PAD(gnu_hack_len);
1090			if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1091			    sizeof(hd->size), 3)) {
1092				tty_warn(1,"File is too long for ustar %s",
1093				    arcn->org_name);
1094				return(1);
1095			}
1096		} else {
1097			if (arcn->type == PAX_CTG)
1098				hd->typeflag = CONTTYPE;
1099			else
1100				hd->typeflag = REGTYPE;
1101			arcn->pad = TAR_PAD(arcn->sb.st_size);
1102			if (OFFT_OCT(arcn->sb.st_size, hd->size,
1103			    sizeof(hd->size), 3)) {
1104				tty_warn(1,"File is too long for ustar %s",
1105				    arcn->org_name);
1106				return(1);
1107			}
1108		}
1109		break;
1110	}
1111
1112	strncpy(hd->magic, TMAGIC, TMAGLEN);
1113	if (is_gnutar)
1114		hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1115	else
1116		strncpy(hd->version, TVERSION, TVERSLEN);
1117
1118	/*
1119	 * set the remaining fields. Some versions want all 16 bits of mode
1120	 * we better humor them (they really do not meet spec though)....
1121	 */
1122	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1123	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1124	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1125	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1126		goto out;
1127	user = user_from_uid(arcn->sb.st_uid, 1);
1128	group = group_from_gid(arcn->sb.st_gid, 1);
1129	strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1130	strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1131
1132	/*
1133	 * calculate and store the checksum write the header to the archive
1134	 * return 0 tells the caller to now write the file data, 1 says no data
1135	 * needs to be written
1136	 */
1137	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1138	   sizeof(hd->chksum), 3))
1139		goto out;
1140	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1141		return(-1);
1142	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1143		return(-1);
1144	if (gnu_hack_string) {
1145		int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1146		int pad = gnu_hack_len;
1147		gnu_hack_string = NULL;
1148		gnu_hack_len = 0;
1149		if (res < 0)
1150			return(-1);
1151		if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1152			return(-1);
1153	}
1154	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1155		return(0);
1156	return(1);
1157
1158    out:
1159	/*
1160	 * header field is out of range
1161	 */
1162	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1163	return(1);
1164}
1165
1166/*
1167 * name_split()
1168 *	see if the name has to be split for storage in a ustar header. We try
1169 *	to fit the entire name in the name field without splitting if we can.
1170 *	The split point is always at a /
1171 * Return
1172 *	character pointer to split point (always the / that is to be removed
1173 *	if the split is not needed, the points is set to the start of the file
1174 *	name (it would violate the spec to split there). A NULL is returned if
1175 *	the file name is too long
1176 */
1177
1178static char *
1179name_split(char *name, int len)
1180{
1181	char *start;
1182
1183	/*
1184	 * check to see if the file name is small enough to fit in the name
1185	 * field. if so just return a pointer to the name.
1186	 */
1187	if (len < TNMSZ)
1188		return(name);
1189	if (len > (TPFSZ + TNMSZ))
1190		return(NULL);
1191
1192	/*
1193	 * we start looking at the biggest sized piece that fits in the name
1194	 * field. We walk forward looking for a slash to split at. The idea is
1195	 * to find the biggest piece to fit in the name field (or the smallest
1196	 * prefix we can find) (the -1 is correct the biggest piece would
1197	 * include the slash between the two parts that gets thrown away)
1198	 */
1199	start = name + len - TNMSZ;
1200	while ((*start != '\0') && (*start != '/'))
1201		++start;
1202
1203	/*
1204	 * if we hit the end of the string, this name cannot be split, so we
1205	 * cannot store this file.
1206	 */
1207	if (*start == '\0')
1208		return(NULL);
1209	len = start - name;
1210
1211	/*
1212	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1213	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1214	 * the file would then expand on extract to //str. The len == 0 below
1215	 * makes this special case follow the spec to the letter.
1216	 */
1217	if ((len >= TPFSZ) || (len == 0))
1218		return(NULL);
1219
1220	/*
1221	 * ok have a split point, return it to the caller
1222	 */
1223	return(start);
1224}
1225
1226/*
1227 * deal with GNU tar -X switch.  basically, we go through each line of
1228 * the file, building a string from the "glob" lines in the file into
1229 * RE lines, of the form `/^RE$//', which we pass to rep_add(), which
1230 * will add a empty replacement (exclusion), for the named files.
1231 */
1232int
1233tar_gnutar_X_compat(path)
1234	const char *path;
1235{
1236	char *line, sbuf[MAXPATHLEN * 2 + 1 + 5];
1237	FILE *fp;
1238	int lineno = 0, i, j;
1239	size_t len;
1240
1241	fp = fopen(path, "r");
1242	if (fp == NULL) {
1243		tty_warn(1, "can not open %s: %s", path,
1244		    strerror(errno));
1245		return(-1);
1246	}
1247
1248	while ((line = fgetln(fp, &len))) {
1249		lineno++;
1250		if (len > MAXPATHLEN) {
1251			tty_warn(0, "pathname too long, line %d of %s",
1252			    lineno, path);
1253		}
1254		if (line[len - 1] == '\n')
1255			len--;
1256		for (i = 0, j = 2; i < len; i++) {
1257			/*
1258			 * convert glob to regexp, escaping everything
1259			 */
1260			if (line[i] == '*')
1261				sbuf[j++] = '.';
1262			else if (line[i] == '?')
1263				line[i] = '.';
1264			else if (!isalnum(line[i]) && !isblank(line[i]))
1265				sbuf[j++] = '\\';
1266			sbuf[j++] = line[i];
1267		}
1268		sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1269		sbuf[1] = '^';
1270		sbuf[j] = '$';
1271		sbuf[j + 3] = '\0';
1272		if (rep_add(sbuf) < 0)
1273			return (-1);
1274	}
1275	return (0);
1276}
1277