tar.c revision 1.34
1/*	$NetBSD: tar.c,v 1.34 2002/12/08 02:00:10 mrg Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40#include <sys/cdefs.h>
41#if defined(__RCSID) && !defined(lint)
42#if 0
43static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44#else
45__RCSID("$NetBSD: tar.c,v 1.34 2002/12/08 02:00:10 mrg Exp $");
46#endif
47#endif /* not lint */
48
49#include <sys/types.h>
50#include <sys/time.h>
51#include <sys/stat.h>
52#include <sys/param.h>
53
54#include <ctype.h>
55#include <errno.h>
56#include <grp.h>
57#include <pwd.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "pax.h"
64#include "extern.h"
65#include "tar.h"
66
67/*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71static int expandname(char *, size_t,  char **, const char *);
72static void longlink(ARCHD *);
73static u_long tar_chksm(char *, int);
74static char *name_split(char *, int);
75static int ul_oct(u_long, char *, int, int);
76#if !defined(NET2_STAT) && !defined(_LP64)
77static int ull_oct(unsigned long long, char *, int, int);
78#endif
79static int tar_gnutar_exclude_one(const char *, size_t);
80
81/*
82 * Routines common to all versions of tar
83 */
84
85static int tar_nodir;			/* do not write dirs under old tar */
86int is_gnutar;				/* behave like gnu tar; enable gnu
87					 * extensions and skip end-ofvolume
88					 * checks
89					 */
90static int seen_gnu_warning;		/* Have we warned yet? */
91static char *gnu_hack_string;		/* ././@LongLink hackery */
92static int gnu_hack_len;		/* len of gnu_hack_string */
93char *gnu_name_string;			/* ././@LongLink hackery name */
94char *gnu_link_string;			/* ././@LongLink hackery link */
95
96/*
97 * tar_endwr()
98 *	add the tar trailer of two null blocks
99 * Return:
100 *	0 if ok, -1 otherwise (what wr_skip returns)
101 */
102
103int
104tar_endwr(void)
105{
106	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
107}
108
109/*
110 * tar_endrd()
111 *	no cleanup needed here, just return size of trailer (for append)
112 * Return:
113 *	size of trailer (2 * BLKMULT)
114 */
115
116off_t
117tar_endrd(void)
118{
119	return((off_t)(NULLCNT*BLKMULT));
120}
121
122/*
123 * tar_trail()
124 *	Called to determine if a header block is a valid trailer. We are passed
125 *	the block, the in_sync flag (which tells us we are in resync mode;
126 *	looking for a valid header), and cnt (which starts at zero) which is
127 *	used to count the number of empty blocks we have seen so far.
128 * Return:
129 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
130 *	could never contain a header.
131 */
132
133int
134tar_trail(char *buf, int in_resync, int *cnt)
135{
136	int i;
137
138	/*
139	 * look for all zero, trailer is two consecutive blocks of zero
140	 */
141	for (i = 0; i < BLKMULT; ++i) {
142		if (buf[i] != '\0')
143			break;
144	}
145
146	/*
147	 * if not all zero it is not a trailer, but MIGHT be a header.
148	 */
149	if (i != BLKMULT)
150		return(-1);
151
152	/*
153	 * When given a zero block, we must be careful!
154	 * If we are not in resync mode, check for the trailer. Have to watch
155	 * out that we do not mis-identify file data as the trailer, so we do
156	 * NOT try to id a trailer during resync mode. During resync mode we
157	 * might as well throw this block out since a valid header can NEVER be
158	 * a block of all 0 (we must have a valid file name).
159	 */
160	if (!in_resync && (++*cnt >= NULLCNT))
161		return(0);
162	return(1);
163}
164
165/*
166 * ul_oct()
167 *	convert an unsigned long to an octal string. many oddball field
168 *	termination characters are used by the various versions of tar in the
169 *	different fields. term selects which kind to use. str is '0' padded
170 *	at the front to len. we are unable to use only one format as many old
171 *	tar readers are very cranky about this.
172 * Return:
173 *	0 if the number fit into the string, -1 otherwise
174 */
175
176static int
177ul_oct(u_long val, char *str, int len, int term)
178{
179	char *pt;
180
181	/*
182	 * term selects the appropriate character(s) for the end of the string
183	 */
184	pt = str + len - 1;
185	switch(term) {
186	case 3:
187		*pt-- = '\0';
188		break;
189	case 2:
190		*pt-- = ' ';
191		*pt-- = '\0';
192		break;
193	case 1:
194		*pt-- = ' ';
195		break;
196	case 0:
197	default:
198		*pt-- = '\0';
199		*pt-- = ' ';
200		break;
201	}
202
203	/*
204	 * convert and blank pad if there is space
205	 */
206	while (pt >= str) {
207		*pt-- = '0' + (char)(val & 0x7);
208		if ((val = val >> 3) == (u_long)0)
209			break;
210	}
211
212	while (pt >= str)
213		*pt-- = '0';
214	if (val != (u_long)0)
215		return(-1);
216	return(0);
217}
218
219#if !defined(NET2_STAT) && !defined(_LP64)
220/*
221 * ull_oct()
222 *	convert an unsigned long long to an octal string. one of many oddball
223 *	field termination characters are used by the various versions of tar
224 *	in the different fields. term selects which kind to use. str is '0'
225 *	padded at the front to len. we are unable to use only one format as
226 *	many old tar readers are very cranky about this.
227 * Return:
228 *	0 if the number fit into the string, -1 otherwise
229 */
230
231static int
232ull_oct(unsigned long long val, char *str, int len, int term)
233{
234	char *pt;
235
236	/*
237	 * term selects the appropriate character(s) for the end of the string
238	 */
239	pt = str + len - 1;
240	switch(term) {
241	case 3:
242		*pt-- = '\0';
243		break;
244	case 2:
245		*pt-- = ' ';
246		*pt-- = '\0';
247		break;
248	case 1:
249		*pt-- = ' ';
250		break;
251	case 0:
252	default:
253		*pt-- = '\0';
254		*pt-- = ' ';
255		break;
256	}
257
258	/*
259	 * convert and blank pad if there is space
260	 */
261	while (pt >= str) {
262		*pt-- = '0' + (char)(val & 0x7);
263		if ((val = val >> 3) == 0)
264			break;
265	}
266
267	while (pt >= str)
268		*pt-- = '0';
269	if (val != (unsigned long long)0)
270		return(-1);
271	return(0);
272}
273#endif
274
275/*
276 * tar_chksm()
277 *	calculate the checksum for a tar block counting the checksum field as
278 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
279 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
280 *	pad headers with 0.
281 * Return:
282 *	unsigned long checksum
283 */
284
285static u_long
286tar_chksm(char *blk, int len)
287{
288	char *stop;
289	char *pt;
290	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
291
292	/*
293	 * add the part of the block before the checksum field
294	 */
295	pt = blk;
296	stop = blk + CHK_OFFSET;
297	while (pt < stop)
298		chksm += (u_long)(*pt++ & 0xff);
299	/*
300	 * move past the checksum field and keep going, spec counts the
301	 * checksum field as the sum of 8 blanks (which is pre-computed as
302	 * BLNKSUM).
303	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
304	 * starts, no point in summing zero's)
305	 */
306	pt += CHK_LEN;
307	stop = blk + len;
308	while (pt < stop)
309		chksm += (u_long)(*pt++ & 0xff);
310	return(chksm);
311}
312
313/*
314 * Routines for old BSD style tar (also made portable to sysV tar)
315 */
316
317/*
318 * tar_id()
319 *	determine if a block given to us is a valid tar header (and not a USTAR
320 *	header). We have to be on the lookout for those pesky blocks of	all
321 *	zero's.
322 * Return:
323 *	0 if a tar header, -1 otherwise
324 */
325
326int
327tar_id(char *blk, int size)
328{
329	HD_TAR *hd;
330	HD_USTAR *uhd;
331
332	if (size < BLKMULT)
333		return(-1);
334	hd = (HD_TAR *)blk;
335	uhd = (HD_USTAR *)blk;
336
337	/*
338	 * check for block of zero's first, a simple and fast test, then make
339	 * sure this is not a ustar header by looking for the ustar magic
340	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
341	 * wrong and create archives missing the \0. Last we check the
342	 * checksum. If this is ok we have to assume it is a valid header.
343	 */
344	if (hd->name[0] == '\0')
345		return(-1);
346	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
347		return(-1);
348	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
349		return(-1);
350	return(0);
351}
352
353/*
354 * tar_opt()
355 *	handle tar format specific -o options
356 * Return:
357 *	0 if ok -1 otherwise
358 */
359
360int
361tar_opt(void)
362{
363	OPLIST *opt;
364
365	while ((opt = opt_next()) != NULL) {
366		if (strcmp(opt->name, TAR_OPTION) ||
367		    strcmp(opt->value, TAR_NODIR)) {
368			tty_warn(1,
369			    "Unknown tar format -o option/value pair %s=%s",
370			    opt->name, opt->value);
371			tty_warn(1,
372			    "%s=%s is the only supported tar format option",
373			    TAR_OPTION, TAR_NODIR);
374			return(-1);
375		}
376
377		/*
378		 * we only support one option, and only when writing
379		 */
380		if ((act != APPND) && (act != ARCHIVE)) {
381			tty_warn(1, "%s=%s is only supported when writing.",
382			    opt->name, opt->value);
383			return(-1);
384		}
385		tar_nodir = 1;
386	}
387	return(0);
388}
389
390
391/*
392 * tar_rd()
393 *	extract the values out of block already determined to be a tar header.
394 *	store the values in the ARCHD parameter.
395 * Return:
396 *	0
397 */
398
399int
400tar_rd(ARCHD *arcn, char *buf)
401{
402	HD_TAR *hd;
403	char *pt;
404
405	/*
406	 * we only get proper sized buffers passed to us
407	 */
408	if (tar_id(buf, BLKMULT) < 0)
409		return(-1);
410	memset(arcn, 0, sizeof(*arcn));
411	arcn->org_name = arcn->name;
412	arcn->pat = NULL;
413	arcn->sb.st_nlink = 1;
414
415	/*
416	 * copy out the name and values in the stat buffer
417	 */
418	hd = (HD_TAR *)buf;
419	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
420		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
421		    &gnu_name_string, hd->name);
422		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
423		    &gnu_link_string, hd->linkname);
424	}
425	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
426	    0xfff);
427	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
428	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
429	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
430	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
431	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
432
433	/*
434	 * have to look at the last character, it may be a '/' and that is used
435	 * to encode this as a directory
436	 */
437	pt = &(arcn->name[arcn->nlen - 1]);
438	arcn->pad = 0;
439	arcn->skip = 0;
440	switch(hd->linkflag) {
441	case SYMTYPE:
442		/*
443		 * symbolic link, need to get the link name and set the type in
444		 * the st_mode so -v printing will look correct.
445		 */
446		arcn->type = PAX_SLK;
447		arcn->sb.st_mode |= S_IFLNK;
448		break;
449	case LNKTYPE:
450		/*
451		 * hard link, need to get the link name, set the type in the
452		 * st_mode and st_nlink so -v printing will look better.
453		 */
454		arcn->type = PAX_HLK;
455		arcn->sb.st_nlink = 2;
456
457		/*
458		 * no idea of what type this thing really points at, but
459		 * we set something for printing only.
460		 */
461		arcn->sb.st_mode |= S_IFREG;
462		break;
463	case LONGLINKTYPE:
464		arcn->type = PAX_GLL;
465		/* FALLTHROUGH */
466	case LONGNAMETYPE:
467		/*
468		 * GNU long link/file; we tag these here and let the
469		 * pax internals deal with it -- too ugly otherwise.
470		 */
471		if (hd->linkflag != LONGLINKTYPE)
472			arcn->type = PAX_GLF;
473		arcn->pad = TAR_PAD(arcn->sb.st_size);
474		arcn->skip = arcn->sb.st_size;
475		break;
476	case AREGTYPE:
477	case REGTYPE:
478	case DIRTYPE:	/* see below */
479	default:
480		/*
481		 * If we have a trailing / this is a directory and NOT a file.
482		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
483		 * reported that V7 archives using USTAR directories do exist.
484		 */
485		if (*pt == '/' || hd->linkflag == DIRTYPE) {
486			/*
487			 * it is a directory, set the mode for -v printing
488			 */
489			arcn->type = PAX_DIR;
490			arcn->sb.st_mode |= S_IFDIR;
491			arcn->sb.st_nlink = 2;
492		} else {
493			/*
494			 * have a file that will be followed by data. Set the
495			 * skip value to the size field and calculate the size
496			 * of the padding.
497			 */
498			arcn->type = PAX_REG;
499			arcn->sb.st_mode |= S_IFREG;
500			arcn->pad = TAR_PAD(arcn->sb.st_size);
501			arcn->skip = arcn->sb.st_size;
502		}
503		break;
504	}
505
506	/*
507	 * strip off any trailing slash.
508	 */
509	if (*pt == '/') {
510		*pt = '\0';
511		--arcn->nlen;
512	}
513	return(0);
514}
515
516/*
517 * tar_wr()
518 *	write a tar header for the file specified in the ARCHD to the archive.
519 *	Have to check for file types that cannot be stored and file names that
520 *	are too long. Be careful of the term (last arg) to ul_oct, each field
521 *	of tar has it own spec for the termination character(s).
522 *	ASSUMED: space after header in header block is zero filled
523 * Return:
524 *	0 if file has data to be written after the header, 1 if file has NO
525 *	data to write after the header, -1 if archive write failed
526 */
527
528int
529tar_wr(ARCHD *arcn)
530{
531	HD_TAR *hd;
532	int len;
533	char hdblk[sizeof(HD_TAR)];
534
535	/*
536	 * check for those file system types which tar cannot store
537	 */
538	switch(arcn->type) {
539	case PAX_DIR:
540		/*
541		 * user asked that dirs not be written to the archive
542		 */
543		if (tar_nodir)
544			return(1);
545		break;
546	case PAX_CHR:
547		tty_warn(1, "Tar cannot archive a character device %s",
548		    arcn->org_name);
549		return(1);
550	case PAX_BLK:
551		tty_warn(1,
552		    "Tar cannot archive a block device %s", arcn->org_name);
553		return(1);
554	case PAX_SCK:
555		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
556		return(1);
557	case PAX_FIF:
558		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
559		return(1);
560	case PAX_SLK:
561	case PAX_HLK:
562	case PAX_HRG:
563		if (arcn->ln_nlen > sizeof(hd->linkname)) {
564			tty_warn(1,"Link name too long for tar %s",
565			    arcn->ln_name);
566			return(1);
567		}
568		break;
569	case PAX_REG:
570	case PAX_CTG:
571	default:
572		break;
573	}
574
575	/*
576	 * check file name len, remember extra char for dirs (the / at the end)
577	 */
578	len = arcn->nlen;
579	if (arcn->type == PAX_DIR)
580		++len;
581	if (len >= sizeof(hd->name)) {
582		tty_warn(1, "File name too long for tar %s", arcn->name);
583		return(1);
584	}
585
586	/*
587	 * copy the data out of the ARCHD into the tar header based on the type
588	 * of the file. Remember many tar readers want the unused fields to be
589	 * padded with zero. We set the linkflag field (type), the linkname
590	 * (or zero if not used),the size, and set the padding (if any) to be
591	 * added after the file data (0 for all other types, as they only have
592	 * a header)
593	 */
594	memset(hdblk, 0, sizeof(hdblk));
595	hd = (HD_TAR *)hdblk;
596	strlcpy(hd->name, arcn->name, sizeof(hd->name));
597	arcn->pad = 0;
598
599	if (arcn->type == PAX_DIR) {
600		/*
601		 * directories are the same as files, except have a filename
602		 * that ends with a /, we add the slash here. No data follows,
603		 * dirs, so no pad.
604		 */
605		hd->linkflag = AREGTYPE;
606		hd->name[len-1] = '/';
607		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
608			goto out;
609	} else if (arcn->type == PAX_SLK) {
610		/*
611		 * no data follows this file, so no pad
612		 */
613		hd->linkflag = SYMTYPE;
614		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
615		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
616			goto out;
617	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
618		/*
619		 * no data follows this file, so no pad
620		 */
621		hd->linkflag = LNKTYPE;
622		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
623		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
624			goto out;
625	} else {
626		/*
627		 * data follows this file, so set the pad
628		 */
629		hd->linkflag = AREGTYPE;
630		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
631			tty_warn(1,"File is too large for tar %s",
632			    arcn->org_name);
633			return(1);
634		}
635		arcn->pad = TAR_PAD(arcn->sb.st_size);
636	}
637
638	/*
639	 * copy those fields that are independent of the type
640	 */
641	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
642	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
643	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
644	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
645		goto out;
646
647	/*
648	 * calculate and add the checksum, then write the header. A return of
649	 * 0 tells the caller to now write the file data, 1 says no data needs
650	 * to be written
651	 */
652	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
653	    sizeof(hd->chksum), 3))
654		goto out;			/* XXX Something's wrong here
655						 * because a zero-byte file can
656						 * cause this to be done and
657						 * yet the resulting warning
658						 * seems incorrect */
659
660	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
661		return(-1);
662	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
663		return(-1);
664	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
665		return(0);
666	return(1);
667
668    out:
669	/*
670	 * header field is out of range
671	 */
672	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
673	return(1);
674}
675
676/*
677 * Routines for POSIX ustar
678 */
679
680/*
681 * ustar_strd()
682 *	initialization for ustar read
683 * Return:
684 *	0 if ok, -1 otherwise
685 */
686
687int
688ustar_strd(void)
689{
690	return(0);
691}
692
693/*
694 * ustar_stwr()
695 *	initialization for ustar write
696 * Return:
697 *	0 if ok, -1 otherwise
698 */
699
700int
701ustar_stwr(void)
702{
703	return(0);
704}
705
706/*
707 * ustar_id()
708 *	determine if a block given to us is a valid ustar header. We have to
709 *	be on the lookout for those pesky blocks of all zero's
710 * Return:
711 *	0 if a ustar header, -1 otherwise
712 */
713
714int
715ustar_id(char *blk, int size)
716{
717	HD_USTAR *hd;
718
719	if (size < BLKMULT)
720		return(-1);
721	hd = (HD_USTAR *)blk;
722
723	/*
724	 * check for block of zero's first, a simple and fast test then check
725	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
726	 * programs are fouled up and create archives missing the \0. Last we
727	 * check the checksum. If ok we have to assume it is a valid header.
728	 */
729	if (hd->name[0] == '\0')
730		return(-1);
731	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
732		return(-1);
733	/* This is GNU tar */
734	if (strncmp(hd->magic, "ustar  ", 8) == 0 && !is_gnutar &&
735	    !seen_gnu_warning) {
736		seen_gnu_warning = 1;
737		tty_warn(0,
738		    "Trying to read GNU tar archive with extensions off");
739	}
740	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
741		return(-1);
742	return(0);
743}
744
745/*
746 * ustar_rd()
747 *	extract the values out of block already determined to be a ustar header.
748 *	store the values in the ARCHD parameter.
749 * Return:
750 *	0
751 */
752
753int
754ustar_rd(ARCHD *arcn, char *buf)
755{
756	HD_USTAR *hd;
757	char *dest;
758	int cnt;
759	dev_t devmajor;
760	dev_t devminor;
761
762	/*
763	 * we only get proper sized buffers
764	 */
765	if (ustar_id(buf, BLKMULT) < 0)
766		return(-1);
767
768	memset(arcn, 0, sizeof(*arcn));
769	arcn->org_name = arcn->name;
770	arcn->pat = NULL;
771	arcn->sb.st_nlink = 1;
772	hd = (HD_USTAR *)buf;
773
774	/*
775	 * see if the filename is split into two parts. if, so joint the parts.
776	 * we copy the prefix first and add a / between the prefix and name.
777	 */
778	dest = arcn->name;
779	if (*(hd->prefix) != '\0') {
780		cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
781		dest += cnt;
782		*dest++ = '/';
783		cnt++;
784	} else {
785		cnt = 0;
786	}
787
788	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
789		arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
790		    &gnu_name_string, hd->name);
791		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
792		    &gnu_link_string, hd->linkname);
793	}
794
795	/*
796	 * follow the spec to the letter. we should only have mode bits, strip
797	 * off all other crud we may be passed.
798	 */
799	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
800	    0xfff);
801	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
802	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
803	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
804
805	/*
806	 * If we can find the ascii names for gname and uname in the password
807	 * and group files we will use the uid's and gid they bind. Otherwise
808	 * we use the uid and gid values stored in the header. (This is what
809	 * the posix spec wants).
810	 */
811	hd->gname[sizeof(hd->gname) - 1] = '\0';
812	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
813		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
814	hd->uname[sizeof(hd->uname) - 1] = '\0';
815	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
816		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
817
818	/*
819	 * set the defaults, these may be changed depending on the file type
820	 */
821	arcn->pad = 0;
822	arcn->skip = 0;
823	arcn->sb.st_rdev = (dev_t)0;
824
825	/*
826	 * set the mode and PAX type according to the typeflag in the header
827	 */
828	switch(hd->typeflag) {
829	case FIFOTYPE:
830		arcn->type = PAX_FIF;
831		arcn->sb.st_mode |= S_IFIFO;
832		break;
833	case DIRTYPE:
834		arcn->type = PAX_DIR;
835		arcn->sb.st_mode |= S_IFDIR;
836		arcn->sb.st_nlink = 2;
837
838		/*
839		 * Some programs that create ustar archives append a '/'
840		 * to the pathname for directories. This clearly violates
841		 * ustar specs, but we will silently strip it off anyway.
842		 */
843		if (arcn->name[arcn->nlen - 1] == '/')
844			arcn->name[--arcn->nlen] = '\0';
845		break;
846	case BLKTYPE:
847	case CHRTYPE:
848		/*
849		 * this type requires the rdev field to be set.
850		 */
851		if (hd->typeflag == BLKTYPE) {
852			arcn->type = PAX_BLK;
853			arcn->sb.st_mode |= S_IFBLK;
854		} else {
855			arcn->type = PAX_CHR;
856			arcn->sb.st_mode |= S_IFCHR;
857		}
858		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
859		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
860		arcn->sb.st_rdev = TODEV(devmajor, devminor);
861		break;
862	case SYMTYPE:
863	case LNKTYPE:
864		if (hd->typeflag == SYMTYPE) {
865			arcn->type = PAX_SLK;
866			arcn->sb.st_mode |= S_IFLNK;
867		} else {
868			arcn->type = PAX_HLK;
869			/*
870			 * so printing looks better
871			 */
872			arcn->sb.st_mode |= S_IFREG;
873			arcn->sb.st_nlink = 2;
874		}
875		break;
876	case LONGLINKTYPE:
877		if (is_gnutar)
878			arcn->type = PAX_GLL;
879		/* FALLTHROUGH */
880	case LONGNAMETYPE:
881		if (is_gnutar) {
882			/*
883			 * GNU long link/file; we tag these here and let the
884			 * pax internals deal with it -- too ugly otherwise.
885			 */
886			if (hd->typeflag != LONGLINKTYPE)
887				arcn->type = PAX_GLF;
888			arcn->pad = TAR_PAD(arcn->sb.st_size);
889			arcn->skip = arcn->sb.st_size;
890		} else {
891			tty_warn(1, "GNU Long %s found in posix ustar archive.",
892			    hd->typeflag == LONGLINKTYPE ? "Link" : "File");
893		}
894		break;
895	case CONTTYPE:
896	case AREGTYPE:
897	case REGTYPE:
898	default:
899		/*
900		 * these types have file data that follows. Set the skip and
901		 * pad fields.
902		 */
903		arcn->type = PAX_REG;
904		arcn->pad = TAR_PAD(arcn->sb.st_size);
905		arcn->skip = arcn->sb.st_size;
906		arcn->sb.st_mode |= S_IFREG;
907		break;
908	}
909	return(0);
910}
911
912static int
913expandname(char *buf, size_t len,  char **gnu_name, const char *name)
914{
915	if (*gnu_name) {
916		len = strlcpy(buf, *gnu_name, len);
917		free(*gnu_name);
918		*gnu_name = NULL;
919	} else {
920		len = strlcpy(buf, name, len);
921	}
922	return len;
923}
924
925static void
926longlink(ARCHD *arcn)
927{
928	ARCHD larc;
929
930	memset(&larc, 0, sizeof(larc));
931
932	switch (arcn->type) {
933	case PAX_SLK:
934	case PAX_HRG:
935	case PAX_HLK:
936		larc.type = PAX_GLL;
937		larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
938		    sizeof(larc.ln_name));
939		gnu_hack_string = arcn->ln_name;
940		gnu_hack_len = arcn->ln_nlen + 1;
941		break;
942	default:
943		larc.nlen = strlcpy(larc.name, "././@LongLink",
944		    sizeof(larc.name));
945		gnu_hack_string = arcn->name;
946		gnu_hack_len = arcn->nlen + 1;
947		larc.type = PAX_GLF;
948	}
949	/*
950	 * We need a longlink now.
951	 */
952	ustar_wr(&larc);
953}
954
955/*
956 * ustar_wr()
957 *	write a ustar header for the file specified in the ARCHD to the archive
958 *	Have to check for file types that cannot be stored and file names that
959 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
960 *	'\0' for the termination character (this is different than picky tar)
961 *	ASSUMED: space after header in header block is zero filled
962 * Return:
963 *	0 if file has data to be written after the header, 1 if file has NO
964 *	data to write after the header, -1 if archive write failed
965 */
966
967int
968ustar_wr(ARCHD *arcn)
969{
970	HD_USTAR *hd;
971	char *pt;
972	char hdblk[sizeof(HD_USTAR)];
973	const char *user, *group;
974
975	/*
976	 * check for those file system types ustar cannot store
977	 */
978	if (arcn->type == PAX_SCK) {
979		tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name);
980		return(1);
981	}
982
983	/*
984	 * check the length of the linkname
985	 */
986	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
987	    (arcn->type == PAX_HRG)) &&
988	    (arcn->ln_nlen >= sizeof(hd->linkname))){
989		if (is_gnutar) {
990			longlink(arcn);
991		} else {
992			tty_warn(1, "Link name too long for ustar %s",
993			    arcn->ln_name);
994			return(1);
995		}
996	}
997
998	/*
999	 * split the path name into prefix and name fields (if needed). if
1000	 * pt != arcn->name, the name has to be split
1001	 */
1002	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1003		if (is_gnutar) {
1004			longlink(arcn);
1005			pt = arcn->name;
1006		} else {
1007			tty_warn(1, "File name too long for ustar %s",
1008			    arcn->name);
1009			return(1);
1010		}
1011	}
1012
1013	/*
1014	 * zero out the header so we don't have to worry about zero fill below
1015	 */
1016	memset(hdblk, 0, sizeof(hdblk));
1017	hd = (HD_USTAR *)hdblk;
1018	arcn->pad = 0L;
1019
1020	/*
1021	 * split the name, or zero out the prefix
1022	 */
1023	if (pt != arcn->name) {
1024		/*
1025		 * name was split, pt points at the / where the split is to
1026		 * occur, we remove the / and copy the first part to the prefix
1027		 */
1028		*pt = '\0';
1029		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1030		*pt++ = '/';
1031	}
1032
1033	/*
1034	 * copy the name part. this may be the whole path or the part after
1035	 * the prefix
1036	 */
1037	strlcpy(hd->name, pt, sizeof(hd->name));
1038
1039	/*
1040	 * set the fields in the header that are type dependent
1041	 */
1042	switch(arcn->type) {
1043	case PAX_DIR:
1044		hd->typeflag = DIRTYPE;
1045		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1046			goto out;
1047		break;
1048	case PAX_CHR:
1049	case PAX_BLK:
1050		if (arcn->type == PAX_CHR)
1051			hd->typeflag = CHRTYPE;
1052		else
1053			hd->typeflag = BLKTYPE;
1054		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1055		   sizeof(hd->devmajor), 3) ||
1056		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1057		   sizeof(hd->devminor), 3) ||
1058		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1059			goto out;
1060		break;
1061	case PAX_FIF:
1062		hd->typeflag = FIFOTYPE;
1063		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1064			goto out;
1065		break;
1066	case PAX_GLL:
1067	case PAX_SLK:
1068	case PAX_HLK:
1069	case PAX_HRG:
1070		if (arcn->type == PAX_SLK)
1071			hd->typeflag = SYMTYPE;
1072		else if (arcn->type == PAX_GLL)
1073			hd->typeflag = LONGLINKTYPE;
1074		else
1075			hd->typeflag = LNKTYPE;
1076		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1077		if (ul_oct((u_long)gnu_hack_len, hd->size,
1078		    sizeof(hd->size), 3))
1079			goto out;
1080		break;
1081	case PAX_GLF:
1082	case PAX_REG:
1083	case PAX_CTG:
1084	default:
1085		/*
1086		 * file data with this type, set the padding
1087		 */
1088		if (arcn->type == PAX_GLF) {
1089			hd->typeflag = LONGNAMETYPE;
1090			arcn->pad = TAR_PAD(gnu_hack_len);
1091			if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1092			    sizeof(hd->size), 3)) {
1093				tty_warn(1,"File is too long for ustar %s",
1094				    arcn->org_name);
1095				return(1);
1096			}
1097		} else {
1098			if (arcn->type == PAX_CTG)
1099				hd->typeflag = CONTTYPE;
1100			else
1101				hd->typeflag = REGTYPE;
1102			arcn->pad = TAR_PAD(arcn->sb.st_size);
1103			if (OFFT_OCT(arcn->sb.st_size, hd->size,
1104			    sizeof(hd->size), 3)) {
1105				tty_warn(1,"File is too long for ustar %s",
1106				    arcn->org_name);
1107				return(1);
1108			}
1109		}
1110		break;
1111	}
1112
1113	strncpy(hd->magic, TMAGIC, TMAGLEN);
1114	if (is_gnutar)
1115		hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1116	else
1117		strncpy(hd->version, TVERSION, TVERSLEN);
1118
1119	/*
1120	 * set the remaining fields. Some versions want all 16 bits of mode
1121	 * we better humor them (they really do not meet spec though)....
1122	 */
1123	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1124	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1125	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1126	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1127		goto out;
1128	user = user_from_uid(arcn->sb.st_uid, 1);
1129	group = group_from_gid(arcn->sb.st_gid, 1);
1130	strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1131	strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1132
1133	/*
1134	 * calculate and store the checksum write the header to the archive
1135	 * return 0 tells the caller to now write the file data, 1 says no data
1136	 * needs to be written
1137	 */
1138	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1139	   sizeof(hd->chksum), 3))
1140		goto out;
1141	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1142		return(-1);
1143	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1144		return(-1);
1145	if (gnu_hack_string) {
1146		int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1147		int pad = gnu_hack_len;
1148		gnu_hack_string = NULL;
1149		gnu_hack_len = 0;
1150		if (res < 0)
1151			return(-1);
1152		if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1153			return(-1);
1154	}
1155	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1156		return(0);
1157	return(1);
1158
1159    out:
1160	/*
1161	 * header field is out of range
1162	 */
1163	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1164	return(1);
1165}
1166
1167/*
1168 * name_split()
1169 *	see if the name has to be split for storage in a ustar header. We try
1170 *	to fit the entire name in the name field without splitting if we can.
1171 *	The split point is always at a /
1172 * Return
1173 *	character pointer to split point (always the / that is to be removed
1174 *	if the split is not needed, the points is set to the start of the file
1175 *	name (it would violate the spec to split there). A NULL is returned if
1176 *	the file name is too long
1177 */
1178
1179static char *
1180name_split(char *name, int len)
1181{
1182	char *start;
1183
1184	/*
1185	 * check to see if the file name is small enough to fit in the name
1186	 * field. if so just return a pointer to the name.
1187	 */
1188	if (len < TNMSZ)
1189		return(name);
1190	if (len > (TPFSZ + TNMSZ))
1191		return(NULL);
1192
1193	/*
1194	 * we start looking at the biggest sized piece that fits in the name
1195	 * field. We walk forward looking for a slash to split at. The idea is
1196	 * to find the biggest piece to fit in the name field (or the smallest
1197	 * prefix we can find) (the -1 is correct the biggest piece would
1198	 * include the slash between the two parts that gets thrown away)
1199	 */
1200	start = name + len - TNMSZ;
1201	while ((*start != '\0') && (*start != '/'))
1202		++start;
1203
1204	/*
1205	 * if we hit the end of the string, this name cannot be split, so we
1206	 * cannot store this file.
1207	 */
1208	if (*start == '\0')
1209		return(NULL);
1210	len = start - name;
1211
1212	/*
1213	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1214	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1215	 * the file would then expand on extract to //str. The len == 0 below
1216	 * makes this special case follow the spec to the letter.
1217	 */
1218	if ((len >= TPFSZ) || (len == 0))
1219		return(NULL);
1220
1221	/*
1222	 * ok have a split point, return it to the caller
1223	 */
1224	return(start);
1225}
1226
1227/* convert a glob into a RE, and add it to the list */
1228static int
1229tar_gnutar_exclude_one(const char *line, size_t len)
1230{
1231	char sbuf[MAXPATHLEN * 2 + 1 + 5];
1232	int i, j;
1233
1234	if (line[len - 1] == '\n')
1235		len--;
1236	for (i = 0, j = 2; i < len; i++) {
1237		/*
1238		 * convert glob to regexp, escaping everything
1239		 */
1240		if (line[i] == '*')
1241			sbuf[j++] = '.';
1242		else if (line[i] == '?') {
1243			sbuf[j++] = '.';
1244			continue;
1245		} else if (!isalnum(line[i]) && !isblank(line[i]))
1246			sbuf[j++] = '\\';
1247		sbuf[j++] = line[i];
1248	}
1249	sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1250	sbuf[1] = '^';
1251	sbuf[j] = '$';
1252	sbuf[j + 3] = '\0';
1253	if (rep_add(sbuf) < 0)
1254		return (-1);
1255
1256	return (0);
1257}
1258
1259/*
1260 * deal with GNU tar -X/--exclude-from & --exclude switchs.  basically,
1261 * we go through each line of the file, building a string from the "glob"
1262 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1263 * to rep_add(), which will add a empty replacement (exclusion), for the
1264 * named files.
1265 */
1266int
1267tar_gnutar_minus_minus_exclude(path)
1268	const char *path;
1269{
1270	size_t	len = strlen(path);
1271
1272	if (len > MAXPATHLEN)
1273		tty_warn(0, "pathname too long: %s", path);
1274
1275	return (tar_gnutar_exclude_one(path, len));
1276}
1277
1278int
1279tar_gnutar_X_compat(path)
1280	const char *path;
1281{
1282	char *line;
1283	FILE *fp;
1284	int lineno = 0;
1285	size_t len;
1286
1287	fp = fopen(path, "r");
1288	if (fp == NULL) {
1289		tty_warn(1, "can not open %s: %s", path,
1290		    strerror(errno));
1291		return(-1);
1292	}
1293
1294	while ((line = fgetln(fp, &len))) {
1295		lineno++;
1296		if (len > MAXPATHLEN) {
1297			tty_warn(0, "pathname too long, line %d of %s",
1298			    lineno, path);
1299		}
1300		if (tar_gnutar_exclude_one(line, len))
1301			return (-1);
1302	}
1303	return (0);
1304}
1305