tar.c revision 1.47.2.7
1/*	$NetBSD: tar.c,v 1.47.2.7 2004/11/12 04:59:17 jmc Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if HAVE_NBTOOL_CONFIG_H
37#include "nbtool_config.h"
38#endif
39
40#include <sys/cdefs.h>
41#if !defined(lint)
42#if 0
43static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44#else
45__RCSID("$NetBSD: tar.c,v 1.47.2.7 2004/11/12 04:59:17 jmc Exp $");
46#endif
47#endif /* not lint */
48
49#include <sys/types.h>
50#include <sys/time.h>
51#include <sys/stat.h>
52#include <sys/param.h>
53
54#include <ctype.h>
55#include <errno.h>
56#include <grp.h>
57#include <pwd.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "pax.h"
64#include "extern.h"
65#include "tar.h"
66
67/*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71static int expandname(char *, size_t,  char **, const char *, size_t);
72static void longlink(ARCHD *, int);
73static u_long tar_chksm(char *, int);
74static char *name_split(char *, int);
75static int ul_oct(u_long, char *, int, int);
76#if !defined(NET2_STAT) && !defined(_LP64)
77static int ull_oct(unsigned long long, char *, int, int);
78#endif
79static int tar_gnutar_exclude_one(const char *, size_t);
80static int check_sum(char *, size_t, char *, size_t, int);
81
82/*
83 * Routines common to all versions of tar
84 */
85
86static int tar_nodir;			/* do not write dirs under old tar */
87int is_gnutar;				/* behave like gnu tar; enable gnu
88					 * extensions and skip end-ofvolume
89					 * checks
90					 */
91static int seen_gnu_warning;		/* Have we warned yet? */
92static char *gnu_hack_string;		/* ././@LongLink hackery */
93static int gnu_hack_len;		/* len of gnu_hack_string */
94char *gnu_name_string;			/* ././@LongLink hackery name */
95char *gnu_link_string;			/* ././@LongLink hackery link */
96static int gnu_short_trailer;		/* gnu short trailer */
97
98static const char LONG_LINK[] = "././@LongLink";
99
100static int
101check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet)
102{
103	u_long hdck, blck;
104
105	hdck = asc_ul(hd, hdlen, OCT);
106	blck = tar_chksm(bl, bllen);
107
108	if (hdck != blck) {
109		if (!quiet)
110			tty_warn(0, "Header checksum %lo does not match %lo",
111			    hdck, blck);
112		return(-1);
113	}
114	return(0);
115}
116
117
118/*
119 * tar_endwr()
120 *	add the tar trailer of two null blocks
121 * Return:
122 *	0 if ok, -1 otherwise (what wr_skip returns)
123 */
124
125int
126tar_endwr(void)
127{
128	return(wr_skip((off_t)(NULLCNT * BLKMULT)));
129}
130
131/*
132 * tar_endrd()
133 *	no cleanup needed here, just return size of trailer (for append)
134 * Return:
135 *	size of trailer BLKMULT
136 */
137
138off_t
139tar_endrd(void)
140{
141	return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT));
142}
143
144/*
145 * tar_trail()
146 *	Called to determine if a header block is a valid trailer. We are passed
147 *	the block, the in_sync flag (which tells us we are in resync mode;
148 *	looking for a valid header), and cnt (which starts at zero) which is
149 *	used to count the number of empty blocks we have seen so far.
150 * Return:
151 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
152 *	could never contain a header.
153 */
154
155int
156tar_trail(char *buf, int in_resync, int *cnt)
157{
158	int i;
159
160	gnu_short_trailer = 0;
161	/*
162	 * look for all zero, trailer is two consecutive blocks of zero
163	 */
164	for (i = 0; i < BLKMULT; ++i) {
165		if (buf[i] != '\0')
166			break;
167	}
168
169	/*
170	 * if not all zero it is not a trailer, but MIGHT be a header.
171	 */
172	if (i != BLKMULT)
173		return(-1);
174
175	/*
176	 * When given a zero block, we must be careful!
177	 * If we are not in resync mode, check for the trailer. Have to watch
178	 * out that we do not mis-identify file data as the trailer, so we do
179	 * NOT try to id a trailer during resync mode. During resync mode we
180	 * might as well throw this block out since a valid header can NEVER be
181	 * a block of all 0 (we must have a valid file name).
182	 */
183	if (!in_resync) {
184		++*cnt;
185		/*
186		 * old GNU tar (up through 1.13) only writes one block of
187		 * trailers, so we pretend we got another
188		 */
189		if (is_gnutar) {
190			gnu_short_trailer = 1;
191			++*cnt;
192		}
193		if (*cnt >= NULLCNT)
194			return(0);
195	}
196	return(1);
197}
198
199/*
200 * ul_oct()
201 *	convert an unsigned long to an octal string. many oddball field
202 *	termination characters are used by the various versions of tar in the
203 *	different fields. term selects which kind to use. str is '0' padded
204 *	at the front to len. we are unable to use only one format as many old
205 *	tar readers are very cranky about this.
206 * Return:
207 *	0 if the number fit into the string, -1 otherwise
208 */
209
210static int
211ul_oct(u_long val, char *str, int len, int term)
212{
213	char *pt;
214
215	/*
216	 * term selects the appropriate character(s) for the end of the string
217	 */
218	pt = str + len - 1;
219	switch(term) {
220	case 3:
221		*pt-- = '\0';
222		break;
223	case 2:
224		*pt-- = ' ';
225		*pt-- = '\0';
226		break;
227	case 1:
228		*pt-- = ' ';
229		break;
230	case 0:
231	default:
232		*pt-- = '\0';
233		*pt-- = ' ';
234		break;
235	}
236
237	/*
238	 * convert and blank pad if there is space
239	 */
240	while (pt >= str) {
241		*pt-- = '0' + (char)(val & 0x7);
242		if ((val = val >> 3) == (u_long)0)
243			break;
244	}
245
246	while (pt >= str)
247		*pt-- = '0';
248	if (val != (u_long)0)
249		return(-1);
250	return(0);
251}
252
253#if !defined(NET2_STAT) && !defined(_LP64)
254/*
255 * ull_oct()
256 *	convert an unsigned long long to an octal string. one of many oddball
257 *	field termination characters are used by the various versions of tar
258 *	in the different fields. term selects which kind to use. str is '0'
259 *	padded at the front to len. we are unable to use only one format as
260 *	many old tar readers are very cranky about this.
261 * Return:
262 *	0 if the number fit into the string, -1 otherwise
263 */
264
265static int
266ull_oct(unsigned long long val, char *str, int len, int term)
267{
268	char *pt;
269
270	/*
271	 * term selects the appropriate character(s) for the end of the string
272	 */
273	pt = str + len - 1;
274	switch(term) {
275	case 3:
276		*pt-- = '\0';
277		break;
278	case 2:
279		*pt-- = ' ';
280		*pt-- = '\0';
281		break;
282	case 1:
283		*pt-- = ' ';
284		break;
285	case 0:
286	default:
287		*pt-- = '\0';
288		*pt-- = ' ';
289		break;
290	}
291
292	/*
293	 * convert and blank pad if there is space
294	 */
295	while (pt >= str) {
296		*pt-- = '0' + (char)(val & 0x7);
297		if ((val = val >> 3) == 0)
298			break;
299	}
300
301	while (pt >= str)
302		*pt-- = '0';
303	if (val != (unsigned long long)0)
304		return(-1);
305	return(0);
306}
307#endif
308
309/*
310 * tar_chksm()
311 *	calculate the checksum for a tar block counting the checksum field as
312 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
313 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
314 *	pad headers with 0.
315 * Return:
316 *	unsigned long checksum
317 */
318
319static u_long
320tar_chksm(char *blk, int len)
321{
322	char *stop;
323	char *pt;
324	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
325
326	/*
327	 * add the part of the block before the checksum field
328	 */
329	pt = blk;
330	stop = blk + CHK_OFFSET;
331	while (pt < stop)
332		chksm += (u_long)(*pt++ & 0xff);
333	/*
334	 * move past the checksum field and keep going, spec counts the
335	 * checksum field as the sum of 8 blanks (which is pre-computed as
336	 * BLNKSUM).
337	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
338	 * starts, no point in summing zero's)
339	 */
340	pt += CHK_LEN;
341	stop = blk + len;
342	while (pt < stop)
343		chksm += (u_long)(*pt++ & 0xff);
344	return(chksm);
345}
346
347/*
348 * Routines for old BSD style tar (also made portable to sysV tar)
349 */
350
351/*
352 * tar_id()
353 *	determine if a block given to us is a valid tar header (and not a USTAR
354 *	header). We have to be on the lookout for those pesky blocks of	all
355 *	zero's.
356 * Return:
357 *	0 if a tar header, -1 otherwise
358 */
359
360int
361tar_id(char *blk, int size)
362{
363	HD_TAR *hd;
364	HD_USTAR *uhd;
365
366	if (size < BLKMULT)
367		return(-1);
368	hd = (HD_TAR *)blk;
369	uhd = (HD_USTAR *)blk;
370
371	/*
372	 * check for block of zero's first, a simple and fast test, then make
373	 * sure this is not a ustar header by looking for the ustar magic
374	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
375	 * wrong and create archives missing the \0. Last we check the
376	 * checksum. If this is ok we have to assume it is a valid header.
377	 */
378	if (hd->name[0] == '\0')
379		return(-1);
380	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
381		return(-1);
382	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1);
383}
384
385/*
386 * tar_opt()
387 *	handle tar format specific -o options
388 * Return:
389 *	0 if ok -1 otherwise
390 */
391
392int
393tar_opt(void)
394{
395	OPLIST *opt;
396
397	while ((opt = opt_next()) != NULL) {
398		if (strcmp(opt->name, TAR_OPTION) ||
399		    strcmp(opt->value, TAR_NODIR)) {
400			tty_warn(1,
401			    "Unknown tar format -o option/value pair %s=%s",
402			    opt->name, opt->value);
403			tty_warn(1,
404			    "%s=%s is the only supported tar format option",
405			    TAR_OPTION, TAR_NODIR);
406			return(-1);
407		}
408
409		/*
410		 * we only support one option, and only when writing
411		 */
412		if ((act != APPND) && (act != ARCHIVE)) {
413			tty_warn(1, "%s=%s is only supported when writing.",
414			    opt->name, opt->value);
415			return(-1);
416		}
417		tar_nodir = 1;
418	}
419	return(0);
420}
421
422
423/*
424 * tar_rd()
425 *	extract the values out of block already determined to be a tar header.
426 *	store the values in the ARCHD parameter.
427 * Return:
428 *	0
429 */
430
431int
432tar_rd(ARCHD *arcn, char *buf)
433{
434	HD_TAR *hd;
435	char *pt;
436
437	/*
438	 * we only get proper sized buffers passed to us
439	 */
440	if (tar_id(buf, BLKMULT) < 0)
441		return(-1);
442	memset(arcn, 0, sizeof(*arcn));
443	arcn->org_name = arcn->name;
444	arcn->pat = NULL;
445	arcn->sb.st_nlink = 1;
446
447	/*
448	 * copy out the name and values in the stat buffer
449	 */
450	hd = (HD_TAR *)buf;
451	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
452		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
453		    &gnu_name_string, hd->name, sizeof(hd->name));
454		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
455		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
456	}
457	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
458	    0xfff);
459	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
460	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
461	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
462	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
463	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
464
465	/*
466	 * have to look at the last character, it may be a '/' and that is used
467	 * to encode this as a directory
468	 */
469	pt = &(arcn->name[arcn->nlen - 1]);
470	arcn->pad = 0;
471	arcn->skip = 0;
472	switch(hd->linkflag) {
473	case SYMTYPE:
474		/*
475		 * symbolic link, need to get the link name and set the type in
476		 * the st_mode so -v printing will look correct.
477		 */
478		arcn->type = PAX_SLK;
479		arcn->sb.st_mode |= S_IFLNK;
480		break;
481	case LNKTYPE:
482		/*
483		 * hard link, need to get the link name, set the type in the
484		 * st_mode and st_nlink so -v printing will look better.
485		 */
486		arcn->type = PAX_HLK;
487		arcn->sb.st_nlink = 2;
488
489		/*
490		 * no idea of what type this thing really points at, but
491		 * we set something for printing only.
492		 */
493		arcn->sb.st_mode |= S_IFREG;
494		break;
495	case LONGLINKTYPE:
496	case LONGNAMETYPE:
497		/*
498		 * GNU long link/file; we tag these here and let the
499		 * pax internals deal with it -- too ugly otherwise.
500		 */
501		if (hd->linkflag != LONGLINKTYPE)
502			arcn->type = PAX_GLF;
503		else
504			arcn->type = PAX_GLL;
505		arcn->pad = TAR_PAD(arcn->sb.st_size);
506		arcn->skip = arcn->sb.st_size;
507		break;
508	case AREGTYPE:
509	case REGTYPE:
510	case DIRTYPE:	/* see below */
511	default:
512		/*
513		 * If we have a trailing / this is a directory and NOT a file.
514		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
515		 * reported that V7 archives using USTAR directories do exist.
516		 */
517		if (*pt == '/' || hd->linkflag == DIRTYPE) {
518			/*
519			 * it is a directory, set the mode for -v printing
520			 */
521			arcn->type = PAX_DIR;
522			arcn->sb.st_mode |= S_IFDIR;
523			arcn->sb.st_nlink = 2;
524		} else {
525			/*
526			 * have a file that will be followed by data. Set the
527			 * skip value to the size field and calculate the size
528			 * of the padding.
529			 */
530			arcn->type = PAX_REG;
531			arcn->sb.st_mode |= S_IFREG;
532			arcn->pad = TAR_PAD(arcn->sb.st_size);
533			arcn->skip = arcn->sb.st_size;
534		}
535		break;
536	}
537
538	/*
539	 * strip off any trailing slash.
540	 */
541	if (*pt == '/') {
542		*pt = '\0';
543		--arcn->nlen;
544	}
545	return(0);
546}
547
548/*
549 * tar_wr()
550 *	write a tar header for the file specified in the ARCHD to the archive.
551 *	Have to check for file types that cannot be stored and file names that
552 *	are too long. Be careful of the term (last arg) to ul_oct, each field
553 *	of tar has it own spec for the termination character(s).
554 *	ASSUMED: space after header in header block is zero filled
555 * Return:
556 *	0 if file has data to be written after the header, 1 if file has NO
557 *	data to write after the header, -1 if archive write failed
558 */
559
560int
561tar_wr(ARCHD *arcn)
562{
563	HD_TAR *hd;
564	int len;
565	char hdblk[sizeof(HD_TAR)];
566
567	/*
568	 * check for those file system types which tar cannot store
569	 */
570	switch(arcn->type) {
571	case PAX_DIR:
572		/*
573		 * user asked that dirs not be written to the archive
574		 */
575		if (tar_nodir)
576			return(1);
577		break;
578	case PAX_CHR:
579		tty_warn(1, "Tar cannot archive a character device %s",
580		    arcn->org_name);
581		return(1);
582	case PAX_BLK:
583		tty_warn(1,
584		    "Tar cannot archive a block device %s", arcn->org_name);
585		return(1);
586	case PAX_SCK:
587		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
588		return(1);
589	case PAX_FIF:
590		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
591		return(1);
592	case PAX_SLK:
593	case PAX_HLK:
594	case PAX_HRG:
595		if (arcn->ln_nlen > sizeof(hd->linkname)) {
596			tty_warn(1,"Link name too long for tar %s",
597			    arcn->ln_name);
598			return(1);
599		}
600		break;
601	case PAX_REG:
602	case PAX_CTG:
603	default:
604		break;
605	}
606
607	/*
608	 * check file name len, remember extra char for dirs (the / at the end)
609	 */
610	len = arcn->nlen;
611	if (arcn->type == PAX_DIR)
612		++len;
613	if (len >= sizeof(hd->name)) {
614		tty_warn(1, "File name too long for tar %s", arcn->name);
615		return(1);
616	}
617
618	/*
619	 * copy the data out of the ARCHD into the tar header based on the type
620	 * of the file. Remember many tar readers want the unused fields to be
621	 * padded with zero. We set the linkflag field (type), the linkname
622	 * (or zero if not used),the size, and set the padding (if any) to be
623	 * added after the file data (0 for all other types, as they only have
624	 * a header)
625	 */
626	memset(hdblk, 0, sizeof(hdblk));
627	hd = (HD_TAR *)hdblk;
628	strlcpy(hd->name, arcn->name, sizeof(hd->name));
629	arcn->pad = 0;
630
631	if (arcn->type == PAX_DIR) {
632		/*
633		 * directories are the same as files, except have a filename
634		 * that ends with a /, we add the slash here. No data follows,
635		 * dirs, so no pad.
636		 */
637		hd->linkflag = AREGTYPE;
638		hd->name[len-1] = '/';
639		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
640			goto out;
641	} else if (arcn->type == PAX_SLK) {
642		/*
643		 * no data follows this file, so no pad
644		 */
645		hd->linkflag = SYMTYPE;
646		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
647		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
648			goto out;
649	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
650		/*
651		 * no data follows this file, so no pad
652		 */
653		hd->linkflag = LNKTYPE;
654		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
655		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
656			goto out;
657	} else {
658		/*
659		 * data follows this file, so set the pad
660		 */
661		hd->linkflag = AREGTYPE;
662		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
663			tty_warn(1,"File is too large for tar %s",
664			    arcn->org_name);
665			return(1);
666		}
667		arcn->pad = TAR_PAD(arcn->sb.st_size);
668	}
669
670	/*
671	 * copy those fields that are independent of the type
672	 */
673	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
674	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
675	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
676	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
677		goto out;
678
679	/*
680	 * calculate and add the checksum, then write the header. A return of
681	 * 0 tells the caller to now write the file data, 1 says no data needs
682	 * to be written
683	 */
684	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
685	    sizeof(hd->chksum), 3))
686		goto out;			/* XXX Something's wrong here
687						 * because a zero-byte file can
688						 * cause this to be done and
689						 * yet the resulting warning
690						 * seems incorrect */
691
692	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
693		return(-1);
694	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
695		return(-1);
696	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
697		return(0);
698	return(1);
699
700    out:
701	/*
702	 * header field is out of range
703	 */
704	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
705	return(1);
706}
707
708/*
709 * Routines for POSIX ustar
710 */
711
712/*
713 * ustar_strd()
714 *	initialization for ustar read
715 * Return:
716 *	0 if ok, -1 otherwise
717 */
718
719int
720ustar_strd(void)
721{
722	return(0);
723}
724
725/*
726 * ustar_stwr()
727 *	initialization for ustar write
728 * Return:
729 *	0 if ok, -1 otherwise
730 */
731
732int
733ustar_stwr(void)
734{
735	return(0);
736}
737
738/*
739 * ustar_id()
740 *	determine if a block given to us is a valid ustar header. We have to
741 *	be on the lookout for those pesky blocks of all zero's
742 * Return:
743 *	0 if a ustar header, -1 otherwise
744 */
745
746int
747ustar_id(char *blk, int size)
748{
749	HD_USTAR *hd;
750
751	if (size < BLKMULT)
752		return(-1);
753	hd = (HD_USTAR *)blk;
754
755	/*
756	 * check for block of zero's first, a simple and fast test then check
757	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
758	 * programs are fouled up and create archives missing the \0. Last we
759	 * check the checksum. If ok we have to assume it is a valid header.
760	 */
761	if (hd->name[0] == '\0')
762		return(-1);
763	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
764		return(-1);
765	/* This is GNU tar */
766	if (strncmp(hd->magic, "ustar  ", 8) == 0 && !is_gnutar &&
767	    !seen_gnu_warning) {
768		seen_gnu_warning = 1;
769		tty_warn(0,
770		    "Trying to read GNU tar archive with extensions off");
771	}
772	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0);
773}
774
775/*
776 * ustar_rd()
777 *	extract the values out of block already determined to be a ustar header.
778 *	store the values in the ARCHD parameter.
779 * Return:
780 *	0
781 */
782
783int
784ustar_rd(ARCHD *arcn, char *buf)
785{
786	HD_USTAR *hd;
787	char *dest;
788	int cnt;
789	dev_t devmajor;
790	dev_t devminor;
791
792	/*
793	 * we only get proper sized buffers
794	 */
795	if (ustar_id(buf, BLKMULT) < 0)
796		return(-1);
797
798	memset(arcn, 0, sizeof(*arcn));
799	arcn->org_name = arcn->name;
800	arcn->pat = NULL;
801	arcn->sb.st_nlink = 1;
802	hd = (HD_USTAR *)buf;
803
804	/*
805	 * see if the filename is split into two parts. if, so joint the parts.
806	 * we copy the prefix first and add a / between the prefix and name.
807	 */
808	dest = arcn->name;
809	if (*(hd->prefix) != '\0') {
810		cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
811		dest += cnt;
812		*dest++ = '/';
813		cnt++;
814	} else {
815		cnt = 0;
816	}
817
818	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
819		arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
820		    &gnu_name_string, hd->name, sizeof(hd->name)) + cnt;
821		arcn->ln_nlen = expandname(arcn->ln_name,
822		    sizeof(arcn->ln_name), &gnu_link_string, hd->linkname,
823		    sizeof(hd->linkname));
824	}
825
826	/*
827	 * follow the spec to the letter. we should only have mode bits, strip
828	 * off all other crud we may be passed.
829	 */
830	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
831	    0xfff);
832	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
833	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
834	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
835
836	/*
837	 * If we can find the ascii names for gname and uname in the password
838	 * and group files we will use the uid's and gid they bind. Otherwise
839	 * we use the uid and gid values stored in the header. (This is what
840	 * the posix spec wants).
841	 */
842	hd->gname[sizeof(hd->gname) - 1] = '\0';
843	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
844		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
845	hd->uname[sizeof(hd->uname) - 1] = '\0';
846	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
847		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
848
849	/*
850	 * set the defaults, these may be changed depending on the file type
851	 */
852	arcn->pad = 0;
853	arcn->skip = 0;
854	arcn->sb.st_rdev = (dev_t)0;
855
856	/*
857	 * set the mode and PAX type according to the typeflag in the header
858	 */
859	switch(hd->typeflag) {
860	case FIFOTYPE:
861		arcn->type = PAX_FIF;
862		arcn->sb.st_mode |= S_IFIFO;
863		break;
864	case DIRTYPE:
865		arcn->type = PAX_DIR;
866		arcn->sb.st_mode |= S_IFDIR;
867		arcn->sb.st_nlink = 2;
868
869		/*
870		 * Some programs that create ustar archives append a '/'
871		 * to the pathname for directories. This clearly violates
872		 * ustar specs, but we will silently strip it off anyway.
873		 */
874		if (arcn->name[arcn->nlen - 1] == '/')
875			arcn->name[--arcn->nlen] = '\0';
876		break;
877	case BLKTYPE:
878	case CHRTYPE:
879		/*
880		 * this type requires the rdev field to be set.
881		 */
882		if (hd->typeflag == BLKTYPE) {
883			arcn->type = PAX_BLK;
884			arcn->sb.st_mode |= S_IFBLK;
885		} else {
886			arcn->type = PAX_CHR;
887			arcn->sb.st_mode |= S_IFCHR;
888		}
889		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
890		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
891		arcn->sb.st_rdev = TODEV(devmajor, devminor);
892		break;
893	case SYMTYPE:
894	case LNKTYPE:
895		if (hd->typeflag == SYMTYPE) {
896			arcn->type = PAX_SLK;
897			arcn->sb.st_mode |= S_IFLNK;
898		} else {
899			arcn->type = PAX_HLK;
900			/*
901			 * so printing looks better
902			 */
903			arcn->sb.st_mode |= S_IFREG;
904			arcn->sb.st_nlink = 2;
905		}
906		break;
907	case LONGLINKTYPE:
908	case LONGNAMETYPE:
909		if (is_gnutar) {
910			/*
911			 * GNU long link/file; we tag these here and let the
912			 * pax internals deal with it -- too ugly otherwise.
913			 */
914			if (hd->typeflag != LONGLINKTYPE)
915				arcn->type = PAX_GLF;
916			else
917				arcn->type = PAX_GLL;
918			arcn->pad = TAR_PAD(arcn->sb.st_size);
919			arcn->skip = arcn->sb.st_size;
920		} else {
921			tty_warn(1, "GNU Long %s found in posix ustar archive.",
922			    hd->typeflag == LONGLINKTYPE ? "Link" : "File");
923		}
924		break;
925	case CONTTYPE:
926	case AREGTYPE:
927	case REGTYPE:
928	default:
929		/*
930		 * these types have file data that follows. Set the skip and
931		 * pad fields.
932		 */
933		arcn->type = PAX_REG;
934		arcn->pad = TAR_PAD(arcn->sb.st_size);
935		arcn->skip = arcn->sb.st_size;
936		arcn->sb.st_mode |= S_IFREG;
937		break;
938	}
939	return(0);
940}
941
942static int
943expandname(char *buf, size_t len, char **gnu_name, const char *name,
944    size_t nlen)
945{
946	if (*gnu_name) {
947		len = strlcpy(buf, *gnu_name, len);
948		free(*gnu_name);
949		*gnu_name = NULL;
950	} else {
951		if (len > ++nlen)
952			len = nlen;
953		len = strlcpy(buf, name, len);
954	}
955	return len;
956}
957
958static void
959longlink(ARCHD *arcn, int type)
960{
961	ARCHD larc;
962
963	(void)memset(&larc, 0, sizeof(larc));
964
965	larc.type = type;
966	larc.nlen = strlcpy(larc.name, LONG_LINK, sizeof(larc.name));
967
968	switch (type) {
969	case PAX_GLL:
970		gnu_hack_string = arcn->ln_name;
971		gnu_hack_len = arcn->ln_nlen + 1;
972		break;
973	case PAX_GLF:
974		gnu_hack_string = arcn->name;
975		gnu_hack_len = arcn->nlen + 1;
976		break;
977	default:
978		errx(1, "Invalid type in GNU longlink %d\n", type);
979	}
980
981	/*
982	 * We need a longlink now.
983	 */
984	ustar_wr(&larc);
985}
986
987/*
988 * ustar_wr()
989 *	write a ustar header for the file specified in the ARCHD to the archive
990 *	Have to check for file types that cannot be stored and file names that
991 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
992 *	'\0' for the termination character (this is different than picky tar)
993 *	ASSUMED: space after header in header block is zero filled
994 * Return:
995 *	0 if file has data to be written after the header, 1 if file has NO
996 *	data to write after the header, -1 if archive write failed
997 */
998
999int
1000ustar_wr(ARCHD *arcn)
1001{
1002	HD_USTAR *hd;
1003	char *pt;
1004	char hdblk[sizeof(HD_USTAR)];
1005	const char *user, *group;
1006
1007	switch (arcn->type) {
1008	case PAX_SCK:
1009		/*
1010		 * check for those file system types ustar cannot store
1011		 */
1012		if (!is_gnutar)
1013			tty_warn(1, "Ustar cannot archive a socket %s",
1014			    arcn->org_name);
1015		return(1);
1016
1017	case PAX_SLK:
1018	case PAX_HLK:
1019	case PAX_HRG:
1020		/*
1021		 * check the length of the linkname
1022		 */
1023		if (arcn->ln_nlen >= sizeof(hd->linkname)) {
1024			if (is_gnutar) {
1025				longlink(arcn, PAX_GLL);
1026			} else {
1027				tty_warn(1, "Link name too long for ustar %s",
1028				    arcn->ln_name);
1029				return(1);
1030			}
1031		}
1032		break;
1033	default:
1034		break;
1035	}
1036
1037	/*
1038	 * split the path name into prefix and name fields (if needed). if
1039	 * pt != arcn->name, the name has to be split
1040	 */
1041	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1042		if (is_gnutar) {
1043			longlink(arcn, PAX_GLF);
1044			pt = arcn->name;
1045		} else {
1046			tty_warn(1, "File name too long for ustar %s",
1047			    arcn->name);
1048			return(1);
1049		}
1050	}
1051
1052	/*
1053	 * zero out the header so we don't have to worry about zero fill below
1054	 */
1055	memset(hdblk, 0, sizeof(hdblk));
1056	hd = (HD_USTAR *)hdblk;
1057	arcn->pad = 0L;
1058
1059	/*
1060	 * split the name, or zero out the prefix
1061	 */
1062	if (pt != arcn->name) {
1063		/*
1064		 * name was split, pt points at the / where the split is to
1065		 * occur, we remove the / and copy the first part to the prefix
1066		 */
1067		*pt = '\0';
1068		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1069		*pt++ = '/';
1070	}
1071
1072	/*
1073	 * copy the name part. this may be the whole path or the part after
1074	 * the prefix
1075	 */
1076	strlcpy(hd->name, pt, sizeof(hd->name));
1077
1078	/*
1079	 * set the fields in the header that are type dependent
1080	 */
1081	switch(arcn->type) {
1082	case PAX_DIR:
1083		hd->typeflag = DIRTYPE;
1084		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1085			goto out;
1086		break;
1087	case PAX_CHR:
1088	case PAX_BLK:
1089		if (arcn->type == PAX_CHR)
1090			hd->typeflag = CHRTYPE;
1091		else
1092			hd->typeflag = BLKTYPE;
1093		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1094		   sizeof(hd->devmajor), 3) ||
1095		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1096		   sizeof(hd->devminor), 3) ||
1097		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1098			goto out;
1099		break;
1100	case PAX_FIF:
1101		hd->typeflag = FIFOTYPE;
1102		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1103			goto out;
1104		break;
1105	case PAX_GLL:
1106	case PAX_SLK:
1107	case PAX_HLK:
1108	case PAX_HRG:
1109		if (arcn->type == PAX_SLK)
1110			hd->typeflag = SYMTYPE;
1111		else if (arcn->type == PAX_GLL)
1112			hd->typeflag = LONGLINKTYPE;
1113		else
1114			hd->typeflag = LNKTYPE;
1115		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1116		if (ul_oct((u_long)gnu_hack_len, hd->size,
1117		    sizeof(hd->size), 3))
1118			goto out;
1119		break;
1120	case PAX_GLF:
1121	case PAX_REG:
1122	case PAX_CTG:
1123	default:
1124		/*
1125		 * file data with this type, set the padding
1126		 */
1127		if (arcn->type == PAX_GLF) {
1128			hd->typeflag = LONGNAMETYPE;
1129			arcn->pad = TAR_PAD(gnu_hack_len);
1130			if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1131			    sizeof(hd->size), 3)) {
1132				tty_warn(1,"File is too long for ustar %s",
1133				    arcn->org_name);
1134				return(1);
1135			}
1136		} else {
1137			if (arcn->type == PAX_CTG)
1138				hd->typeflag = CONTTYPE;
1139			else
1140				hd->typeflag = REGTYPE;
1141			arcn->pad = TAR_PAD(arcn->sb.st_size);
1142			if (OFFT_OCT(arcn->sb.st_size, hd->size,
1143			    sizeof(hd->size), 3)) {
1144				tty_warn(1,"File is too long for ustar %s",
1145				    arcn->org_name);
1146				return(1);
1147			}
1148		}
1149		break;
1150	}
1151
1152	strncpy(hd->magic, TMAGIC, TMAGLEN);
1153	if (is_gnutar)
1154		hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1155	else
1156		strncpy(hd->version, TVERSION, TVERSLEN);
1157
1158	/*
1159	 * set the remaining fields. Some versions want all 16 bits of mode
1160	 * we better humor them (they really do not meet spec though)....
1161	 */
1162	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1163	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1164	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1165	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1166		goto out;
1167	user = user_from_uid(arcn->sb.st_uid, 1);
1168	group = group_from_gid(arcn->sb.st_gid, 1);
1169	strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1170	strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1171
1172	/*
1173	 * calculate and store the checksum write the header to the archive
1174	 * return 0 tells the caller to now write the file data, 1 says no data
1175	 * needs to be written
1176	 */
1177	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1178	   sizeof(hd->chksum), 3))
1179		goto out;
1180	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1181		return(-1);
1182	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1183		return(-1);
1184	if (gnu_hack_string) {
1185		int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1186		int pad = gnu_hack_len;
1187		gnu_hack_string = NULL;
1188		gnu_hack_len = 0;
1189		if (res < 0)
1190			return(-1);
1191		if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1192			return(-1);
1193	}
1194	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1195		return(0);
1196	return(1);
1197
1198    out:
1199	/*
1200	 * header field is out of range
1201	 */
1202	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1203	return(1);
1204}
1205
1206/*
1207 * name_split()
1208 *	see if the name has to be split for storage in a ustar header. We try
1209 *	to fit the entire name in the name field without splitting if we can.
1210 *	The split point is always at a /
1211 * Return
1212 *	character pointer to split point (always the / that is to be removed
1213 *	if the split is not needed, the points is set to the start of the file
1214 *	name (it would violate the spec to split there). A NULL is returned if
1215 *	the file name is too long
1216 */
1217
1218static char *
1219name_split(char *name, int len)
1220{
1221	char *start;
1222
1223	/*
1224	 * check to see if the file name is small enough to fit in the name
1225	 * field. if so just return a pointer to the name.
1226	 */
1227	if (len < TNMSZ)
1228		return(name);
1229	/*
1230	 * GNU tar does not honor the prefix+name mode if the magic
1231	 * is not "ustar\0". So in GNU tar compatibility mode, we don't
1232	 * split the filename into prefix+name because we are setting
1233	 * the magic to "ustar " as GNU tar does. This of course will
1234	 * end up creating a LongLink record in cases where it does not
1235	 * really need do, but we are behaving like GNU tar after all.
1236	 */
1237	if (is_gnutar || len > (TPFSZ + TNMSZ))
1238		return(NULL);
1239
1240	/*
1241	 * we start looking at the biggest sized piece that fits in the name
1242	 * field. We walk forward looking for a slash to split at. The idea is
1243	 * to find the biggest piece to fit in the name field (or the smallest
1244	 * prefix we can find) (the -1 is correct the biggest piece would
1245	 * include the slash between the two parts that gets thrown away)
1246	 */
1247	start = name + len - TNMSZ;
1248	while ((*start != '\0') && (*start != '/'))
1249		++start;
1250
1251	/*
1252	 * if we hit the end of the string, this name cannot be split, so we
1253	 * cannot store this file.
1254	 */
1255	if (*start == '\0')
1256		return(NULL);
1257	len = start - name;
1258
1259	/*
1260	 * NOTE: /str where the length of str == TNMSZ cannot be stored under
1261	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1262	 * the file would then expand on extract to //str. The len == 0 below
1263	 * makes this special case follow the spec to the letter.
1264	 */
1265	if ((len >= TPFSZ) || (len == 0))
1266		return(NULL);
1267
1268	/*
1269	 * ok have a split point, return it to the caller
1270	 */
1271	return(start);
1272}
1273
1274/*
1275 * convert a glob into a RE, and add it to the list.  we convert to
1276 * four different RE's (because we're using BRE's and can't use |
1277 * alternation :-() with this padding:
1278 *	.*\/ and $
1279 *	.*\/ and \/.*
1280 *	^ and $
1281 *	^ and \/.*
1282 */
1283static int
1284tar_gnutar_exclude_one(const char *line, size_t len)
1285{
1286	/* 2 * buffer len + nul */
1287	char sbuf[MAXPATHLEN * 2 + 1];
1288	/* + / + // + .*""/\/ + \/.* */
1289	char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4];
1290	int i, j;
1291
1292	if (line[len - 1] == '\n')
1293		len--;
1294	strncpy(sbuf, ".*" "\\/", j = 4);
1295	for (i = 0; i < len; i++) {
1296		/*
1297		 * convert glob to regexp, escaping everything
1298		 */
1299		if (line[i] == '*')
1300			sbuf[j++] = '.';
1301		else if (line[i] == '?') {
1302			sbuf[j++] = '.';
1303			continue;
1304		} else if (!isalnum((unsigned char)line[i]) &&
1305		    !isblank((unsigned char)line[i]))
1306			sbuf[j++] = '\\';
1307		sbuf[j++] = line[i];
1308	}
1309	sbuf[j] = '\0';
1310	/* don't need the .*\/ ones if we start with /, i guess */
1311	if (line[0] != '/') {
1312		(void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf);
1313		if (rep_add(rabuf) < 0)
1314			return (-1);
1315		(void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf);
1316		if (rep_add(rabuf) < 0)
1317			return (-1);
1318	}
1319
1320	(void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf);
1321	if (rep_add(rabuf) < 0)
1322		return (-1);
1323	(void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf);
1324	if (rep_add(rabuf) < 0)
1325		return (-1);
1326
1327	return (0);
1328}
1329
1330/*
1331 * deal with GNU tar -X/--exclude-from & --exclude switchs.  basically,
1332 * we go through each line of the file, building a string from the "glob"
1333 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1334 * to rep_add(), which will add a empty replacement (exclusion), for the
1335 * named files.
1336 */
1337int
1338tar_gnutar_minus_minus_exclude(path)
1339	const char *path;
1340{
1341	size_t	len = strlen(path);
1342
1343	if (len > MAXPATHLEN)
1344		tty_warn(0, "pathname too long: %s", path);
1345
1346	return (tar_gnutar_exclude_one(path, len));
1347}
1348
1349int
1350tar_gnutar_X_compat(path)
1351	const char *path;
1352{
1353	char *line;
1354	FILE *fp;
1355	int lineno = 0;
1356	size_t len;
1357
1358	fp = fopen(path, "r");
1359	if (fp == NULL) {
1360		tty_warn(1, "cannot open %s: %s", path,
1361		    strerror(errno));
1362		return(-1);
1363	}
1364
1365	while ((line = fgetln(fp, &len))) {
1366		lineno++;
1367		if (len > MAXPATHLEN) {
1368			tty_warn(0, "pathname too long, line %d of %s",
1369			    lineno, path);
1370		}
1371		if (tar_gnutar_exclude_one(line, len))
1372			return (-1);
1373	}
1374	return (0);
1375}
1376