tar.c revision 1.53
1/*	$NetBSD: tar.c,v 1.53 2004/05/11 17:12:26 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. Neither the name of the University nor the names of its contributors
20 *    may be used to endorse or promote products derived from this software
21 *    without specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 * SUCH DAMAGE.
34 */
35
36#if HAVE_NBTOOL_CONFIG_H
37#include "nbtool_config.h"
38#endif
39
40#include <sys/cdefs.h>
41#if !defined(lint)
42#if 0
43static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44#else
45__RCSID("$NetBSD: tar.c,v 1.53 2004/05/11 17:12:26 christos Exp $");
46#endif
47#endif /* not lint */
48
49#include <sys/types.h>
50#include <sys/time.h>
51#include <sys/stat.h>
52#include <sys/param.h>
53
54#include <ctype.h>
55#include <errno.h>
56#include <grp.h>
57#include <pwd.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "pax.h"
64#include "extern.h"
65#include "tar.h"
66
67/*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71static int expandname(char *, size_t,  char **, const char *, size_t);
72static void longlink(ARCHD *);
73static u_long tar_chksm(char *, int);
74static char *name_split(char *, int);
75static int ul_oct(u_long, char *, int, int);
76#if !defined(NET2_STAT) && !defined(_LP64)
77static int ull_oct(unsigned long long, char *, int, int);
78#endif
79static int tar_gnutar_exclude_one(const char *, size_t);
80static int check_sum(char *, size_t, char *, size_t, int);
81
82/*
83 * Routines common to all versions of tar
84 */
85
86static int tar_nodir;			/* do not write dirs under old tar */
87int is_gnutar;				/* behave like gnu tar; enable gnu
88					 * extensions and skip end-ofvolume
89					 * checks
90					 */
91static int seen_gnu_warning;		/* Have we warned yet? */
92static char *gnu_hack_string;		/* ././@LongLink hackery */
93static int gnu_hack_len;		/* len of gnu_hack_string */
94char *gnu_name_string;			/* ././@LongLink hackery name */
95char *gnu_link_string;			/* ././@LongLink hackery link */
96static int gnu_short_trailer;		/* gnu short trailer */
97
98#ifdef _PAX_
99char DEV_0[] = "/dev/rst0";
100char DEV_1[] = "/dev/rst1";
101char DEV_4[] = "/dev/rst4";
102char DEV_5[] = "/dev/rst5";
103char DEV_7[] = "/dev/rst7";
104char DEV_8[] = "/dev/rst8";
105#endif
106
107static int
108check_sum(char *hd, size_t hdlen, char *bl, size_t bllen, int quiet)
109{
110	u_long hdck, blck;
111
112	hdck = asc_ul(hd, hdlen, OCT);
113	blck = tar_chksm(bl, bllen);
114
115	if (hdck != blck) {
116		if (!quiet)
117			tty_warn(0, "Header checksum %lo does not match %lo",
118			    hdck, blck);
119		return(-1);
120	}
121	return(0);
122}
123
124
125/*
126 * tar_endwr()
127 *	add the tar trailer of two null blocks
128 * Return:
129 *	0 if ok, -1 otherwise (what wr_skip returns)
130 */
131
132int
133tar_endwr(void)
134{
135	return(wr_skip((off_t)(NULLCNT * BLKMULT)));
136}
137
138/*
139 * tar_endrd()
140 *	no cleanup needed here, just return size of trailer (for append)
141 * Return:
142 *	size of trailer BLKMULT
143 */
144
145off_t
146tar_endrd(void)
147{
148	return((off_t)((gnu_short_trailer ? 1 : NULLCNT) * BLKMULT));
149}
150
151/*
152 * tar_trail()
153 *	Called to determine if a header block is a valid trailer. We are passed
154 *	the block, the in_sync flag (which tells us we are in resync mode;
155 *	looking for a valid header), and cnt (which starts at zero) which is
156 *	used to count the number of empty blocks we have seen so far.
157 * Return:
158 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
159 *	could never contain a header.
160 */
161
162int
163tar_trail(char *buf, int in_resync, int *cnt)
164{
165	int i;
166
167	gnu_short_trailer = 0;
168	/*
169	 * look for all zero, trailer is two consecutive blocks of zero
170	 */
171	for (i = 0; i < BLKMULT; ++i) {
172		if (buf[i] != '\0')
173			break;
174	}
175
176	/*
177	 * if not all zero it is not a trailer, but MIGHT be a header.
178	 */
179	if (i != BLKMULT)
180		return(-1);
181
182	/*
183	 * When given a zero block, we must be careful!
184	 * If we are not in resync mode, check for the trailer. Have to watch
185	 * out that we do not mis-identify file data as the trailer, so we do
186	 * NOT try to id a trailer during resync mode. During resync mode we
187	 * might as well throw this block out since a valid header can NEVER be
188	 * a block of all 0 (we must have a valid file name).
189	 */
190	if (!in_resync) {
191		++*cnt;
192		/*
193		 * old GNU tar (up through 1.13) only writes one block of
194		 * trailers, so we pretend we got another
195		 */
196		if (is_gnutar) {
197			gnu_short_trailer = 1;
198			++*cnt;
199		}
200		if (*cnt >= NULLCNT)
201			return(0);
202	}
203	return(1);
204}
205
206/*
207 * ul_oct()
208 *	convert an unsigned long to an octal string. many oddball field
209 *	termination characters are used by the various versions of tar in the
210 *	different fields. term selects which kind to use. str is '0' padded
211 *	at the front to len. we are unable to use only one format as many old
212 *	tar readers are very cranky about this.
213 * Return:
214 *	0 if the number fit into the string, -1 otherwise
215 */
216
217static int
218ul_oct(u_long val, char *str, int len, int term)
219{
220	char *pt;
221
222	/*
223	 * term selects the appropriate character(s) for the end of the string
224	 */
225	pt = str + len - 1;
226	switch(term) {
227	case 3:
228		*pt-- = '\0';
229		break;
230	case 2:
231		*pt-- = ' ';
232		*pt-- = '\0';
233		break;
234	case 1:
235		*pt-- = ' ';
236		break;
237	case 0:
238	default:
239		*pt-- = '\0';
240		*pt-- = ' ';
241		break;
242	}
243
244	/*
245	 * convert and blank pad if there is space
246	 */
247	while (pt >= str) {
248		*pt-- = '0' + (char)(val & 0x7);
249		if ((val = val >> 3) == (u_long)0)
250			break;
251	}
252
253	while (pt >= str)
254		*pt-- = '0';
255	if (val != (u_long)0)
256		return(-1);
257	return(0);
258}
259
260#if !defined(NET2_STAT) && !defined(_LP64)
261/*
262 * ull_oct()
263 *	convert an unsigned long long to an octal string. one of many oddball
264 *	field termination characters are used by the various versions of tar
265 *	in the different fields. term selects which kind to use. str is '0'
266 *	padded at the front to len. we are unable to use only one format as
267 *	many old tar readers are very cranky about this.
268 * Return:
269 *	0 if the number fit into the string, -1 otherwise
270 */
271
272static int
273ull_oct(unsigned long long val, char *str, int len, int term)
274{
275	char *pt;
276
277	/*
278	 * term selects the appropriate character(s) for the end of the string
279	 */
280	pt = str + len - 1;
281	switch(term) {
282	case 3:
283		*pt-- = '\0';
284		break;
285	case 2:
286		*pt-- = ' ';
287		*pt-- = '\0';
288		break;
289	case 1:
290		*pt-- = ' ';
291		break;
292	case 0:
293	default:
294		*pt-- = '\0';
295		*pt-- = ' ';
296		break;
297	}
298
299	/*
300	 * convert and blank pad if there is space
301	 */
302	while (pt >= str) {
303		*pt-- = '0' + (char)(val & 0x7);
304		if ((val = val >> 3) == 0)
305			break;
306	}
307
308	while (pt >= str)
309		*pt-- = '0';
310	if (val != (unsigned long long)0)
311		return(-1);
312	return(0);
313}
314#endif
315
316/*
317 * tar_chksm()
318 *	calculate the checksum for a tar block counting the checksum field as
319 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
320 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
321 *	pad headers with 0.
322 * Return:
323 *	unsigned long checksum
324 */
325
326static u_long
327tar_chksm(char *blk, int len)
328{
329	char *stop;
330	char *pt;
331	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
332
333	/*
334	 * add the part of the block before the checksum field
335	 */
336	pt = blk;
337	stop = blk + CHK_OFFSET;
338	while (pt < stop)
339		chksm += (u_long)(*pt++ & 0xff);
340	/*
341	 * move past the checksum field and keep going, spec counts the
342	 * checksum field as the sum of 8 blanks (which is pre-computed as
343	 * BLNKSUM).
344	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
345	 * starts, no point in summing zero's)
346	 */
347	pt += CHK_LEN;
348	stop = blk + len;
349	while (pt < stop)
350		chksm += (u_long)(*pt++ & 0xff);
351	return(chksm);
352}
353
354/*
355 * Routines for old BSD style tar (also made portable to sysV tar)
356 */
357
358/*
359 * tar_id()
360 *	determine if a block given to us is a valid tar header (and not a USTAR
361 *	header). We have to be on the lookout for those pesky blocks of	all
362 *	zero's.
363 * Return:
364 *	0 if a tar header, -1 otherwise
365 */
366
367int
368tar_id(char *blk, int size)
369{
370	HD_TAR *hd;
371	HD_USTAR *uhd;
372
373	if (size < BLKMULT)
374		return(-1);
375	hd = (HD_TAR *)blk;
376	uhd = (HD_USTAR *)blk;
377
378	/*
379	 * check for block of zero's first, a simple and fast test, then make
380	 * sure this is not a ustar header by looking for the ustar magic
381	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
382	 * wrong and create archives missing the \0. Last we check the
383	 * checksum. If this is ok we have to assume it is a valid header.
384	 */
385	if (hd->name[0] == '\0')
386		return(-1);
387	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
388		return(-1);
389	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 1);
390}
391
392/*
393 * tar_opt()
394 *	handle tar format specific -o options
395 * Return:
396 *	0 if ok -1 otherwise
397 */
398
399int
400tar_opt(void)
401{
402	OPLIST *opt;
403
404	while ((opt = opt_next()) != NULL) {
405		if (strcmp(opt->name, TAR_OPTION) ||
406		    strcmp(opt->value, TAR_NODIR)) {
407			tty_warn(1,
408			    "Unknown tar format -o option/value pair %s=%s",
409			    opt->name, opt->value);
410			tty_warn(1,
411			    "%s=%s is the only supported tar format option",
412			    TAR_OPTION, TAR_NODIR);
413			return(-1);
414		}
415
416		/*
417		 * we only support one option, and only when writing
418		 */
419		if ((act != APPND) && (act != ARCHIVE)) {
420			tty_warn(1, "%s=%s is only supported when writing.",
421			    opt->name, opt->value);
422			return(-1);
423		}
424		tar_nodir = 1;
425	}
426	return(0);
427}
428
429
430/*
431 * tar_rd()
432 *	extract the values out of block already determined to be a tar header.
433 *	store the values in the ARCHD parameter.
434 * Return:
435 *	0
436 */
437
438int
439tar_rd(ARCHD *arcn, char *buf)
440{
441	HD_TAR *hd;
442	char *pt;
443
444	/*
445	 * we only get proper sized buffers passed to us
446	 */
447	if (tar_id(buf, BLKMULT) < 0)
448		return(-1);
449	memset(arcn, 0, sizeof(*arcn));
450	arcn->org_name = arcn->name;
451	arcn->pat = NULL;
452	arcn->sb.st_nlink = 1;
453
454	/*
455	 * copy out the name and values in the stat buffer
456	 */
457	hd = (HD_TAR *)buf;
458	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
459		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
460		    &gnu_name_string, hd->name, sizeof(hd->name));
461		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
462		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
463	}
464	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
465	    0xfff);
466	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
467	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
468	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
469	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
470	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
471
472	/*
473	 * have to look at the last character, it may be a '/' and that is used
474	 * to encode this as a directory
475	 */
476	pt = &(arcn->name[arcn->nlen - 1]);
477	arcn->pad = 0;
478	arcn->skip = 0;
479	switch(hd->linkflag) {
480	case SYMTYPE:
481		/*
482		 * symbolic link, need to get the link name and set the type in
483		 * the st_mode so -v printing will look correct.
484		 */
485		arcn->type = PAX_SLK;
486		arcn->sb.st_mode |= S_IFLNK;
487		break;
488	case LNKTYPE:
489		/*
490		 * hard link, need to get the link name, set the type in the
491		 * st_mode and st_nlink so -v printing will look better.
492		 */
493		arcn->type = PAX_HLK;
494		arcn->sb.st_nlink = 2;
495
496		/*
497		 * no idea of what type this thing really points at, but
498		 * we set something for printing only.
499		 */
500		arcn->sb.st_mode |= S_IFREG;
501		break;
502	case LONGLINKTYPE:
503		arcn->type = PAX_GLL;
504		/* FALLTHROUGH */
505	case LONGNAMETYPE:
506		/*
507		 * GNU long link/file; we tag these here and let the
508		 * pax internals deal with it -- too ugly otherwise.
509		 */
510		if (hd->linkflag != LONGLINKTYPE)
511			arcn->type = PAX_GLF;
512		arcn->pad = TAR_PAD(arcn->sb.st_size);
513		arcn->skip = arcn->sb.st_size;
514		break;
515	case AREGTYPE:
516	case REGTYPE:
517	case DIRTYPE:	/* see below */
518	default:
519		/*
520		 * If we have a trailing / this is a directory and NOT a file.
521		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
522		 * reported that V7 archives using USTAR directories do exist.
523		 */
524		if (*pt == '/' || hd->linkflag == DIRTYPE) {
525			/*
526			 * it is a directory, set the mode for -v printing
527			 */
528			arcn->type = PAX_DIR;
529			arcn->sb.st_mode |= S_IFDIR;
530			arcn->sb.st_nlink = 2;
531		} else {
532			/*
533			 * have a file that will be followed by data. Set the
534			 * skip value to the size field and calculate the size
535			 * of the padding.
536			 */
537			arcn->type = PAX_REG;
538			arcn->sb.st_mode |= S_IFREG;
539			arcn->pad = TAR_PAD(arcn->sb.st_size);
540			arcn->skip = arcn->sb.st_size;
541		}
542		break;
543	}
544
545	/*
546	 * strip off any trailing slash.
547	 */
548	if (*pt == '/') {
549		*pt = '\0';
550		--arcn->nlen;
551	}
552	return(0);
553}
554
555/*
556 * tar_wr()
557 *	write a tar header for the file specified in the ARCHD to the archive.
558 *	Have to check for file types that cannot be stored and file names that
559 *	are too long. Be careful of the term (last arg) to ul_oct, each field
560 *	of tar has it own spec for the termination character(s).
561 *	ASSUMED: space after header in header block is zero filled
562 * Return:
563 *	0 if file has data to be written after the header, 1 if file has NO
564 *	data to write after the header, -1 if archive write failed
565 */
566
567int
568tar_wr(ARCHD *arcn)
569{
570	HD_TAR *hd;
571	int len;
572	char hdblk[sizeof(HD_TAR)];
573
574	/*
575	 * check for those file system types which tar cannot store
576	 */
577	switch(arcn->type) {
578	case PAX_DIR:
579		/*
580		 * user asked that dirs not be written to the archive
581		 */
582		if (tar_nodir)
583			return(1);
584		break;
585	case PAX_CHR:
586		tty_warn(1, "Tar cannot archive a character device %s",
587		    arcn->org_name);
588		return(1);
589	case PAX_BLK:
590		tty_warn(1,
591		    "Tar cannot archive a block device %s", arcn->org_name);
592		return(1);
593	case PAX_SCK:
594		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
595		return(1);
596	case PAX_FIF:
597		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
598		return(1);
599	case PAX_SLK:
600	case PAX_HLK:
601	case PAX_HRG:
602		if (arcn->ln_nlen > sizeof(hd->linkname)) {
603			tty_warn(1,"Link name too long for tar %s",
604			    arcn->ln_name);
605			return(1);
606		}
607		break;
608	case PAX_REG:
609	case PAX_CTG:
610	default:
611		break;
612	}
613
614	/*
615	 * check file name len, remember extra char for dirs (the / at the end)
616	 */
617	len = arcn->nlen;
618	if (arcn->type == PAX_DIR)
619		++len;
620	if (len >= sizeof(hd->name)) {
621		tty_warn(1, "File name too long for tar %s", arcn->name);
622		return(1);
623	}
624
625	/*
626	 * copy the data out of the ARCHD into the tar header based on the type
627	 * of the file. Remember many tar readers want the unused fields to be
628	 * padded with zero. We set the linkflag field (type), the linkname
629	 * (or zero if not used),the size, and set the padding (if any) to be
630	 * added after the file data (0 for all other types, as they only have
631	 * a header)
632	 */
633	memset(hdblk, 0, sizeof(hdblk));
634	hd = (HD_TAR *)hdblk;
635	strlcpy(hd->name, arcn->name, sizeof(hd->name));
636	arcn->pad = 0;
637
638	if (arcn->type == PAX_DIR) {
639		/*
640		 * directories are the same as files, except have a filename
641		 * that ends with a /, we add the slash here. No data follows,
642		 * dirs, so no pad.
643		 */
644		hd->linkflag = AREGTYPE;
645		hd->name[len-1] = '/';
646		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
647			goto out;
648	} else if (arcn->type == PAX_SLK) {
649		/*
650		 * no data follows this file, so no pad
651		 */
652		hd->linkflag = SYMTYPE;
653		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
654		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
655			goto out;
656	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
657		/*
658		 * no data follows this file, so no pad
659		 */
660		hd->linkflag = LNKTYPE;
661		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
662		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
663			goto out;
664	} else {
665		/*
666		 * data follows this file, so set the pad
667		 */
668		hd->linkflag = AREGTYPE;
669		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
670			tty_warn(1,"File is too large for tar %s",
671			    arcn->org_name);
672			return(1);
673		}
674		arcn->pad = TAR_PAD(arcn->sb.st_size);
675	}
676
677	/*
678	 * copy those fields that are independent of the type
679	 */
680	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
681	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
682	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
683	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
684		goto out;
685
686	/*
687	 * calculate and add the checksum, then write the header. A return of
688	 * 0 tells the caller to now write the file data, 1 says no data needs
689	 * to be written
690	 */
691	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
692	    sizeof(hd->chksum), 3))
693		goto out;			/* XXX Something's wrong here
694						 * because a zero-byte file can
695						 * cause this to be done and
696						 * yet the resulting warning
697						 * seems incorrect */
698
699	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
700		return(-1);
701	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
702		return(-1);
703	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
704		return(0);
705	return(1);
706
707    out:
708	/*
709	 * header field is out of range
710	 */
711	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
712	return(1);
713}
714
715/*
716 * Routines for POSIX ustar
717 */
718
719/*
720 * ustar_strd()
721 *	initialization for ustar read
722 * Return:
723 *	0 if ok, -1 otherwise
724 */
725
726int
727ustar_strd(void)
728{
729	return(0);
730}
731
732/*
733 * ustar_stwr()
734 *	initialization for ustar write
735 * Return:
736 *	0 if ok, -1 otherwise
737 */
738
739int
740ustar_stwr(void)
741{
742	return(0);
743}
744
745/*
746 * ustar_id()
747 *	determine if a block given to us is a valid ustar header. We have to
748 *	be on the lookout for those pesky blocks of all zero's
749 * Return:
750 *	0 if a ustar header, -1 otherwise
751 */
752
753int
754ustar_id(char *blk, int size)
755{
756	HD_USTAR *hd;
757
758	if (size < BLKMULT)
759		return(-1);
760	hd = (HD_USTAR *)blk;
761
762	/*
763	 * check for block of zero's first, a simple and fast test then check
764	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
765	 * programs are fouled up and create archives missing the \0. Last we
766	 * check the checksum. If ok we have to assume it is a valid header.
767	 */
768	if (hd->name[0] == '\0')
769		return(-1);
770	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
771		return(-1);
772	/* This is GNU tar */
773	if (strncmp(hd->magic, "ustar  ", 8) == 0 && !is_gnutar &&
774	    !seen_gnu_warning) {
775		seen_gnu_warning = 1;
776		tty_warn(0,
777		    "Trying to read GNU tar archive with extensions off");
778	}
779	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT, 0);
780}
781
782/*
783 * ustar_rd()
784 *	extract the values out of block already determined to be a ustar header.
785 *	store the values in the ARCHD parameter.
786 * Return:
787 *	0
788 */
789
790int
791ustar_rd(ARCHD *arcn, char *buf)
792{
793	HD_USTAR *hd;
794	char *dest;
795	int cnt;
796	dev_t devmajor;
797	dev_t devminor;
798
799	/*
800	 * we only get proper sized buffers
801	 */
802	if (ustar_id(buf, BLKMULT) < 0)
803		return(-1);
804
805	memset(arcn, 0, sizeof(*arcn));
806	arcn->org_name = arcn->name;
807	arcn->pat = NULL;
808	arcn->sb.st_nlink = 1;
809	hd = (HD_USTAR *)buf;
810
811	/*
812	 * see if the filename is split into two parts. if, so joint the parts.
813	 * we copy the prefix first and add a / between the prefix and name.
814	 */
815	dest = arcn->name;
816	if (*(hd->prefix) != '\0') {
817		cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
818		dest += cnt;
819		*dest++ = '/';
820		cnt++;
821	} else {
822		cnt = 0;
823	}
824
825	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
826		arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
827		    &gnu_name_string, hd->name, sizeof(hd->name)) + cnt;
828		arcn->ln_nlen = expandname(arcn->ln_name,
829		    sizeof(arcn->ln_name), &gnu_link_string, hd->linkname,
830		    sizeof(hd->linkname));
831	}
832
833	/*
834	 * follow the spec to the letter. we should only have mode bits, strip
835	 * off all other crud we may be passed.
836	 */
837	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
838	    0xfff);
839	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
840	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
841	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
842
843	/*
844	 * If we can find the ascii names for gname and uname in the password
845	 * and group files we will use the uid's and gid they bind. Otherwise
846	 * we use the uid and gid values stored in the header. (This is what
847	 * the posix spec wants).
848	 */
849	hd->gname[sizeof(hd->gname) - 1] = '\0';
850	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
851		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
852	hd->uname[sizeof(hd->uname) - 1] = '\0';
853	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
854		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
855
856	/*
857	 * set the defaults, these may be changed depending on the file type
858	 */
859	arcn->pad = 0;
860	arcn->skip = 0;
861	arcn->sb.st_rdev = (dev_t)0;
862
863	/*
864	 * set the mode and PAX type according to the typeflag in the header
865	 */
866	switch(hd->typeflag) {
867	case FIFOTYPE:
868		arcn->type = PAX_FIF;
869		arcn->sb.st_mode |= S_IFIFO;
870		break;
871	case DIRTYPE:
872		arcn->type = PAX_DIR;
873		arcn->sb.st_mode |= S_IFDIR;
874		arcn->sb.st_nlink = 2;
875
876		/*
877		 * Some programs that create ustar archives append a '/'
878		 * to the pathname for directories. This clearly violates
879		 * ustar specs, but we will silently strip it off anyway.
880		 */
881		if (arcn->name[arcn->nlen - 1] == '/')
882			arcn->name[--arcn->nlen] = '\0';
883		break;
884	case BLKTYPE:
885	case CHRTYPE:
886		/*
887		 * this type requires the rdev field to be set.
888		 */
889		if (hd->typeflag == BLKTYPE) {
890			arcn->type = PAX_BLK;
891			arcn->sb.st_mode |= S_IFBLK;
892		} else {
893			arcn->type = PAX_CHR;
894			arcn->sb.st_mode |= S_IFCHR;
895		}
896		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
897		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
898		arcn->sb.st_rdev = TODEV(devmajor, devminor);
899		break;
900	case SYMTYPE:
901	case LNKTYPE:
902		if (hd->typeflag == SYMTYPE) {
903			arcn->type = PAX_SLK;
904			arcn->sb.st_mode |= S_IFLNK;
905		} else {
906			arcn->type = PAX_HLK;
907			/*
908			 * so printing looks better
909			 */
910			arcn->sb.st_mode |= S_IFREG;
911			arcn->sb.st_nlink = 2;
912		}
913		break;
914	case LONGLINKTYPE:
915		if (is_gnutar)
916			arcn->type = PAX_GLL;
917		/* FALLTHROUGH */
918	case LONGNAMETYPE:
919		if (is_gnutar) {
920			/*
921			 * GNU long link/file; we tag these here and let the
922			 * pax internals deal with it -- too ugly otherwise.
923			 */
924			if (hd->typeflag != LONGLINKTYPE)
925				arcn->type = PAX_GLF;
926			arcn->pad = TAR_PAD(arcn->sb.st_size);
927			arcn->skip = arcn->sb.st_size;
928		} else {
929			tty_warn(1, "GNU Long %s found in posix ustar archive.",
930			    hd->typeflag == LONGLINKTYPE ? "Link" : "File");
931		}
932		break;
933	case CONTTYPE:
934	case AREGTYPE:
935	case REGTYPE:
936	default:
937		/*
938		 * these types have file data that follows. Set the skip and
939		 * pad fields.
940		 */
941		arcn->type = PAX_REG;
942		arcn->pad = TAR_PAD(arcn->sb.st_size);
943		arcn->skip = arcn->sb.st_size;
944		arcn->sb.st_mode |= S_IFREG;
945		break;
946	}
947	return(0);
948}
949
950static int
951expandname(char *buf, size_t len, char **gnu_name, const char *name,
952    size_t nlen)
953{
954	if (*gnu_name) {
955		len = strlcpy(buf, *gnu_name, len);
956		free(*gnu_name);
957		*gnu_name = NULL;
958	} else {
959		if (len > ++nlen)
960			len = nlen;
961		len = strlcpy(buf, name, len);
962	}
963	return len;
964}
965
966static void
967longlink(ARCHD *arcn)
968{
969	ARCHD larc;
970
971	memset(&larc, 0, sizeof(larc));
972
973	switch (arcn->type) {
974	case PAX_SLK:
975	case PAX_HRG:
976	case PAX_HLK:
977		larc.type = PAX_GLL;
978		larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
979		    sizeof(larc.ln_name));
980		gnu_hack_string = arcn->ln_name;
981		gnu_hack_len = arcn->ln_nlen + 1;
982		break;
983	default:
984		larc.nlen = strlcpy(larc.name, "././@LongLink",
985		    sizeof(larc.name));
986		gnu_hack_string = arcn->name;
987		gnu_hack_len = arcn->nlen + 1;
988		larc.type = PAX_GLF;
989	}
990	/*
991	 * We need a longlink now.
992	 */
993	ustar_wr(&larc);
994}
995
996/*
997 * ustar_wr()
998 *	write a ustar header for the file specified in the ARCHD to the archive
999 *	Have to check for file types that cannot be stored and file names that
1000 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
1001 *	'\0' for the termination character (this is different than picky tar)
1002 *	ASSUMED: space after header in header block is zero filled
1003 * Return:
1004 *	0 if file has data to be written after the header, 1 if file has NO
1005 *	data to write after the header, -1 if archive write failed
1006 */
1007
1008int
1009ustar_wr(ARCHD *arcn)
1010{
1011	HD_USTAR *hd;
1012	char *pt;
1013	char hdblk[sizeof(HD_USTAR)];
1014	const char *user, *group;
1015
1016	/*
1017	 * check for those file system types ustar cannot store
1018	 */
1019	if (arcn->type == PAX_SCK) {
1020		if (!is_gnutar)
1021			tty_warn(1, "Ustar cannot archive a socket %s",
1022			    arcn->org_name);
1023		return(1);
1024	}
1025
1026	/*
1027	 * check the length of the linkname
1028	 */
1029	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
1030	    (arcn->type == PAX_HRG)) &&
1031	    (arcn->ln_nlen >= sizeof(hd->linkname))){
1032		if (is_gnutar) {
1033			longlink(arcn);
1034		} else {
1035			tty_warn(1, "Link name too long for ustar %s",
1036			    arcn->ln_name);
1037			return(1);
1038		}
1039	}
1040
1041	/*
1042	 * split the path name into prefix and name fields (if needed). if
1043	 * pt != arcn->name, the name has to be split
1044	 */
1045	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1046		if (is_gnutar) {
1047			longlink(arcn);
1048			pt = arcn->name;
1049		} else {
1050			tty_warn(1, "File name too long for ustar %s",
1051			    arcn->name);
1052			return(1);
1053		}
1054	}
1055
1056	/*
1057	 * zero out the header so we don't have to worry about zero fill below
1058	 */
1059	memset(hdblk, 0, sizeof(hdblk));
1060	hd = (HD_USTAR *)hdblk;
1061	arcn->pad = 0L;
1062
1063	/*
1064	 * split the name, or zero out the prefix
1065	 */
1066	if (pt != arcn->name) {
1067		/*
1068		 * name was split, pt points at the / where the split is to
1069		 * occur, we remove the / and copy the first part to the prefix
1070		 */
1071		*pt = '\0';
1072		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1073		*pt++ = '/';
1074	}
1075
1076	/*
1077	 * copy the name part. this may be the whole path or the part after
1078	 * the prefix
1079	 */
1080	strlcpy(hd->name, pt, sizeof(hd->name));
1081
1082	/*
1083	 * set the fields in the header that are type dependent
1084	 */
1085	switch(arcn->type) {
1086	case PAX_DIR:
1087		hd->typeflag = DIRTYPE;
1088		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1089			goto out;
1090		break;
1091	case PAX_CHR:
1092	case PAX_BLK:
1093		if (arcn->type == PAX_CHR)
1094			hd->typeflag = CHRTYPE;
1095		else
1096			hd->typeflag = BLKTYPE;
1097		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1098		   sizeof(hd->devmajor), 3) ||
1099		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1100		   sizeof(hd->devminor), 3) ||
1101		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1102			goto out;
1103		break;
1104	case PAX_FIF:
1105		hd->typeflag = FIFOTYPE;
1106		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1107			goto out;
1108		break;
1109	case PAX_GLL:
1110	case PAX_SLK:
1111	case PAX_HLK:
1112	case PAX_HRG:
1113		if (arcn->type == PAX_SLK)
1114			hd->typeflag = SYMTYPE;
1115		else if (arcn->type == PAX_GLL)
1116			hd->typeflag = LONGLINKTYPE;
1117		else
1118			hd->typeflag = LNKTYPE;
1119		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1120		if (ul_oct((u_long)gnu_hack_len, hd->size,
1121		    sizeof(hd->size), 3))
1122			goto out;
1123		break;
1124	case PAX_GLF:
1125	case PAX_REG:
1126	case PAX_CTG:
1127	default:
1128		/*
1129		 * file data with this type, set the padding
1130		 */
1131		if (arcn->type == PAX_GLF) {
1132			hd->typeflag = LONGNAMETYPE;
1133			arcn->pad = TAR_PAD(gnu_hack_len);
1134			if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1135			    sizeof(hd->size), 3)) {
1136				tty_warn(1,"File is too long for ustar %s",
1137				    arcn->org_name);
1138				return(1);
1139			}
1140		} else {
1141			if (arcn->type == PAX_CTG)
1142				hd->typeflag = CONTTYPE;
1143			else
1144				hd->typeflag = REGTYPE;
1145			arcn->pad = TAR_PAD(arcn->sb.st_size);
1146			if (OFFT_OCT(arcn->sb.st_size, hd->size,
1147			    sizeof(hd->size), 3)) {
1148				tty_warn(1,"File is too long for ustar %s",
1149				    arcn->org_name);
1150				return(1);
1151			}
1152		}
1153		break;
1154	}
1155
1156	strncpy(hd->magic, TMAGIC, TMAGLEN);
1157	if (is_gnutar)
1158		hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1159	else
1160		strncpy(hd->version, TVERSION, TVERSLEN);
1161
1162	/*
1163	 * set the remaining fields. Some versions want all 16 bits of mode
1164	 * we better humor them (they really do not meet spec though)....
1165	 */
1166	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1167	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1168	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1169	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1170		goto out;
1171	user = user_from_uid(arcn->sb.st_uid, 1);
1172	group = group_from_gid(arcn->sb.st_gid, 1);
1173	strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1174	strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1175
1176	/*
1177	 * calculate and store the checksum write the header to the archive
1178	 * return 0 tells the caller to now write the file data, 1 says no data
1179	 * needs to be written
1180	 */
1181	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1182	   sizeof(hd->chksum), 3))
1183		goto out;
1184	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1185		return(-1);
1186	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1187		return(-1);
1188	if (gnu_hack_string) {
1189		int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1190		int pad = gnu_hack_len;
1191		gnu_hack_string = NULL;
1192		gnu_hack_len = 0;
1193		if (res < 0)
1194			return(-1);
1195		if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1196			return(-1);
1197	}
1198	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1199		return(0);
1200	return(1);
1201
1202    out:
1203	/*
1204	 * header field is out of range
1205	 */
1206	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1207	return(1);
1208}
1209
1210/*
1211 * name_split()
1212 *	see if the name has to be split for storage in a ustar header. We try
1213 *	to fit the entire name in the name field without splitting if we can.
1214 *	The split point is always at a /
1215 * Return
1216 *	character pointer to split point (always the / that is to be removed
1217 *	if the split is not needed, the points is set to the start of the file
1218 *	name (it would violate the spec to split there). A NULL is returned if
1219 *	the file name is too long
1220 */
1221
1222static char *
1223name_split(char *name, int len)
1224{
1225	char *start;
1226
1227	/*
1228	 * check to see if the file name is small enough to fit in the name
1229	 * field. if so just return a pointer to the name.
1230	 */
1231	if (len < TNMSZ)
1232		return(name);
1233	if (len > (TPFSZ + TNMSZ))
1234		return(NULL);
1235
1236	/*
1237	 * we start looking at the biggest sized piece that fits in the name
1238	 * field. We walk forward looking for a slash to split at. The idea is
1239	 * to find the biggest piece to fit in the name field (or the smallest
1240	 * prefix we can find) (the -1 is correct the biggest piece would
1241	 * include the slash between the two parts that gets thrown away)
1242	 */
1243	start = name + len - TNMSZ;
1244	while ((*start != '\0') && (*start != '/'))
1245		++start;
1246
1247	/*
1248	 * if we hit the end of the string, this name cannot be split, so we
1249	 * cannot store this file.
1250	 */
1251	if (*start == '\0')
1252		return(NULL);
1253	len = start - name;
1254
1255	/*
1256	 * NOTE: /str where the length of str == TNMSZ cannot be stored under
1257	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1258	 * the file would then expand on extract to //str. The len == 0 below
1259	 * makes this special case follow the spec to the letter.
1260	 */
1261	if ((len >= TPFSZ) || (len == 0))
1262		return(NULL);
1263
1264	/*
1265	 * ok have a split point, return it to the caller
1266	 */
1267	return(start);
1268}
1269
1270/*
1271 * convert a glob into a RE, and add it to the list.  we convert to
1272 * four different RE's (because we're using BRE's and can't use |
1273 * alternation :-() with this padding:
1274 *	.*\/ and $
1275 *	.*\/ and \/.*
1276 *	^ and $
1277 *	^ and \/.*
1278 */
1279static int
1280tar_gnutar_exclude_one(const char *line, size_t len)
1281{
1282	/* 2 * buffer len + nul */
1283	char sbuf[MAXPATHLEN * 2 + 1];
1284	/* + / + // + .*""/\/ + \/.* */
1285	char rabuf[MAXPATHLEN * 2 + 1 + 1 + 2 + 4 + 4];
1286	int i, j;
1287
1288	if (line[len - 1] == '\n')
1289		len--;
1290	strncpy(sbuf, ".*" "\\/", j = 4);
1291	for (i = 0; i < len; i++) {
1292		/*
1293		 * convert glob to regexp, escaping everything
1294		 */
1295		if (line[i] == '*')
1296			sbuf[j++] = '.';
1297		else if (line[i] == '?') {
1298			sbuf[j++] = '.';
1299			continue;
1300		} else if (!isalnum(line[i]) && !isblank(line[i]))
1301			sbuf[j++] = '\\';
1302		sbuf[j++] = line[i];
1303	}
1304	/* don't need the .*\/ ones if we start with /, i guess */
1305	if (line[0] != '/') {
1306		(void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s$//", sbuf);
1307		if (rep_add(rabuf) < 0)
1308			return (-1);
1309		(void)snprintf(rabuf, sizeof rabuf, "/.*\\/%s\\/.*//", sbuf);
1310		if (rep_add(rabuf) < 0)
1311			return (-1);
1312	}
1313
1314	(void)snprintf(rabuf, sizeof rabuf, "/^%s$//", sbuf);
1315	if (rep_add(rabuf) < 0)
1316		return (-1);
1317	(void)snprintf(rabuf, sizeof rabuf, "/^%s\\/.*//", sbuf);
1318	if (rep_add(rabuf) < 0)
1319		return (-1);
1320
1321	return (0);
1322}
1323
1324/*
1325 * deal with GNU tar -X/--exclude-from & --exclude switchs.  basically,
1326 * we go through each line of the file, building a string from the "glob"
1327 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1328 * to rep_add(), which will add a empty replacement (exclusion), for the
1329 * named files.
1330 */
1331int
1332tar_gnutar_minus_minus_exclude(path)
1333	const char *path;
1334{
1335	size_t	len = strlen(path);
1336
1337	if (len > MAXPATHLEN)
1338		tty_warn(0, "pathname too long: %s", path);
1339
1340	return (tar_gnutar_exclude_one(path, len));
1341}
1342
1343int
1344tar_gnutar_X_compat(path)
1345	const char *path;
1346{
1347	char *line;
1348	FILE *fp;
1349	int lineno = 0;
1350	size_t len;
1351
1352	fp = fopen(path, "r");
1353	if (fp == NULL) {
1354		tty_warn(1, "cannot open %s: %s", path,
1355		    strerror(errno));
1356		return(-1);
1357	}
1358
1359	while ((line = fgetln(fp, &len))) {
1360		lineno++;
1361		if (len > MAXPATHLEN) {
1362			tty_warn(0, "pathname too long, line %d of %s",
1363			    lineno, path);
1364		}
1365		if (tar_gnutar_exclude_one(line, len))
1366			return (-1);
1367	}
1368	return (0);
1369}
1370