tar.c revision 1.35
1/*	$NetBSD: tar.c,v 1.35 2003/01/09 17:22:26 christos Exp $	*/
2
3/*-
4 * Copyright (c) 1992 Keith Muller.
5 * Copyright (c) 1992, 1993
6 *	The Regents of the University of California.  All rights reserved.
7 *
8 * This code is derived from software contributed to Berkeley by
9 * Keith Muller of the University of California, San Diego.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 *    notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 *    notice, this list of conditions and the following disclaimer in the
18 *    documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 *    must display the following acknowledgement:
21 *	This product includes software developed by the University of
22 *	California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 *    may be used to endorse or promote products derived from this software
25 *    without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 */
39
40#include <sys/cdefs.h>
41#if defined(__RCSID) && !defined(lint)
42#if 0
43static char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
44#else
45__RCSID("$NetBSD: tar.c,v 1.35 2003/01/09 17:22:26 christos Exp $");
46#endif
47#endif /* not lint */
48
49#include <sys/types.h>
50#include <sys/time.h>
51#include <sys/stat.h>
52#include <sys/param.h>
53
54#include <ctype.h>
55#include <errno.h>
56#include <grp.h>
57#include <pwd.h>
58#include <stdio.h>
59#include <stdlib.h>
60#include <string.h>
61#include <unistd.h>
62
63#include "pax.h"
64#include "extern.h"
65#include "tar.h"
66
67/*
68 * Routines for reading, writing and header identify of various versions of tar
69 */
70
71static int expandname(char *, size_t,  char **, const char *);
72static void longlink(ARCHD *);
73static u_long tar_chksm(char *, int);
74static char *name_split(char *, int);
75static int ul_oct(u_long, char *, int, int);
76#if !defined(NET2_STAT) && !defined(_LP64)
77static int ull_oct(unsigned long long, char *, int, int);
78#endif
79static int tar_gnutar_exclude_one(const char *, size_t);
80static int check_sum(char *, size_t, char *, size_t);
81
82/*
83 * Routines common to all versions of tar
84 */
85
86static int tar_nodir;			/* do not write dirs under old tar */
87int is_gnutar;				/* behave like gnu tar; enable gnu
88					 * extensions and skip end-ofvolume
89					 * checks
90					 */
91static int seen_gnu_warning;		/* Have we warned yet? */
92static char *gnu_hack_string;		/* ././@LongLink hackery */
93static int gnu_hack_len;		/* len of gnu_hack_string */
94char *gnu_name_string;			/* ././@LongLink hackery name */
95char *gnu_link_string;			/* ././@LongLink hackery link */
96
97static int
98check_sum(char *hd, size_t hdlen, char *bl, size_t bllen)
99{
100	u_long hdck, blck;
101
102	hdck = asc_ul(hd, hdlen, OCT);
103	blck = tar_chksm(bl, bllen);
104
105	if (hdck != blck) {
106		tty_warn(0, "Header checksum %lo does not match %lo",
107		    hdck, blck);
108		return(-1);
109	}
110	return(0);
111}
112
113
114/*
115 * tar_endwr()
116 *	add the tar trailer of two null blocks
117 * Return:
118 *	0 if ok, -1 otherwise (what wr_skip returns)
119 */
120
121int
122tar_endwr(void)
123{
124	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
125}
126
127/*
128 * tar_endrd()
129 *	no cleanup needed here, just return size of trailer (for append)
130 * Return:
131 *	size of trailer (2 * BLKMULT)
132 */
133
134off_t
135tar_endrd(void)
136{
137	return((off_t)(NULLCNT*BLKMULT));
138}
139
140/*
141 * tar_trail()
142 *	Called to determine if a header block is a valid trailer. We are passed
143 *	the block, the in_sync flag (which tells us we are in resync mode;
144 *	looking for a valid header), and cnt (which starts at zero) which is
145 *	used to count the number of empty blocks we have seen so far.
146 * Return:
147 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
148 *	could never contain a header.
149 */
150
151int
152tar_trail(char *buf, int in_resync, int *cnt)
153{
154	int i;
155
156	/*
157	 * look for all zero, trailer is two consecutive blocks of zero
158	 */
159	for (i = 0; i < BLKMULT; ++i) {
160		if (buf[i] != '\0') {
161			fprintf(stderr, "non zero at %d\n", i);
162			break;
163		}
164	}
165
166	/*
167	 * if not all zero it is not a trailer, but MIGHT be a header.
168	 */
169	if (i != BLKMULT)
170		return(-1);
171
172	/*
173	 * When given a zero block, we must be careful!
174	 * If we are not in resync mode, check for the trailer. Have to watch
175	 * out that we do not mis-identify file data as the trailer, so we do
176	 * NOT try to id a trailer during resync mode. During resync mode we
177	 * might as well throw this block out since a valid header can NEVER be
178	 * a block of all 0 (we must have a valid file name).
179	 */
180	if (!in_resync && (++*cnt >= NULLCNT))
181		return(0);
182	return(1);
183}
184
185/*
186 * ul_oct()
187 *	convert an unsigned long to an octal string. many oddball field
188 *	termination characters are used by the various versions of tar in the
189 *	different fields. term selects which kind to use. str is '0' padded
190 *	at the front to len. we are unable to use only one format as many old
191 *	tar readers are very cranky about this.
192 * Return:
193 *	0 if the number fit into the string, -1 otherwise
194 */
195
196static int
197ul_oct(u_long val, char *str, int len, int term)
198{
199	char *pt;
200
201	/*
202	 * term selects the appropriate character(s) for the end of the string
203	 */
204	pt = str + len - 1;
205	switch(term) {
206	case 3:
207		*pt-- = '\0';
208		break;
209	case 2:
210		*pt-- = ' ';
211		*pt-- = '\0';
212		break;
213	case 1:
214		*pt-- = ' ';
215		break;
216	case 0:
217	default:
218		*pt-- = '\0';
219		*pt-- = ' ';
220		break;
221	}
222
223	/*
224	 * convert and blank pad if there is space
225	 */
226	while (pt >= str) {
227		*pt-- = '0' + (char)(val & 0x7);
228		if ((val = val >> 3) == (u_long)0)
229			break;
230	}
231
232	while (pt >= str)
233		*pt-- = '0';
234	if (val != (u_long)0)
235		return(-1);
236	return(0);
237}
238
239#if !defined(NET2_STAT) && !defined(_LP64)
240/*
241 * ull_oct()
242 *	convert an unsigned long long to an octal string. one of many oddball
243 *	field termination characters are used by the various versions of tar
244 *	in the different fields. term selects which kind to use. str is '0'
245 *	padded at the front to len. we are unable to use only one format as
246 *	many old tar readers are very cranky about this.
247 * Return:
248 *	0 if the number fit into the string, -1 otherwise
249 */
250
251static int
252ull_oct(unsigned long long val, char *str, int len, int term)
253{
254	char *pt;
255
256	/*
257	 * term selects the appropriate character(s) for the end of the string
258	 */
259	pt = str + len - 1;
260	switch(term) {
261	case 3:
262		*pt-- = '\0';
263		break;
264	case 2:
265		*pt-- = ' ';
266		*pt-- = '\0';
267		break;
268	case 1:
269		*pt-- = ' ';
270		break;
271	case 0:
272	default:
273		*pt-- = '\0';
274		*pt-- = ' ';
275		break;
276	}
277
278	/*
279	 * convert and blank pad if there is space
280	 */
281	while (pt >= str) {
282		*pt-- = '0' + (char)(val & 0x7);
283		if ((val = val >> 3) == 0)
284			break;
285	}
286
287	while (pt >= str)
288		*pt-- = '0';
289	if (val != (unsigned long long)0)
290		return(-1);
291	return(0);
292}
293#endif
294
295/*
296 * tar_chksm()
297 *	calculate the checksum for a tar block counting the checksum field as
298 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
299 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
300 *	pad headers with 0.
301 * Return:
302 *	unsigned long checksum
303 */
304
305static u_long
306tar_chksm(char *blk, int len)
307{
308	char *stop;
309	char *pt;
310	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
311
312	/*
313	 * add the part of the block before the checksum field
314	 */
315	pt = blk;
316	stop = blk + CHK_OFFSET;
317	while (pt < stop)
318		chksm += (u_long)(*pt++ & 0xff);
319	/*
320	 * move past the checksum field and keep going, spec counts the
321	 * checksum field as the sum of 8 blanks (which is pre-computed as
322	 * BLNKSUM).
323	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
324	 * starts, no point in summing zero's)
325	 */
326	pt += CHK_LEN;
327	stop = blk + len;
328	while (pt < stop)
329		chksm += (u_long)(*pt++ & 0xff);
330	return(chksm);
331}
332
333/*
334 * Routines for old BSD style tar (also made portable to sysV tar)
335 */
336
337/*
338 * tar_id()
339 *	determine if a block given to us is a valid tar header (and not a USTAR
340 *	header). We have to be on the lookout for those pesky blocks of	all
341 *	zero's.
342 * Return:
343 *	0 if a tar header, -1 otherwise
344 */
345
346int
347tar_id(char *blk, int size)
348{
349	HD_TAR *hd;
350	HD_USTAR *uhd;
351
352	if (size < BLKMULT)
353		return(-1);
354	hd = (HD_TAR *)blk;
355	uhd = (HD_USTAR *)blk;
356
357	/*
358	 * check for block of zero's first, a simple and fast test, then make
359	 * sure this is not a ustar header by looking for the ustar magic
360	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
361	 * wrong and create archives missing the \0. Last we check the
362	 * checksum. If this is ok we have to assume it is a valid header.
363	 */
364	if (hd->name[0] == '\0')
365		return(-1);
366	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
367		return(-1);
368	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
369}
370
371/*
372 * tar_opt()
373 *	handle tar format specific -o options
374 * Return:
375 *	0 if ok -1 otherwise
376 */
377
378int
379tar_opt(void)
380{
381	OPLIST *opt;
382
383	while ((opt = opt_next()) != NULL) {
384		if (strcmp(opt->name, TAR_OPTION) ||
385		    strcmp(opt->value, TAR_NODIR)) {
386			tty_warn(1,
387			    "Unknown tar format -o option/value pair %s=%s",
388			    opt->name, opt->value);
389			tty_warn(1,
390			    "%s=%s is the only supported tar format option",
391			    TAR_OPTION, TAR_NODIR);
392			return(-1);
393		}
394
395		/*
396		 * we only support one option, and only when writing
397		 */
398		if ((act != APPND) && (act != ARCHIVE)) {
399			tty_warn(1, "%s=%s is only supported when writing.",
400			    opt->name, opt->value);
401			return(-1);
402		}
403		tar_nodir = 1;
404	}
405	return(0);
406}
407
408
409/*
410 * tar_rd()
411 *	extract the values out of block already determined to be a tar header.
412 *	store the values in the ARCHD parameter.
413 * Return:
414 *	0
415 */
416
417int
418tar_rd(ARCHD *arcn, char *buf)
419{
420	HD_TAR *hd;
421	char *pt;
422
423	/*
424	 * we only get proper sized buffers passed to us
425	 */
426	if (tar_id(buf, BLKMULT) < 0)
427		return(-1);
428	memset(arcn, 0, sizeof(*arcn));
429	arcn->org_name = arcn->name;
430	arcn->pat = NULL;
431	arcn->sb.st_nlink = 1;
432
433	/*
434	 * copy out the name and values in the stat buffer
435	 */
436	hd = (HD_TAR *)buf;
437	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
438		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
439		    &gnu_name_string, hd->name);
440		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
441		    &gnu_link_string, hd->linkname);
442	}
443	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
444	    0xfff);
445	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
446	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
447	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
448	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
449	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
450
451	/*
452	 * have to look at the last character, it may be a '/' and that is used
453	 * to encode this as a directory
454	 */
455	pt = &(arcn->name[arcn->nlen - 1]);
456	arcn->pad = 0;
457	arcn->skip = 0;
458	switch(hd->linkflag) {
459	case SYMTYPE:
460		/*
461		 * symbolic link, need to get the link name and set the type in
462		 * the st_mode so -v printing will look correct.
463		 */
464		arcn->type = PAX_SLK;
465		arcn->sb.st_mode |= S_IFLNK;
466		break;
467	case LNKTYPE:
468		/*
469		 * hard link, need to get the link name, set the type in the
470		 * st_mode and st_nlink so -v printing will look better.
471		 */
472		arcn->type = PAX_HLK;
473		arcn->sb.st_nlink = 2;
474
475		/*
476		 * no idea of what type this thing really points at, but
477		 * we set something for printing only.
478		 */
479		arcn->sb.st_mode |= S_IFREG;
480		break;
481	case LONGLINKTYPE:
482		arcn->type = PAX_GLL;
483		/* FALLTHROUGH */
484	case LONGNAMETYPE:
485		/*
486		 * GNU long link/file; we tag these here and let the
487		 * pax internals deal with it -- too ugly otherwise.
488		 */
489		if (hd->linkflag != LONGLINKTYPE)
490			arcn->type = PAX_GLF;
491		arcn->pad = TAR_PAD(arcn->sb.st_size);
492		arcn->skip = arcn->sb.st_size;
493		break;
494	case AREGTYPE:
495	case REGTYPE:
496	case DIRTYPE:	/* see below */
497	default:
498		/*
499		 * If we have a trailing / this is a directory and NOT a file.
500		 * Note: V7 tar doesn't actually have DIRTYPE, but it was
501		 * reported that V7 archives using USTAR directories do exist.
502		 */
503		if (*pt == '/' || hd->linkflag == DIRTYPE) {
504			/*
505			 * it is a directory, set the mode for -v printing
506			 */
507			arcn->type = PAX_DIR;
508			arcn->sb.st_mode |= S_IFDIR;
509			arcn->sb.st_nlink = 2;
510		} else {
511			/*
512			 * have a file that will be followed by data. Set the
513			 * skip value to the size field and calculate the size
514			 * of the padding.
515			 */
516			arcn->type = PAX_REG;
517			arcn->sb.st_mode |= S_IFREG;
518			arcn->pad = TAR_PAD(arcn->sb.st_size);
519			arcn->skip = arcn->sb.st_size;
520		}
521		break;
522	}
523
524	/*
525	 * strip off any trailing slash.
526	 */
527	if (*pt == '/') {
528		*pt = '\0';
529		--arcn->nlen;
530	}
531	return(0);
532}
533
534/*
535 * tar_wr()
536 *	write a tar header for the file specified in the ARCHD to the archive.
537 *	Have to check for file types that cannot be stored and file names that
538 *	are too long. Be careful of the term (last arg) to ul_oct, each field
539 *	of tar has it own spec for the termination character(s).
540 *	ASSUMED: space after header in header block is zero filled
541 * Return:
542 *	0 if file has data to be written after the header, 1 if file has NO
543 *	data to write after the header, -1 if archive write failed
544 */
545
546int
547tar_wr(ARCHD *arcn)
548{
549	HD_TAR *hd;
550	int len;
551	char hdblk[sizeof(HD_TAR)];
552
553	/*
554	 * check for those file system types which tar cannot store
555	 */
556	switch(arcn->type) {
557	case PAX_DIR:
558		/*
559		 * user asked that dirs not be written to the archive
560		 */
561		if (tar_nodir)
562			return(1);
563		break;
564	case PAX_CHR:
565		tty_warn(1, "Tar cannot archive a character device %s",
566		    arcn->org_name);
567		return(1);
568	case PAX_BLK:
569		tty_warn(1,
570		    "Tar cannot archive a block device %s", arcn->org_name);
571		return(1);
572	case PAX_SCK:
573		tty_warn(1, "Tar cannot archive a socket %s", arcn->org_name);
574		return(1);
575	case PAX_FIF:
576		tty_warn(1, "Tar cannot archive a fifo %s", arcn->org_name);
577		return(1);
578	case PAX_SLK:
579	case PAX_HLK:
580	case PAX_HRG:
581		if (arcn->ln_nlen > sizeof(hd->linkname)) {
582			tty_warn(1,"Link name too long for tar %s",
583			    arcn->ln_name);
584			return(1);
585		}
586		break;
587	case PAX_REG:
588	case PAX_CTG:
589	default:
590		break;
591	}
592
593	/*
594	 * check file name len, remember extra char for dirs (the / at the end)
595	 */
596	len = arcn->nlen;
597	if (arcn->type == PAX_DIR)
598		++len;
599	if (len >= sizeof(hd->name)) {
600		tty_warn(1, "File name too long for tar %s", arcn->name);
601		return(1);
602	}
603
604	/*
605	 * copy the data out of the ARCHD into the tar header based on the type
606	 * of the file. Remember many tar readers want the unused fields to be
607	 * padded with zero. We set the linkflag field (type), the linkname
608	 * (or zero if not used),the size, and set the padding (if any) to be
609	 * added after the file data (0 for all other types, as they only have
610	 * a header)
611	 */
612	memset(hdblk, 0, sizeof(hdblk));
613	hd = (HD_TAR *)hdblk;
614	strlcpy(hd->name, arcn->name, sizeof(hd->name));
615	arcn->pad = 0;
616
617	if (arcn->type == PAX_DIR) {
618		/*
619		 * directories are the same as files, except have a filename
620		 * that ends with a /, we add the slash here. No data follows,
621		 * dirs, so no pad.
622		 */
623		hd->linkflag = AREGTYPE;
624		hd->name[len-1] = '/';
625		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
626			goto out;
627	} else if (arcn->type == PAX_SLK) {
628		/*
629		 * no data follows this file, so no pad
630		 */
631		hd->linkflag = SYMTYPE;
632		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
633		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
634			goto out;
635	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
636		/*
637		 * no data follows this file, so no pad
638		 */
639		hd->linkflag = LNKTYPE;
640		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
641		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
642			goto out;
643	} else {
644		/*
645		 * data follows this file, so set the pad
646		 */
647		hd->linkflag = AREGTYPE;
648		if (OFFT_OCT(arcn->sb.st_size, hd->size, sizeof(hd->size), 1)) {
649			tty_warn(1,"File is too large for tar %s",
650			    arcn->org_name);
651			return(1);
652		}
653		arcn->pad = TAR_PAD(arcn->sb.st_size);
654	}
655
656	/*
657	 * copy those fields that are independent of the type
658	 */
659	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
660	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
661	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
662	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
663		goto out;
664
665	/*
666	 * calculate and add the checksum, then write the header. A return of
667	 * 0 tells the caller to now write the file data, 1 says no data needs
668	 * to be written
669	 */
670	if (ul_oct(tar_chksm(hdblk, sizeof(HD_TAR)), hd->chksum,
671	    sizeof(hd->chksum), 3))
672		goto out;			/* XXX Something's wrong here
673						 * because a zero-byte file can
674						 * cause this to be done and
675						 * yet the resulting warning
676						 * seems incorrect */
677
678	if (wr_rdbuf(hdblk, sizeof(HD_TAR)) < 0)
679		return(-1);
680	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
681		return(-1);
682	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
683		return(0);
684	return(1);
685
686    out:
687	/*
688	 * header field is out of range
689	 */
690	tty_warn(1, "Tar header field is too small for %s", arcn->org_name);
691	return(1);
692}
693
694/*
695 * Routines for POSIX ustar
696 */
697
698/*
699 * ustar_strd()
700 *	initialization for ustar read
701 * Return:
702 *	0 if ok, -1 otherwise
703 */
704
705int
706ustar_strd(void)
707{
708	return(0);
709}
710
711/*
712 * ustar_stwr()
713 *	initialization for ustar write
714 * Return:
715 *	0 if ok, -1 otherwise
716 */
717
718int
719ustar_stwr(void)
720{
721	return(0);
722}
723
724/*
725 * ustar_id()
726 *	determine if a block given to us is a valid ustar header. We have to
727 *	be on the lookout for those pesky blocks of all zero's
728 * Return:
729 *	0 if a ustar header, -1 otherwise
730 */
731
732int
733ustar_id(char *blk, int size)
734{
735	HD_USTAR *hd;
736
737	if (size < BLKMULT) {
738fprintf(stderr, "not a block multiple\n");
739		return(-1);
740	}
741	hd = (HD_USTAR *)blk;
742fprintf(stderr, "hd->name = %s\n", hd->name);
743
744	/*
745	 * check for block of zero's first, a simple and fast test then check
746	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
747	 * programs are fouled up and create archives missing the \0. Last we
748	 * check the checksum. If ok we have to assume it is a valid header.
749	 */
750	if (hd->name[0] == '\0') {
751		int i;
752fprintf(stderr, "null name\n");
753		for (i = 0; i < BLKMULT; i++)
754			if (blk[i] != '\0')
755				fprintf(stderr, "%c", blk[i]);
756		printf("\n");
757		return(-1);
758	}
759	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0) {
760fprintf(stderr, "bad magic %s\n", hd->magic);
761		return(-1);
762	}
763	/* This is GNU tar */
764	if (strncmp(hd->magic, "ustar  ", 8) == 0 && !is_gnutar &&
765	    !seen_gnu_warning) {
766		seen_gnu_warning = 1;
767		tty_warn(0,
768		    "Trying to read GNU tar archive with extensions off");
769	}
770	return check_sum(hd->chksum, sizeof(hd->chksum), blk, BLKMULT);
771}
772
773/*
774 * ustar_rd()
775 *	extract the values out of block already determined to be a ustar header.
776 *	store the values in the ARCHD parameter.
777 * Return:
778 *	0
779 */
780
781int
782ustar_rd(ARCHD *arcn, char *buf)
783{
784	HD_USTAR *hd;
785	char *dest;
786	int cnt;
787	dev_t devmajor;
788	dev_t devminor;
789
790	/*
791	 * we only get proper sized buffers
792	 */
793	if (ustar_id(buf, BLKMULT) < 0)
794		return(-1);
795
796	memset(arcn, 0, sizeof(*arcn));
797	arcn->org_name = arcn->name;
798	arcn->pat = NULL;
799	arcn->sb.st_nlink = 1;
800	hd = (HD_USTAR *)buf;
801
802	/*
803	 * see if the filename is split into two parts. if, so joint the parts.
804	 * we copy the prefix first and add a / between the prefix and name.
805	 */
806	dest = arcn->name;
807	if (*(hd->prefix) != '\0') {
808		cnt = strlcpy(arcn->name, hd->prefix, sizeof(arcn->name));
809		dest += cnt;
810		*dest++ = '/';
811		cnt++;
812	} else {
813		cnt = 0;
814	}
815
816	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
817		arcn->nlen = expandname(dest, sizeof(arcn->name) - cnt,
818		    &gnu_name_string, hd->name);
819		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
820		    &gnu_link_string, hd->linkname);
821	}
822
823	/*
824	 * follow the spec to the letter. we should only have mode bits, strip
825	 * off all other crud we may be passed.
826	 */
827	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
828	    0xfff);
829	arcn->sb.st_size = (off_t)ASC_OFFT(hd->size, sizeof(hd->size), OCT);
830	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
831	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
832
833	/*
834	 * If we can find the ascii names for gname and uname in the password
835	 * and group files we will use the uid's and gid they bind. Otherwise
836	 * we use the uid and gid values stored in the header. (This is what
837	 * the posix spec wants).
838	 */
839	hd->gname[sizeof(hd->gname) - 1] = '\0';
840	if (gid_from_group(hd->gname, &(arcn->sb.st_gid)) < 0)
841		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
842	hd->uname[sizeof(hd->uname) - 1] = '\0';
843	if (uid_from_user(hd->uname, &(arcn->sb.st_uid)) < 0)
844		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
845
846	/*
847	 * set the defaults, these may be changed depending on the file type
848	 */
849	arcn->pad = 0;
850	arcn->skip = 0;
851	arcn->sb.st_rdev = (dev_t)0;
852
853	/*
854	 * set the mode and PAX type according to the typeflag in the header
855	 */
856	switch(hd->typeflag) {
857	case FIFOTYPE:
858		arcn->type = PAX_FIF;
859		arcn->sb.st_mode |= S_IFIFO;
860		break;
861	case DIRTYPE:
862		arcn->type = PAX_DIR;
863		arcn->sb.st_mode |= S_IFDIR;
864		arcn->sb.st_nlink = 2;
865
866		/*
867		 * Some programs that create ustar archives append a '/'
868		 * to the pathname for directories. This clearly violates
869		 * ustar specs, but we will silently strip it off anyway.
870		 */
871		if (arcn->name[arcn->nlen - 1] == '/')
872			arcn->name[--arcn->nlen] = '\0';
873		break;
874	case BLKTYPE:
875	case CHRTYPE:
876		/*
877		 * this type requires the rdev field to be set.
878		 */
879		if (hd->typeflag == BLKTYPE) {
880			arcn->type = PAX_BLK;
881			arcn->sb.st_mode |= S_IFBLK;
882		} else {
883			arcn->type = PAX_CHR;
884			arcn->sb.st_mode |= S_IFCHR;
885		}
886		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
887		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
888		arcn->sb.st_rdev = TODEV(devmajor, devminor);
889		break;
890	case SYMTYPE:
891	case LNKTYPE:
892		if (hd->typeflag == SYMTYPE) {
893			arcn->type = PAX_SLK;
894			arcn->sb.st_mode |= S_IFLNK;
895		} else {
896			arcn->type = PAX_HLK;
897			/*
898			 * so printing looks better
899			 */
900			arcn->sb.st_mode |= S_IFREG;
901			arcn->sb.st_nlink = 2;
902		}
903		break;
904	case LONGLINKTYPE:
905		if (is_gnutar)
906			arcn->type = PAX_GLL;
907		/* FALLTHROUGH */
908	case LONGNAMETYPE:
909		if (is_gnutar) {
910			/*
911			 * GNU long link/file; we tag these here and let the
912			 * pax internals deal with it -- too ugly otherwise.
913			 */
914			if (hd->typeflag != LONGLINKTYPE)
915				arcn->type = PAX_GLF;
916			arcn->pad = TAR_PAD(arcn->sb.st_size);
917			arcn->skip = arcn->sb.st_size;
918		} else {
919			tty_warn(1, "GNU Long %s found in posix ustar archive.",
920			    hd->typeflag == LONGLINKTYPE ? "Link" : "File");
921		}
922		break;
923	case CONTTYPE:
924	case AREGTYPE:
925	case REGTYPE:
926	default:
927		/*
928		 * these types have file data that follows. Set the skip and
929		 * pad fields.
930		 */
931		arcn->type = PAX_REG;
932		arcn->pad = TAR_PAD(arcn->sb.st_size);
933		arcn->skip = arcn->sb.st_size;
934		arcn->sb.st_mode |= S_IFREG;
935		break;
936	}
937	return(0);
938}
939
940static int
941expandname(char *buf, size_t len,  char **gnu_name, const char *name)
942{
943	if (*gnu_name) {
944		len = strlcpy(buf, *gnu_name, len);
945		free(*gnu_name);
946		*gnu_name = NULL;
947	} else {
948		len = strlcpy(buf, name, len);
949	}
950	return len;
951}
952
953static void
954longlink(ARCHD *arcn)
955{
956	ARCHD larc;
957
958	memset(&larc, 0, sizeof(larc));
959
960	switch (arcn->type) {
961	case PAX_SLK:
962	case PAX_HRG:
963	case PAX_HLK:
964		larc.type = PAX_GLL;
965		larc.ln_nlen = strlcpy(larc.ln_name, "././@LongLink",
966		    sizeof(larc.ln_name));
967		gnu_hack_string = arcn->ln_name;
968		gnu_hack_len = arcn->ln_nlen + 1;
969		break;
970	default:
971		larc.nlen = strlcpy(larc.name, "././@LongLink",
972		    sizeof(larc.name));
973		gnu_hack_string = arcn->name;
974		gnu_hack_len = arcn->nlen + 1;
975		larc.type = PAX_GLF;
976	}
977	/*
978	 * We need a longlink now.
979	 */
980	ustar_wr(&larc);
981}
982
983/*
984 * ustar_wr()
985 *	write a ustar header for the file specified in the ARCHD to the archive
986 *	Have to check for file types that cannot be stored and file names that
987 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
988 *	'\0' for the termination character (this is different than picky tar)
989 *	ASSUMED: space after header in header block is zero filled
990 * Return:
991 *	0 if file has data to be written after the header, 1 if file has NO
992 *	data to write after the header, -1 if archive write failed
993 */
994
995int
996ustar_wr(ARCHD *arcn)
997{
998	HD_USTAR *hd;
999	char *pt;
1000	char hdblk[sizeof(HD_USTAR)];
1001	const char *user, *group;
1002
1003	/*
1004	 * check for those file system types ustar cannot store
1005	 */
1006	if (arcn->type == PAX_SCK) {
1007		tty_warn(1, "Ustar cannot archive a socket %s", arcn->org_name);
1008		return(1);
1009	}
1010
1011	/*
1012	 * check the length of the linkname
1013	 */
1014	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
1015	    (arcn->type == PAX_HRG)) &&
1016	    (arcn->ln_nlen >= sizeof(hd->linkname))){
1017		if (is_gnutar) {
1018			longlink(arcn);
1019		} else {
1020			tty_warn(1, "Link name too long for ustar %s",
1021			    arcn->ln_name);
1022			return(1);
1023		}
1024	}
1025
1026	/*
1027	 * split the path name into prefix and name fields (if needed). if
1028	 * pt != arcn->name, the name has to be split
1029	 */
1030	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
1031		if (is_gnutar) {
1032			longlink(arcn);
1033			pt = arcn->name;
1034		} else {
1035			tty_warn(1, "File name too long for ustar %s",
1036			    arcn->name);
1037			return(1);
1038		}
1039	}
1040
1041	/*
1042	 * zero out the header so we don't have to worry about zero fill below
1043	 */
1044	memset(hdblk, 0, sizeof(hdblk));
1045	hd = (HD_USTAR *)hdblk;
1046	arcn->pad = 0L;
1047
1048	/*
1049	 * split the name, or zero out the prefix
1050	 */
1051	if (pt != arcn->name) {
1052		/*
1053		 * name was split, pt points at the / where the split is to
1054		 * occur, we remove the / and copy the first part to the prefix
1055		 */
1056		*pt = '\0';
1057		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
1058		*pt++ = '/';
1059	}
1060
1061	/*
1062	 * copy the name part. this may be the whole path or the part after
1063	 * the prefix
1064	 */
1065	strlcpy(hd->name, pt, sizeof(hd->name));
1066
1067	/*
1068	 * set the fields in the header that are type dependent
1069	 */
1070	switch(arcn->type) {
1071	case PAX_DIR:
1072		hd->typeflag = DIRTYPE;
1073		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1074			goto out;
1075		break;
1076	case PAX_CHR:
1077	case PAX_BLK:
1078		if (arcn->type == PAX_CHR)
1079			hd->typeflag = CHRTYPE;
1080		else
1081			hd->typeflag = BLKTYPE;
1082		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1083		   sizeof(hd->devmajor), 3) ||
1084		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1085		   sizeof(hd->devminor), 3) ||
1086		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1087			goto out;
1088		break;
1089	case PAX_FIF:
1090		hd->typeflag = FIFOTYPE;
1091		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 3))
1092			goto out;
1093		break;
1094	case PAX_GLL:
1095	case PAX_SLK:
1096	case PAX_HLK:
1097	case PAX_HRG:
1098		if (arcn->type == PAX_SLK)
1099			hd->typeflag = SYMTYPE;
1100		else if (arcn->type == PAX_GLL)
1101			hd->typeflag = LONGLINKTYPE;
1102		else
1103			hd->typeflag = LNKTYPE;
1104		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1105		if (ul_oct((u_long)gnu_hack_len, hd->size,
1106		    sizeof(hd->size), 3))
1107			goto out;
1108		break;
1109	case PAX_GLF:
1110	case PAX_REG:
1111	case PAX_CTG:
1112	default:
1113		/*
1114		 * file data with this type, set the padding
1115		 */
1116		if (arcn->type == PAX_GLF) {
1117			hd->typeflag = LONGNAMETYPE;
1118			arcn->pad = TAR_PAD(gnu_hack_len);
1119			if (OFFT_OCT((u_long)gnu_hack_len, hd->size,
1120			    sizeof(hd->size), 3)) {
1121				tty_warn(1,"File is too long for ustar %s",
1122				    arcn->org_name);
1123				return(1);
1124			}
1125		} else {
1126			if (arcn->type == PAX_CTG)
1127				hd->typeflag = CONTTYPE;
1128			else
1129				hd->typeflag = REGTYPE;
1130			arcn->pad = TAR_PAD(arcn->sb.st_size);
1131			if (OFFT_OCT(arcn->sb.st_size, hd->size,
1132			    sizeof(hd->size), 3)) {
1133				tty_warn(1,"File is too long for ustar %s",
1134				    arcn->org_name);
1135				return(1);
1136			}
1137		}
1138		break;
1139	}
1140
1141	strncpy(hd->magic, TMAGIC, TMAGLEN);
1142	if (is_gnutar)
1143		hd->magic[TMAGLEN - 1] = hd->magic[TMAGLEN] = ' ';
1144	else
1145		strncpy(hd->version, TVERSION, TVERSLEN);
1146
1147	/*
1148	 * set the remaining fields. Some versions want all 16 bits of mode
1149	 * we better humor them (they really do not meet spec though)....
1150	 */
1151	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 3) ||
1152	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 3)  ||
1153	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 3) ||
1154	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),3))
1155		goto out;
1156	user = user_from_uid(arcn->sb.st_uid, 1);
1157	group = group_from_gid(arcn->sb.st_gid, 1);
1158	strncpy(hd->uname, user ? user : "", sizeof(hd->uname));
1159	strncpy(hd->gname, group ? group : "", sizeof(hd->gname));
1160
1161	/*
1162	 * calculate and store the checksum write the header to the archive
1163	 * return 0 tells the caller to now write the file data, 1 says no data
1164	 * needs to be written
1165	 */
1166	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1167	   sizeof(hd->chksum), 3))
1168		goto out;
1169	if (wr_rdbuf(hdblk, sizeof(HD_USTAR)) < 0)
1170		return(-1);
1171	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1172		return(-1);
1173	if (gnu_hack_string) {
1174		int res = wr_rdbuf(gnu_hack_string, gnu_hack_len);
1175		int pad = gnu_hack_len;
1176		gnu_hack_string = NULL;
1177		gnu_hack_len = 0;
1178		if (res < 0)
1179			return(-1);
1180		if (wr_skip((off_t)(BLKMULT - (pad % BLKMULT))) < 0)
1181			return(-1);
1182	}
1183	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1184		return(0);
1185	return(1);
1186
1187    out:
1188	/*
1189	 * header field is out of range
1190	 */
1191	tty_warn(1, "Ustar header field is too small for %s", arcn->org_name);
1192	return(1);
1193}
1194
1195/*
1196 * name_split()
1197 *	see if the name has to be split for storage in a ustar header. We try
1198 *	to fit the entire name in the name field without splitting if we can.
1199 *	The split point is always at a /
1200 * Return
1201 *	character pointer to split point (always the / that is to be removed
1202 *	if the split is not needed, the points is set to the start of the file
1203 *	name (it would violate the spec to split there). A NULL is returned if
1204 *	the file name is too long
1205 */
1206
1207static char *
1208name_split(char *name, int len)
1209{
1210	char *start;
1211
1212	/*
1213	 * check to see if the file name is small enough to fit in the name
1214	 * field. if so just return a pointer to the name.
1215	 */
1216	if (len < TNMSZ)
1217		return(name);
1218	if (len > (TPFSZ + TNMSZ))
1219		return(NULL);
1220
1221	/*
1222	 * we start looking at the biggest sized piece that fits in the name
1223	 * field. We walk forward looking for a slash to split at. The idea is
1224	 * to find the biggest piece to fit in the name field (or the smallest
1225	 * prefix we can find) (the -1 is correct the biggest piece would
1226	 * include the slash between the two parts that gets thrown away)
1227	 */
1228	start = name + len - TNMSZ;
1229	while ((*start != '\0') && (*start != '/'))
1230		++start;
1231
1232	/*
1233	 * if we hit the end of the string, this name cannot be split, so we
1234	 * cannot store this file.
1235	 */
1236	if (*start == '\0')
1237		return(NULL);
1238	len = start - name;
1239
1240	/*
1241	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1242	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1243	 * the file would then expand on extract to //str. The len == 0 below
1244	 * makes this special case follow the spec to the letter.
1245	 */
1246	if ((len >= TPFSZ) || (len == 0))
1247		return(NULL);
1248
1249	/*
1250	 * ok have a split point, return it to the caller
1251	 */
1252	return(start);
1253}
1254
1255/* convert a glob into a RE, and add it to the list */
1256static int
1257tar_gnutar_exclude_one(const char *line, size_t len)
1258{
1259	char sbuf[MAXPATHLEN * 2 + 1 + 5];
1260	int i, j;
1261
1262	if (line[len - 1] == '\n')
1263		len--;
1264	for (i = 0, j = 2; i < len; i++) {
1265		/*
1266		 * convert glob to regexp, escaping everything
1267		 */
1268		if (line[i] == '*')
1269			sbuf[j++] = '.';
1270		else if (line[i] == '?') {
1271			sbuf[j++] = '.';
1272			continue;
1273		} else if (!isalnum(line[i]) && !isblank(line[i]))
1274			sbuf[j++] = '\\';
1275		sbuf[j++] = line[i];
1276	}
1277	sbuf[0] = sbuf[j + 1] = sbuf[j + 2] = '/';
1278	sbuf[1] = '^';
1279	sbuf[j] = '$';
1280	sbuf[j + 3] = '\0';
1281	if (rep_add(sbuf) < 0)
1282		return (-1);
1283
1284	return (0);
1285}
1286
1287/*
1288 * deal with GNU tar -X/--exclude-from & --exclude switchs.  basically,
1289 * we go through each line of the file, building a string from the "glob"
1290 * lines in the file into RE lines, of the form `/^RE$//', which we pass
1291 * to rep_add(), which will add a empty replacement (exclusion), for the
1292 * named files.
1293 */
1294int
1295tar_gnutar_minus_minus_exclude(path)
1296	const char *path;
1297{
1298	size_t	len = strlen(path);
1299
1300	if (len > MAXPATHLEN)
1301		tty_warn(0, "pathname too long: %s", path);
1302
1303	return (tar_gnutar_exclude_one(path, len));
1304}
1305
1306int
1307tar_gnutar_X_compat(path)
1308	const char *path;
1309{
1310	char *line;
1311	FILE *fp;
1312	int lineno = 0;
1313	size_t len;
1314
1315	fp = fopen(path, "r");
1316	if (fp == NULL) {
1317		tty_warn(1, "can not open %s: %s", path,
1318		    strerror(errno));
1319		return(-1);
1320	}
1321
1322	while ((line = fgetln(fp, &len))) {
1323		lineno++;
1324		if (len > MAXPATHLEN) {
1325			tty_warn(0, "pathname too long, line %d of %s",
1326			    lineno, path);
1327		}
1328		if (tar_gnutar_exclude_one(line, len))
1329			return (-1);
1330	}
1331	return (0);
1332}
1333