1/*	$OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $	*/
2/*	$NetBSD: tar.c,v 1.5 1995/03/21 09:07:49 cgd Exp $	*/
3
4/*-
5 * Copyright (c) 1992 Keith Muller.
6 * Copyright (c) 1992, 1993
7 *	The Regents of the University of California.  All rights reserved.
8 *
9 * This code is derived from software contributed to Berkeley by
10 * Keith Muller of the University of California, San Diego.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 *    notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 *    notice, this list of conditions and the following disclaimer in the
19 *    documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 *    may be used to endorse or promote products derived from this software
22 *    without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 */
36
37#include <sys/cdefs.h>
38#ifndef lint
39#if 0
40static const char sccsid[] = "@(#)tar.c	8.2 (Berkeley) 4/18/94";
41#else
42__used static const char rcsid[] = "$OpenBSD: tar.c,v 1.41 2006/03/04 20:24:55 otto Exp $";
43#endif
44#endif /* not lint */
45
46#include <sys/types.h>
47#include <sys/time.h>
48#include <sys/stat.h>
49#include <sys/param.h>
50#include <string.h>
51#include <stdio.h>
52#include <unistd.h>
53#include <stdlib.h>
54#include "pax.h"
55#include "extern.h"
56#include "tar.h"
57
58/*
59 * Routines for reading, writing and header identify of various versions of tar
60 */
61
62static size_t expandname(char *, size_t, char **, const char *, size_t);
63static u_long tar_chksm(char *, int);
64static char *name_split(char *, int);
65static int ul_oct(u_long, char *, int, int);
66#ifndef LONG_OFF_T
67static int uqd_oct(u_quad_t, char *, int, int);
68#endif
69
70static uid_t uid_nobody;
71static uid_t uid_warn;
72static gid_t gid_nobody;
73static gid_t gid_warn;
74
75/*
76 * Routines common to all versions of tar
77 */
78
79static int tar_nodir;			/* do not write dirs under old tar */
80char *gnu_name_string;			/* GNU ././@LongLink hackery name */
81char *gnu_link_string;			/* GNU ././@LongLink hackery link */
82
83/*
84 * tar_endwr()
85 *	add the tar trailer of two null blocks
86 * Return:
87 *	0 if ok, -1 otherwise (what wr_skip returns)
88 */
89
90int
91tar_endwr(void)
92{
93	return(wr_skip((off_t)(NULLCNT*BLKMULT)));
94}
95
96/*
97 * tar_endrd()
98 *	no cleanup needed here, just return size of trailer (for append)
99 * Return:
100 *	size of trailer (2 * BLKMULT)
101 */
102
103off_t
104tar_endrd(void)
105{
106	return((off_t)(NULLCNT*BLKMULT));
107}
108
109/*
110 * tar_trail()
111 *	Called to determine if a header block is a valid trailer. We are passed
112 *	the block, the in_sync flag (which tells us we are in resync mode;
113 *	looking for a valid header), and cnt (which starts at zero) which is
114 *	used to count the number of empty blocks we have seen so far.
115 * Return:
116 *	0 if a valid trailer, -1 if not a valid trailer, or 1 if the block
117 *	could never contain a header.
118 */
119
120int
121tar_trail(ARCHD *ignore, char *buf, int in_resync, int *cnt)
122{
123	int i;
124
125	/*
126	 * look for all zero, trailer is two consecutive blocks of zero
127	 */
128	for (i = 0; i < BLKMULT; ++i) {
129		if (buf[i] != '\0')
130			break;
131	}
132
133	/*
134	 * if not all zero it is not a trailer, but MIGHT be a header.
135	 */
136	if (i != BLKMULT)
137		return(-1);
138
139	/*
140	 * When given a zero block, we must be careful!
141	 * If we are not in resync mode, check for the trailer. Have to watch
142	 * out that we do not mis-identify file data as the trailer, so we do
143	 * NOT try to id a trailer during resync mode. During resync mode we
144	 * might as well throw this block out since a valid header can NEVER be
145	 * a block of all 0 (we must have a valid file name).
146	 */
147	if (!in_resync && (++*cnt >= NULLCNT))
148		return(0);
149	return(1);
150}
151
152/*
153 * ul_oct()
154 *	convert an unsigned long to an octal string. many oddball field
155 *	termination characters are used by the various versions of tar in the
156 *	different fields. term selects which kind to use. str is '0' padded
157 *	at the front to len. we are unable to use only one format as many old
158 *	tar readers are very cranky about this.
159 * Return:
160 *	0 if the number fit into the string, -1 otherwise
161 */
162
163static int
164ul_oct(u_long val, char *str, int len, int term)
165{
166	char *pt;
167
168	/*
169	 * term selects the appropriate character(s) for the end of the string
170	 */
171	pt = str + len - 1;
172	switch (term) {
173	case 3:
174		*pt-- = '\0';
175		break;
176	case 2:
177		*pt-- = ' ';
178		*pt-- = '\0';
179		break;
180	case 1:
181		*pt-- = ' ';
182		break;
183	case 0:
184	default:
185		*pt-- = '\0';
186		*pt-- = ' ';
187		break;
188	}
189
190	/*
191	 * convert and blank pad if there is space
192	 */
193	while (pt >= str) {
194		*pt-- = '0' + (char)(val & 0x7);
195		if ((val = val >> 3) == (u_long)0)
196			break;
197	}
198
199	while (pt >= str)
200		*pt-- = '0';
201	if (val != (u_long)0)
202		return(-1);
203	return(0);
204}
205
206#ifndef LONG_OFF_T
207/*
208 * uqd_oct()
209 *	convert an u_quad_t to an octal string. one of many oddball field
210 *	termination characters are used by the various versions of tar in the
211 *	different fields. term selects which kind to use. str is '0' padded
212 *	at the front to len. we are unable to use only one format as many old
213 *	tar readers are very cranky about this.
214 * Return:
215 *	0 if the number fit into the string, -1 otherwise
216 */
217
218static int
219uqd_oct(u_quad_t val, char *str, int len, int term)
220{
221	char *pt;
222
223	/*
224	 * term selects the appropriate character(s) for the end of the string
225	 */
226	pt = str + len - 1;
227	switch (term) {
228	case 3:
229		*pt-- = '\0';
230		break;
231	case 2:
232		*pt-- = ' ';
233		*pt-- = '\0';
234		break;
235	case 1:
236		*pt-- = ' ';
237		break;
238	case 0:
239	default:
240		*pt-- = '\0';
241		*pt-- = ' ';
242		break;
243	}
244
245	/*
246	 * convert and blank pad if there is space
247	 */
248	while (pt >= str) {
249		*pt-- = '0' + (char)(val & 0x7);
250		if ((val = val >> 3) == 0)
251			break;
252	}
253
254	while (pt >= str)
255		*pt-- = '0';
256	if (val != (u_quad_t)0)
257		return(-1);
258	return(0);
259}
260#endif
261
262/*
263 * tar_chksm()
264 *	calculate the checksum for a tar block counting the checksum field as
265 *	all blanks (BLNKSUM is that value pre-calculated, the sum of 8 blanks).
266 *	NOTE: we use len to short circuit summing 0's on write since we ALWAYS
267 *	pad headers with 0.
268 * Return:
269 *	unsigned long checksum
270 */
271
272static u_long
273tar_chksm(char *blk, int len)
274{
275	char *stop;
276	char *pt;
277	u_long chksm = BLNKSUM;	/* initial value is checksum field sum */
278
279	/*
280	 * add the part of the block before the checksum field
281	 */
282	pt = blk;
283	stop = blk + CHK_OFFSET;
284	while (pt < stop)
285		chksm += (u_long)(*pt++ & 0xff);
286	/*
287	 * move past the checksum field and keep going, spec counts the
288	 * checksum field as the sum of 8 blanks (which is pre-computed as
289	 * BLNKSUM).
290	 * ASSUMED: len is greater than CHK_OFFSET. (len is where our 0 padding
291	 * starts, no point in summing zero's)
292	 */
293	pt += CHK_LEN;
294	stop = blk + len;
295	while (pt < stop)
296		chksm += (u_long)(*pt++ & 0xff);
297	return(chksm);
298}
299
300/*
301 * Routines for old BSD style tar (also made portable to sysV tar)
302 */
303
304/*
305 * tar_id()
306 *	determine if a block given to us is a valid tar header (and not a USTAR
307 *	header). We have to be on the lookout for those pesky blocks of	all
308 *	zero's.
309 * Return:
310 *	0 if a tar header, -1 otherwise
311 */
312
313int
314tar_id(char *blk, int size)
315{
316	HD_TAR *hd;
317	HD_USTAR *uhd;
318
319	if (size < BLKMULT)
320		return(-1);
321	hd = (HD_TAR *)blk;
322	uhd = (HD_USTAR *)blk;
323
324	/*
325	 * check for block of zero's first, a simple and fast test, then make
326	 * sure this is not a ustar header by looking for the ustar magic
327	 * cookie. We should use TMAGLEN, but some USTAR archive programs are
328	 * wrong and create archives missing the \0. Last we check the
329	 * checksum. If this is ok we have to assume it is a valid header.
330	 */
331	if (hd->name[0] == '\0')
332		return(-1);
333	if (strncmp(uhd->magic, TMAGIC, TMAGLEN - 1) == 0)
334		return(-1);
335	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
336		return(-1);
337	force_one_volume = 1;
338	return(0);
339}
340
341/*
342 * tar_opt()
343 *	handle tar format specific -o options
344 * Return:
345 *	0 if ok -1 otherwise
346 */
347
348int
349tar_opt(void)
350{
351	OPLIST *opt;
352
353	while ((opt = opt_next()) != NULL) {
354		if (strcmp(opt->name, TAR_OPTION) ||
355		    strcmp(opt->value, TAR_NODIR)) {
356			paxwarn(1, "Unknown tar format -o option/value pair %s=%s",
357			    opt->name, opt->value);
358			paxwarn(1,"%s=%s is the only supported tar format option",
359			    TAR_OPTION, TAR_NODIR);
360			return(-1);
361		}
362
363		/*
364		 * we only support one option, and only when writing
365		 */
366		if ((act != APPND) && (act != ARCHIVE)) {
367			paxwarn(1, "%s=%s is only supported when writing.",
368			    opt->name, opt->value);
369			return(-1);
370		}
371		tar_nodir = 1;
372	}
373	return(0);
374}
375
376
377/*
378 * tar_rd()
379 *	extract the values out of block already determined to be a tar header.
380 *	store the values in the ARCHD parameter.
381 * Return:
382 *	0
383 */
384
385int
386tar_rd(ARCHD *arcn, char *buf)
387{
388	HD_TAR *hd;
389	char *pt;
390
391	/*
392	 * we only get proper sized buffers passed to us
393	 */
394	if (tar_id(buf, BLKMULT) < 0)
395		return(-1);
396	memset(arcn, 0, sizeof(*arcn));
397	arcn->org_name = arcn->name;
398	arcn->sb.st_nlink = 1;
399
400	/*
401	 * copy out the name and values in the stat buffer
402	 */
403	hd = (HD_TAR *)buf;
404	if (hd->linkflag != LONGLINKTYPE && hd->linkflag != LONGNAMETYPE) {
405		arcn->nlen = expandname(arcn->name, sizeof(arcn->name),
406		    &gnu_name_string, hd->name, sizeof(hd->name));
407		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
408		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
409	}
410	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode,sizeof(hd->mode),OCT) &
411	    0xfff);
412	arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
413	arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
414#ifdef LONG_OFF_T
415	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
416#else
417	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
418#endif
419	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
420	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
421
422	/*
423	 * have to look at the last character, it may be a '/' and that is used
424	 * to encode this as a directory
425	 */
426	pt = &(arcn->name[arcn->nlen - 1]);
427	arcn->pad = 0;
428	arcn->skip = 0;
429	switch (hd->linkflag) {
430	case SYMTYPE:
431		/*
432		 * symbolic link, need to get the link name and set the type in
433		 * the st_mode so -v printing will look correct.
434		 */
435		arcn->type = PAX_SLK;
436		arcn->sb.st_mode |= S_IFLNK;
437		arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
438		break;
439	case LNKTYPE:
440		/*
441		 * hard link, need to get the link name, set the type in the
442		 * st_mode and st_nlink so -v printing will look better.
443		 */
444		arcn->type = PAX_HLK;
445		arcn->sb.st_nlink = 2;
446		arcn->ln_nlen = strlcpy(arcn->ln_name, hd->linkname, sizeof(arcn->ln_name));
447
448		/*
449		 * no idea of what type this thing really points at, but
450		 * we set something for printing only.
451		 */
452		arcn->sb.st_mode |= S_IFREG;
453		break;
454	case LONGLINKTYPE:
455	case LONGNAMETYPE:
456		/*
457		 * GNU long link/file; we tag these here and let the
458		 * pax internals deal with it -- too ugly otherwise.
459		 */
460		arcn->type =
461		    hd->linkflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
462		arcn->pad = TAR_PAD(arcn->sb.st_size);
463		arcn->skip = arcn->sb.st_size;
464		break;
465	case DIRTYPE:
466		/*
467		 * It is a directory, set the mode for -v printing
468		 */
469		arcn->type = PAX_DIR;
470		arcn->sb.st_mode |= S_IFDIR;
471		arcn->sb.st_nlink = 2;
472		break;
473	case AREGTYPE:
474	case REGTYPE:
475	default:
476		/*
477		 * If we have a trailing / this is a directory and NOT a file.
478		 */
479		arcn->ln_name[0] = '\0';
480		arcn->ln_nlen = 0;
481		if (*pt == '/') {
482			/*
483			 * it is a directory, set the mode for -v printing
484			 */
485			arcn->type = PAX_DIR;
486			arcn->sb.st_mode |= S_IFDIR;
487			arcn->sb.st_nlink = 2;
488		} else {
489			/*
490			 * have a file that will be followed by data. Set the
491			 * skip value to the size field and calculate the size
492			 * of the padding.
493			 */
494			arcn->type = PAX_REG;
495			arcn->sb.st_mode |= S_IFREG;
496			arcn->pad = TAR_PAD(arcn->sb.st_size);
497			arcn->skip = arcn->sb.st_size;
498		}
499		break;
500	}
501
502	/*
503	 * strip off any trailing slash.
504	 */
505	if (*pt == '/') {
506		*pt = '\0';
507		--arcn->nlen;
508	}
509	return(0);
510}
511
512/*
513 * tar_wr()
514 *	write a tar header for the file specified in the ARCHD to the archive.
515 *	Have to check for file types that cannot be stored and file names that
516 *	are too long. Be careful of the term (last arg) to ul_oct, each field
517 *	of tar has it own spec for the termination character(s).
518 *	ASSUMED: space after header in header block is zero filled
519 * Return:
520 *	0 if file has data to be written after the header, 1 if file has NO
521 *	data to write after the header, -1 if archive write failed
522 */
523
524int
525tar_wr(ARCHD *arcn)
526{
527	HD_TAR *hd;
528	int len;
529	HD_TAR hdblk;
530
531	/*
532	 * check for those file system types which tar cannot store
533	 */
534	switch (arcn->type) {
535	case PAX_DIR:
536		/*
537		 * user asked that dirs not be written to the archive
538		 */
539		if (tar_nodir)
540			return(1);
541		break;
542	case PAX_CHR:
543		paxwarn(1, "Tar cannot archive a character device %s",
544		    arcn->org_name);
545		return(1);
546	case PAX_BLK:
547		paxwarn(1, "Tar cannot archive a block device %s", arcn->org_name);
548		return(1);
549	case PAX_SCK:
550		paxwarn(1, "Tar cannot archive a socket %s", arcn->org_name);
551		return(1);
552	case PAX_FIF:
553		paxwarn(1, "Tar cannot archive a fifo %s", arcn->org_name);
554		return(1);
555	case PAX_SLK:
556	case PAX_HLK:
557	case PAX_HRG:
558		if (arcn->ln_nlen >= sizeof(hd->linkname)) {
559			paxwarn(1, "Link name too long for tar %s",
560			    arcn->ln_name);
561			return(1);
562		}
563		break;
564	case PAX_REG:
565	case PAX_CTG:
566	default:
567		break;
568	}
569
570	/*
571	 * check file name len, remember extra char for dirs (the / at the end)
572	 */
573	len = arcn->nlen;
574	if (arcn->type == PAX_DIR)
575		++len;
576	if (len >= sizeof(hd->name)) {
577		paxwarn(1, "File name too long for tar %s", arcn->name);
578		return(1);
579	}
580
581	/*
582	 * Copy the data out of the ARCHD into the tar header based on the type
583	 * of the file. Remember, many tar readers want all fields to be
584	 * padded with zero so we zero the header first.  We then set the
585	 * linkflag field (type), the linkname, the size, and set the padding
586	 * (if any) to be added after the file data (0 for all other types,
587	 * as they only have a header).
588	 */
589	memset(&hdblk, 0, sizeof(hdblk));
590	hd = (HD_TAR *)&hdblk;
591	strlcpy(hd->name,  arcn->name, sizeof(hd->name));
592	arcn->pad = 0;
593
594	if (arcn->type == PAX_DIR) {
595		/*
596		 * directories are the same as files, except have a filename
597		 * that ends with a /, we add the slash here. No data follows
598		 * dirs, so no pad.
599		 */
600		hd->linkflag = AREGTYPE;
601		hd->name[len-1] = '/';
602		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
603			goto out;
604	} else if (arcn->type == PAX_SLK) {
605		/*
606		 * no data follows this file, so no pad
607		 */
608		hd->linkflag = SYMTYPE;
609		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
610		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
611			goto out;
612	} else if ((arcn->type == PAX_HLK) || (arcn->type == PAX_HRG)) {
613		/*
614		 * no data follows this file, so no pad
615		 */
616		hd->linkflag = LNKTYPE;
617		strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
618		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), 1))
619			goto out;
620	} else {
621		/*
622		 * data follows this file, so set the pad
623		 */
624		hd->linkflag = AREGTYPE;
625#		ifdef LONG_OFF_T
626		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
627		    sizeof(hd->size), 1)) {
628#		else
629		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
630		    sizeof(hd->size), 1)) {
631#		endif
632			paxwarn(1,"File is too large for tar %s", arcn->org_name);
633			return(1);
634		}
635		arcn->pad = TAR_PAD(arcn->sb.st_size);
636	}
637
638	/*
639	 * copy those fields that are independent of the type
640	 */
641	if (ul_oct((u_long)arcn->sb.st_mode, hd->mode, sizeof(hd->mode), 0) ||
642	    ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), 0) ||
643	    ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), 0) ||
644	    ul_oct((u_long)arcn->sb.st_mtime, hd->mtime, sizeof(hd->mtime), 1))
645		goto out;
646
647	/*
648	 * calculate and add the checksum, then write the header. A return of
649	 * 0 tells the caller to now write the file data, 1 says no data needs
650	 * to be written
651	 */
652	if (ul_oct(tar_chksm((char *)&hdblk, sizeof(HD_TAR)), hd->chksum,
653	    sizeof(hd->chksum), 3))
654		goto out;
655	if (wr_rdbuf((char *)&hdblk, sizeof(HD_TAR)) < 0)
656		return(-1);
657	if (wr_skip((off_t)(BLKMULT - sizeof(HD_TAR))) < 0)
658		return(-1);
659	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
660		return(0);
661	return(1);
662
663    out:
664	/*
665	 * header field is out of range
666	 */
667	paxwarn(1, "Tar header field is too small for %s", arcn->org_name);
668	return(1);
669}
670
671/*
672 * Routines for POSIX ustar
673 */
674
675/*
676 * ustar_strd()
677 *	initialization for ustar read
678 * Return:
679 *	0 if ok, -1 otherwise
680 */
681
682int
683ustar_strd(void)
684{
685	if ((usrtb_start() < 0) || (grptb_start() < 0))
686		return(-1);
687	return(0);
688}
689
690/*
691 * ustar_stwr()
692 *	initialization for ustar write
693 * Return:
694 *	0 if ok, -1 otherwise
695 */
696
697int
698ustar_stwr(void)
699{
700	if ((uidtb_start() < 0) || (gidtb_start() < 0))
701		return(-1);
702	return(0);
703}
704
705/*
706 * ustar_id()
707 *	determine if a block given to us is a valid ustar header. We have to
708 *	be on the lookout for those pesky blocks of all zero's
709 * Return:
710 *	0 if a ustar header, -1 otherwise
711 */
712
713int
714ustar_id(char *blk, int size)
715{
716	HD_USTAR *hd;
717
718	if (size < BLKMULT)
719		return(-1);
720	hd = (HD_USTAR *)blk;
721
722	/*
723	 * check for block of zero's first, a simple and fast test then check
724	 * ustar magic cookie. We should use TMAGLEN, but some USTAR archive
725	 * programs are fouled up and create archives missing the \0. Last we
726	 * check the checksum. If ok we have to assume it is a valid header.
727	 */
728	if (hd->name[0] == '\0')
729		return(-1);
730	if (strncmp(hd->magic, TMAGIC, TMAGLEN - 1) != 0)
731		return(-1);
732	if (asc_ul(hd->chksum,sizeof(hd->chksum),OCT) != tar_chksm(blk,BLKMULT))
733		return(-1);
734	return(0);
735}
736
737/*
738 * ustar_rd()
739 *	extract the values out of block already determined to be a ustar header.
740 *	store the values in the ARCHD parameter.
741 * Return:
742 *	0
743 */
744
745int
746ustar_rd(ARCHD *arcn, char *buf)
747{
748	HD_USTAR *hd;
749	char *dest;
750	int cnt = 0;
751	dev_t devmajor;
752	dev_t devminor;
753
754	/*
755	 * we only get proper sized buffers
756	 */
757	if (ustar_id(buf, BLKMULT) < 0)
758		return(-1);
759	memset(arcn, 0, sizeof(*arcn));
760	arcn->org_name = arcn->name;
761	arcn->sb.st_nlink = 1;
762	hd = (HD_USTAR *)buf;
763
764	/*
765	 * see if the filename is split into two parts. if, so joint the parts.
766	 * we copy the prefix first and add a / between the prefix and name.
767	 */
768	dest = arcn->name;
769	if (*(hd->prefix) != '\0') {
770		cnt = strlcpy(dest, hd->prefix, sizeof(arcn->name) - 1);
771		dest += cnt;
772		*dest++ = '/';
773		cnt++;
774	} else {
775		cnt = 0;
776	}
777
778	if (hd->typeflag != LONGLINKTYPE && hd->typeflag != LONGNAMETYPE) {
779		arcn->nlen = cnt + expandname(dest, sizeof(arcn->name) - cnt,
780		    &gnu_name_string, hd->name, sizeof(hd->name));
781		arcn->ln_nlen = expandname(arcn->ln_name, sizeof(arcn->ln_name),
782		    &gnu_link_string, hd->linkname, sizeof(hd->linkname));
783	}
784
785	/*
786	 * follow the spec to the letter. we should only have mode bits, strip
787	 * off all other crud we may be passed.
788	 */
789	arcn->sb.st_mode = (mode_t)(asc_ul(hd->mode, sizeof(hd->mode), OCT) &
790	    0xfff);
791#ifdef LONG_OFF_T
792	arcn->sb.st_size = (off_t)asc_ul(hd->size, sizeof(hd->size), OCT);
793#else
794	arcn->sb.st_size = (off_t)asc_uqd(hd->size, sizeof(hd->size), OCT);
795#endif
796	arcn->sb.st_mtime = (time_t)asc_ul(hd->mtime, sizeof(hd->mtime), OCT);
797	arcn->sb.st_ctime = arcn->sb.st_atime = arcn->sb.st_mtime;
798
799	/*
800	 * If we can find the ascii names for gname and uname in the password
801	 * and group files we will use the uid's and gid they bind. Otherwise
802	 * we use the uid and gid values stored in the header. (This is what
803	 * the POSIX spec wants).
804	 */
805	hd->gname[sizeof(hd->gname) - 1] = '\0';
806	if (gid_name(hd->gname, &(arcn->sb.st_gid)) < 0)
807		arcn->sb.st_gid = (gid_t)asc_ul(hd->gid, sizeof(hd->gid), OCT);
808	hd->uname[sizeof(hd->uname) - 1] = '\0';
809	if (uid_name(hd->uname, &(arcn->sb.st_uid)) < 0)
810		arcn->sb.st_uid = (uid_t)asc_ul(hd->uid, sizeof(hd->uid), OCT);
811
812	/*
813	 * set the defaults, these may be changed depending on the file type
814	 */
815	arcn->pad = 0;
816	arcn->skip = 0;
817	arcn->sb.st_rdev = (dev_t)0;
818
819	/*
820	 * set the mode and PAX type according to the typeflag in the header
821	 */
822	switch (hd->typeflag) {
823	case FIFOTYPE:
824		arcn->type = PAX_FIF;
825		arcn->sb.st_mode |= S_IFIFO;
826		break;
827	case DIRTYPE:
828		arcn->type = PAX_DIR;
829		arcn->sb.st_mode |= S_IFDIR;
830		arcn->sb.st_nlink = 2;
831
832		/*
833		 * Some programs that create ustar archives append a '/'
834		 * to the pathname for directories. This clearly violates
835		 * ustar specs, but we will silently strip it off anyway.
836		 */
837		if (arcn->name[arcn->nlen - 1] == '/')
838			arcn->name[--arcn->nlen] = '\0';
839		break;
840	case BLKTYPE:
841	case CHRTYPE:
842		/*
843		 * this type requires the rdev field to be set.
844		 */
845		if (hd->typeflag == BLKTYPE) {
846			arcn->type = PAX_BLK;
847			arcn->sb.st_mode |= S_IFBLK;
848		} else {
849			arcn->type = PAX_CHR;
850			arcn->sb.st_mode |= S_IFCHR;
851		}
852		devmajor = (dev_t)asc_ul(hd->devmajor,sizeof(hd->devmajor),OCT);
853		devminor = (dev_t)asc_ul(hd->devminor,sizeof(hd->devminor),OCT);
854		arcn->sb.st_rdev = TODEV(devmajor, devminor);
855		break;
856	case SYMTYPE:
857	case LNKTYPE:
858		if (hd->typeflag == SYMTYPE) {
859			arcn->type = PAX_SLK;
860			arcn->sb.st_mode |= S_IFLNK;
861		} else {
862			arcn->type = PAX_HLK;
863			/*
864			 * so printing looks better
865			 */
866			arcn->sb.st_mode |= S_IFREG;
867			arcn->sb.st_nlink = 2;
868		}
869		break;
870	case LONGLINKTYPE:
871	case LONGNAMETYPE:
872		/*
873		 * GNU long link/file; we tag these here and let the
874		 * pax internals deal with it -- too ugly otherwise.
875		 */
876		arcn->type =
877		    hd->typeflag == LONGLINKTYPE ? PAX_GLL : PAX_GLF;
878		arcn->pad = TAR_PAD(arcn->sb.st_size);
879		arcn->skip = arcn->sb.st_size;
880		break;
881	case CONTTYPE:
882	case AREGTYPE:
883	case REGTYPE:
884	default:
885		/*
886		 * these types have file data that follows. Set the skip and
887		 * pad fields.
888		 */
889		arcn->type = PAX_REG;
890		arcn->pad = TAR_PAD(arcn->sb.st_size);
891		arcn->skip = arcn->sb.st_size;
892		arcn->sb.st_mode |= S_IFREG;
893		break;
894	}
895	return(0);
896}
897
898/*
899 * ustar_wr()
900 *	write a ustar header for the file specified in the ARCHD to the archive
901 *	Have to check for file types that cannot be stored and file names that
902 *	are too long. Be careful of the term (last arg) to ul_oct, we only use
903 *	'\0' for the termination character (this is different than picky tar)
904 *	ASSUMED: space after header in header block is zero filled
905 * Return:
906 *	0 if file has data to be written after the header, 1 if file has NO
907 *	data to write after the header, -1 if archive write failed
908 */
909
910int
911ustar_wr(ARCHD *arcn)
912{
913	HD_USTAR *hd;
914	char *pt;
915	char hdblk[sizeof(HD_USTAR)];
916	mode_t mode12only;
917	int term_char=3;	/* orignal setting */
918	term_char=1;		/* To pass conformance tests 274, 301 */
919
920	/*
921	 * check for those file system types ustar cannot store
922	 */
923	if (arcn->type == PAX_SCK) {
924		paxwarn(1, "Ustar cannot archive a socket %s", arcn->org_name);
925		return(1);
926	}
927
928	/*
929	 * check the length of the linkname
930	 */
931	if (((arcn->type == PAX_SLK) || (arcn->type == PAX_HLK) ||
932	    (arcn->type == PAX_HRG)) && (arcn->ln_nlen > sizeof(hd->linkname))){
933		paxwarn(1, "Link name too long for ustar %s", arcn->ln_name);
934		/*
935		 * Conformance: test pax:285 wants error code to be non-zero, and
936		 * test tar:12 wants error code from pax to be 0
937		 */
938		return(1);
939	}
940
941	/*
942	 * split the path name into prefix and name fields (if needed). if
943	 * pt != arcn->name, the name has to be split
944	 */
945	if ((pt = name_split(arcn->name, arcn->nlen)) == NULL) {
946		paxwarn(1, "File name too long for ustar %s", arcn->name);
947		return(1);
948	}
949
950	/*
951	 * zero out the header so we don't have to worry about zero fill below
952	 */
953	memset(hdblk, 0, sizeof(hdblk));
954	hd = (HD_USTAR *)hdblk;
955	arcn->pad = 0L;
956
957	/* To pass conformance tests 274/301, always set these fields to "zero" */
958	ul_oct(0, hd->devmajor, sizeof(hd->devmajor), term_char);
959	ul_oct(0, hd->devminor, sizeof(hd->devminor), term_char);
960
961	/*
962	 * split the name, or zero out the prefix
963	 */
964	if (pt != arcn->name) {
965		/*
966		 * name was split, pt points at the / where the split is to
967		 * occur, we remove the / and copy the first part to the prefix
968		 */
969		*pt = '\0';
970		strlcpy(hd->prefix, arcn->name, sizeof(hd->prefix));
971		*pt++ = '/';
972	}
973
974	/*
975	 * copy the name part. this may be the whole path or the part after
976	 * the prefix.  both the name and prefix may fill the entire field.
977	 */
978	if (strlen(pt) == sizeof(hd->name)) {	/* must account for name just fits in buffer */
979		strncpy(hd->name, pt, sizeof(hd->name));
980	} else {
981		strlcpy(hd->name, pt, sizeof(hd->name));
982	}
983
984	/*
985	 * set the fields in the header that are type dependent
986	 */
987	switch (arcn->type) {
988	case PAX_DIR:
989		hd->typeflag = DIRTYPE;
990		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
991			goto out;
992		break;
993	case PAX_CHR:
994	case PAX_BLK:
995		if (arcn->type == PAX_CHR)
996			hd->typeflag = CHRTYPE;
997		else
998			hd->typeflag = BLKTYPE;
999		if (ul_oct((u_long)MAJOR(arcn->sb.st_rdev), hd->devmajor,
1000		   sizeof(hd->devmajor), term_char) ||
1001		   ul_oct((u_long)MINOR(arcn->sb.st_rdev), hd->devminor,
1002		   sizeof(hd->devminor), term_char) ||
1003		   ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1004			goto out;
1005		break;
1006	case PAX_FIF:
1007		hd->typeflag = FIFOTYPE;
1008		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1009			goto out;
1010		break;
1011	case PAX_SLK:
1012	case PAX_HLK:
1013	case PAX_HRG:
1014		if (arcn->type == PAX_SLK)
1015			hd->typeflag = SYMTYPE;
1016		else
1017			hd->typeflag = LNKTYPE;
1018		if (strlen(arcn->ln_name) == sizeof(hd->linkname)) {	/* must account for name just fits in buffer */
1019			strncpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1020		} else {
1021			strlcpy(hd->linkname, arcn->ln_name, sizeof(hd->linkname));
1022		}
1023		if (ul_oct((u_long)0L, hd->size, sizeof(hd->size), term_char))
1024			goto out;
1025		break;
1026	case PAX_REG:
1027	case PAX_CTG:
1028	default:
1029		/*
1030		 * file data with this type, set the padding
1031		 */
1032		if (arcn->type == PAX_CTG)
1033			hd->typeflag = CONTTYPE;
1034		else
1035			hd->typeflag = REGTYPE;
1036		arcn->pad = TAR_PAD(arcn->sb.st_size);
1037#		ifdef LONG_OFF_T
1038		if (ul_oct((u_long)arcn->sb.st_size, hd->size,
1039		    sizeof(hd->size), term_char)) {
1040#		else
1041		if (uqd_oct((u_quad_t)arcn->sb.st_size, hd->size,
1042		    sizeof(hd->size), term_char)) {
1043#		endif
1044			paxwarn(1,"File is too long for ustar %s",arcn->org_name);
1045			return(1);
1046		}
1047		break;
1048	}
1049
1050	strncpy(hd->magic, TMAGIC, TMAGLEN);
1051	strncpy(hd->version, TVERSION, TVERSLEN);
1052
1053	/*
1054	 * set the remaining fields. Some versions want all 16 bits of mode
1055	 * we better humor them (they really do not meet spec though)....
1056	 */
1057	if (ul_oct((u_long)arcn->sb.st_uid, hd->uid, sizeof(hd->uid), term_char)) {
1058		if (uid_nobody == 0) {
1059			if (uid_name("nobody", &uid_nobody) == -1)
1060				goto out;
1061		}
1062		if (uid_warn != arcn->sb.st_uid) {
1063			uid_warn = arcn->sb.st_uid;
1064			paxwarn(1,
1065			    "Ustar header field is too small for uid %lu, "
1066			    "using nobody", (u_long)arcn->sb.st_uid);
1067		}
1068		if (ul_oct((u_long)uid_nobody, hd->uid, sizeof(hd->uid), term_char))
1069			goto out;
1070	}
1071	if (ul_oct((u_long)arcn->sb.st_gid, hd->gid, sizeof(hd->gid), term_char)) {
1072		if (gid_nobody == 0) {
1073			if (gid_name("nobody", &gid_nobody) == -1)
1074				goto out;
1075		}
1076		if (gid_warn != arcn->sb.st_gid) {
1077			gid_warn = arcn->sb.st_gid;
1078			paxwarn(1,
1079			    "Ustar header field is too small for gid %lu, "
1080			    "using nobody", (u_long)arcn->sb.st_gid);
1081		}
1082		if (ul_oct((u_long)gid_nobody, hd->gid, sizeof(hd->gid), term_char))
1083			goto out;
1084	}
1085	/* However, Unix conformance tests do not like MORE than 12 mode bits:
1086	   remove all beyond (see definition of stat.st_mode structure)		*/
1087	mode12only = ((u_long)arcn->sb.st_mode) & 0x00000fff;
1088	if (ul_oct((u_long)mode12only, hd->mode, sizeof(hd->mode), term_char) ||
1089	    ul_oct((u_long)arcn->sb.st_mtime,hd->mtime,sizeof(hd->mtime),term_char))
1090		goto out;
1091	strncpy(hd->uname, name_uid(arcn->sb.st_uid, 0), sizeof(hd->uname));
1092	strncpy(hd->gname, name_gid(arcn->sb.st_gid, 0), sizeof(hd->gname));
1093
1094	/*
1095	 * calculate and store the checksum write the header to the archive
1096	 * return 0 tells the caller to now write the file data, 1 says no data
1097	 * needs to be written
1098	 */
1099	if (ul_oct(tar_chksm(hdblk, sizeof(HD_USTAR)), hd->chksum,
1100	   sizeof(hd->chksum), term_char))
1101		goto out;
1102	if (wr_rdbuf((char *)&hdblk, sizeof(HD_USTAR)) < 0)
1103		return(-1);
1104	if (wr_skip((off_t)(BLKMULT - sizeof(HD_USTAR))) < 0)
1105		return(-1);
1106	if ((arcn->type == PAX_CTG) || (arcn->type == PAX_REG))
1107		return(0);
1108	return(1);
1109
1110    out:
1111	/*
1112	 * header field is out of range
1113	 */
1114	paxwarn(1, "Ustar header field is too small for %s", arcn->org_name);
1115	return(1);
1116}
1117
1118/*
1119 * name_split()
1120 *	see if the name has to be split for storage in a ustar header. We try
1121 *	to fit the entire name in the name field without splitting if we can.
1122 *	The split point is always at a /
1123 * Return
1124 *	character pointer to split point (always the / that is to be removed
1125 *	if the split is not needed, the points is set to the start of the file
1126 *	name (it would violate the spec to split there). A NULL is returned if
1127 *	the file name is too long
1128 */
1129
1130static char *
1131name_split(char *name, int len)
1132{
1133	char *start;
1134
1135	/*
1136	 * check to see if the file name is small enough to fit in the name
1137	 * field. if so just return a pointer to the name.
1138	 * The strings can fill the complete name and prefix fields
1139	 * without a NUL terminator.
1140	 */
1141	if (len <= TNMSZ)
1142		return(name);
1143	if (len > (TPFSZ + TNMSZ + 1))
1144		return(NULL);
1145
1146	/*
1147	 * we start looking at the biggest sized piece that fits in the name
1148	 * field. We walk forward looking for a slash to split at. The idea is
1149	 * to find the biggest piece to fit in the name field (or the smallest
1150	 * prefix we can find) (the -1 is correct the biggest piece would
1151	 * include the slash between the two parts that gets thrown away)
1152	 */
1153	start = name + len - TNMSZ - 1;
1154	if ((*start == '/') && (start == name))
1155		++start;	/* 101 byte paths with leading '/' are dinged otherwise */
1156	while ((*start != '\0') && (*start != '/'))
1157		++start;
1158
1159	/*
1160	 * if we hit the end of the string, this name cannot be split, so we
1161	 * cannot store this file.
1162	 */
1163	if (*start == '\0')
1164		return(NULL);
1165	len = start - name;
1166
1167	/*
1168	 * NOTE: /str where the length of str == TNMSZ can not be stored under
1169	 * the p1003.1-1990 spec for ustar. We could force a prefix of / and
1170	 * the file would then expand on extract to //str. The len == 0 below
1171	 * makes this special case follow the spec to the letter.
1172	 */
1173	if ((len > TPFSZ) || (len == 0))
1174		return(NULL);
1175
1176	/*
1177	 * ok have a split point, return it to the caller
1178	 */
1179	return(start);
1180}
1181
1182static size_t
1183expandname(char *buf, size_t len, char **gnu_name, const char *name,
1184    size_t name_len)
1185{
1186	size_t nlen;
1187
1188	if (*gnu_name) {
1189		/* *gnu_name is NUL terminated */
1190		if ((nlen = strlcpy(buf, *gnu_name, len)) >= len)
1191			nlen = len - 1;
1192		free(*gnu_name);
1193		*gnu_name = NULL;
1194	} else {
1195		if (name_len < len) {
1196			/* name may not be null terminated: it might be as big as the
1197			   field,  so copy is limited to the max size of the header field */
1198			if ((nlen = strlcpy(buf, name, name_len+1)) >= name_len+1)
1199				nlen = name_len;
1200		} else {
1201			if ((nlen = strlcpy(buf, name, len)) >= len)
1202				nlen = len - 1;
1203		}
1204	}
1205	return(nlen);
1206}
1207