1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27229592Smm__FBSDID("$FreeBSD$");
28228753Smm
29228753Smm#ifdef HAVE_ERRNO_H
30228753Smm#include <errno.h>
31228753Smm#endif
32228753Smm#include <stddef.h>
33228753Smm/* #include <stdint.h> */ /* See archive_platform.h */
34228753Smm#ifdef HAVE_STDLIB_H
35228753Smm#include <stdlib.h>
36228753Smm#endif
37228753Smm#ifdef HAVE_STRING_H
38228753Smm#include <string.h>
39228753Smm#endif
40228753Smm
41228753Smm/* Obtain suitable wide-character manipulation functions. */
42228753Smm#ifdef HAVE_WCHAR_H
43228753Smm#include <wchar.h>
44228753Smm#else
45228753Smm/* Good enough for equality testing, which is all we need. */
46228753Smmstatic int wcscmp(const wchar_t *s1, const wchar_t *s2)
47228753Smm{
48228753Smm	int diff = *s1 - *s2;
49228753Smm	while (*s1 && diff == 0)
50228753Smm		diff = (int)*++s1 - (int)*++s2;
51228753Smm	return diff;
52228753Smm}
53228753Smm/* Good enough for equality testing, which is all we need. */
54228753Smmstatic int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n)
55228753Smm{
56228753Smm	int diff = *s1 - *s2;
57228753Smm	while (*s1 && diff == 0 && n-- > 0)
58228753Smm		diff = (int)*++s1 - (int)*++s2;
59228753Smm	return diff;
60228753Smm}
61228753Smmstatic size_t wcslen(const wchar_t *s)
62228753Smm{
63228753Smm	const wchar_t *p = s;
64228753Smm	while (*p)
65228753Smm		p++;
66228753Smm	return p - s;
67228753Smm}
68228753Smm#endif
69228753Smm
70228753Smm#include "archive.h"
71228753Smm#include "archive_entry.h"
72228753Smm#include "archive_private.h"
73228753Smm#include "archive_read_private.h"
74228753Smm
75228753Smm#define tar_min(a,b) ((a) < (b) ? (a) : (b))
76228753Smm
77228753Smm/*
78228753Smm * Layout of POSIX 'ustar' tar header.
79228753Smm */
80228753Smmstruct archive_entry_header_ustar {
81228753Smm	char	name[100];
82228753Smm	char	mode[8];
83228753Smm	char	uid[8];
84228753Smm	char	gid[8];
85228753Smm	char	size[12];
86228753Smm	char	mtime[12];
87228753Smm	char	checksum[8];
88228753Smm	char	typeflag[1];
89228753Smm	char	linkname[100];	/* "old format" header ends here */
90228753Smm	char	magic[6];	/* For POSIX: "ustar\0" */
91228753Smm	char	version[2];	/* For POSIX: "00" */
92228753Smm	char	uname[32];
93228753Smm	char	gname[32];
94228753Smm	char	rdevmajor[8];
95228753Smm	char	rdevminor[8];
96228753Smm	char	prefix[155];
97228753Smm};
98228753Smm
99228753Smm/*
100228753Smm * Structure of GNU tar header
101228753Smm */
102228753Smmstruct gnu_sparse {
103228753Smm	char	offset[12];
104228753Smm	char	numbytes[12];
105228753Smm};
106228753Smm
107228753Smmstruct archive_entry_header_gnutar {
108228753Smm	char	name[100];
109228753Smm	char	mode[8];
110228753Smm	char	uid[8];
111228753Smm	char	gid[8];
112228753Smm	char	size[12];
113228753Smm	char	mtime[12];
114228753Smm	char	checksum[8];
115228753Smm	char	typeflag[1];
116228753Smm	char	linkname[100];
117228753Smm	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
118228753Smm	char	uname[32];
119228753Smm	char	gname[32];
120228753Smm	char	rdevmajor[8];
121228753Smm	char	rdevminor[8];
122228753Smm	char	atime[12];
123228753Smm	char	ctime[12];
124228753Smm	char	offset[12];
125228753Smm	char	longnames[4];
126228753Smm	char	unused[1];
127228753Smm	struct gnu_sparse sparse[4];
128228753Smm	char	isextended[1];
129228753Smm	char	realsize[12];
130228753Smm	/*
131228753Smm	 * Old GNU format doesn't use POSIX 'prefix' field; they use
132228753Smm	 * the 'L' (longname) entry instead.
133228753Smm	 */
134228753Smm};
135228753Smm
136228753Smm/*
137228753Smm * Data specific to this format.
138228753Smm */
139228753Smmstruct sparse_block {
140228753Smm	struct sparse_block	*next;
141228753Smm	off_t	offset;
142228753Smm	off_t	remaining;
143228753Smm};
144228753Smm
145228753Smmstruct tar {
146228753Smm	struct archive_string	 acl_text;
147228753Smm	struct archive_string	 entry_pathname;
148228753Smm	/* For "GNU.sparse.name" and other similar path extensions. */
149228753Smm	struct archive_string	 entry_pathname_override;
150228753Smm	struct archive_string	 entry_linkpath;
151228753Smm	struct archive_string	 entry_uname;
152228753Smm	struct archive_string	 entry_gname;
153228753Smm	struct archive_string	 longlink;
154228753Smm	struct archive_string	 longname;
155228753Smm	struct archive_string	 pax_header;
156228753Smm	struct archive_string	 pax_global;
157228753Smm	struct archive_string	 line;
158228753Smm	int			 pax_hdrcharset_binary;
159228753Smm	wchar_t 		*pax_entry;
160228753Smm	size_t			 pax_entry_length;
161228753Smm	int			 header_recursion_depth;
162228753Smm	int64_t			 entry_bytes_remaining;
163228753Smm	int64_t			 entry_offset;
164228753Smm	int64_t			 entry_padding;
165228753Smm	int64_t			 realsize;
166228753Smm	struct sparse_block	*sparse_list;
167228753Smm	struct sparse_block	*sparse_last;
168228753Smm	int64_t			 sparse_offset;
169228753Smm	int64_t			 sparse_numbytes;
170228753Smm	int			 sparse_gnu_major;
171228753Smm	int			 sparse_gnu_minor;
172228753Smm	char			 sparse_gnu_pending;
173228753Smm};
174228753Smm
175228753Smmstatic ssize_t	UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n);
176228753Smmstatic int	archive_block_is_null(const unsigned char *p);
177228753Smmstatic char	*base64_decode(const char *, size_t, size_t *);
178228753Smmstatic void	 gnu_add_sparse_entry(struct tar *,
179228753Smm		    off_t offset, off_t remaining);
180228753Smmstatic void	gnu_clear_sparse_list(struct tar *);
181228753Smmstatic int	gnu_sparse_old_read(struct archive_read *, struct tar *,
182228753Smm		    const struct archive_entry_header_gnutar *header);
183228753Smmstatic void	gnu_sparse_old_parse(struct tar *,
184228753Smm		    const struct gnu_sparse *sparse, int length);
185228753Smmstatic int	gnu_sparse_01_parse(struct tar *, const char *);
186228753Smmstatic ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *);
187228753Smmstatic int	header_Solaris_ACL(struct archive_read *,  struct tar *,
188228753Smm		    struct archive_entry *, const void *);
189228753Smmstatic int	header_common(struct archive_read *,  struct tar *,
190228753Smm		    struct archive_entry *, const void *);
191228753Smmstatic int	header_old_tar(struct archive_read *, struct tar *,
192228753Smm		    struct archive_entry *, const void *);
193228753Smmstatic int	header_pax_extensions(struct archive_read *, struct tar *,
194228753Smm		    struct archive_entry *, const void *);
195228753Smmstatic int	header_pax_global(struct archive_read *, struct tar *,
196228753Smm		    struct archive_entry *, const void *h);
197228753Smmstatic int	header_longlink(struct archive_read *, struct tar *,
198228753Smm		    struct archive_entry *, const void *h);
199228753Smmstatic int	header_longname(struct archive_read *, struct tar *,
200228753Smm		    struct archive_entry *, const void *h);
201228753Smmstatic int	header_volume(struct archive_read *, struct tar *,
202228753Smm		    struct archive_entry *, const void *h);
203228753Smmstatic int	header_ustar(struct archive_read *, struct tar *,
204228753Smm		    struct archive_entry *, const void *h);
205228753Smmstatic int	header_gnutar(struct archive_read *, struct tar *,
206228753Smm		    struct archive_entry *, const void *h);
207228753Smmstatic int	archive_read_format_tar_bid(struct archive_read *);
208228753Smmstatic int	archive_read_format_tar_cleanup(struct archive_read *);
209228753Smmstatic int	archive_read_format_tar_read_data(struct archive_read *a,
210228753Smm		    const void **buff, size_t *size, off_t *offset);
211228753Smmstatic int	archive_read_format_tar_skip(struct archive_read *a);
212228753Smmstatic int	archive_read_format_tar_read_header(struct archive_read *,
213228753Smm		    struct archive_entry *);
214228753Smmstatic int	checksum(struct archive_read *, const void *);
215228753Smmstatic int 	pax_attribute(struct tar *, struct archive_entry *,
216228753Smm		    char *key, char *value);
217228753Smmstatic int 	pax_header(struct archive_read *, struct tar *,
218228753Smm		    struct archive_entry *, char *attr);
219228753Smmstatic void	pax_time(const char *, int64_t *sec, long *nanos);
220228753Smmstatic ssize_t	readline(struct archive_read *, struct tar *, const char **,
221228753Smm		    ssize_t limit);
222228753Smmstatic int	read_body_to_string(struct archive_read *, struct tar *,
223228753Smm		    struct archive_string *, const void *h);
224228753Smmstatic int64_t	tar_atol(const char *, unsigned);
225228753Smmstatic int64_t	tar_atol10(const char *, unsigned);
226228753Smmstatic int64_t	tar_atol256(const char *, unsigned);
227228753Smmstatic int64_t	tar_atol8(const char *, unsigned);
228228753Smmstatic int	tar_read_header(struct archive_read *, struct tar *,
229228753Smm		    struct archive_entry *);
230228753Smmstatic int	tohex(int c);
231228753Smmstatic char	*url_decode(const char *);
232228753Smmstatic wchar_t	*utf8_decode(struct tar *, const char *, size_t length);
233228753Smm
234228753Smmint
235228753Smmarchive_read_support_format_gnutar(struct archive *a)
236228753Smm{
237228753Smm	return (archive_read_support_format_tar(a));
238228753Smm}
239228753Smm
240228753Smm
241228753Smmint
242228753Smmarchive_read_support_format_tar(struct archive *_a)
243228753Smm{
244228753Smm	struct archive_read *a = (struct archive_read *)_a;
245228753Smm	struct tar *tar;
246228753Smm	int r;
247228753Smm
248228753Smm	tar = (struct tar *)malloc(sizeof(*tar));
249228753Smm	if (tar == NULL) {
250228753Smm		archive_set_error(&a->archive, ENOMEM,
251228753Smm		    "Can't allocate tar data");
252228753Smm		return (ARCHIVE_FATAL);
253228753Smm	}
254228753Smm	memset(tar, 0, sizeof(*tar));
255228753Smm
256228753Smm	r = __archive_read_register_format(a, tar, "tar",
257228753Smm	    archive_read_format_tar_bid,
258228753Smm	    NULL,
259228753Smm	    archive_read_format_tar_read_header,
260228753Smm	    archive_read_format_tar_read_data,
261228753Smm	    archive_read_format_tar_skip,
262228753Smm	    archive_read_format_tar_cleanup);
263228753Smm
264228753Smm	if (r != ARCHIVE_OK)
265228753Smm		free(tar);
266228753Smm	return (ARCHIVE_OK);
267228753Smm}
268228753Smm
269228753Smmstatic int
270228753Smmarchive_read_format_tar_cleanup(struct archive_read *a)
271228753Smm{
272228753Smm	struct tar *tar;
273228753Smm
274228753Smm	tar = (struct tar *)(a->format->data);
275228753Smm	gnu_clear_sparse_list(tar);
276228753Smm	archive_string_free(&tar->acl_text);
277228753Smm	archive_string_free(&tar->entry_pathname);
278228753Smm	archive_string_free(&tar->entry_pathname_override);
279228753Smm	archive_string_free(&tar->entry_linkpath);
280228753Smm	archive_string_free(&tar->entry_uname);
281228753Smm	archive_string_free(&tar->entry_gname);
282228753Smm	archive_string_free(&tar->line);
283228753Smm	archive_string_free(&tar->pax_global);
284228753Smm	archive_string_free(&tar->pax_header);
285228753Smm	archive_string_free(&tar->longname);
286228753Smm	archive_string_free(&tar->longlink);
287228753Smm	free(tar->pax_entry);
288228753Smm	free(tar);
289228753Smm	(a->format->data) = NULL;
290228753Smm	return (ARCHIVE_OK);
291228753Smm}
292228753Smm
293228753Smm
294228753Smmstatic int
295228753Smmarchive_read_format_tar_bid(struct archive_read *a)
296228753Smm{
297228753Smm	int bid;
298228753Smm	const void *h;
299228753Smm	const struct archive_entry_header_ustar *header;
300228753Smm
301228753Smm	bid = 0;
302228753Smm
303228753Smm	/* Now let's look at the actual header and see if it matches. */
304228753Smm	h = __archive_read_ahead(a, 512, NULL);
305228753Smm	if (h == NULL)
306228753Smm		return (-1);
307228753Smm
308228753Smm	/* If it's an end-of-archive mark, we can handle it. */
309228753Smm	if ((*(const char *)h) == 0
310228753Smm	    && archive_block_is_null((const unsigned char *)h)) {
311228753Smm		/*
312228753Smm		 * Usually, I bid the number of bits verified, but
313228753Smm		 * in this case, 4096 seems excessive so I picked 10 as
314228753Smm		 * an arbitrary but reasonable-seeming value.
315228753Smm		 */
316228753Smm		return (10);
317228753Smm	}
318228753Smm
319228753Smm	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
320228753Smm	if (!checksum(a, h))
321228753Smm		return (0);
322228753Smm	bid += 48;  /* Checksum is usually 6 octal digits. */
323228753Smm
324228753Smm	header = (const struct archive_entry_header_ustar *)h;
325228753Smm
326228753Smm	/* Recognize POSIX formats. */
327228753Smm	if ((memcmp(header->magic, "ustar\0", 6) == 0)
328228753Smm	    &&(memcmp(header->version, "00", 2)==0))
329228753Smm		bid += 56;
330228753Smm
331228753Smm	/* Recognize GNU tar format. */
332228753Smm	if ((memcmp(header->magic, "ustar ", 6) == 0)
333228753Smm	    &&(memcmp(header->version, " \0", 2)==0))
334228753Smm		bid += 56;
335228753Smm
336228753Smm	/* Type flag must be null, digit or A-Z, a-z. */
337228753Smm	if (header->typeflag[0] != 0 &&
338228753Smm	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
339228753Smm	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
340228753Smm	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
341228753Smm		return (0);
342228753Smm	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
343228753Smm
344228753Smm	/* Sanity check: Look at first byte of mode field. */
345228753Smm	switch (255 & (unsigned)header->mode[0]) {
346228753Smm	case 0: case 255:
347228753Smm		/* Base-256 value: No further verification possible! */
348228753Smm		break;
349228753Smm	case ' ': /* Not recommended, but not illegal, either. */
350228753Smm		break;
351228753Smm	case '0': case '1': case '2': case '3':
352228753Smm	case '4': case '5': case '6': case '7':
353228753Smm		/* Octal Value. */
354228753Smm		/* TODO: Check format of remainder of this field. */
355228753Smm		break;
356228753Smm	default:
357228753Smm		/* Not a valid mode; bail out here. */
358228753Smm		return (0);
359228753Smm	}
360228753Smm	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
361228753Smm
362228753Smm	return (bid);
363228753Smm}
364228753Smm
365228753Smm/*
366228753Smm * The function invoked by archive_read_header().  This
367228753Smm * just sets up a few things and then calls the internal
368228753Smm * tar_read_header() function below.
369228753Smm */
370228753Smmstatic int
371228753Smmarchive_read_format_tar_read_header(struct archive_read *a,
372228753Smm    struct archive_entry *entry)
373228753Smm{
374228753Smm	/*
375228753Smm	 * When converting tar archives to cpio archives, it is
376228753Smm	 * essential that each distinct file have a distinct inode
377228753Smm	 * number.  To simplify this, we keep a static count here to
378228753Smm	 * assign fake dev/inode numbers to each tar entry.  Note that
379228753Smm	 * pax format archives may overwrite this with something more
380228753Smm	 * useful.
381228753Smm	 *
382228753Smm	 * Ideally, we would track every file read from the archive so
383228753Smm	 * that we could assign the same dev/ino pair to hardlinks,
384228753Smm	 * but the memory required to store a complete lookup table is
385228753Smm	 * probably not worthwhile just to support the relatively
386228753Smm	 * obscure tar->cpio conversion case.
387228753Smm	 */
388228753Smm	static int default_inode;
389228753Smm	static int default_dev;
390228753Smm	struct tar *tar;
391228753Smm	struct sparse_block *sp;
392228753Smm	const char *p;
393228753Smm	int r;
394228753Smm	size_t l;
395228753Smm
396228753Smm	/* Assign default device/inode values. */
397228753Smm	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
398228753Smm	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
399228753Smm	/* Limit generated st_ino number to 16 bits. */
400228753Smm	if (default_inode >= 0xffff) {
401228753Smm		++default_dev;
402228753Smm		default_inode = 0;
403228753Smm	}
404228753Smm
405228753Smm	tar = (struct tar *)(a->format->data);
406228753Smm	tar->entry_offset = 0;
407228753Smm	while (tar->sparse_list != NULL) {
408228753Smm		sp = tar->sparse_list;
409228753Smm		tar->sparse_list = sp->next;
410228753Smm		free(sp);
411228753Smm	}
412228753Smm	tar->sparse_last = NULL;
413228753Smm	tar->realsize = -1; /* Mark this as "unset" */
414228753Smm
415228753Smm	r = tar_read_header(a, tar, entry);
416228753Smm
417228753Smm	/*
418228753Smm	 * "non-sparse" files are really just sparse files with
419228753Smm	 * a single block.
420228753Smm	 */
421228753Smm	if (tar->sparse_list == NULL)
422228753Smm		gnu_add_sparse_entry(tar, 0, tar->entry_bytes_remaining);
423228753Smm
424228753Smm	if (r == ARCHIVE_OK) {
425228753Smm		/*
426228753Smm		 * "Regular" entry with trailing '/' is really
427228753Smm		 * directory: This is needed for certain old tar
428228753Smm		 * variants and even for some broken newer ones.
429228753Smm		 */
430228753Smm		p = archive_entry_pathname(entry);
431228753Smm		l = strlen(p);
432228753Smm		if (archive_entry_filetype(entry) == AE_IFREG
433228753Smm		    && p[l-1] == '/')
434228753Smm			archive_entry_set_filetype(entry, AE_IFDIR);
435228753Smm	}
436228753Smm	return (r);
437228753Smm}
438228753Smm
439228753Smmstatic int
440228753Smmarchive_read_format_tar_read_data(struct archive_read *a,
441228753Smm    const void **buff, size_t *size, off_t *offset)
442228753Smm{
443228753Smm	ssize_t bytes_read;
444228753Smm	struct tar *tar;
445228753Smm	struct sparse_block *p;
446228753Smm
447228753Smm	tar = (struct tar *)(a->format->data);
448228753Smm
449228753Smm	if (tar->sparse_gnu_pending) {
450228753Smm		if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) {
451228753Smm			tar->sparse_gnu_pending = 0;
452228753Smm			/* Read initial sparse map. */
453228753Smm			bytes_read = gnu_sparse_10_read(a, tar);
454228753Smm			tar->entry_bytes_remaining -= bytes_read;
455228753Smm			if (bytes_read < 0)
456228753Smm				return (bytes_read);
457228753Smm		} else {
458228753Smm			*size = 0;
459228753Smm			*offset = 0;
460228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
461228753Smm			    "Unrecognized GNU sparse file format");
462228753Smm			return (ARCHIVE_WARN);
463228753Smm		}
464228753Smm		tar->sparse_gnu_pending = 0;
465228753Smm	}
466228753Smm
467228753Smm	/* Remove exhausted entries from sparse list. */
468228753Smm	while (tar->sparse_list != NULL &&
469228753Smm	    tar->sparse_list->remaining == 0) {
470228753Smm		p = tar->sparse_list;
471228753Smm		tar->sparse_list = p->next;
472228753Smm		free(p);
473228753Smm	}
474228753Smm
475228753Smm	/* If we're at end of file, return EOF. */
476228753Smm	if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) {
477228753Smm		if (__archive_read_skip(a, tar->entry_padding) < 0)
478228753Smm			return (ARCHIVE_FATAL);
479228753Smm		tar->entry_padding = 0;
480228753Smm		*buff = NULL;
481228753Smm		*size = 0;
482228753Smm		*offset = tar->realsize;
483228753Smm		return (ARCHIVE_EOF);
484228753Smm	}
485228753Smm
486228753Smm	*buff = __archive_read_ahead(a, 1, &bytes_read);
487228753Smm	if (bytes_read < 0)
488228753Smm		return (ARCHIVE_FATAL);
489228753Smm	if (*buff == NULL) {
490228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
491228753Smm		    "Truncated tar archive");
492228753Smm		return (ARCHIVE_FATAL);
493228753Smm	}
494228753Smm	if (bytes_read > tar->entry_bytes_remaining)
495228753Smm		bytes_read = tar->entry_bytes_remaining;
496228753Smm	/* Don't read more than is available in the
497228753Smm	 * current sparse block. */
498228753Smm	if (tar->sparse_list->remaining < bytes_read)
499228753Smm		bytes_read = tar->sparse_list->remaining;
500228753Smm	*size = bytes_read;
501228753Smm	*offset = tar->sparse_list->offset;
502228753Smm	tar->sparse_list->remaining -= bytes_read;
503228753Smm	tar->sparse_list->offset += bytes_read;
504228753Smm	tar->entry_bytes_remaining -= bytes_read;
505228753Smm	__archive_read_consume(a, bytes_read);
506228753Smm	return (ARCHIVE_OK);
507228753Smm}
508228753Smm
509228753Smmstatic int
510228753Smmarchive_read_format_tar_skip(struct archive_read *a)
511228753Smm{
512228753Smm	int64_t bytes_skipped;
513228753Smm	struct tar* tar;
514228753Smm
515228753Smm	tar = (struct tar *)(a->format->data);
516228753Smm
517228753Smm	/*
518228753Smm	 * Compression layer skip functions are required to either skip the
519228753Smm	 * length requested or fail, so we can rely upon the entire entry
520228753Smm	 * plus padding being skipped.
521228753Smm	 */
522228753Smm	bytes_skipped = __archive_read_skip(a,
523228753Smm	    tar->entry_bytes_remaining + tar->entry_padding);
524228753Smm	if (bytes_skipped < 0)
525228753Smm		return (ARCHIVE_FATAL);
526228753Smm
527228753Smm	tar->entry_bytes_remaining = 0;
528228753Smm	tar->entry_padding = 0;
529228753Smm
530228753Smm	/* Free the sparse list. */
531228753Smm	gnu_clear_sparse_list(tar);
532228753Smm
533228753Smm	return (ARCHIVE_OK);
534228753Smm}
535228753Smm
536228753Smm/*
537228753Smm * This function recursively interprets all of the headers associated
538228753Smm * with a single entry.
539228753Smm */
540228753Smmstatic int
541228753Smmtar_read_header(struct archive_read *a, struct tar *tar,
542228753Smm    struct archive_entry *entry)
543228753Smm{
544228753Smm	ssize_t bytes;
545228753Smm	int err;
546228753Smm	const void *h;
547228753Smm	const struct archive_entry_header_ustar *header;
548228753Smm
549228753Smm	/* Read 512-byte header record */
550228753Smm	h = __archive_read_ahead(a, 512, &bytes);
551228753Smm	if (bytes < 0)
552228753Smm		return (bytes);
553228753Smm	if (bytes < 512) {  /* Short read or EOF. */
554228753Smm		/* Try requesting just one byte and see what happens. */
555228753Smm		(void)__archive_read_ahead(a, 1, &bytes);
556228753Smm		if (bytes == 0) {
557228753Smm			/*
558228753Smm			 * The archive ends at a 512-byte boundary but
559228753Smm			 * without a proper end-of-archive marker.
560228753Smm			 * Yes, there are tar writers that do this;
561228753Smm			 * hold our nose and accept it.
562228753Smm			 */
563228753Smm			return (ARCHIVE_EOF);
564228753Smm		}
565228753Smm		/* Archive ends with a partial block; this is bad. */
566228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
567228753Smm		    "Truncated tar archive");
568228753Smm		return (ARCHIVE_FATAL);
569228753Smm	}
570228753Smm	__archive_read_consume(a, 512);
571228753Smm
572228753Smm
573228753Smm	/* Check for end-of-archive mark. */
574228753Smm	if (((*(const char *)h)==0) && archive_block_is_null((const unsigned char *)h)) {
575228753Smm		/* Try to consume a second all-null record, as well. */
576228753Smm		h = __archive_read_ahead(a, 512, NULL);
577228753Smm		if (h != NULL)
578228753Smm			__archive_read_consume(a, 512);
579228753Smm		archive_clear_error(&a->archive);
580228753Smm		if (a->archive.archive_format_name == NULL) {
581228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
582228753Smm			a->archive.archive_format_name = "tar";
583228753Smm		}
584228753Smm		return (ARCHIVE_EOF);
585228753Smm	}
586228753Smm
587228753Smm	/*
588228753Smm	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
589228753Smm	 * then the client is likely to just retry.  This is a very
590228753Smm	 * crude way to search for the next valid header!
591228753Smm	 *
592228753Smm	 * TODO: Improve this by implementing a real header scan.
593228753Smm	 */
594228753Smm	if (!checksum(a, h)) {
595228753Smm		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
596228753Smm		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
597228753Smm	}
598228753Smm
599228753Smm	if (++tar->header_recursion_depth > 32) {
600228753Smm		archive_set_error(&a->archive, EINVAL, "Too many special headers");
601228753Smm		return (ARCHIVE_WARN);
602228753Smm	}
603228753Smm
604228753Smm	/* Determine the format variant. */
605228753Smm	header = (const struct archive_entry_header_ustar *)h;
606228753Smm	switch(header->typeflag[0]) {
607228753Smm	case 'A': /* Solaris tar ACL */
608228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
609228753Smm		a->archive.archive_format_name = "Solaris tar";
610228753Smm		err = header_Solaris_ACL(a, tar, entry, h);
611228753Smm		break;
612228753Smm	case 'g': /* POSIX-standard 'g' header. */
613228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
614228753Smm		a->archive.archive_format_name = "POSIX pax interchange format";
615228753Smm		err = header_pax_global(a, tar, entry, h);
616228753Smm		break;
617228753Smm	case 'K': /* Long link name (GNU tar, others) */
618228753Smm		err = header_longlink(a, tar, entry, h);
619228753Smm		break;
620228753Smm	case 'L': /* Long filename (GNU tar, others) */
621228753Smm		err = header_longname(a, tar, entry, h);
622228753Smm		break;
623228753Smm	case 'V': /* GNU volume header */
624228753Smm		err = header_volume(a, tar, entry, h);
625228753Smm		break;
626228753Smm	case 'X': /* Used by SUN tar; same as 'x'. */
627228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
628228753Smm		a->archive.archive_format_name =
629228753Smm		    "POSIX pax interchange format (Sun variant)";
630228753Smm		err = header_pax_extensions(a, tar, entry, h);
631228753Smm		break;
632228753Smm	case 'x': /* POSIX-standard 'x' header. */
633228753Smm		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
634228753Smm		a->archive.archive_format_name = "POSIX pax interchange format";
635228753Smm		err = header_pax_extensions(a, tar, entry, h);
636228753Smm		break;
637228753Smm	default:
638228753Smm		if (memcmp(header->magic, "ustar  \0", 8) == 0) {
639228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
640228753Smm			a->archive.archive_format_name = "GNU tar format";
641228753Smm			err = header_gnutar(a, tar, entry, h);
642228753Smm		} else if (memcmp(header->magic, "ustar", 5) == 0) {
643228753Smm			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
644228753Smm				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
645228753Smm				a->archive.archive_format_name = "POSIX ustar format";
646228753Smm			}
647228753Smm			err = header_ustar(a, tar, entry, h);
648228753Smm		} else {
649228753Smm			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
650228753Smm			a->archive.archive_format_name = "tar (non-POSIX)";
651228753Smm			err = header_old_tar(a, tar, entry, h);
652228753Smm		}
653228753Smm	}
654228753Smm	--tar->header_recursion_depth;
655228753Smm	/* We return warnings or success as-is.  Anything else is fatal. */
656228753Smm	if (err == ARCHIVE_WARN || err == ARCHIVE_OK)
657228753Smm		return (err);
658228753Smm	if (err == ARCHIVE_EOF)
659228753Smm		/* EOF when recursively reading a header is bad. */
660228753Smm		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
661228753Smm	return (ARCHIVE_FATAL);
662228753Smm}
663228753Smm
664228753Smm/*
665228753Smm * Return true if block checksum is correct.
666228753Smm */
667228753Smmstatic int
668228753Smmchecksum(struct archive_read *a, const void *h)
669228753Smm{
670228753Smm	const unsigned char *bytes;
671228753Smm	const struct archive_entry_header_ustar	*header;
672228753Smm	int check, i, sum;
673228753Smm
674228753Smm	(void)a; /* UNUSED */
675228753Smm	bytes = (const unsigned char *)h;
676228753Smm	header = (const struct archive_entry_header_ustar *)h;
677228753Smm
678228753Smm	/*
679228753Smm	 * Test the checksum.  Note that POSIX specifies _unsigned_
680228753Smm	 * bytes for this calculation.
681228753Smm	 */
682228753Smm	sum = tar_atol(header->checksum, sizeof(header->checksum));
683228753Smm	check = 0;
684228753Smm	for (i = 0; i < 148; i++)
685228753Smm		check += (unsigned char)bytes[i];
686228753Smm	for (; i < 156; i++)
687228753Smm		check += 32;
688228753Smm	for (; i < 512; i++)
689228753Smm		check += (unsigned char)bytes[i];
690228753Smm	if (sum == check)
691228753Smm		return (1);
692228753Smm
693228753Smm	/*
694228753Smm	 * Repeat test with _signed_ bytes, just in case this archive
695228753Smm	 * was created by an old BSD, Solaris, or HP-UX tar with a
696228753Smm	 * broken checksum calculation.
697228753Smm	 */
698228753Smm	check = 0;
699228753Smm	for (i = 0; i < 148; i++)
700228753Smm		check += (signed char)bytes[i];
701228753Smm	for (; i < 156; i++)
702228753Smm		check += 32;
703228753Smm	for (; i < 512; i++)
704228753Smm		check += (signed char)bytes[i];
705228753Smm	if (sum == check)
706228753Smm		return (1);
707228753Smm
708228753Smm	return (0);
709228753Smm}
710228753Smm
711228753Smm/*
712228753Smm * Return true if this block contains only nulls.
713228753Smm */
714228753Smmstatic int
715228753Smmarchive_block_is_null(const unsigned char *p)
716228753Smm{
717228753Smm	unsigned i;
718228753Smm
719228753Smm	for (i = 0; i < 512; i++)
720228753Smm		if (*p++)
721228753Smm			return (0);
722228753Smm	return (1);
723228753Smm}
724228753Smm
725228753Smm/*
726228753Smm * Interpret 'A' Solaris ACL header
727228753Smm */
728228753Smmstatic int
729228753Smmheader_Solaris_ACL(struct archive_read *a, struct tar *tar,
730228753Smm    struct archive_entry *entry, const void *h)
731228753Smm{
732228753Smm	const struct archive_entry_header_ustar *header;
733228753Smm	size_t size;
734228753Smm	int err;
735228753Smm	int64_t type;
736228753Smm	char *acl, *p;
737228753Smm	wchar_t *wp;
738228753Smm
739228753Smm	/*
740228753Smm	 * read_body_to_string adds a NUL terminator, but we need a little
741228753Smm	 * more to make sure that we don't overrun acl_text later.
742228753Smm	 */
743228753Smm	header = (const struct archive_entry_header_ustar *)h;
744228753Smm	size = tar_atol(header->size, sizeof(header->size));
745228753Smm	err = read_body_to_string(a, tar, &(tar->acl_text), h);
746228753Smm	if (err != ARCHIVE_OK)
747228753Smm		return (err);
748228753Smm	/* Recursively read next header */
749228753Smm	err = tar_read_header(a, tar, entry);
750228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
751228753Smm		return (err);
752228753Smm
753228753Smm	/* TODO: Examine the first characters to see if this
754228753Smm	 * is an AIX ACL descriptor.  We'll likely never support
755228753Smm	 * them, but it would be polite to recognize and warn when
756228753Smm	 * we do see them. */
757228753Smm
758228753Smm	/* Leading octal number indicates ACL type and number of entries. */
759228753Smm	p = acl = tar->acl_text.s;
760228753Smm	type = 0;
761228753Smm	while (*p != '\0' && p < acl + size) {
762228753Smm		if (*p < '0' || *p > '7') {
763228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
764228753Smm			    "Malformed Solaris ACL attribute (invalid digit)");
765228753Smm			return(ARCHIVE_WARN);
766228753Smm		}
767228753Smm		type <<= 3;
768228753Smm		type += *p - '0';
769228753Smm		if (type > 077777777) {
770228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
771228753Smm			    "Malformed Solaris ACL attribute (count too large)");
772228753Smm			return (ARCHIVE_WARN);
773228753Smm		}
774228753Smm		p++;
775228753Smm	}
776228753Smm	switch ((int)type & ~0777777) {
777228753Smm	case 01000000:
778228753Smm		/* POSIX.1e ACL */
779228753Smm		break;
780228753Smm	case 03000000:
781228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
782228753Smm		    "Solaris NFSv4 ACLs not supported");
783228753Smm		return (ARCHIVE_WARN);
784228753Smm	default:
785228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
786228753Smm		    "Malformed Solaris ACL attribute (unsupported type %o)",
787228753Smm		    (int)type);
788228753Smm		return (ARCHIVE_WARN);
789228753Smm	}
790228753Smm	p++;
791228753Smm
792228753Smm	if (p >= acl + size) {
793228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
794228753Smm		    "Malformed Solaris ACL attribute (body overflow)");
795228753Smm		return(ARCHIVE_WARN);
796228753Smm	}
797228753Smm
798228753Smm	/* ACL text is null-terminated; find the end. */
799228753Smm	size -= (p - acl);
800228753Smm	acl = p;
801228753Smm
802228753Smm	while (*p != '\0' && p < acl + size)
803228753Smm		p++;
804228753Smm
805228753Smm	wp = utf8_decode(tar, acl, p - acl);
806228753Smm	err = __archive_entry_acl_parse_w(entry, wp,
807228753Smm	    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
808228753Smm	if (err != ARCHIVE_OK)
809228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
810228753Smm		    "Malformed Solaris ACL attribute (unparsable)");
811228753Smm	return (err);
812228753Smm}
813228753Smm
814228753Smm/*
815228753Smm * Interpret 'K' long linkname header.
816228753Smm */
817228753Smmstatic int
818228753Smmheader_longlink(struct archive_read *a, struct tar *tar,
819228753Smm    struct archive_entry *entry, const void *h)
820228753Smm{
821228753Smm	int err;
822228753Smm
823228753Smm	err = read_body_to_string(a, tar, &(tar->longlink), h);
824228753Smm	if (err != ARCHIVE_OK)
825228753Smm		return (err);
826228753Smm	err = tar_read_header(a, tar, entry);
827228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
828228753Smm		return (err);
829228753Smm	/* Set symlink if symlink already set, else hardlink. */
830228753Smm	archive_entry_copy_link(entry, tar->longlink.s);
831228753Smm	return (ARCHIVE_OK);
832228753Smm}
833228753Smm
834228753Smm/*
835228753Smm * Interpret 'L' long filename header.
836228753Smm */
837228753Smmstatic int
838228753Smmheader_longname(struct archive_read *a, struct tar *tar,
839228753Smm    struct archive_entry *entry, const void *h)
840228753Smm{
841228753Smm	int err;
842228753Smm
843228753Smm	err = read_body_to_string(a, tar, &(tar->longname), h);
844228753Smm	if (err != ARCHIVE_OK)
845228753Smm		return (err);
846228753Smm	/* Read and parse "real" header, then override name. */
847228753Smm	err = tar_read_header(a, tar, entry);
848228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
849228753Smm		return (err);
850228753Smm	archive_entry_copy_pathname(entry, tar->longname.s);
851228753Smm	return (ARCHIVE_OK);
852228753Smm}
853228753Smm
854228753Smm
855228753Smm/*
856228753Smm * Interpret 'V' GNU tar volume header.
857228753Smm */
858228753Smmstatic int
859228753Smmheader_volume(struct archive_read *a, struct tar *tar,
860228753Smm    struct archive_entry *entry, const void *h)
861228753Smm{
862228753Smm	(void)h;
863228753Smm
864228753Smm	/* Just skip this and read the next header. */
865228753Smm	return (tar_read_header(a, tar, entry));
866228753Smm}
867228753Smm
868228753Smm/*
869228753Smm * Read body of an archive entry into an archive_string object.
870228753Smm */
871228753Smmstatic int
872228753Smmread_body_to_string(struct archive_read *a, struct tar *tar,
873228753Smm    struct archive_string *as, const void *h)
874228753Smm{
875228753Smm	off_t size, padded_size;
876228753Smm	const struct archive_entry_header_ustar *header;
877228753Smm	const void *src;
878228753Smm
879228753Smm	(void)tar; /* UNUSED */
880228753Smm	header = (const struct archive_entry_header_ustar *)h;
881228753Smm	size  = tar_atol(header->size, sizeof(header->size));
882228753Smm	if ((size > 1048576) || (size < 0)) {
883228753Smm		archive_set_error(&a->archive, EINVAL,
884228753Smm		    "Special header too large");
885228753Smm		return (ARCHIVE_FATAL);
886228753Smm	}
887228753Smm
888228753Smm	/* Fail if we can't make our buffer big enough. */
889228753Smm	if (archive_string_ensure(as, size+1) == NULL) {
890228753Smm		archive_set_error(&a->archive, ENOMEM,
891228753Smm		    "No memory");
892228753Smm		return (ARCHIVE_FATAL);
893228753Smm	}
894228753Smm
895228753Smm 	/* Read the body into the string. */
896228753Smm	padded_size = (size + 511) & ~ 511;
897228753Smm	src = __archive_read_ahead(a, padded_size, NULL);
898228753Smm	if (src == NULL)
899228753Smm		return (ARCHIVE_FATAL);
900228753Smm	memcpy(as->s, src, size);
901228753Smm	__archive_read_consume(a, padded_size);
902228753Smm	as->s[size] = '\0';
903228753Smm	return (ARCHIVE_OK);
904228753Smm}
905228753Smm
906228753Smm/*
907228753Smm * Parse out common header elements.
908228753Smm *
909228753Smm * This would be the same as header_old_tar, except that the
910228753Smm * filename is handled slightly differently for old and POSIX
911228753Smm * entries  (POSIX entries support a 'prefix').  This factoring
912228753Smm * allows header_old_tar and header_ustar
913228753Smm * to handle filenames differently, while still putting most of the
914228753Smm * common parsing into one place.
915228753Smm */
916228753Smmstatic int
917228753Smmheader_common(struct archive_read *a, struct tar *tar,
918228753Smm    struct archive_entry *entry, const void *h)
919228753Smm{
920228753Smm	const struct archive_entry_header_ustar	*header;
921228753Smm	char	tartype;
922228753Smm
923228753Smm	(void)a; /* UNUSED */
924228753Smm
925228753Smm	header = (const struct archive_entry_header_ustar *)h;
926228753Smm	if (header->linkname[0])
927228753Smm		archive_strncpy(&(tar->entry_linkpath), header->linkname,
928228753Smm		    sizeof(header->linkname));
929228753Smm	else
930228753Smm		archive_string_empty(&(tar->entry_linkpath));
931228753Smm
932228753Smm	/* Parse out the numeric fields (all are octal) */
933228753Smm	archive_entry_set_mode(entry, tar_atol(header->mode, sizeof(header->mode)));
934228753Smm	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
935228753Smm	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
936228753Smm	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
937228753Smm	tar->realsize = tar->entry_bytes_remaining;
938228753Smm	archive_entry_set_size(entry, tar->entry_bytes_remaining);
939228753Smm	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
940228753Smm
941228753Smm	/* Handle the tar type flag appropriately. */
942228753Smm	tartype = header->typeflag[0];
943228753Smm
944228753Smm	switch (tartype) {
945228753Smm	case '1': /* Hard link */
946228753Smm		archive_entry_copy_hardlink(entry, tar->entry_linkpath.s);
947228753Smm		/*
948228753Smm		 * The following may seem odd, but: Technically, tar
949228753Smm		 * does not store the file type for a "hard link"
950228753Smm		 * entry, only the fact that it is a hard link.  So, I
951228753Smm		 * leave the type zero normally.  But, pax interchange
952228753Smm		 * format allows hard links to have data, which
953228753Smm		 * implies that the underlying entry is a regular
954228753Smm		 * file.
955228753Smm		 */
956228753Smm		if (archive_entry_size(entry) > 0)
957228753Smm			archive_entry_set_filetype(entry, AE_IFREG);
958228753Smm
959228753Smm		/*
960228753Smm		 * A tricky point: Traditionally, tar readers have
961228753Smm		 * ignored the size field when reading hardlink
962228753Smm		 * entries, and some writers put non-zero sizes even
963228753Smm		 * though the body is empty.  POSIX blessed this
964228753Smm		 * convention in the 1988 standard, but broke with
965228753Smm		 * this tradition in 2001 by permitting hardlink
966228753Smm		 * entries to store valid bodies in pax interchange
967228753Smm		 * format, but not in ustar format.  Since there is no
968228753Smm		 * hard and fast way to distinguish pax interchange
969228753Smm		 * from earlier archives (the 'x' and 'g' entries are
970228753Smm		 * optional, after all), we need a heuristic.
971228753Smm		 */
972228753Smm		if (archive_entry_size(entry) == 0) {
973228753Smm			/* If the size is already zero, we're done. */
974228753Smm		}  else if (a->archive.archive_format
975228753Smm		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
976228753Smm			/* Definitely pax extended; must obey hardlink size. */
977228753Smm		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
978228753Smm		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
979228753Smm		{
980228753Smm			/* Old-style or GNU tar: we must ignore the size. */
981228753Smm			archive_entry_set_size(entry, 0);
982228753Smm			tar->entry_bytes_remaining = 0;
983228753Smm		} else if (archive_read_format_tar_bid(a) > 50) {
984228753Smm			/*
985228753Smm			 * We don't know if it's pax: If the bid
986228753Smm			 * function sees a valid ustar header
987228753Smm			 * immediately following, then let's ignore
988228753Smm			 * the hardlink size.
989228753Smm			 */
990228753Smm			archive_entry_set_size(entry, 0);
991228753Smm			tar->entry_bytes_remaining = 0;
992228753Smm		}
993228753Smm		/*
994228753Smm		 * TODO: There are still two cases I'd like to handle:
995228753Smm		 *   = a ustar non-pax archive with a hardlink entry at
996228753Smm		 *     end-of-archive.  (Look for block of nulls following?)
997228753Smm		 *   = a pax archive that has not seen any pax headers
998228753Smm		 *     and has an entry which is a hardlink entry storing
999228753Smm		 *     a body containing an uncompressed tar archive.
1000228753Smm		 * The first is worth addressing; I don't see any reliable
1001228753Smm		 * way to deal with the second possibility.
1002228753Smm		 */
1003228753Smm		break;
1004228753Smm	case '2': /* Symlink */
1005228753Smm		archive_entry_set_filetype(entry, AE_IFLNK);
1006228753Smm		archive_entry_set_size(entry, 0);
1007228753Smm		tar->entry_bytes_remaining = 0;
1008228753Smm		archive_entry_copy_symlink(entry, tar->entry_linkpath.s);
1009228753Smm		break;
1010228753Smm	case '3': /* Character device */
1011228753Smm		archive_entry_set_filetype(entry, AE_IFCHR);
1012228753Smm		archive_entry_set_size(entry, 0);
1013228753Smm		tar->entry_bytes_remaining = 0;
1014228753Smm		break;
1015228753Smm	case '4': /* Block device */
1016228753Smm		archive_entry_set_filetype(entry, AE_IFBLK);
1017228753Smm		archive_entry_set_size(entry, 0);
1018228753Smm		tar->entry_bytes_remaining = 0;
1019228753Smm		break;
1020228753Smm	case '5': /* Dir */
1021228753Smm		archive_entry_set_filetype(entry, AE_IFDIR);
1022228753Smm		archive_entry_set_size(entry, 0);
1023228753Smm		tar->entry_bytes_remaining = 0;
1024228753Smm		break;
1025228753Smm	case '6': /* FIFO device */
1026228753Smm		archive_entry_set_filetype(entry, AE_IFIFO);
1027228753Smm		archive_entry_set_size(entry, 0);
1028228753Smm		tar->entry_bytes_remaining = 0;
1029228753Smm		break;
1030228753Smm	case 'D': /* GNU incremental directory type */
1031228753Smm		/*
1032228753Smm		 * No special handling is actually required here.
1033228753Smm		 * It might be nice someday to preprocess the file list and
1034228753Smm		 * provide it to the client, though.
1035228753Smm		 */
1036228753Smm		archive_entry_set_filetype(entry, AE_IFDIR);
1037228753Smm		break;
1038228753Smm	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1039228753Smm		/*
1040228753Smm		 * As far as I can tell, this is just like a regular file
1041228753Smm		 * entry, except that the contents should be _appended_ to
1042228753Smm		 * the indicated file at the indicated offset.  This may
1043228753Smm		 * require some API work to fully support.
1044228753Smm		 */
1045228753Smm		break;
1046228753Smm	case 'N': /* Old GNU "long filename" entry. */
1047228753Smm		/* The body of this entry is a script for renaming
1048228753Smm		 * previously-extracted entries.  Ugh.  It will never
1049228753Smm		 * be supported by libarchive. */
1050228753Smm		archive_entry_set_filetype(entry, AE_IFREG);
1051228753Smm		break;
1052228753Smm	case 'S': /* GNU sparse files */
1053228753Smm		/*
1054228753Smm		 * Sparse files are really just regular files with
1055228753Smm		 * sparse information in the extended area.
1056228753Smm		 */
1057228753Smm		/* FALLTHROUGH */
1058228753Smm	default: /* Regular file  and non-standard types */
1059228753Smm		/*
1060228753Smm		 * Per POSIX: non-recognized types should always be
1061228753Smm		 * treated as regular files.
1062228753Smm		 */
1063228753Smm		archive_entry_set_filetype(entry, AE_IFREG);
1064228753Smm		break;
1065228753Smm	}
1066228753Smm	return (0);
1067228753Smm}
1068228753Smm
1069228753Smm/*
1070228753Smm * Parse out header elements for "old-style" tar archives.
1071228753Smm */
1072228753Smmstatic int
1073228753Smmheader_old_tar(struct archive_read *a, struct tar *tar,
1074228753Smm    struct archive_entry *entry, const void *h)
1075228753Smm{
1076228753Smm	const struct archive_entry_header_ustar	*header;
1077228753Smm
1078228753Smm	/* Copy filename over (to ensure null termination). */
1079228753Smm	header = (const struct archive_entry_header_ustar *)h;
1080228753Smm	archive_strncpy(&(tar->entry_pathname), header->name, sizeof(header->name));
1081228753Smm	archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1082228753Smm
1083228753Smm	/* Grab rest of common fields */
1084228753Smm	header_common(a, tar, entry, h);
1085228753Smm
1086228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1087228753Smm	return (0);
1088228753Smm}
1089228753Smm
1090228753Smm/*
1091228753Smm * Parse a file header for a pax extended archive entry.
1092228753Smm */
1093228753Smmstatic int
1094228753Smmheader_pax_global(struct archive_read *a, struct tar *tar,
1095228753Smm    struct archive_entry *entry, const void *h)
1096228753Smm{
1097228753Smm	int err;
1098228753Smm
1099228753Smm	err = read_body_to_string(a, tar, &(tar->pax_global), h);
1100228753Smm	if (err != ARCHIVE_OK)
1101228753Smm		return (err);
1102228753Smm	err = tar_read_header(a, tar, entry);
1103228753Smm	return (err);
1104228753Smm}
1105228753Smm
1106228753Smmstatic int
1107228753Smmheader_pax_extensions(struct archive_read *a, struct tar *tar,
1108228753Smm    struct archive_entry *entry, const void *h)
1109228753Smm{
1110228753Smm	int err, err2;
1111228753Smm
1112228753Smm	err = read_body_to_string(a, tar, &(tar->pax_header), h);
1113228753Smm	if (err != ARCHIVE_OK)
1114228753Smm		return (err);
1115228753Smm
1116228753Smm	/* Parse the next header. */
1117228753Smm	err = tar_read_header(a, tar, entry);
1118228753Smm	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1119228753Smm		return (err);
1120228753Smm
1121228753Smm	/*
1122228753Smm	 * TODO: Parse global/default options into 'entry' struct here
1123228753Smm	 * before handling file-specific options.
1124228753Smm	 *
1125228753Smm	 * This design (parse standard header, then overwrite with pax
1126228753Smm	 * extended attribute data) usually works well, but isn't ideal;
1127228753Smm	 * it would be better to parse the pax extended attributes first
1128228753Smm	 * and then skip any fields in the standard header that were
1129228753Smm	 * defined in the pax header.
1130228753Smm	 */
1131228753Smm	err2 = pax_header(a, tar, entry, tar->pax_header.s);
1132228753Smm	err =  err_combine(err, err2);
1133228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1134228753Smm	return (err);
1135228753Smm}
1136228753Smm
1137228753Smm
1138228753Smm/*
1139228753Smm * Parse a file header for a Posix "ustar" archive entry.  This also
1140228753Smm * handles "pax" or "extended ustar" entries.
1141228753Smm */
1142228753Smmstatic int
1143228753Smmheader_ustar(struct archive_read *a, struct tar *tar,
1144228753Smm    struct archive_entry *entry, const void *h)
1145228753Smm{
1146228753Smm	const struct archive_entry_header_ustar	*header;
1147228753Smm	struct archive_string *as;
1148228753Smm
1149228753Smm	header = (const struct archive_entry_header_ustar *)h;
1150228753Smm
1151228753Smm	/* Copy name into an internal buffer to ensure null-termination. */
1152228753Smm	as = &(tar->entry_pathname);
1153228753Smm	if (header->prefix[0]) {
1154228753Smm		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1155228753Smm		if (as->s[archive_strlen(as) - 1] != '/')
1156228753Smm			archive_strappend_char(as, '/');
1157228753Smm		archive_strncat(as, header->name, sizeof(header->name));
1158228753Smm	} else
1159228753Smm		archive_strncpy(as, header->name, sizeof(header->name));
1160228753Smm
1161228753Smm	archive_entry_copy_pathname(entry, as->s);
1162228753Smm
1163228753Smm	/* Handle rest of common fields. */
1164228753Smm	header_common(a, tar, entry, h);
1165228753Smm
1166228753Smm	/* Handle POSIX ustar fields. */
1167228753Smm	archive_strncpy(&(tar->entry_uname), header->uname,
1168228753Smm	    sizeof(header->uname));
1169228753Smm	archive_entry_copy_uname(entry, tar->entry_uname.s);
1170228753Smm
1171228753Smm	archive_strncpy(&(tar->entry_gname), header->gname,
1172228753Smm	    sizeof(header->gname));
1173228753Smm	archive_entry_copy_gname(entry, tar->entry_gname.s);
1174228753Smm
1175228753Smm	/* Parse out device numbers only for char and block specials. */
1176228753Smm	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1177228753Smm		archive_entry_set_rdevmajor(entry,
1178228753Smm		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1179228753Smm		archive_entry_set_rdevminor(entry,
1180228753Smm		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1181228753Smm	}
1182228753Smm
1183228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1184228753Smm
1185228753Smm	return (0);
1186228753Smm}
1187228753Smm
1188228753Smm
1189228753Smm/*
1190228753Smm * Parse the pax extended attributes record.
1191228753Smm *
1192228753Smm * Returns non-zero if there's an error in the data.
1193228753Smm */
1194228753Smmstatic int
1195228753Smmpax_header(struct archive_read *a, struct tar *tar,
1196228753Smm    struct archive_entry *entry, char *attr)
1197228753Smm{
1198228753Smm	size_t attr_length, l, line_length;
1199228753Smm	char *p;
1200228753Smm	char *key, *value;
1201228753Smm	int err, err2;
1202228753Smm
1203228753Smm	attr_length = strlen(attr);
1204228753Smm	tar->pax_hdrcharset_binary = 0;
1205228753Smm	archive_string_empty(&(tar->entry_gname));
1206228753Smm	archive_string_empty(&(tar->entry_linkpath));
1207228753Smm	archive_string_empty(&(tar->entry_pathname));
1208228753Smm	archive_string_empty(&(tar->entry_pathname_override));
1209228753Smm	archive_string_empty(&(tar->entry_uname));
1210228753Smm	err = ARCHIVE_OK;
1211228753Smm	while (attr_length > 0) {
1212228753Smm		/* Parse decimal length field at start of line. */
1213228753Smm		line_length = 0;
1214228753Smm		l = attr_length;
1215228753Smm		p = attr; /* Record start of line. */
1216228753Smm		while (l>0) {
1217228753Smm			if (*p == ' ') {
1218228753Smm				p++;
1219228753Smm				l--;
1220228753Smm				break;
1221228753Smm			}
1222228753Smm			if (*p < '0' || *p > '9') {
1223228753Smm				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1224228753Smm				    "Ignoring malformed pax extended attributes");
1225228753Smm				return (ARCHIVE_WARN);
1226228753Smm			}
1227228753Smm			line_length *= 10;
1228228753Smm			line_length += *p - '0';
1229228753Smm			if (line_length > 999999) {
1230228753Smm				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1231228753Smm				    "Rejecting pax extended attribute > 1MB");
1232228753Smm				return (ARCHIVE_WARN);
1233228753Smm			}
1234228753Smm			p++;
1235228753Smm			l--;
1236228753Smm		}
1237228753Smm
1238228753Smm		/*
1239228753Smm		 * Parsed length must be no bigger than available data,
1240228753Smm		 * at least 1, and the last character of the line must
1241228753Smm		 * be '\n'.
1242228753Smm		 */
1243228753Smm		if (line_length > attr_length
1244228753Smm		    || line_length < 1
1245228753Smm		    || attr[line_length - 1] != '\n')
1246228753Smm		{
1247228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1248228753Smm			    "Ignoring malformed pax extended attribute");
1249228753Smm			return (ARCHIVE_WARN);
1250228753Smm		}
1251228753Smm
1252228753Smm		/* Null-terminate the line. */
1253228753Smm		attr[line_length - 1] = '\0';
1254228753Smm
1255228753Smm		/* Find end of key and null terminate it. */
1256228753Smm		key = p;
1257228753Smm		if (key[0] == '=')
1258228753Smm			return (-1);
1259228753Smm		while (*p && *p != '=')
1260228753Smm			++p;
1261228753Smm		if (*p == '\0') {
1262228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1263228753Smm			    "Invalid pax extended attributes");
1264228753Smm			return (ARCHIVE_WARN);
1265228753Smm		}
1266228753Smm		*p = '\0';
1267228753Smm
1268228753Smm		/* Identify null-terminated 'value' portion. */
1269228753Smm		value = p + 1;
1270228753Smm
1271228753Smm		/* Identify this attribute and set it in the entry. */
1272228753Smm		err2 = pax_attribute(tar, entry, key, value);
1273228753Smm		err = err_combine(err, err2);
1274228753Smm
1275228753Smm		/* Skip to next line */
1276228753Smm		attr += line_length;
1277228753Smm		attr_length -= line_length;
1278228753Smm	}
1279228753Smm	if (archive_strlen(&(tar->entry_gname)) > 0) {
1280228753Smm		value = tar->entry_gname.s;
1281228753Smm		if (tar->pax_hdrcharset_binary)
1282228753Smm			archive_entry_copy_gname(entry, value);
1283228753Smm		else {
1284228753Smm			if (!archive_entry_update_gname_utf8(entry, value)) {
1285228753Smm				err = ARCHIVE_WARN;
1286228753Smm				archive_set_error(&a->archive,
1287228753Smm				    ARCHIVE_ERRNO_FILE_FORMAT,
1288228753Smm				    "Gname in pax header can't "
1289228753Smm				    "be converted to current locale.");
1290228753Smm			}
1291228753Smm		}
1292228753Smm	}
1293228753Smm	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1294228753Smm		value = tar->entry_linkpath.s;
1295228753Smm		if (tar->pax_hdrcharset_binary)
1296228753Smm			archive_entry_copy_link(entry, value);
1297228753Smm		else {
1298228753Smm			if (!archive_entry_update_link_utf8(entry, value)) {
1299228753Smm				err = ARCHIVE_WARN;
1300228753Smm				archive_set_error(&a->archive,
1301228753Smm				    ARCHIVE_ERRNO_FILE_FORMAT,
1302228753Smm				    "Linkname in pax header can't "
1303228753Smm				    "be converted to current locale.");
1304228753Smm			}
1305228753Smm		}
1306228753Smm	}
1307228753Smm	/*
1308228753Smm	 * Some extensions (such as the GNU sparse file extensions)
1309228753Smm	 * deliberately store a synthetic name under the regular 'path'
1310228753Smm	 * attribute and the real file name under a different attribute.
1311228753Smm	 * Since we're supposed to not care about the order, we
1312228753Smm	 * have no choice but to store all of the various filenames
1313228753Smm	 * we find and figure it all out afterwards.  This is the
1314228753Smm	 * figuring out part.
1315228753Smm	 */
1316228753Smm	value = NULL;
1317228753Smm	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1318228753Smm		value = tar->entry_pathname_override.s;
1319228753Smm	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1320228753Smm		value = tar->entry_pathname.s;
1321228753Smm	if (value != NULL) {
1322228753Smm		if (tar->pax_hdrcharset_binary)
1323228753Smm			archive_entry_copy_pathname(entry, value);
1324228753Smm		else {
1325228753Smm			if (!archive_entry_update_pathname_utf8(entry, value)) {
1326228753Smm				err = ARCHIVE_WARN;
1327228753Smm				archive_set_error(&a->archive,
1328228753Smm				    ARCHIVE_ERRNO_FILE_FORMAT,
1329228753Smm				    "Pathname in pax header can't be "
1330228753Smm				    "converted to current locale.");
1331228753Smm			}
1332228753Smm		}
1333228753Smm	}
1334228753Smm	if (archive_strlen(&(tar->entry_uname)) > 0) {
1335228753Smm		value = tar->entry_uname.s;
1336228753Smm		if (tar->pax_hdrcharset_binary)
1337228753Smm			archive_entry_copy_uname(entry, value);
1338228753Smm		else {
1339228753Smm			if (!archive_entry_update_uname_utf8(entry, value)) {
1340228753Smm				err = ARCHIVE_WARN;
1341228753Smm				archive_set_error(&a->archive,
1342228753Smm				    ARCHIVE_ERRNO_FILE_FORMAT,
1343228753Smm				    "Uname in pax header can't "
1344228753Smm				    "be converted to current locale.");
1345228753Smm			}
1346228753Smm		}
1347228753Smm	}
1348228753Smm	return (err);
1349228753Smm}
1350228753Smm
1351228753Smmstatic int
1352228753Smmpax_attribute_xattr(struct archive_entry *entry,
1353228753Smm	char *name, char *value)
1354228753Smm{
1355228753Smm	char *name_decoded;
1356228753Smm	void *value_decoded;
1357228753Smm	size_t value_len;
1358228753Smm
1359228753Smm	if (strlen(name) < 18 || (strncmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1360228753Smm		return 3;
1361228753Smm
1362228753Smm	name += 17;
1363228753Smm
1364228753Smm	/* URL-decode name */
1365228753Smm	name_decoded = url_decode(name);
1366228753Smm	if (name_decoded == NULL)
1367228753Smm		return 2;
1368228753Smm
1369228753Smm	/* Base-64 decode value */
1370228753Smm	value_decoded = base64_decode(value, strlen(value), &value_len);
1371228753Smm	if (value_decoded == NULL) {
1372228753Smm		free(name_decoded);
1373228753Smm		return 1;
1374228753Smm	}
1375228753Smm
1376228753Smm	archive_entry_xattr_add_entry(entry, name_decoded,
1377228753Smm		value_decoded, value_len);
1378228753Smm
1379228753Smm	free(name_decoded);
1380228753Smm	free(value_decoded);
1381228753Smm	return 0;
1382228753Smm}
1383228753Smm
1384228753Smm/*
1385228753Smm * Parse a single key=value attribute.  key/value pointers are
1386228753Smm * assumed to point into reasonably long-lived storage.
1387228753Smm *
1388228753Smm * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1389228753Smm * extensions should always have keywords of the form "VENDOR.attribute"
1390228753Smm * In particular, it's quite feasible to support many different
1391228753Smm * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1392228753Smm * unique to this library.
1393228753Smm *
1394228753Smm * Investigate other vendor-specific extensions and see if
1395228753Smm * any of them look useful.
1396228753Smm */
1397228753Smmstatic int
1398228753Smmpax_attribute(struct tar *tar, struct archive_entry *entry,
1399228753Smm    char *key, char *value)
1400228753Smm{
1401228753Smm	int64_t s;
1402228753Smm	long n;
1403228753Smm	wchar_t *wp;
1404228753Smm
1405228753Smm	switch (key[0]) {
1406228753Smm	case 'G':
1407228753Smm		/* GNU "0.0" sparse pax format. */
1408228753Smm		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1409228753Smm			tar->sparse_offset = -1;
1410228753Smm			tar->sparse_numbytes = -1;
1411228753Smm			tar->sparse_gnu_major = 0;
1412228753Smm			tar->sparse_gnu_minor = 0;
1413228753Smm		}
1414228753Smm		if (strcmp(key, "GNU.sparse.offset") == 0) {
1415228753Smm			tar->sparse_offset = tar_atol10(value, strlen(value));
1416228753Smm			if (tar->sparse_numbytes != -1) {
1417228753Smm				gnu_add_sparse_entry(tar,
1418228753Smm				    tar->sparse_offset, tar->sparse_numbytes);
1419228753Smm				tar->sparse_offset = -1;
1420228753Smm				tar->sparse_numbytes = -1;
1421228753Smm			}
1422228753Smm		}
1423228753Smm		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1424228753Smm			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1425228753Smm			if (tar->sparse_numbytes != -1) {
1426228753Smm				gnu_add_sparse_entry(tar,
1427228753Smm				    tar->sparse_offset, tar->sparse_numbytes);
1428228753Smm				tar->sparse_offset = -1;
1429228753Smm				tar->sparse_numbytes = -1;
1430228753Smm			}
1431228753Smm		}
1432228753Smm		if (strcmp(key, "GNU.sparse.size") == 0) {
1433228753Smm			tar->realsize = tar_atol10(value, strlen(value));
1434228753Smm			archive_entry_set_size(entry, tar->realsize);
1435228753Smm		}
1436228753Smm
1437228753Smm		/* GNU "0.1" sparse pax format. */
1438228753Smm		if (strcmp(key, "GNU.sparse.map") == 0) {
1439228753Smm			tar->sparse_gnu_major = 0;
1440228753Smm			tar->sparse_gnu_minor = 1;
1441228753Smm			if (gnu_sparse_01_parse(tar, value) != ARCHIVE_OK)
1442228753Smm				return (ARCHIVE_WARN);
1443228753Smm		}
1444228753Smm
1445228753Smm		/* GNU "1.0" sparse pax format */
1446228753Smm		if (strcmp(key, "GNU.sparse.major") == 0) {
1447228753Smm			tar->sparse_gnu_major = tar_atol10(value, strlen(value));
1448228753Smm			tar->sparse_gnu_pending = 1;
1449228753Smm		}
1450228753Smm		if (strcmp(key, "GNU.sparse.minor") == 0) {
1451228753Smm			tar->sparse_gnu_minor = tar_atol10(value, strlen(value));
1452228753Smm			tar->sparse_gnu_pending = 1;
1453228753Smm		}
1454228753Smm		if (strcmp(key, "GNU.sparse.name") == 0) {
1455228753Smm			/*
1456228753Smm			 * The real filename; when storing sparse
1457228753Smm			 * files, GNU tar puts a synthesized name into
1458228753Smm			 * the regular 'path' attribute in an attempt
1459228753Smm			 * to limit confusion. ;-)
1460228753Smm			 */
1461228753Smm			archive_strcpy(&(tar->entry_pathname_override), value);
1462228753Smm		}
1463228753Smm		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1464228753Smm			tar->realsize = tar_atol10(value, strlen(value));
1465228753Smm			archive_entry_set_size(entry, tar->realsize);
1466228753Smm		}
1467228753Smm		break;
1468228753Smm	case 'L':
1469228753Smm		/* Our extensions */
1470228753Smm/* TODO: Handle arbitrary extended attributes... */
1471228753Smm/*
1472228753Smm		if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0)
1473228753Smm			archive_entry_set_xxxxxx(entry, value);
1474228753Smm*/
1475228753Smm		if (strcmp(key, "LIBARCHIVE.creationtime")==0) {
1476228753Smm			pax_time(value, &s, &n);
1477228753Smm			archive_entry_set_birthtime(entry, s, n);
1478228753Smm		}
1479228753Smm		if (strncmp(key, "LIBARCHIVE.xattr.", 17)==0)
1480228753Smm			pax_attribute_xattr(entry, key, value);
1481228753Smm		break;
1482228753Smm	case 'S':
1483228753Smm		/* We support some keys used by the "star" archiver */
1484228753Smm		if (strcmp(key, "SCHILY.acl.access")==0) {
1485228753Smm			wp = utf8_decode(tar, value, strlen(value));
1486228753Smm			/* TODO: if (wp == NULL) */
1487228753Smm			__archive_entry_acl_parse_w(entry, wp,
1488228753Smm			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
1489228753Smm		} else if (strcmp(key, "SCHILY.acl.default")==0) {
1490228753Smm			wp = utf8_decode(tar, value, strlen(value));
1491228753Smm			/* TODO: if (wp == NULL) */
1492228753Smm			__archive_entry_acl_parse_w(entry, wp,
1493228753Smm			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
1494228753Smm		} else if (strcmp(key, "SCHILY.devmajor")==0) {
1495228753Smm			archive_entry_set_rdevmajor(entry,
1496228753Smm			    tar_atol10(value, strlen(value)));
1497228753Smm		} else if (strcmp(key, "SCHILY.devminor")==0) {
1498228753Smm			archive_entry_set_rdevminor(entry,
1499228753Smm			    tar_atol10(value, strlen(value)));
1500228753Smm		} else if (strcmp(key, "SCHILY.fflags")==0) {
1501228753Smm			archive_entry_copy_fflags_text(entry, value);
1502228753Smm		} else if (strcmp(key, "SCHILY.dev")==0) {
1503228753Smm			archive_entry_set_dev(entry,
1504228753Smm			    tar_atol10(value, strlen(value)));
1505228753Smm		} else if (strcmp(key, "SCHILY.ino")==0) {
1506228753Smm			archive_entry_set_ino(entry,
1507228753Smm			    tar_atol10(value, strlen(value)));
1508228753Smm		} else if (strcmp(key, "SCHILY.nlink")==0) {
1509228753Smm			archive_entry_set_nlink(entry,
1510228753Smm			    tar_atol10(value, strlen(value)));
1511228753Smm		} else if (strcmp(key, "SCHILY.realsize")==0) {
1512228753Smm			tar->realsize = tar_atol10(value, strlen(value));
1513228753Smm			archive_entry_set_size(entry, tar->realsize);
1514228753Smm		}
1515228753Smm		break;
1516228753Smm	case 'a':
1517228753Smm		if (strcmp(key, "atime")==0) {
1518228753Smm			pax_time(value, &s, &n);
1519228753Smm			archive_entry_set_atime(entry, s, n);
1520228753Smm		}
1521228753Smm		break;
1522228753Smm	case 'c':
1523228753Smm		if (strcmp(key, "ctime")==0) {
1524228753Smm			pax_time(value, &s, &n);
1525228753Smm			archive_entry_set_ctime(entry, s, n);
1526228753Smm		} else if (strcmp(key, "charset")==0) {
1527228753Smm			/* TODO: Publish charset information in entry. */
1528228753Smm		} else if (strcmp(key, "comment")==0) {
1529228753Smm			/* TODO: Publish comment in entry. */
1530228753Smm		}
1531228753Smm		break;
1532228753Smm	case 'g':
1533228753Smm		if (strcmp(key, "gid")==0) {
1534228753Smm			archive_entry_set_gid(entry,
1535228753Smm			    tar_atol10(value, strlen(value)));
1536228753Smm		} else if (strcmp(key, "gname")==0) {
1537228753Smm			archive_strcpy(&(tar->entry_gname), value);
1538228753Smm		}
1539228753Smm		break;
1540228753Smm	case 'h':
1541228753Smm		if (strcmp(key, "hdrcharset") == 0) {
1542228753Smm			if (strcmp(value, "BINARY") == 0)
1543228753Smm				tar->pax_hdrcharset_binary = 1;
1544228753Smm			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1545228753Smm				tar->pax_hdrcharset_binary = 0;
1546228753Smm			else {
1547228753Smm				/* TODO: Warn about unsupported hdrcharset */
1548228753Smm			}
1549228753Smm		}
1550228753Smm		break;
1551228753Smm	case 'l':
1552228753Smm		/* pax interchange doesn't distinguish hardlink vs. symlink. */
1553228753Smm		if (strcmp(key, "linkpath")==0) {
1554228753Smm			archive_strcpy(&(tar->entry_linkpath), value);
1555228753Smm		}
1556228753Smm		break;
1557228753Smm	case 'm':
1558228753Smm		if (strcmp(key, "mtime")==0) {
1559228753Smm			pax_time(value, &s, &n);
1560228753Smm			archive_entry_set_mtime(entry, s, n);
1561228753Smm		}
1562228753Smm		break;
1563228753Smm	case 'p':
1564228753Smm		if (strcmp(key, "path")==0) {
1565228753Smm			archive_strcpy(&(tar->entry_pathname), value);
1566228753Smm		}
1567228753Smm		break;
1568228753Smm	case 'r':
1569228753Smm		/* POSIX has reserved 'realtime.*' */
1570228753Smm		break;
1571228753Smm	case 's':
1572228753Smm		/* POSIX has reserved 'security.*' */
1573228753Smm		/* Someday: if (strcmp(key, "security.acl")==0) { ... } */
1574228753Smm		if (strcmp(key, "size")==0) {
1575228753Smm			/* "size" is the size of the data in the entry. */
1576228753Smm			tar->entry_bytes_remaining
1577228753Smm			    = tar_atol10(value, strlen(value));
1578228753Smm			/*
1579228753Smm			 * But, "size" is not necessarily the size of
1580228753Smm			 * the file on disk; if this is a sparse file,
1581228753Smm			 * the disk size may have already been set from
1582228753Smm			 * GNU.sparse.realsize or GNU.sparse.size or
1583228753Smm			 * an old GNU header field or SCHILY.realsize
1584228753Smm			 * or ....
1585228753Smm			 */
1586228753Smm			if (tar->realsize < 0) {
1587228753Smm				archive_entry_set_size(entry,
1588228753Smm				    tar->entry_bytes_remaining);
1589228753Smm				tar->realsize
1590228753Smm				    = tar->entry_bytes_remaining;
1591228753Smm			}
1592228753Smm		}
1593228753Smm		break;
1594228753Smm	case 'u':
1595228753Smm		if (strcmp(key, "uid")==0) {
1596228753Smm			archive_entry_set_uid(entry,
1597228753Smm			    tar_atol10(value, strlen(value)));
1598228753Smm		} else if (strcmp(key, "uname")==0) {
1599228753Smm			archive_strcpy(&(tar->entry_uname), value);
1600228753Smm		}
1601228753Smm		break;
1602228753Smm	}
1603228753Smm	return (0);
1604228753Smm}
1605228753Smm
1606228753Smm
1607228753Smm
1608228753Smm/*
1609228753Smm * parse a decimal time value, which may include a fractional portion
1610228753Smm */
1611228753Smmstatic void
1612228753Smmpax_time(const char *p, int64_t *ps, long *pn)
1613228753Smm{
1614228753Smm	char digit;
1615228753Smm	int64_t	s;
1616228753Smm	unsigned long l;
1617228753Smm	int sign;
1618228753Smm	int64_t limit, last_digit_limit;
1619228753Smm
1620228753Smm	limit = INT64_MAX / 10;
1621228753Smm	last_digit_limit = INT64_MAX % 10;
1622228753Smm
1623228753Smm	s = 0;
1624228753Smm	sign = 1;
1625228753Smm	if (*p == '-') {
1626228753Smm		sign = -1;
1627228753Smm		p++;
1628228753Smm	}
1629228753Smm	while (*p >= '0' && *p <= '9') {
1630228753Smm		digit = *p - '0';
1631228753Smm		if (s > limit ||
1632228753Smm		    (s == limit && digit > last_digit_limit)) {
1633228753Smm			s = INT64_MAX;
1634228753Smm			break;
1635228753Smm		}
1636228753Smm		s = (s * 10) + digit;
1637228753Smm		++p;
1638228753Smm	}
1639228753Smm
1640228753Smm	*ps = s * sign;
1641228753Smm
1642228753Smm	/* Calculate nanoseconds. */
1643228753Smm	*pn = 0;
1644228753Smm
1645228753Smm	if (*p != '.')
1646228753Smm		return;
1647228753Smm
1648228753Smm	l = 100000000UL;
1649228753Smm	do {
1650228753Smm		++p;
1651228753Smm		if (*p >= '0' && *p <= '9')
1652228753Smm			*pn += (*p - '0') * l;
1653228753Smm		else
1654228753Smm			break;
1655228753Smm	} while (l /= 10);
1656228753Smm}
1657228753Smm
1658228753Smm/*
1659228753Smm * Parse GNU tar header
1660228753Smm */
1661228753Smmstatic int
1662228753Smmheader_gnutar(struct archive_read *a, struct tar *tar,
1663228753Smm    struct archive_entry *entry, const void *h)
1664228753Smm{
1665228753Smm	const struct archive_entry_header_gnutar *header;
1666228753Smm
1667228753Smm	(void)a;
1668228753Smm
1669228753Smm	/*
1670228753Smm	 * GNU header is like POSIX ustar, except 'prefix' is
1671228753Smm	 * replaced with some other fields. This also means the
1672228753Smm	 * filename is stored as in old-style archives.
1673228753Smm	 */
1674228753Smm
1675228753Smm	/* Grab fields common to all tar variants. */
1676228753Smm	header_common(a, tar, entry, h);
1677228753Smm
1678228753Smm	/* Copy filename over (to ensure null termination). */
1679228753Smm	header = (const struct archive_entry_header_gnutar *)h;
1680228753Smm	archive_strncpy(&(tar->entry_pathname), header->name,
1681228753Smm	    sizeof(header->name));
1682228753Smm	archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1683228753Smm
1684228753Smm	/* Fields common to ustar and GNU */
1685228753Smm	/* XXX Can the following be factored out since it's common
1686228753Smm	 * to ustar and gnu tar?  Is it okay to move it down into
1687228753Smm	 * header_common, perhaps?  */
1688228753Smm	archive_strncpy(&(tar->entry_uname),
1689228753Smm	    header->uname, sizeof(header->uname));
1690228753Smm	archive_entry_copy_uname(entry, tar->entry_uname.s);
1691228753Smm
1692228753Smm	archive_strncpy(&(tar->entry_gname),
1693228753Smm	    header->gname, sizeof(header->gname));
1694228753Smm	archive_entry_copy_gname(entry, tar->entry_gname.s);
1695228753Smm
1696228753Smm	/* Parse out device numbers only for char and block specials */
1697228753Smm	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1698228753Smm		archive_entry_set_rdevmajor(entry,
1699228753Smm		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1700228753Smm		archive_entry_set_rdevminor(entry,
1701228753Smm		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1702228753Smm	} else
1703228753Smm		archive_entry_set_rdev(entry, 0);
1704228753Smm
1705228753Smm	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1706228753Smm
1707228753Smm	/* Grab GNU-specific fields. */
1708228753Smm	archive_entry_set_atime(entry,
1709228753Smm	    tar_atol(header->atime, sizeof(header->atime)), 0);
1710228753Smm	archive_entry_set_ctime(entry,
1711228753Smm	    tar_atol(header->ctime, sizeof(header->ctime)), 0);
1712228753Smm	if (header->realsize[0] != 0) {
1713228753Smm		tar->realsize
1714228753Smm		    = tar_atol(header->realsize, sizeof(header->realsize));
1715228753Smm		archive_entry_set_size(entry, tar->realsize);
1716228753Smm	}
1717228753Smm
1718228753Smm	if (header->sparse[0].offset[0] != 0) {
1719228753Smm		gnu_sparse_old_read(a, tar, header);
1720228753Smm	} else {
1721228753Smm		if (header->isextended[0] != 0) {
1722228753Smm			/* XXX WTF? XXX */
1723228753Smm		}
1724228753Smm	}
1725228753Smm
1726228753Smm	return (0);
1727228753Smm}
1728228753Smm
1729228753Smmstatic void
1730228753Smmgnu_add_sparse_entry(struct tar *tar, off_t offset, off_t remaining)
1731228753Smm{
1732228753Smm	struct sparse_block *p;
1733228753Smm
1734228753Smm	p = (struct sparse_block *)malloc(sizeof(*p));
1735228753Smm	if (p == NULL)
1736228753Smm		__archive_errx(1, "Out of memory");
1737228753Smm	memset(p, 0, sizeof(*p));
1738228753Smm	if (tar->sparse_last != NULL)
1739228753Smm		tar->sparse_last->next = p;
1740228753Smm	else
1741228753Smm		tar->sparse_list = p;
1742228753Smm	tar->sparse_last = p;
1743228753Smm	p->offset = offset;
1744228753Smm	p->remaining = remaining;
1745228753Smm}
1746228753Smm
1747228753Smmstatic void
1748228753Smmgnu_clear_sparse_list(struct tar *tar)
1749228753Smm{
1750228753Smm	struct sparse_block *p;
1751228753Smm
1752228753Smm	while (tar->sparse_list != NULL) {
1753228753Smm		p = tar->sparse_list;
1754228753Smm		tar->sparse_list = p->next;
1755228753Smm		free(p);
1756228753Smm	}
1757228753Smm	tar->sparse_last = NULL;
1758228753Smm}
1759228753Smm
1760228753Smm/*
1761228753Smm * GNU tar old-format sparse data.
1762228753Smm *
1763228753Smm * GNU old-format sparse data is stored in a fixed-field
1764228753Smm * format.  Offset/size values are 11-byte octal fields (same
1765228753Smm * format as 'size' field in ustart header).  These are
1766228753Smm * stored in the header, allocating subsequent header blocks
1767228753Smm * as needed.  Extending the header in this way is a pretty
1768228753Smm * severe POSIX violation; this design has earned GNU tar a
1769228753Smm * lot of criticism.
1770228753Smm */
1771228753Smm
1772228753Smmstatic int
1773228753Smmgnu_sparse_old_read(struct archive_read *a, struct tar *tar,
1774228753Smm    const struct archive_entry_header_gnutar *header)
1775228753Smm{
1776228753Smm	ssize_t bytes_read;
1777228753Smm	const void *data;
1778228753Smm	struct extended {
1779228753Smm		struct gnu_sparse sparse[21];
1780228753Smm		char	isextended[1];
1781228753Smm		char	padding[7];
1782228753Smm	};
1783228753Smm	const struct extended *ext;
1784228753Smm
1785228753Smm	gnu_sparse_old_parse(tar, header->sparse, 4);
1786228753Smm	if (header->isextended[0] == 0)
1787228753Smm		return (ARCHIVE_OK);
1788228753Smm
1789228753Smm	do {
1790228753Smm		data = __archive_read_ahead(a, 512, &bytes_read);
1791228753Smm		if (bytes_read < 0)
1792228753Smm			return (ARCHIVE_FATAL);
1793228753Smm		if (bytes_read < 512) {
1794228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1795228753Smm			    "Truncated tar archive "
1796228753Smm			    "detected while reading sparse file data");
1797228753Smm			return (ARCHIVE_FATAL);
1798228753Smm		}
1799228753Smm		__archive_read_consume(a, 512);
1800228753Smm		ext = (const struct extended *)data;
1801228753Smm		gnu_sparse_old_parse(tar, ext->sparse, 21);
1802228753Smm	} while (ext->isextended[0] != 0);
1803228753Smm	if (tar->sparse_list != NULL)
1804228753Smm		tar->entry_offset = tar->sparse_list->offset;
1805228753Smm	return (ARCHIVE_OK);
1806228753Smm}
1807228753Smm
1808228753Smmstatic void
1809228753Smmgnu_sparse_old_parse(struct tar *tar,
1810228753Smm    const struct gnu_sparse *sparse, int length)
1811228753Smm{
1812228753Smm	while (length > 0 && sparse->offset[0] != 0) {
1813228753Smm		gnu_add_sparse_entry(tar,
1814228753Smm		    tar_atol(sparse->offset, sizeof(sparse->offset)),
1815228753Smm		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)));
1816228753Smm		sparse++;
1817228753Smm		length--;
1818228753Smm	}
1819228753Smm}
1820228753Smm
1821228753Smm/*
1822228753Smm * GNU tar sparse format 0.0
1823228753Smm *
1824228753Smm * Beginning with GNU tar 1.15, sparse files are stored using
1825228753Smm * information in the pax extended header.  The GNU tar maintainers
1826228753Smm * have gone through a number of variations in the process of working
1827228753Smm * out this scheme; furtunately, they're all numbered.
1828228753Smm *
1829228753Smm * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
1830228753Smm * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
1831228753Smm * store offset/size for each block.  The repeated instances of these
1832228753Smm * latter fields violate the pax specification (which frowns on
1833228753Smm * duplicate keys), so this format was quickly replaced.
1834228753Smm */
1835228753Smm
1836228753Smm/*
1837228753Smm * GNU tar sparse format 0.1
1838228753Smm *
1839228753Smm * This version replaced the offset/numbytes attributes with
1840228753Smm * a single "map" attribute that stored a list of integers.  This
1841228753Smm * format had two problems: First, the "map" attribute could be very
1842228753Smm * long, which caused problems for some implementations.  More
1843228753Smm * importantly, the sparse data was lost when extracted by archivers
1844228753Smm * that didn't recognize this extension.
1845228753Smm */
1846228753Smm
1847228753Smmstatic int
1848228753Smmgnu_sparse_01_parse(struct tar *tar, const char *p)
1849228753Smm{
1850228753Smm	const char *e;
1851228753Smm	off_t offset = -1, size = -1;
1852228753Smm
1853228753Smm	for (;;) {
1854228753Smm		e = p;
1855228753Smm		while (*e != '\0' && *e != ',') {
1856228753Smm			if (*e < '0' || *e > '9')
1857228753Smm				return (ARCHIVE_WARN);
1858228753Smm			e++;
1859228753Smm		}
1860228753Smm		if (offset < 0) {
1861228753Smm			offset = tar_atol10(p, e - p);
1862228753Smm			if (offset < 0)
1863228753Smm				return (ARCHIVE_WARN);
1864228753Smm		} else {
1865228753Smm			size = tar_atol10(p, e - p);
1866228753Smm			if (size < 0)
1867228753Smm				return (ARCHIVE_WARN);
1868228753Smm			gnu_add_sparse_entry(tar, offset, size);
1869228753Smm			offset = -1;
1870228753Smm		}
1871228753Smm		if (*e == '\0')
1872228753Smm			return (ARCHIVE_OK);
1873228753Smm		p = e + 1;
1874228753Smm	}
1875228753Smm}
1876228753Smm
1877228753Smm/*
1878228753Smm * GNU tar sparse format 1.0
1879228753Smm *
1880228753Smm * The idea: The offset/size data is stored as a series of base-10
1881228753Smm * ASCII numbers prepended to the file data, so that dearchivers that
1882228753Smm * don't support this format will extract the block map along with the
1883228753Smm * data and a separate post-process can restore the sparseness.
1884228753Smm *
1885228753Smm * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
1886228753Smm * padding to the body of the file when using this format.  GNU tar
1887228753Smm * 1.17 corrected this bug without bumping the version number, so
1888228753Smm * it's not possible to support both variants.  This code supports
1889228753Smm * the later variant at the expense of not supporting the former.
1890228753Smm *
1891228753Smm * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
1892228753Smm * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
1893228753Smm */
1894228753Smm
1895228753Smm/*
1896228753Smm * Read the next line from the input, and parse it as a decimal
1897228753Smm * integer followed by '\n'.  Returns positive integer value or
1898228753Smm * negative on error.
1899228753Smm */
1900228753Smmstatic int64_t
1901228753Smmgnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
1902228753Smm    ssize_t *remaining)
1903228753Smm{
1904228753Smm	int64_t l, limit, last_digit_limit;
1905228753Smm	const char *p;
1906228753Smm	ssize_t bytes_read;
1907228753Smm	int base, digit;
1908228753Smm
1909228753Smm	base = 10;
1910228753Smm	limit = INT64_MAX / base;
1911228753Smm	last_digit_limit = INT64_MAX % base;
1912228753Smm
1913228753Smm	/*
1914228753Smm	 * Skip any lines starting with '#'; GNU tar specs
1915228753Smm	 * don't require this, but they should.
1916228753Smm	 */
1917228753Smm	do {
1918228753Smm		bytes_read = readline(a, tar, &p, tar_min(*remaining, 100));
1919228753Smm		if (bytes_read <= 0)
1920228753Smm			return (ARCHIVE_FATAL);
1921228753Smm		*remaining -= bytes_read;
1922228753Smm	} while (p[0] == '#');
1923228753Smm
1924228753Smm	l = 0;
1925228753Smm	while (bytes_read > 0) {
1926228753Smm		if (*p == '\n')
1927228753Smm			return (l);
1928228753Smm		if (*p < '0' || *p >= '0' + base)
1929228753Smm			return (ARCHIVE_WARN);
1930228753Smm		digit = *p - '0';
1931228753Smm		if (l > limit || (l == limit && digit > last_digit_limit))
1932228753Smm			l = INT64_MAX; /* Truncate on overflow. */
1933228753Smm		else
1934228753Smm			l = (l * base) + digit;
1935228753Smm		p++;
1936228753Smm		bytes_read--;
1937228753Smm	}
1938228753Smm	/* TODO: Error message. */
1939228753Smm	return (ARCHIVE_WARN);
1940228753Smm}
1941228753Smm
1942228753Smm/*
1943228753Smm * Returns length (in bytes) of the sparse data description
1944228753Smm * that was read.
1945228753Smm */
1946228753Smmstatic ssize_t
1947228753Smmgnu_sparse_10_read(struct archive_read *a, struct tar *tar)
1948228753Smm{
1949228753Smm	ssize_t remaining, bytes_read;
1950228753Smm	int entries;
1951228753Smm	off_t offset, size, to_skip;
1952228753Smm
1953228753Smm	/* Clear out the existing sparse list. */
1954228753Smm	gnu_clear_sparse_list(tar);
1955228753Smm
1956228753Smm	remaining = tar->entry_bytes_remaining;
1957228753Smm
1958228753Smm	/* Parse entries. */
1959228753Smm	entries = gnu_sparse_10_atol(a, tar, &remaining);
1960228753Smm	if (entries < 0)
1961228753Smm		return (ARCHIVE_FATAL);
1962228753Smm	/* Parse the individual entries. */
1963228753Smm	while (entries-- > 0) {
1964228753Smm		/* Parse offset/size */
1965228753Smm		offset = gnu_sparse_10_atol(a, tar, &remaining);
1966228753Smm		if (offset < 0)
1967228753Smm			return (ARCHIVE_FATAL);
1968228753Smm		size = gnu_sparse_10_atol(a, tar, &remaining);
1969228753Smm		if (size < 0)
1970228753Smm			return (ARCHIVE_FATAL);
1971228753Smm		/* Add a new sparse entry. */
1972228753Smm		gnu_add_sparse_entry(tar, offset, size);
1973228753Smm	}
1974228753Smm	/* Skip rest of block... */
1975228753Smm	bytes_read = tar->entry_bytes_remaining - remaining;
1976228753Smm	to_skip = 0x1ff & -bytes_read;
1977228753Smm	if (to_skip != __archive_read_skip(a, to_skip))
1978228753Smm		return (ARCHIVE_FATAL);
1979228753Smm	return (bytes_read + to_skip);
1980228753Smm}
1981228753Smm
1982228753Smm/*-
1983228753Smm * Convert text->integer.
1984228753Smm *
1985228753Smm * Traditional tar formats (including POSIX) specify base-8 for
1986228753Smm * all of the standard numeric fields.  This is a significant limitation
1987228753Smm * in practice:
1988228753Smm *   = file size is limited to 8GB
1989228753Smm *   = rdevmajor and rdevminor are limited to 21 bits
1990228753Smm *   = uid/gid are limited to 21 bits
1991228753Smm *
1992228753Smm * There are two workarounds for this:
1993228753Smm *   = pax extended headers, which use variable-length string fields
1994228753Smm *   = GNU tar and STAR both allow either base-8 or base-256 in
1995228753Smm *      most fields.  The high bit is set to indicate base-256.
1996228753Smm *
1997228753Smm * On read, this implementation supports both extensions.
1998228753Smm */
1999228753Smmstatic int64_t
2000228753Smmtar_atol(const char *p, unsigned char_cnt)
2001228753Smm{
2002228753Smm	/*
2003228753Smm	 * Technically, GNU tar considers a field to be in base-256
2004228753Smm	 * only if the first byte is 0xff or 0x80.
2005228753Smm	 */
2006228753Smm	if (*p & 0x80)
2007228753Smm		return (tar_atol256(p, char_cnt));
2008228753Smm	return (tar_atol8(p, char_cnt));
2009228753Smm}
2010228753Smm
2011228753Smm/*
2012228753Smm * Note that this implementation does not (and should not!) obey
2013228753Smm * locale settings; you cannot simply substitute strtol here, since
2014228753Smm * it does obey locale.
2015228753Smm */
2016228753Smmstatic int64_t
2017228753Smmtar_atol8(const char *p, unsigned char_cnt)
2018228753Smm{
2019228753Smm	int64_t	l, limit, last_digit_limit;
2020228753Smm	int digit, sign, base;
2021228753Smm
2022228753Smm	base = 8;
2023228753Smm	limit = INT64_MAX / base;
2024228753Smm	last_digit_limit = INT64_MAX % base;
2025228753Smm
2026228753Smm	while (*p == ' ' || *p == '\t')
2027228753Smm		p++;
2028228753Smm	if (*p == '-') {
2029228753Smm		sign = -1;
2030228753Smm		p++;
2031228753Smm	} else
2032228753Smm		sign = 1;
2033228753Smm
2034228753Smm	l = 0;
2035228753Smm	digit = *p - '0';
2036228753Smm	while (digit >= 0 && digit < base  && char_cnt-- > 0) {
2037228753Smm		if (l>limit || (l == limit && digit > last_digit_limit)) {
2038228753Smm			l = INT64_MAX; /* Truncate on overflow. */
2039228753Smm			break;
2040228753Smm		}
2041228753Smm		l = (l * base) + digit;
2042228753Smm		digit = *++p - '0';
2043228753Smm	}
2044228753Smm	return (sign < 0) ? -l : l;
2045228753Smm}
2046228753Smm
2047228753Smm/*
2048228753Smm * Note that this implementation does not (and should not!) obey
2049228753Smm * locale settings; you cannot simply substitute strtol here, since
2050228753Smm * it does obey locale.
2051228753Smm */
2052228753Smmstatic int64_t
2053228753Smmtar_atol10(const char *p, unsigned char_cnt)
2054228753Smm{
2055228753Smm	int64_t l, limit, last_digit_limit;
2056228753Smm	int base, digit, sign;
2057228753Smm
2058228753Smm	base = 10;
2059228753Smm	limit = INT64_MAX / base;
2060228753Smm	last_digit_limit = INT64_MAX % base;
2061228753Smm
2062228753Smm	while (*p == ' ' || *p == '\t')
2063228753Smm		p++;
2064228753Smm	if (*p == '-') {
2065228753Smm		sign = -1;
2066228753Smm		p++;
2067228753Smm	} else
2068228753Smm		sign = 1;
2069228753Smm
2070228753Smm	l = 0;
2071228753Smm	digit = *p - '0';
2072228753Smm	while (digit >= 0 && digit < base  && char_cnt-- > 0) {
2073228753Smm		if (l > limit || (l == limit && digit > last_digit_limit)) {
2074228753Smm			l = INT64_MAX; /* Truncate on overflow. */
2075228753Smm			break;
2076228753Smm		}
2077228753Smm		l = (l * base) + digit;
2078228753Smm		digit = *++p - '0';
2079228753Smm	}
2080228753Smm	return (sign < 0) ? -l : l;
2081228753Smm}
2082228753Smm
2083228753Smm/*
2084228753Smm * Parse a base-256 integer.  This is just a straight signed binary
2085228753Smm * value in big-endian order, except that the high-order bit is
2086228753Smm * ignored.
2087228753Smm */
2088228753Smmstatic int64_t
2089228753Smmtar_atol256(const char *_p, unsigned char_cnt)
2090228753Smm{
2091228753Smm	int64_t	l, upper_limit, lower_limit;
2092228753Smm	const unsigned char *p = (const unsigned char *)_p;
2093228753Smm
2094228753Smm	upper_limit = INT64_MAX / 256;
2095228753Smm	lower_limit = INT64_MIN / 256;
2096228753Smm
2097228753Smm	/* Pad with 1 or 0 bits, depending on sign. */
2098228753Smm	if ((0x40 & *p) == 0x40)
2099228753Smm		l = (int64_t)-1;
2100228753Smm	else
2101228753Smm		l = 0;
2102228753Smm	l = (l << 6) | (0x3f & *p++);
2103228753Smm	while (--char_cnt > 0) {
2104228753Smm		if (l > upper_limit) {
2105228753Smm			l = INT64_MAX; /* Truncate on overflow */
2106228753Smm			break;
2107228753Smm		} else if (l < lower_limit) {
2108228753Smm			l = INT64_MIN;
2109228753Smm			break;
2110228753Smm		}
2111228753Smm		l = (l << 8) | (0xff & (int64_t)*p++);
2112228753Smm	}
2113228753Smm	return (l);
2114228753Smm}
2115228753Smm
2116228753Smm/*
2117228753Smm * Returns length of line (including trailing newline)
2118228753Smm * or negative on error.  'start' argument is updated to
2119228753Smm * point to first character of line.  This avoids copying
2120228753Smm * when possible.
2121228753Smm */
2122228753Smmstatic ssize_t
2123228753Smmreadline(struct archive_read *a, struct tar *tar, const char **start,
2124228753Smm    ssize_t limit)
2125228753Smm{
2126228753Smm	ssize_t bytes_read;
2127228753Smm	ssize_t total_size = 0;
2128228753Smm	const void *t;
2129228753Smm	const char *s;
2130228753Smm	void *p;
2131228753Smm
2132228753Smm	t = __archive_read_ahead(a, 1, &bytes_read);
2133228753Smm	if (bytes_read <= 0)
2134228753Smm		return (ARCHIVE_FATAL);
2135228753Smm	s = t;  /* Start of line? */
2136228753Smm	p = memchr(t, '\n', bytes_read);
2137228753Smm	/* If we found '\n' in the read buffer, return pointer to that. */
2138228753Smm	if (p != NULL) {
2139228753Smm		bytes_read = 1 + ((const char *)p) - s;
2140228753Smm		if (bytes_read > limit) {
2141228753Smm			archive_set_error(&a->archive,
2142228753Smm			    ARCHIVE_ERRNO_FILE_FORMAT,
2143228753Smm			    "Line too long");
2144228753Smm			return (ARCHIVE_FATAL);
2145228753Smm		}
2146228753Smm		__archive_read_consume(a, bytes_read);
2147228753Smm		*start = s;
2148228753Smm		return (bytes_read);
2149228753Smm	}
2150228753Smm	/* Otherwise, we need to accumulate in a line buffer. */
2151228753Smm	for (;;) {
2152228753Smm		if (total_size + bytes_read > limit) {
2153228753Smm			archive_set_error(&a->archive,
2154228753Smm			    ARCHIVE_ERRNO_FILE_FORMAT,
2155228753Smm			    "Line too long");
2156228753Smm			return (ARCHIVE_FATAL);
2157228753Smm		}
2158228753Smm		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2159228753Smm			archive_set_error(&a->archive, ENOMEM,
2160228753Smm			    "Can't allocate working buffer");
2161228753Smm			return (ARCHIVE_FATAL);
2162228753Smm		}
2163228753Smm		memcpy(tar->line.s + total_size, t, bytes_read);
2164228753Smm		__archive_read_consume(a, bytes_read);
2165228753Smm		total_size += bytes_read;
2166228753Smm		/* If we found '\n', clean up and return. */
2167228753Smm		if (p != NULL) {
2168228753Smm			*start = tar->line.s;
2169228753Smm			return (total_size);
2170228753Smm		}
2171228753Smm		/* Read some more. */
2172228753Smm		t = __archive_read_ahead(a, 1, &bytes_read);
2173228753Smm		if (bytes_read <= 0)
2174228753Smm			return (ARCHIVE_FATAL);
2175228753Smm		s = t;  /* Start of line? */
2176228753Smm		p = memchr(t, '\n', bytes_read);
2177228753Smm		/* If we found '\n', trim the read. */
2178228753Smm		if (p != NULL) {
2179228753Smm			bytes_read = 1 + ((const char *)p) - s;
2180228753Smm		}
2181228753Smm	}
2182228753Smm}
2183228753Smm
2184228753Smmstatic wchar_t *
2185228753Smmutf8_decode(struct tar *tar, const char *src, size_t length)
2186228753Smm{
2187228753Smm	wchar_t *dest;
2188228753Smm	ssize_t n;
2189228753Smm
2190228753Smm	/* Ensure pax_entry buffer is big enough. */
2191228753Smm	if (tar->pax_entry_length <= length) {
2192228753Smm		wchar_t *old_entry;
2193228753Smm
2194228753Smm		if (tar->pax_entry_length <= 0)
2195228753Smm			tar->pax_entry_length = 1024;
2196228753Smm		while (tar->pax_entry_length <= length + 1)
2197228753Smm			tar->pax_entry_length *= 2;
2198228753Smm
2199228753Smm		old_entry = tar->pax_entry;
2200228753Smm		tar->pax_entry = (wchar_t *)realloc(tar->pax_entry,
2201228753Smm		    tar->pax_entry_length * sizeof(wchar_t));
2202228753Smm		if (tar->pax_entry == NULL) {
2203228753Smm			free(old_entry);
2204228753Smm			/* TODO: Handle this error. */
2205228753Smm			return (NULL);
2206228753Smm		}
2207228753Smm	}
2208228753Smm
2209228753Smm	dest = tar->pax_entry;
2210228753Smm	while (length > 0) {
2211228753Smm		n = UTF8_mbrtowc(dest, src, length);
2212228753Smm		if (n < 0)
2213228753Smm			return (NULL);
2214228753Smm		if (n == 0)
2215228753Smm			break;
2216228753Smm		dest++;
2217228753Smm		src += n;
2218228753Smm		length -= n;
2219228753Smm	}
2220228753Smm	*dest = L'\0';
2221228753Smm	return (tar->pax_entry);
2222228753Smm}
2223228753Smm
2224228753Smm/*
2225228753Smm * Copied and simplified from FreeBSD libc/locale.
2226228753Smm */
2227228753Smmstatic ssize_t
2228228753SmmUTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
2229228753Smm{
2230228753Smm        int ch, i, len, mask;
2231228753Smm        unsigned long wch;
2232228753Smm
2233228753Smm        if (s == NULL || n == 0 || pwc == NULL)
2234228753Smm                return (0);
2235228753Smm
2236228753Smm        /*
2237228753Smm         * Determine the number of octets that make up this character from
2238228753Smm         * the first octet, and a mask that extracts the interesting bits of
2239228753Smm         * the first octet.
2240228753Smm         */
2241228753Smm        ch = (unsigned char)*s;
2242228753Smm        if ((ch & 0x80) == 0) {
2243228753Smm                mask = 0x7f;
2244228753Smm                len = 1;
2245228753Smm        } else if ((ch & 0xe0) == 0xc0) {
2246228753Smm                mask = 0x1f;
2247228753Smm                len = 2;
2248228753Smm        } else if ((ch & 0xf0) == 0xe0) {
2249228753Smm                mask = 0x0f;
2250228753Smm                len = 3;
2251228753Smm        } else if ((ch & 0xf8) == 0xf0) {
2252228753Smm                mask = 0x07;
2253228753Smm                len = 4;
2254228753Smm        } else {
2255228753Smm		/* Invalid first byte. */
2256228753Smm		return (-1);
2257228753Smm        }
2258228753Smm
2259228753Smm        if (n < (size_t)len) {
2260228753Smm		/* Valid first byte but truncated. */
2261228753Smm                return (-2);
2262228753Smm	}
2263228753Smm
2264228753Smm        /*
2265228753Smm         * Decode the octet sequence representing the character in chunks
2266228753Smm         * of 6 bits, most significant first.
2267228753Smm         */
2268228753Smm        wch = (unsigned char)*s++ & mask;
2269228753Smm        i = len;
2270228753Smm        while (--i != 0) {
2271228753Smm                if ((*s & 0xc0) != 0x80) {
2272228753Smm			/* Invalid intermediate byte; consume one byte and
2273228753Smm			 * emit '?' */
2274228753Smm			*pwc = '?';
2275228753Smm			return (1);
2276228753Smm                }
2277228753Smm                wch <<= 6;
2278228753Smm                wch |= *s++ & 0x3f;
2279228753Smm        }
2280228753Smm
2281228753Smm	/* Assign the value to the output; out-of-range values
2282228753Smm	 * just get truncated. */
2283228753Smm	*pwc = (wchar_t)wch;
2284228753Smm#ifdef WCHAR_MAX
2285228753Smm	/*
2286228753Smm	 * If platform has WCHAR_MAX, we can do something
2287228753Smm	 * more sensible with out-of-range values.
2288228753Smm	 */
2289228753Smm	if (wch >= WCHAR_MAX)
2290228753Smm		*pwc = '?';
2291228753Smm#endif
2292228753Smm	/* Return number of bytes input consumed: 0 for end-of-string. */
2293228753Smm        return (wch == L'\0' ? 0 : len);
2294228753Smm}
2295228753Smm
2296228753Smm
2297228753Smm/*
2298228753Smm * base64_decode - Base64 decode
2299228753Smm *
2300228753Smm * This accepts most variations of base-64 encoding, including:
2301228753Smm *    * with or without line breaks
2302228753Smm *    * with or without the final group padded with '=' or '_' characters
2303228753Smm * (The most economical Base-64 variant does not pad the last group and
2304228753Smm * omits line breaks; RFC1341 used for MIME requires both.)
2305228753Smm */
2306228753Smmstatic char *
2307228753Smmbase64_decode(const char *s, size_t len, size_t *out_len)
2308228753Smm{
2309228753Smm	static const unsigned char digits[64] = {
2310228753Smm		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2311228753Smm		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2312228753Smm		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2313228753Smm		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2314228753Smm		'4','5','6','7','8','9','+','/' };
2315228753Smm	static unsigned char decode_table[128];
2316228753Smm	char *out, *d;
2317228753Smm	const unsigned char *src = (const unsigned char *)s;
2318228753Smm
2319228753Smm	/* If the decode table is not yet initialized, prepare it. */
2320228753Smm	if (decode_table[digits[1]] != 1) {
2321228753Smm		unsigned i;
2322228753Smm		memset(decode_table, 0xff, sizeof(decode_table));
2323228753Smm		for (i = 0; i < sizeof(digits); i++)
2324228753Smm			decode_table[digits[i]] = i;
2325228753Smm	}
2326228753Smm
2327228753Smm	/* Allocate enough space to hold the entire output. */
2328228753Smm	/* Note that we may not use all of this... */
2329228753Smm	out = (char *)malloc(len - len / 4 + 1);
2330228753Smm	if (out == NULL) {
2331228753Smm		*out_len = 0;
2332228753Smm		return (NULL);
2333228753Smm	}
2334228753Smm	d = out;
2335228753Smm
2336228753Smm	while (len > 0) {
2337228753Smm		/* Collect the next group of (up to) four characters. */
2338228753Smm		int v = 0;
2339228753Smm		int group_size = 0;
2340228753Smm		while (group_size < 4 && len > 0) {
2341228753Smm			/* '=' or '_' padding indicates final group. */
2342228753Smm			if (*src == '=' || *src == '_') {
2343228753Smm				len = 0;
2344228753Smm				break;
2345228753Smm			}
2346228753Smm			/* Skip illegal characters (including line breaks) */
2347228753Smm			if (*src > 127 || *src < 32
2348228753Smm			    || decode_table[*src] == 0xff) {
2349228753Smm				len--;
2350228753Smm				src++;
2351228753Smm				continue;
2352228753Smm			}
2353228753Smm			v <<= 6;
2354228753Smm			v |= decode_table[*src++];
2355228753Smm			len --;
2356228753Smm			group_size++;
2357228753Smm		}
2358228753Smm		/* Align a short group properly. */
2359228753Smm		v <<= 6 * (4 - group_size);
2360228753Smm		/* Unpack the group we just collected. */
2361228753Smm		switch (group_size) {
2362228753Smm		case 4: d[2] = v & 0xff;
2363228753Smm			/* FALLTHROUGH */
2364228753Smm		case 3: d[1] = (v >> 8) & 0xff;
2365228753Smm			/* FALLTHROUGH */
2366228753Smm		case 2: d[0] = (v >> 16) & 0xff;
2367228753Smm			break;
2368228753Smm		case 1: /* this is invalid! */
2369228753Smm			break;
2370228753Smm		}
2371228753Smm		d += group_size * 3 / 4;
2372228753Smm	}
2373228753Smm
2374228753Smm	*out_len = d - out;
2375228753Smm	return (out);
2376228753Smm}
2377228753Smm
2378228753Smmstatic char *
2379228753Smmurl_decode(const char *in)
2380228753Smm{
2381228753Smm	char *out, *d;
2382228753Smm	const char *s;
2383228753Smm
2384228753Smm	out = (char *)malloc(strlen(in) + 1);
2385228753Smm	if (out == NULL)
2386228753Smm		return (NULL);
2387228753Smm	for (s = in, d = out; *s != '\0'; ) {
2388228753Smm		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2389228753Smm			/* Try to convert % escape */
2390228753Smm			int digit1 = tohex(s[1]);
2391228753Smm			int digit2 = tohex(s[2]);
2392228753Smm			if (digit1 >= 0 && digit2 >= 0) {
2393228753Smm				/* Looks good, consume three chars */
2394228753Smm				s += 3;
2395228753Smm				/* Convert output */
2396228753Smm				*d++ = ((digit1 << 4) | digit2);
2397228753Smm				continue;
2398228753Smm			}
2399228753Smm			/* Else fall through and treat '%' as normal char */
2400228753Smm		}
2401228753Smm		*d++ = *s++;
2402228753Smm	}
2403228753Smm	*d = '\0';
2404228753Smm	return (out);
2405228753Smm}
2406228753Smm
2407228753Smmstatic int
2408228753Smmtohex(int c)
2409228753Smm{
2410228753Smm	if (c >= '0' && c <= '9')
2411228753Smm		return (c - '0');
2412228753Smm	else if (c >= 'A' && c <= 'F')
2413228753Smm		return (c - 'A' + 10);
2414228753Smm	else if (c >= 'a' && c <= 'f')
2415228753Smm		return (c - 'a' + 10);
2416228753Smm	else
2417228753Smm		return (-1);
2418228753Smm}
2419