1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27#include "archive_platform.h"
28__FBSDID("$FreeBSD: releng/10.3/contrib/libarchive/libarchive/archive_read_support_format_tar.c 306941 2016-10-10 07:18:54Z delphij $");
29
30#ifdef HAVE_ERRNO_H
31#include <errno.h>
32#endif
33#include <stddef.h>
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_acl_private.h" /* For ACL parsing routines. */
43#include "archive_entry.h"
44#include "archive_entry_locale.h"
45#include "archive_private.h"
46#include "archive_read_private.h"
47
48#define tar_min(a,b) ((a) < (b) ? (a) : (b))
49
50/*
51 * Layout of POSIX 'ustar' tar header.
52 */
53struct archive_entry_header_ustar {
54	char	name[100];
55	char	mode[8];
56	char	uid[8];
57	char	gid[8];
58	char	size[12];
59	char	mtime[12];
60	char	checksum[8];
61	char	typeflag[1];
62	char	linkname[100];	/* "old format" header ends here */
63	char	magic[6];	/* For POSIX: "ustar\0" */
64	char	version[2];	/* For POSIX: "00" */
65	char	uname[32];
66	char	gname[32];
67	char	rdevmajor[8];
68	char	rdevminor[8];
69	char	prefix[155];
70};
71
72/*
73 * Structure of GNU tar header
74 */
75struct gnu_sparse {
76	char	offset[12];
77	char	numbytes[12];
78};
79
80struct archive_entry_header_gnutar {
81	char	name[100];
82	char	mode[8];
83	char	uid[8];
84	char	gid[8];
85	char	size[12];
86	char	mtime[12];
87	char	checksum[8];
88	char	typeflag[1];
89	char	linkname[100];
90	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
91	char	uname[32];
92	char	gname[32];
93	char	rdevmajor[8];
94	char	rdevminor[8];
95	char	atime[12];
96	char	ctime[12];
97	char	offset[12];
98	char	longnames[4];
99	char	unused[1];
100	struct gnu_sparse sparse[4];
101	char	isextended[1];
102	char	realsize[12];
103	/*
104	 * Old GNU format doesn't use POSIX 'prefix' field; they use
105	 * the 'L' (longname) entry instead.
106	 */
107};
108
109/*
110 * Data specific to this format.
111 */
112struct sparse_block {
113	struct sparse_block	*next;
114	int64_t	offset;
115	int64_t	remaining;
116	int hole;
117};
118
119struct tar {
120	struct archive_string	 acl_text;
121	struct archive_string	 entry_pathname;
122	/* For "GNU.sparse.name" and other similar path extensions. */
123	struct archive_string	 entry_pathname_override;
124	struct archive_string	 entry_linkpath;
125	struct archive_string	 entry_uname;
126	struct archive_string	 entry_gname;
127	struct archive_string	 longlink;
128	struct archive_string	 longname;
129	struct archive_string	 pax_header;
130	struct archive_string	 pax_global;
131	struct archive_string	 line;
132	int			 pax_hdrcharset_binary;
133	int			 header_recursion_depth;
134	int64_t			 entry_bytes_remaining;
135	int64_t			 entry_offset;
136	int64_t			 entry_padding;
137	int64_t 		 entry_bytes_unconsumed;
138	int64_t			 realsize;
139	int			 sparse_allowed;
140	struct sparse_block	*sparse_list;
141	struct sparse_block	*sparse_last;
142	int64_t			 sparse_offset;
143	int64_t			 sparse_numbytes;
144	int			 sparse_gnu_major;
145	int			 sparse_gnu_minor;
146	char			 sparse_gnu_pending;
147
148	struct archive_string	 localname;
149	struct archive_string_conv *opt_sconv;
150	struct archive_string_conv *sconv;
151	struct archive_string_conv *sconv_acl;
152	struct archive_string_conv *sconv_default;
153	int			 init_default_conversion;
154	int			 compat_2x;
155};
156
157static int	archive_block_is_null(const char *p);
158static char	*base64_decode(const char *, size_t, size_t *);
159static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
160		    int64_t offset, int64_t remaining);
161
162static void	gnu_clear_sparse_list(struct tar *);
163static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
164		    const struct archive_entry_header_gnutar *header, size_t *);
165static int	gnu_sparse_old_parse(struct archive_read *, struct tar *,
166		    const struct gnu_sparse *sparse, int length);
167static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
168		    const char *);
169static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *,
170			size_t *);
171static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
172		    struct archive_entry *, const void *, size_t *);
173static int	header_common(struct archive_read *,  struct tar *,
174		    struct archive_entry *, const void *);
175static int	header_old_tar(struct archive_read *, struct tar *,
176		    struct archive_entry *, const void *);
177static int	header_pax_extensions(struct archive_read *, struct tar *,
178		    struct archive_entry *, const void *, size_t *);
179static int	header_pax_global(struct archive_read *, struct tar *,
180		    struct archive_entry *, const void *h, size_t *);
181static int	header_longlink(struct archive_read *, struct tar *,
182		    struct archive_entry *, const void *h, size_t *);
183static int	header_longname(struct archive_read *, struct tar *,
184		    struct archive_entry *, const void *h, size_t *);
185static int	read_mac_metadata_blob(struct archive_read *, struct tar *,
186		    struct archive_entry *, const void *h, size_t *);
187static int	header_volume(struct archive_read *, struct tar *,
188		    struct archive_entry *, const void *h, size_t *);
189static int	header_ustar(struct archive_read *, struct tar *,
190		    struct archive_entry *, const void *h);
191static int	header_gnutar(struct archive_read *, struct tar *,
192		    struct archive_entry *, const void *h, size_t *);
193static int	archive_read_format_tar_bid(struct archive_read *, int);
194static int	archive_read_format_tar_options(struct archive_read *,
195		    const char *, const char *);
196static int	archive_read_format_tar_cleanup(struct archive_read *);
197static int	archive_read_format_tar_read_data(struct archive_read *a,
198		    const void **buff, size_t *size, int64_t *offset);
199static int	archive_read_format_tar_skip(struct archive_read *a);
200static int	archive_read_format_tar_read_header(struct archive_read *,
201		    struct archive_entry *);
202static int	checksum(struct archive_read *, const void *);
203static int 	pax_attribute(struct archive_read *, struct tar *,
204		    struct archive_entry *, char *key, char *value);
205static int 	pax_header(struct archive_read *, struct tar *,
206		    struct archive_entry *, char *attr);
207static void	pax_time(const char *, int64_t *sec, long *nanos);
208static ssize_t	readline(struct archive_read *, struct tar *, const char **,
209		    ssize_t limit, size_t *);
210static int	read_body_to_string(struct archive_read *, struct tar *,
211		    struct archive_string *, const void *h, size_t *);
212static int	solaris_sparse_parse(struct archive_read *, struct tar *,
213		    struct archive_entry *, const char *);
214static int64_t	tar_atol(const char *, size_t);
215static int64_t	tar_atol10(const char *, size_t);
216static int64_t	tar_atol256(const char *, size_t);
217static int64_t	tar_atol8(const char *, size_t);
218static int	tar_read_header(struct archive_read *, struct tar *,
219		    struct archive_entry *, size_t *);
220static int	tohex(int c);
221static char	*url_decode(const char *);
222static void	tar_flush_unconsumed(struct archive_read *, size_t *);
223
224
225int
226archive_read_support_format_gnutar(struct archive *a)
227{
228	archive_check_magic(a, ARCHIVE_READ_MAGIC,
229	    ARCHIVE_STATE_NEW, "archive_read_support_format_gnutar");
230	return (archive_read_support_format_tar(a));
231}
232
233
234int
235archive_read_support_format_tar(struct archive *_a)
236{
237	struct archive_read *a = (struct archive_read *)_a;
238	struct tar *tar;
239	int r;
240
241	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
242	    ARCHIVE_STATE_NEW, "archive_read_support_format_tar");
243
244	tar = (struct tar *)calloc(1, sizeof(*tar));
245	if (tar == NULL) {
246		archive_set_error(&a->archive, ENOMEM,
247		    "Can't allocate tar data");
248		return (ARCHIVE_FATAL);
249	}
250
251	r = __archive_read_register_format(a, tar, "tar",
252	    archive_read_format_tar_bid,
253	    archive_read_format_tar_options,
254	    archive_read_format_tar_read_header,
255	    archive_read_format_tar_read_data,
256	    archive_read_format_tar_skip,
257	    NULL,
258	    archive_read_format_tar_cleanup);
259
260	if (r != ARCHIVE_OK)
261		free(tar);
262	return (ARCHIVE_OK);
263}
264
265static int
266archive_read_format_tar_cleanup(struct archive_read *a)
267{
268	struct tar *tar;
269
270	tar = (struct tar *)(a->format->data);
271	gnu_clear_sparse_list(tar);
272	archive_string_free(&tar->acl_text);
273	archive_string_free(&tar->entry_pathname);
274	archive_string_free(&tar->entry_pathname_override);
275	archive_string_free(&tar->entry_linkpath);
276	archive_string_free(&tar->entry_uname);
277	archive_string_free(&tar->entry_gname);
278	archive_string_free(&tar->line);
279	archive_string_free(&tar->pax_global);
280	archive_string_free(&tar->pax_header);
281	archive_string_free(&tar->longname);
282	archive_string_free(&tar->longlink);
283	archive_string_free(&tar->localname);
284	free(tar);
285	(a->format->data) = NULL;
286	return (ARCHIVE_OK);
287}
288
289
290static int
291archive_read_format_tar_bid(struct archive_read *a, int best_bid)
292{
293	int bid;
294	const char *h;
295	const struct archive_entry_header_ustar *header;
296
297	(void)best_bid; /* UNUSED */
298
299	bid = 0;
300
301	/* Now let's look at the actual header and see if it matches. */
302	h = __archive_read_ahead(a, 512, NULL);
303	if (h == NULL)
304		return (-1);
305
306	/* If it's an end-of-archive mark, we can handle it. */
307	if (h[0] == 0 && archive_block_is_null(h)) {
308		/*
309		 * Usually, I bid the number of bits verified, but
310		 * in this case, 4096 seems excessive so I picked 10 as
311		 * an arbitrary but reasonable-seeming value.
312		 */
313		return (10);
314	}
315
316	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
317	if (!checksum(a, h))
318		return (0);
319	bid += 48;  /* Checksum is usually 6 octal digits. */
320
321	header = (const struct archive_entry_header_ustar *)h;
322
323	/* Recognize POSIX formats. */
324	if ((memcmp(header->magic, "ustar\0", 6) == 0)
325	    && (memcmp(header->version, "00", 2) == 0))
326		bid += 56;
327
328	/* Recognize GNU tar format. */
329	if ((memcmp(header->magic, "ustar ", 6) == 0)
330	    && (memcmp(header->version, " \0", 2) == 0))
331		bid += 56;
332
333	/* Type flag must be null, digit or A-Z, a-z. */
334	if (header->typeflag[0] != 0 &&
335	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
336	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
337	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
338		return (0);
339	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
340
341	/* Sanity check: Look at first byte of mode field. */
342	switch (255 & (unsigned)header->mode[0]) {
343	case 0: case 255:
344		/* Base-256 value: No further verification possible! */
345		break;
346	case ' ': /* Not recommended, but not illegal, either. */
347		break;
348	case '0': case '1': case '2': case '3':
349	case '4': case '5': case '6': case '7':
350		/* Octal Value. */
351		/* TODO: Check format of remainder of this field. */
352		break;
353	default:
354		/* Not a valid mode; bail out here. */
355		return (0);
356	}
357	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
358
359	return (bid);
360}
361
362static int
363archive_read_format_tar_options(struct archive_read *a,
364    const char *key, const char *val)
365{
366	struct tar *tar;
367	int ret = ARCHIVE_FAILED;
368
369	tar = (struct tar *)(a->format->data);
370	if (strcmp(key, "compat-2x")  == 0) {
371		/* Handle UTF-8 filnames as libarchive 2.x */
372		tar->compat_2x = (val != NULL)?1:0;
373		tar->init_default_conversion = tar->compat_2x;
374		return (ARCHIVE_OK);
375	} else if (strcmp(key, "hdrcharset")  == 0) {
376		if (val == NULL || val[0] == 0)
377			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
378			    "tar: hdrcharset option needs a character-set name");
379		else {
380			tar->opt_sconv =
381			    archive_string_conversion_from_charset(
382				&a->archive, val, 0);
383			if (tar->opt_sconv != NULL)
384				ret = ARCHIVE_OK;
385			else
386				ret = ARCHIVE_FATAL;
387		}
388		return (ret);
389	}
390
391	/* Note: The "warn" return is just to inform the options
392	 * supervisor that we didn't handle it.  It will generate
393	 * a suitable error if no one used this option. */
394	return (ARCHIVE_WARN);
395}
396
397/* utility function- this exists to centralize the logic of tracking
398 * how much unconsumed data we have floating around, and to consume
399 * anything outstanding since we're going to do read_aheads
400 */
401static void
402tar_flush_unconsumed(struct archive_read *a, size_t *unconsumed)
403{
404	if (*unconsumed) {
405/*
406		void *data = (void *)__archive_read_ahead(a, *unconsumed, NULL);
407		 * this block of code is to poison claimed unconsumed space, ensuring
408		 * things break if it is in use still.
409		 * currently it WILL break things, so enable it only for debugging this issue
410		if (data) {
411			memset(data, 0xff, *unconsumed);
412		}
413*/
414		__archive_read_consume(a, *unconsumed);
415		*unconsumed = 0;
416	}
417}
418
419/*
420 * The function invoked by archive_read_next_header().  This
421 * just sets up a few things and then calls the internal
422 * tar_read_header() function below.
423 */
424static int
425archive_read_format_tar_read_header(struct archive_read *a,
426    struct archive_entry *entry)
427{
428	/*
429	 * When converting tar archives to cpio archives, it is
430	 * essential that each distinct file have a distinct inode
431	 * number.  To simplify this, we keep a static count here to
432	 * assign fake dev/inode numbers to each tar entry.  Note that
433	 * pax format archives may overwrite this with something more
434	 * useful.
435	 *
436	 * Ideally, we would track every file read from the archive so
437	 * that we could assign the same dev/ino pair to hardlinks,
438	 * but the memory required to store a complete lookup table is
439	 * probably not worthwhile just to support the relatively
440	 * obscure tar->cpio conversion case.
441	 */
442	static int default_inode;
443	static int default_dev;
444	struct tar *tar;
445	const char *p;
446	int r;
447	size_t l, unconsumed = 0;
448
449	/* Assign default device/inode values. */
450	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
451	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
452	/* Limit generated st_ino number to 16 bits. */
453	if (default_inode >= 0xffff) {
454		++default_dev;
455		default_inode = 0;
456	}
457
458	tar = (struct tar *)(a->format->data);
459	tar->entry_offset = 0;
460	gnu_clear_sparse_list(tar);
461	tar->realsize = -1; /* Mark this as "unset" */
462
463	/* Setup default string conversion. */
464	tar->sconv = tar->opt_sconv;
465	if (tar->sconv == NULL) {
466		if (!tar->init_default_conversion) {
467			tar->sconv_default =
468			    archive_string_default_conversion_for_read(&(a->archive));
469			tar->init_default_conversion = 1;
470		}
471		tar->sconv = tar->sconv_default;
472	}
473
474	r = tar_read_header(a, tar, entry, &unconsumed);
475
476	tar_flush_unconsumed(a, &unconsumed);
477
478	/*
479	 * "non-sparse" files are really just sparse files with
480	 * a single block.
481	 */
482	if (tar->sparse_list == NULL) {
483		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
484		    != ARCHIVE_OK)
485			return (ARCHIVE_FATAL);
486	} else {
487		struct sparse_block *sb;
488
489		for (sb = tar->sparse_list; sb != NULL; sb = sb->next) {
490			if (!sb->hole)
491				archive_entry_sparse_add_entry(entry,
492				    sb->offset, sb->remaining);
493		}
494	}
495
496	if (r == ARCHIVE_OK) {
497		/*
498		 * "Regular" entry with trailing '/' is really
499		 * directory: This is needed for certain old tar
500		 * variants and even for some broken newer ones.
501		 */
502		const wchar_t *wp;
503		wp = archive_entry_pathname_w(entry);
504		if (wp != NULL) {
505			l = wcslen(wp);
506			if (archive_entry_filetype(entry) == AE_IFREG
507			    && wp[l-1] == L'/')
508				archive_entry_set_filetype(entry, AE_IFDIR);
509		} else {
510			p = archive_entry_pathname(entry);
511			if (p == NULL)
512				return (ARCHIVE_FAILED);
513			l = strlen(p);
514			if (archive_entry_filetype(entry) == AE_IFREG
515			    && p[l-1] == '/')
516				archive_entry_set_filetype(entry, AE_IFDIR);
517		}
518	}
519	return (r);
520}
521
522static int
523archive_read_format_tar_read_data(struct archive_read *a,
524    const void **buff, size_t *size, int64_t *offset)
525{
526	ssize_t bytes_read;
527	struct tar *tar;
528	struct sparse_block *p;
529
530	tar = (struct tar *)(a->format->data);
531
532	for (;;) {
533		/* Remove exhausted entries from sparse list. */
534		while (tar->sparse_list != NULL &&
535		    tar->sparse_list->remaining == 0) {
536			p = tar->sparse_list;
537			tar->sparse_list = p->next;
538			free(p);
539		}
540
541		if (tar->entry_bytes_unconsumed) {
542			__archive_read_consume(a, tar->entry_bytes_unconsumed);
543			tar->entry_bytes_unconsumed = 0;
544		}
545
546		/* If we're at end of file, return EOF. */
547		if (tar->sparse_list == NULL ||
548		    tar->entry_bytes_remaining == 0) {
549			if (__archive_read_consume(a, tar->entry_padding) < 0)
550				return (ARCHIVE_FATAL);
551			tar->entry_padding = 0;
552			*buff = NULL;
553			*size = 0;
554			*offset = tar->realsize;
555			return (ARCHIVE_EOF);
556		}
557
558		*buff = __archive_read_ahead(a, 1, &bytes_read);
559		if (bytes_read < 0)
560			return (ARCHIVE_FATAL);
561		if (*buff == NULL) {
562			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
563			    "Truncated tar archive");
564			return (ARCHIVE_FATAL);
565		}
566		if (bytes_read > tar->entry_bytes_remaining)
567			bytes_read = (ssize_t)tar->entry_bytes_remaining;
568		/* Don't read more than is available in the
569		 * current sparse block. */
570		if (tar->sparse_list->remaining < bytes_read)
571			bytes_read = (ssize_t)tar->sparse_list->remaining;
572		*size = bytes_read;
573		*offset = tar->sparse_list->offset;
574		tar->sparse_list->remaining -= bytes_read;
575		tar->sparse_list->offset += bytes_read;
576		tar->entry_bytes_remaining -= bytes_read;
577		tar->entry_bytes_unconsumed = bytes_read;
578
579		if (!tar->sparse_list->hole)
580			return (ARCHIVE_OK);
581		/* Current is hole data and skip this. */
582	}
583}
584
585static int
586archive_read_format_tar_skip(struct archive_read *a)
587{
588	int64_t bytes_skipped;
589	int64_t request;
590	struct sparse_block *p;
591	struct tar* tar;
592
593	tar = (struct tar *)(a->format->data);
594
595	/* Do not consume the hole of a sparse file. */
596	request = 0;
597	for (p = tar->sparse_list; p != NULL; p = p->next) {
598		if (!p->hole)
599			request += p->remaining;
600	}
601	if (request > tar->entry_bytes_remaining)
602		request = tar->entry_bytes_remaining;
603	request += tar->entry_padding + tar->entry_bytes_unconsumed;
604
605	bytes_skipped = __archive_read_consume(a, request);
606	if (bytes_skipped < 0)
607		return (ARCHIVE_FATAL);
608
609	tar->entry_bytes_remaining = 0;
610	tar->entry_bytes_unconsumed = 0;
611	tar->entry_padding = 0;
612
613	/* Free the sparse list. */
614	gnu_clear_sparse_list(tar);
615
616	return (ARCHIVE_OK);
617}
618
619/*
620 * This function recursively interprets all of the headers associated
621 * with a single entry.
622 */
623static int
624tar_read_header(struct archive_read *a, struct tar *tar,
625    struct archive_entry *entry, size_t *unconsumed)
626{
627	ssize_t bytes;
628	int err;
629	const char *h;
630	const struct archive_entry_header_ustar *header;
631	const struct archive_entry_header_gnutar *gnuheader;
632
633	tar_flush_unconsumed(a, unconsumed);
634
635	/* Read 512-byte header record */
636	h = __archive_read_ahead(a, 512, &bytes);
637	if (bytes < 0)
638		return ((int)bytes);
639	if (bytes == 0) { /* EOF at a block boundary. */
640		/* Some writers do omit the block of nulls. <sigh> */
641		return (ARCHIVE_EOF);
642	}
643	if (bytes < 512) {  /* Short block at EOF; this is bad. */
644		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
645		    "Truncated tar archive");
646		return (ARCHIVE_FATAL);
647	}
648	*unconsumed = 512;
649
650	/* Check for end-of-archive mark. */
651	if (h[0] == 0 && archive_block_is_null(h)) {
652		/* Try to consume a second all-null record, as well. */
653		tar_flush_unconsumed(a, unconsumed);
654		h = __archive_read_ahead(a, 512, NULL);
655		if (h != NULL)
656			__archive_read_consume(a, 512);
657		archive_clear_error(&a->archive);
658		if (a->archive.archive_format_name == NULL) {
659			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
660			a->archive.archive_format_name = "tar";
661		}
662		return (ARCHIVE_EOF);
663	}
664
665	/*
666	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
667	 * then the client is likely to just retry.  This is a very
668	 * crude way to search for the next valid header!
669	 *
670	 * TODO: Improve this by implementing a real header scan.
671	 */
672	if (!checksum(a, h)) {
673		tar_flush_unconsumed(a, unconsumed);
674		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
675		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
676	}
677
678	if (++tar->header_recursion_depth > 32) {
679		tar_flush_unconsumed(a, unconsumed);
680		archive_set_error(&a->archive, EINVAL, "Too many special headers");
681		return (ARCHIVE_WARN);
682	}
683
684	/* Determine the format variant. */
685	header = (const struct archive_entry_header_ustar *)h;
686
687	switch(header->typeflag[0]) {
688	case 'A': /* Solaris tar ACL */
689		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
690		a->archive.archive_format_name = "Solaris tar";
691		err = header_Solaris_ACL(a, tar, entry, h, unconsumed);
692		break;
693	case 'g': /* POSIX-standard 'g' header. */
694		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
695		a->archive.archive_format_name = "POSIX pax interchange format";
696		err = header_pax_global(a, tar, entry, h, unconsumed);
697		break;
698	case 'K': /* Long link name (GNU tar, others) */
699		err = header_longlink(a, tar, entry, h, unconsumed);
700		break;
701	case 'L': /* Long filename (GNU tar, others) */
702		err = header_longname(a, tar, entry, h, unconsumed);
703		break;
704	case 'V': /* GNU volume header */
705		err = header_volume(a, tar, entry, h, unconsumed);
706		break;
707	case 'X': /* Used by SUN tar; same as 'x'. */
708		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
709		a->archive.archive_format_name =
710		    "POSIX pax interchange format (Sun variant)";
711		err = header_pax_extensions(a, tar, entry, h, unconsumed);
712		break;
713	case 'x': /* POSIX-standard 'x' header. */
714		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
715		a->archive.archive_format_name = "POSIX pax interchange format";
716		err = header_pax_extensions(a, tar, entry, h, unconsumed);
717		break;
718	default:
719		gnuheader = (const struct archive_entry_header_gnutar *)h;
720		if (memcmp(gnuheader->magic, "ustar  \0", 8) == 0) {
721			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
722			a->archive.archive_format_name = "GNU tar format";
723			err = header_gnutar(a, tar, entry, h, unconsumed);
724		} else if (memcmp(header->magic, "ustar", 5) == 0) {
725			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
726				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
727				a->archive.archive_format_name = "POSIX ustar format";
728			}
729			err = header_ustar(a, tar, entry, h);
730		} else {
731			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
732			a->archive.archive_format_name = "tar (non-POSIX)";
733			err = header_old_tar(a, tar, entry, h);
734		}
735	}
736	if (err == ARCHIVE_FATAL)
737		return (err);
738
739	tar_flush_unconsumed(a, unconsumed);
740
741	h = NULL;
742	header = NULL;
743
744	--tar->header_recursion_depth;
745	/* Yuck.  Apple's design here ends up storing long pathname
746	 * extensions for both the AppleDouble extension entry and the
747	 * regular entry.
748	 */
749	/* TODO: Should this be disabled on non-Mac platforms? */
750	if ((err == ARCHIVE_WARN || err == ARCHIVE_OK) &&
751	    tar->header_recursion_depth == 0) {
752		int err2 = read_mac_metadata_blob(a, tar, entry, h, unconsumed);
753		if (err2 < err)
754			err = err2;
755	}
756
757	/* We return warnings or success as-is.  Anything else is fatal. */
758	if (err == ARCHIVE_WARN || err == ARCHIVE_OK) {
759		if (tar->sparse_gnu_pending) {
760			if (tar->sparse_gnu_major == 1 &&
761			    tar->sparse_gnu_minor == 0) {
762				ssize_t bytes_read;
763
764				tar->sparse_gnu_pending = 0;
765				/* Read initial sparse map. */
766				bytes_read = gnu_sparse_10_read(a, tar, unconsumed);
767				tar->entry_bytes_remaining -= bytes_read;
768				if (bytes_read < 0)
769					return ((int)bytes_read);
770			} else {
771				archive_set_error(&a->archive,
772				    ARCHIVE_ERRNO_MISC,
773				    "Unrecognized GNU sparse file format");
774				return (ARCHIVE_WARN);
775			}
776			tar->sparse_gnu_pending = 0;
777		}
778		return (err);
779	}
780	if (err == ARCHIVE_EOF)
781		/* EOF when recursively reading a header is bad. */
782		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
783	return (ARCHIVE_FATAL);
784}
785
786/*
787 * Return true if block checksum is correct.
788 */
789static int
790checksum(struct archive_read *a, const void *h)
791{
792	const unsigned char *bytes;
793	const struct archive_entry_header_ustar	*header;
794	int check, i, sum;
795
796	(void)a; /* UNUSED */
797	bytes = (const unsigned char *)h;
798	header = (const struct archive_entry_header_ustar *)h;
799
800	/*
801	 * Test the checksum.  Note that POSIX specifies _unsigned_
802	 * bytes for this calculation.
803	 */
804	sum = (int)tar_atol(header->checksum, sizeof(header->checksum));
805	check = 0;
806	for (i = 0; i < 148; i++)
807		check += (unsigned char)bytes[i];
808	for (; i < 156; i++)
809		check += 32;
810	for (; i < 512; i++)
811		check += (unsigned char)bytes[i];
812	if (sum == check)
813		return (1);
814
815	/*
816	 * Repeat test with _signed_ bytes, just in case this archive
817	 * was created by an old BSD, Solaris, or HP-UX tar with a
818	 * broken checksum calculation.
819	 */
820	check = 0;
821	for (i = 0; i < 148; i++)
822		check += (signed char)bytes[i];
823	for (; i < 156; i++)
824		check += 32;
825	for (; i < 512; i++)
826		check += (signed char)bytes[i];
827	if (sum == check)
828		return (1);
829
830	return (0);
831}
832
833/*
834 * Return true if this block contains only nulls.
835 */
836static int
837archive_block_is_null(const char *p)
838{
839	unsigned i;
840
841	for (i = 0; i < 512; i++)
842		if (*p++)
843			return (0);
844	return (1);
845}
846
847/*
848 * Interpret 'A' Solaris ACL header
849 */
850static int
851header_Solaris_ACL(struct archive_read *a, struct tar *tar,
852    struct archive_entry *entry, const void *h, size_t *unconsumed)
853{
854	const struct archive_entry_header_ustar *header;
855	size_t size;
856	int err;
857	int64_t type;
858	char *acl, *p;
859
860	/*
861	 * read_body_to_string adds a NUL terminator, but we need a little
862	 * more to make sure that we don't overrun acl_text later.
863	 */
864	header = (const struct archive_entry_header_ustar *)h;
865	size = (size_t)tar_atol(header->size, sizeof(header->size));
866	err = read_body_to_string(a, tar, &(tar->acl_text), h, unconsumed);
867	if (err != ARCHIVE_OK)
868		return (err);
869
870	/* Recursively read next header */
871	err = tar_read_header(a, tar, entry, unconsumed);
872	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
873		return (err);
874
875	/* TODO: Examine the first characters to see if this
876	 * is an AIX ACL descriptor.  We'll likely never support
877	 * them, but it would be polite to recognize and warn when
878	 * we do see them. */
879
880	/* Leading octal number indicates ACL type and number of entries. */
881	p = acl = tar->acl_text.s;
882	type = 0;
883	while (*p != '\0' && p < acl + size) {
884		if (*p < '0' || *p > '7') {
885			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
886			    "Malformed Solaris ACL attribute (invalid digit)");
887			return(ARCHIVE_WARN);
888		}
889		type <<= 3;
890		type += *p - '0';
891		if (type > 077777777) {
892			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
893			    "Malformed Solaris ACL attribute (count too large)");
894			return (ARCHIVE_WARN);
895		}
896		p++;
897	}
898	switch ((int)type & ~0777777) {
899	case 01000000:
900		/* POSIX.1e ACL */
901		break;
902	case 03000000:
903		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
904		    "Solaris NFSv4 ACLs not supported");
905		return (ARCHIVE_WARN);
906	default:
907		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
908		    "Malformed Solaris ACL attribute (unsupported type %o)",
909		    (int)type);
910		return (ARCHIVE_WARN);
911	}
912	p++;
913
914	if (p >= acl + size) {
915		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
916		    "Malformed Solaris ACL attribute (body overflow)");
917		return(ARCHIVE_WARN);
918	}
919
920	/* ACL text is null-terminated; find the end. */
921	size -= (p - acl);
922	acl = p;
923
924	while (*p != '\0' && p < acl + size)
925		p++;
926
927	if (tar->sconv_acl == NULL) {
928		tar->sconv_acl = archive_string_conversion_from_charset(
929		    &(a->archive), "UTF-8", 1);
930		if (tar->sconv_acl == NULL)
931			return (ARCHIVE_FATAL);
932	}
933	archive_strncpy(&(tar->localname), acl, p - acl);
934	err = archive_acl_parse_l(archive_entry_acl(entry),
935	    tar->localname.s, ARCHIVE_ENTRY_ACL_TYPE_ACCESS, tar->sconv_acl);
936	if (err != ARCHIVE_OK) {
937		if (errno == ENOMEM) {
938			archive_set_error(&a->archive, ENOMEM,
939			    "Can't allocate memory for ACL");
940		} else
941			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
942			    "Malformed Solaris ACL attribute (unparsable)");
943	}
944	return (err);
945}
946
947/*
948 * Interpret 'K' long linkname header.
949 */
950static int
951header_longlink(struct archive_read *a, struct tar *tar,
952    struct archive_entry *entry, const void *h, size_t *unconsumed)
953{
954	int err;
955
956	err = read_body_to_string(a, tar, &(tar->longlink), h, unconsumed);
957	if (err != ARCHIVE_OK)
958		return (err);
959	err = tar_read_header(a, tar, entry, unconsumed);
960	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
961		return (err);
962	/* Set symlink if symlink already set, else hardlink. */
963	archive_entry_copy_link(entry, tar->longlink.s);
964	return (ARCHIVE_OK);
965}
966
967static int
968set_conversion_failed_error(struct archive_read *a,
969    struct archive_string_conv *sconv, const char *name)
970{
971	if (errno == ENOMEM) {
972		archive_set_error(&a->archive, ENOMEM,
973		    "Can't allocate memory for %s", name);
974		return (ARCHIVE_FATAL);
975	}
976	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
977	    "%s can't be converted from %s to current locale.",
978	    name, archive_string_conversion_charset_name(sconv));
979	return (ARCHIVE_WARN);
980}
981
982/*
983 * Interpret 'L' long filename header.
984 */
985static int
986header_longname(struct archive_read *a, struct tar *tar,
987    struct archive_entry *entry, const void *h, size_t *unconsumed)
988{
989	int err;
990
991	err = read_body_to_string(a, tar, &(tar->longname), h, unconsumed);
992	if (err != ARCHIVE_OK)
993		return (err);
994	/* Read and parse "real" header, then override name. */
995	err = tar_read_header(a, tar, entry, unconsumed);
996	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
997		return (err);
998	if (archive_entry_copy_pathname_l(entry, tar->longname.s,
999	    archive_strlen(&(tar->longname)), tar->sconv) != 0)
1000		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1001	return (err);
1002}
1003
1004
1005/*
1006 * Interpret 'V' GNU tar volume header.
1007 */
1008static int
1009header_volume(struct archive_read *a, struct tar *tar,
1010    struct archive_entry *entry, const void *h, size_t *unconsumed)
1011{
1012	(void)h;
1013
1014	/* Just skip this and read the next header. */
1015	return (tar_read_header(a, tar, entry, unconsumed));
1016}
1017
1018/*
1019 * Read body of an archive entry into an archive_string object.
1020 */
1021static int
1022read_body_to_string(struct archive_read *a, struct tar *tar,
1023    struct archive_string *as, const void *h, size_t *unconsumed)
1024{
1025	int64_t size;
1026	const struct archive_entry_header_ustar *header;
1027	const void *src;
1028
1029	(void)tar; /* UNUSED */
1030	header = (const struct archive_entry_header_ustar *)h;
1031	size  = tar_atol(header->size, sizeof(header->size));
1032	if ((size > 1048576) || (size < 0)) {
1033		archive_set_error(&a->archive, EINVAL,
1034		    "Special header too large");
1035		return (ARCHIVE_FATAL);
1036	}
1037
1038	/* Fail if we can't make our buffer big enough. */
1039	if (archive_string_ensure(as, (size_t)size+1) == NULL) {
1040		archive_set_error(&a->archive, ENOMEM,
1041		    "No memory");
1042		return (ARCHIVE_FATAL);
1043	}
1044
1045	tar_flush_unconsumed(a, unconsumed);
1046
1047	/* Read the body into the string. */
1048	*unconsumed = (size_t)((size + 511) & ~ 511);
1049	src = __archive_read_ahead(a, *unconsumed, NULL);
1050	if (src == NULL) {
1051		*unconsumed = 0;
1052		return (ARCHIVE_FATAL);
1053	}
1054	memcpy(as->s, src, (size_t)size);
1055	as->s[size] = '\0';
1056	as->length = (size_t)size;
1057	return (ARCHIVE_OK);
1058}
1059
1060/*
1061 * Parse out common header elements.
1062 *
1063 * This would be the same as header_old_tar, except that the
1064 * filename is handled slightly differently for old and POSIX
1065 * entries  (POSIX entries support a 'prefix').  This factoring
1066 * allows header_old_tar and header_ustar
1067 * to handle filenames differently, while still putting most of the
1068 * common parsing into one place.
1069 */
1070static int
1071header_common(struct archive_read *a, struct tar *tar,
1072    struct archive_entry *entry, const void *h)
1073{
1074	const struct archive_entry_header_ustar	*header;
1075	char	tartype;
1076	int     err = ARCHIVE_OK;
1077
1078	header = (const struct archive_entry_header_ustar *)h;
1079	if (header->linkname[0])
1080		archive_strncpy(&(tar->entry_linkpath),
1081		    header->linkname, sizeof(header->linkname));
1082	else
1083		archive_string_empty(&(tar->entry_linkpath));
1084
1085	/* Parse out the numeric fields (all are octal) */
1086	archive_entry_set_mode(entry,
1087		(mode_t)tar_atol(header->mode, sizeof(header->mode)));
1088	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
1089	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
1090	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
1091	if (tar->entry_bytes_remaining < 0) {
1092		tar->entry_bytes_remaining = 0;
1093		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1094		    "Tar entry has negative size?");
1095		err = ARCHIVE_WARN;
1096	}
1097	tar->realsize = tar->entry_bytes_remaining;
1098	archive_entry_set_size(entry, tar->entry_bytes_remaining);
1099	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
1100
1101	/* Handle the tar type flag appropriately. */
1102	tartype = header->typeflag[0];
1103
1104	switch (tartype) {
1105	case '1': /* Hard link */
1106		if (archive_entry_copy_hardlink_l(entry, tar->entry_linkpath.s,
1107		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1108			err = set_conversion_failed_error(a, tar->sconv,
1109			    "Linkname");
1110			if (err == ARCHIVE_FATAL)
1111				return (err);
1112		}
1113		/*
1114		 * The following may seem odd, but: Technically, tar
1115		 * does not store the file type for a "hard link"
1116		 * entry, only the fact that it is a hard link.  So, I
1117		 * leave the type zero normally.  But, pax interchange
1118		 * format allows hard links to have data, which
1119		 * implies that the underlying entry is a regular
1120		 * file.
1121		 */
1122		if (archive_entry_size(entry) > 0)
1123			archive_entry_set_filetype(entry, AE_IFREG);
1124
1125		/*
1126		 * A tricky point: Traditionally, tar readers have
1127		 * ignored the size field when reading hardlink
1128		 * entries, and some writers put non-zero sizes even
1129		 * though the body is empty.  POSIX blessed this
1130		 * convention in the 1988 standard, but broke with
1131		 * this tradition in 2001 by permitting hardlink
1132		 * entries to store valid bodies in pax interchange
1133		 * format, but not in ustar format.  Since there is no
1134		 * hard and fast way to distinguish pax interchange
1135		 * from earlier archives (the 'x' and 'g' entries are
1136		 * optional, after all), we need a heuristic.
1137		 */
1138		if (archive_entry_size(entry) == 0) {
1139			/* If the size is already zero, we're done. */
1140		}  else if (a->archive.archive_format
1141		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
1142			/* Definitely pax extended; must obey hardlink size. */
1143		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
1144		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
1145		{
1146			/* Old-style or GNU tar: we must ignore the size. */
1147			archive_entry_set_size(entry, 0);
1148			tar->entry_bytes_remaining = 0;
1149		} else if (archive_read_format_tar_bid(a, 50) > 50) {
1150			/*
1151			 * We don't know if it's pax: If the bid
1152			 * function sees a valid ustar header
1153			 * immediately following, then let's ignore
1154			 * the hardlink size.
1155			 */
1156			archive_entry_set_size(entry, 0);
1157			tar->entry_bytes_remaining = 0;
1158		}
1159		/*
1160		 * TODO: There are still two cases I'd like to handle:
1161		 *   = a ustar non-pax archive with a hardlink entry at
1162		 *     end-of-archive.  (Look for block of nulls following?)
1163		 *   = a pax archive that has not seen any pax headers
1164		 *     and has an entry which is a hardlink entry storing
1165		 *     a body containing an uncompressed tar archive.
1166		 * The first is worth addressing; I don't see any reliable
1167		 * way to deal with the second possibility.
1168		 */
1169		break;
1170	case '2': /* Symlink */
1171		archive_entry_set_filetype(entry, AE_IFLNK);
1172		archive_entry_set_size(entry, 0);
1173		tar->entry_bytes_remaining = 0;
1174		if (archive_entry_copy_symlink_l(entry, tar->entry_linkpath.s,
1175		    archive_strlen(&(tar->entry_linkpath)), tar->sconv) != 0) {
1176			err = set_conversion_failed_error(a, tar->sconv,
1177			    "Linkname");
1178			if (err == ARCHIVE_FATAL)
1179				return (err);
1180		}
1181		break;
1182	case '3': /* Character device */
1183		archive_entry_set_filetype(entry, AE_IFCHR);
1184		archive_entry_set_size(entry, 0);
1185		tar->entry_bytes_remaining = 0;
1186		break;
1187	case '4': /* Block device */
1188		archive_entry_set_filetype(entry, AE_IFBLK);
1189		archive_entry_set_size(entry, 0);
1190		tar->entry_bytes_remaining = 0;
1191		break;
1192	case '5': /* Dir */
1193		archive_entry_set_filetype(entry, AE_IFDIR);
1194		archive_entry_set_size(entry, 0);
1195		tar->entry_bytes_remaining = 0;
1196		break;
1197	case '6': /* FIFO device */
1198		archive_entry_set_filetype(entry, AE_IFIFO);
1199		archive_entry_set_size(entry, 0);
1200		tar->entry_bytes_remaining = 0;
1201		break;
1202	case 'D': /* GNU incremental directory type */
1203		/*
1204		 * No special handling is actually required here.
1205		 * It might be nice someday to preprocess the file list and
1206		 * provide it to the client, though.
1207		 */
1208		archive_entry_set_filetype(entry, AE_IFDIR);
1209		break;
1210	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1211		/*
1212		 * As far as I can tell, this is just like a regular file
1213		 * entry, except that the contents should be _appended_ to
1214		 * the indicated file at the indicated offset.  This may
1215		 * require some API work to fully support.
1216		 */
1217		break;
1218	case 'N': /* Old GNU "long filename" entry. */
1219		/* The body of this entry is a script for renaming
1220		 * previously-extracted entries.  Ugh.  It will never
1221		 * be supported by libarchive. */
1222		archive_entry_set_filetype(entry, AE_IFREG);
1223		break;
1224	case 'S': /* GNU sparse files */
1225		/*
1226		 * Sparse files are really just regular files with
1227		 * sparse information in the extended area.
1228		 */
1229		/* FALLTHROUGH */
1230	case '0':
1231		/*
1232		 * Enable sparse file "read" support only for regular
1233		 * files and explicit GNU sparse files.  However, we
1234		 * don't allow non-standard file types to be sparse.
1235		 */
1236		tar->sparse_allowed = 1;
1237		/* FALLTHROUGH */
1238	default: /* Regular file  and non-standard types */
1239		/*
1240		 * Per POSIX: non-recognized types should always be
1241		 * treated as regular files.
1242		 */
1243		archive_entry_set_filetype(entry, AE_IFREG);
1244		break;
1245	}
1246	return (err);
1247}
1248
1249/*
1250 * Parse out header elements for "old-style" tar archives.
1251 */
1252static int
1253header_old_tar(struct archive_read *a, struct tar *tar,
1254    struct archive_entry *entry, const void *h)
1255{
1256	const struct archive_entry_header_ustar	*header;
1257	int err = ARCHIVE_OK, err2;
1258
1259	/* Copy filename over (to ensure null termination). */
1260	header = (const struct archive_entry_header_ustar *)h;
1261	if (archive_entry_copy_pathname_l(entry,
1262	    header->name, sizeof(header->name), tar->sconv) != 0) {
1263		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1264		if (err == ARCHIVE_FATAL)
1265			return (err);
1266	}
1267
1268	/* Grab rest of common fields */
1269	err2 = header_common(a, tar, entry, h);
1270	if (err > err2)
1271		err = err2;
1272
1273	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1274	return (err);
1275}
1276
1277/*
1278 * Read a Mac AppleDouble-encoded blob of file metadata,
1279 * if there is one.
1280 */
1281static int
1282read_mac_metadata_blob(struct archive_read *a, struct tar *tar,
1283    struct archive_entry *entry, const void *h, size_t *unconsumed)
1284{
1285	int64_t size;
1286	const void *data;
1287	const char *p, *name;
1288	const wchar_t *wp, *wname;
1289
1290	(void)h; /* UNUSED */
1291
1292	wname = wp = archive_entry_pathname_w(entry);
1293	if (wp != NULL) {
1294		/* Find the last path element. */
1295		for (; *wp != L'\0'; ++wp) {
1296			if (wp[0] == '/' && wp[1] != L'\0')
1297				wname = wp + 1;
1298		}
1299		/*
1300		 * If last path element starts with "._", then
1301		 * this is a Mac extension.
1302		 */
1303		if (wname[0] != L'.' || wname[1] != L'_' || wname[2] == L'\0')
1304			return ARCHIVE_OK;
1305	} else {
1306		/* Find the last path element. */
1307		name = p = archive_entry_pathname(entry);
1308		if (p == NULL)
1309			return (ARCHIVE_FAILED);
1310		for (; *p != '\0'; ++p) {
1311			if (p[0] == '/' && p[1] != '\0')
1312				name = p + 1;
1313		}
1314		/*
1315		 * If last path element starts with "._", then
1316		 * this is a Mac extension.
1317		 */
1318		if (name[0] != '.' || name[1] != '_' || name[2] == '\0')
1319			return ARCHIVE_OK;
1320	}
1321
1322 	/* Read the body as a Mac OS metadata blob. */
1323	size = archive_entry_size(entry);
1324
1325	/*
1326	 * TODO: Look beyond the body here to peek at the next header.
1327	 * If it's a regular header (not an extension header)
1328	 * that has the wrong name, just return the current
1329	 * entry as-is, without consuming the body here.
1330	 * That would reduce the risk of us mis-identifying
1331	 * an ordinary file that just happened to have
1332	 * a name starting with "._".
1333	 *
1334	 * Q: Is the above idea really possible?  Even
1335	 * when there are GNU or pax extension entries?
1336	 */
1337	data = __archive_read_ahead(a, (size_t)size, NULL);
1338	if (data == NULL) {
1339		*unconsumed = 0;
1340		return (ARCHIVE_FATAL);
1341	}
1342	archive_entry_copy_mac_metadata(entry, data, (size_t)size);
1343	*unconsumed = (size_t)((size + 511) & ~ 511);
1344	tar_flush_unconsumed(a, unconsumed);
1345	return (tar_read_header(a, tar, entry, unconsumed));
1346}
1347
1348/*
1349 * Parse a file header for a pax extended archive entry.
1350 */
1351static int
1352header_pax_global(struct archive_read *a, struct tar *tar,
1353    struct archive_entry *entry, const void *h, size_t *unconsumed)
1354{
1355	int err;
1356
1357	err = read_body_to_string(a, tar, &(tar->pax_global), h, unconsumed);
1358	if (err != ARCHIVE_OK)
1359		return (err);
1360	err = tar_read_header(a, tar, entry, unconsumed);
1361	return (err);
1362}
1363
1364static int
1365header_pax_extensions(struct archive_read *a, struct tar *tar,
1366    struct archive_entry *entry, const void *h, size_t *unconsumed)
1367{
1368	int err, err2;
1369
1370	err = read_body_to_string(a, tar, &(tar->pax_header), h, unconsumed);
1371	if (err != ARCHIVE_OK)
1372		return (err);
1373
1374	/* Parse the next header. */
1375	err = tar_read_header(a, tar, entry, unconsumed);
1376	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1377		return (err);
1378
1379	/*
1380	 * TODO: Parse global/default options into 'entry' struct here
1381	 * before handling file-specific options.
1382	 *
1383	 * This design (parse standard header, then overwrite with pax
1384	 * extended attribute data) usually works well, but isn't ideal;
1385	 * it would be better to parse the pax extended attributes first
1386	 * and then skip any fields in the standard header that were
1387	 * defined in the pax header.
1388	 */
1389	err2 = pax_header(a, tar, entry, tar->pax_header.s);
1390	err =  err_combine(err, err2);
1391	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1392	return (err);
1393}
1394
1395
1396/*
1397 * Parse a file header for a Posix "ustar" archive entry.  This also
1398 * handles "pax" or "extended ustar" entries.
1399 */
1400static int
1401header_ustar(struct archive_read *a, struct tar *tar,
1402    struct archive_entry *entry, const void *h)
1403{
1404	const struct archive_entry_header_ustar	*header;
1405	struct archive_string *as;
1406	int err = ARCHIVE_OK, r;
1407
1408	header = (const struct archive_entry_header_ustar *)h;
1409
1410	/* Copy name into an internal buffer to ensure null-termination. */
1411	as = &(tar->entry_pathname);
1412	if (header->prefix[0]) {
1413		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1414		if (as->s[archive_strlen(as) - 1] != '/')
1415			archive_strappend_char(as, '/');
1416		archive_strncat(as, header->name, sizeof(header->name));
1417	} else {
1418		archive_strncpy(as, header->name, sizeof(header->name));
1419	}
1420	if (archive_entry_copy_pathname_l(entry, as->s, archive_strlen(as),
1421	    tar->sconv) != 0) {
1422		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
1423		if (err == ARCHIVE_FATAL)
1424			return (err);
1425	}
1426
1427	/* Handle rest of common fields. */
1428	r = header_common(a, tar, entry, h);
1429	if (r == ARCHIVE_FATAL)
1430		return (r);
1431	if (r < err)
1432		err = r;
1433
1434	/* Handle POSIX ustar fields. */
1435	if (archive_entry_copy_uname_l(entry,
1436	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
1437		err = set_conversion_failed_error(a, tar->sconv, "Uname");
1438		if (err == ARCHIVE_FATAL)
1439			return (err);
1440	}
1441
1442	if (archive_entry_copy_gname_l(entry,
1443	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
1444		err = set_conversion_failed_error(a, tar->sconv, "Gname");
1445		if (err == ARCHIVE_FATAL)
1446			return (err);
1447	}
1448
1449	/* Parse out device numbers only for char and block specials. */
1450	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1451		archive_entry_set_rdevmajor(entry, (dev_t)
1452		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1453		archive_entry_set_rdevminor(entry, (dev_t)
1454		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1455	}
1456
1457	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1458
1459	return (err);
1460}
1461
1462
1463/*
1464 * Parse the pax extended attributes record.
1465 *
1466 * Returns non-zero if there's an error in the data.
1467 */
1468static int
1469pax_header(struct archive_read *a, struct tar *tar,
1470    struct archive_entry *entry, char *attr)
1471{
1472	size_t attr_length, l, line_length;
1473	char *p;
1474	char *key, *value;
1475	struct archive_string *as;
1476	struct archive_string_conv *sconv;
1477	int err, err2;
1478
1479	attr_length = strlen(attr);
1480	tar->pax_hdrcharset_binary = 0;
1481	archive_string_empty(&(tar->entry_gname));
1482	archive_string_empty(&(tar->entry_linkpath));
1483	archive_string_empty(&(tar->entry_pathname));
1484	archive_string_empty(&(tar->entry_pathname_override));
1485	archive_string_empty(&(tar->entry_uname));
1486	err = ARCHIVE_OK;
1487	while (attr_length > 0) {
1488		/* Parse decimal length field at start of line. */
1489		line_length = 0;
1490		l = attr_length;
1491		p = attr; /* Record start of line. */
1492		while (l>0) {
1493			if (*p == ' ') {
1494				p++;
1495				l--;
1496				break;
1497			}
1498			if (*p < '0' || *p > '9') {
1499				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1500				    "Ignoring malformed pax extended attributes");
1501				return (ARCHIVE_WARN);
1502			}
1503			line_length *= 10;
1504			line_length += *p - '0';
1505			if (line_length > 999999) {
1506				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1507				    "Rejecting pax extended attribute > 1MB");
1508				return (ARCHIVE_WARN);
1509			}
1510			p++;
1511			l--;
1512		}
1513
1514		/*
1515		 * Parsed length must be no bigger than available data,
1516		 * at least 1, and the last character of the line must
1517		 * be '\n'.
1518		 */
1519		if (line_length > attr_length
1520		    || line_length < 1
1521		    || attr[line_length - 1] != '\n')
1522		{
1523			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1524			    "Ignoring malformed pax extended attribute");
1525			return (ARCHIVE_WARN);
1526		}
1527
1528		/* Null-terminate the line. */
1529		attr[line_length - 1] = '\0';
1530
1531		/* Find end of key and null terminate it. */
1532		key = p;
1533		if (key[0] == '=')
1534			return (-1);
1535		while (*p && *p != '=')
1536			++p;
1537		if (*p == '\0') {
1538			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1539			    "Invalid pax extended attributes");
1540			return (ARCHIVE_WARN);
1541		}
1542		*p = '\0';
1543
1544		/* Identify null-terminated 'value' portion. */
1545		value = p + 1;
1546
1547		/* Identify this attribute and set it in the entry. */
1548		err2 = pax_attribute(a, tar, entry, key, value);
1549		if (err2 == ARCHIVE_FATAL)
1550			return (err2);
1551		err = err_combine(err, err2);
1552
1553		/* Skip to next line */
1554		attr += line_length;
1555		attr_length -= line_length;
1556	}
1557
1558	/*
1559	 * PAX format uses UTF-8 as default charset for its metadata
1560	 * unless hdrcharset=BINARY is present in its header.
1561	 * We apply the charset specified by the hdrcharset option only
1562	 * when the hdrcharset attribute(in PAX header) is BINARY because
1563	 * we respect the charset described in PAX header and BINARY also
1564	 * means that metadata(filename,uname and gname) character-set
1565	 * is unknown.
1566	 */
1567	if (tar->pax_hdrcharset_binary)
1568		sconv = tar->opt_sconv;
1569	else {
1570		sconv = archive_string_conversion_from_charset(
1571		    &(a->archive), "UTF-8", 1);
1572		if (sconv == NULL)
1573			return (ARCHIVE_FATAL);
1574		if (tar->compat_2x)
1575			archive_string_conversion_set_opt(sconv,
1576			    SCONV_SET_OPT_UTF8_LIBARCHIVE2X);
1577	}
1578
1579	if (archive_strlen(&(tar->entry_gname)) > 0) {
1580		if (archive_entry_copy_gname_l(entry, tar->entry_gname.s,
1581		    archive_strlen(&(tar->entry_gname)), sconv) != 0) {
1582			err = set_conversion_failed_error(a, sconv, "Gname");
1583			if (err == ARCHIVE_FATAL)
1584				return (err);
1585			/* Use a converted an original name. */
1586			archive_entry_copy_gname(entry, tar->entry_gname.s);
1587		}
1588	}
1589	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1590		if (archive_entry_copy_link_l(entry, tar->entry_linkpath.s,
1591		    archive_strlen(&(tar->entry_linkpath)), sconv) != 0) {
1592			err = set_conversion_failed_error(a, sconv, "Linkname");
1593			if (err == ARCHIVE_FATAL)
1594				return (err);
1595			/* Use a converted an original name. */
1596			archive_entry_copy_link(entry, tar->entry_linkpath.s);
1597		}
1598	}
1599	/*
1600	 * Some extensions (such as the GNU sparse file extensions)
1601	 * deliberately store a synthetic name under the regular 'path'
1602	 * attribute and the real file name under a different attribute.
1603	 * Since we're supposed to not care about the order, we
1604	 * have no choice but to store all of the various filenames
1605	 * we find and figure it all out afterwards.  This is the
1606	 * figuring out part.
1607	 */
1608	as = NULL;
1609	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1610		as = &(tar->entry_pathname_override);
1611	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1612		as = &(tar->entry_pathname);
1613	if (as != NULL) {
1614		if (archive_entry_copy_pathname_l(entry, as->s,
1615		    archive_strlen(as), sconv) != 0) {
1616			err = set_conversion_failed_error(a, sconv, "Pathname");
1617			if (err == ARCHIVE_FATAL)
1618				return (err);
1619			/* Use a converted an original name. */
1620			archive_entry_copy_pathname(entry, as->s);
1621		}
1622	}
1623	if (archive_strlen(&(tar->entry_uname)) > 0) {
1624		if (archive_entry_copy_uname_l(entry, tar->entry_uname.s,
1625		    archive_strlen(&(tar->entry_uname)), sconv) != 0) {
1626			err = set_conversion_failed_error(a, sconv, "Uname");
1627			if (err == ARCHIVE_FATAL)
1628				return (err);
1629			/* Use a converted an original name. */
1630			archive_entry_copy_uname(entry, tar->entry_uname.s);
1631		}
1632	}
1633	return (err);
1634}
1635
1636static int
1637pax_attribute_xattr(struct archive_entry *entry,
1638	char *name, char *value)
1639{
1640	char *name_decoded;
1641	void *value_decoded;
1642	size_t value_len;
1643
1644	if (strlen(name) < 18 || (memcmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1645		return 3;
1646
1647	name += 17;
1648
1649	/* URL-decode name */
1650	name_decoded = url_decode(name);
1651	if (name_decoded == NULL)
1652		return 2;
1653
1654	/* Base-64 decode value */
1655	value_decoded = base64_decode(value, strlen(value), &value_len);
1656	if (value_decoded == NULL) {
1657		free(name_decoded);
1658		return 1;
1659	}
1660
1661	archive_entry_xattr_add_entry(entry, name_decoded,
1662		value_decoded, value_len);
1663
1664	free(name_decoded);
1665	free(value_decoded);
1666	return 0;
1667}
1668
1669/*
1670 * Parse a single key=value attribute.  key/value pointers are
1671 * assumed to point into reasonably long-lived storage.
1672 *
1673 * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1674 * extensions should always have keywords of the form "VENDOR.attribute"
1675 * In particular, it's quite feasible to support many different
1676 * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1677 * unique to this library.
1678 *
1679 * Investigate other vendor-specific extensions and see if
1680 * any of them look useful.
1681 */
1682static int
1683pax_attribute(struct archive_read *a, struct tar *tar,
1684    struct archive_entry *entry, char *key, char *value)
1685{
1686	int64_t s;
1687	long n;
1688	int err = ARCHIVE_OK, r;
1689
1690#ifndef __FreeBSD__
1691	if (value == NULL)
1692		value = "";	/* Disable compiler warning; do not pass
1693				 * NULL pointer to strlen().  */
1694#endif
1695	switch (key[0]) {
1696	case 'G':
1697		/* Reject GNU.sparse.* headers on non-regular files. */
1698		if (strncmp(key, "GNU.sparse", 10) == 0 &&
1699		    !tar->sparse_allowed) {
1700			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1701			    "Non-regular file cannot be sparse");
1702			return (ARCHIVE_FATAL);
1703		}
1704
1705		/* GNU "0.0" sparse pax format. */
1706		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1707			tar->sparse_offset = -1;
1708			tar->sparse_numbytes = -1;
1709			tar->sparse_gnu_major = 0;
1710			tar->sparse_gnu_minor = 0;
1711		}
1712		if (strcmp(key, "GNU.sparse.offset") == 0) {
1713			tar->sparse_offset = tar_atol10(value, strlen(value));
1714			if (tar->sparse_numbytes != -1) {
1715				if (gnu_add_sparse_entry(a, tar,
1716				    tar->sparse_offset, tar->sparse_numbytes)
1717				    != ARCHIVE_OK)
1718					return (ARCHIVE_FATAL);
1719				tar->sparse_offset = -1;
1720				tar->sparse_numbytes = -1;
1721			}
1722		}
1723		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1724			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1725			if (tar->sparse_numbytes != -1) {
1726				if (gnu_add_sparse_entry(a, tar,
1727				    tar->sparse_offset, tar->sparse_numbytes)
1728				    != ARCHIVE_OK)
1729					return (ARCHIVE_FATAL);
1730				tar->sparse_offset = -1;
1731				tar->sparse_numbytes = -1;
1732			}
1733		}
1734		if (strcmp(key, "GNU.sparse.size") == 0) {
1735			tar->realsize = tar_atol10(value, strlen(value));
1736			archive_entry_set_size(entry, tar->realsize);
1737		}
1738
1739		/* GNU "0.1" sparse pax format. */
1740		if (strcmp(key, "GNU.sparse.map") == 0) {
1741			tar->sparse_gnu_major = 0;
1742			tar->sparse_gnu_minor = 1;
1743			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
1744				return (ARCHIVE_WARN);
1745		}
1746
1747		/* GNU "1.0" sparse pax format */
1748		if (strcmp(key, "GNU.sparse.major") == 0) {
1749			tar->sparse_gnu_major = (int)tar_atol10(value, strlen(value));
1750			tar->sparse_gnu_pending = 1;
1751		}
1752		if (strcmp(key, "GNU.sparse.minor") == 0) {
1753			tar->sparse_gnu_minor = (int)tar_atol10(value, strlen(value));
1754			tar->sparse_gnu_pending = 1;
1755		}
1756		if (strcmp(key, "GNU.sparse.name") == 0) {
1757			/*
1758			 * The real filename; when storing sparse
1759			 * files, GNU tar puts a synthesized name into
1760			 * the regular 'path' attribute in an attempt
1761			 * to limit confusion. ;-)
1762			 */
1763			archive_strcpy(&(tar->entry_pathname_override), value);
1764		}
1765		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1766			tar->realsize = tar_atol10(value, strlen(value));
1767			archive_entry_set_size(entry, tar->realsize);
1768		}
1769		break;
1770	case 'L':
1771		/* Our extensions */
1772/* TODO: Handle arbitrary extended attributes... */
1773/*
1774		if (strcmp(key, "LIBARCHIVE.xxxxxxx") == 0)
1775			archive_entry_set_xxxxxx(entry, value);
1776*/
1777		if (strcmp(key, "LIBARCHIVE.creationtime") == 0) {
1778			pax_time(value, &s, &n);
1779			archive_entry_set_birthtime(entry, s, n);
1780		}
1781		if (memcmp(key, "LIBARCHIVE.xattr.", 17) == 0)
1782			pax_attribute_xattr(entry, key, value);
1783		break;
1784	case 'S':
1785		/* We support some keys used by the "star" archiver */
1786		if (strcmp(key, "SCHILY.acl.access") == 0) {
1787			if (tar->sconv_acl == NULL) {
1788				tar->sconv_acl =
1789				    archive_string_conversion_from_charset(
1790					&(a->archive), "UTF-8", 1);
1791				if (tar->sconv_acl == NULL)
1792					return (ARCHIVE_FATAL);
1793			}
1794
1795			r = archive_acl_parse_l(archive_entry_acl(entry),
1796			    value, ARCHIVE_ENTRY_ACL_TYPE_ACCESS,
1797			    tar->sconv_acl);
1798			if (r != ARCHIVE_OK) {
1799				err = r;
1800				if (err == ARCHIVE_FATAL) {
1801					archive_set_error(&a->archive, ENOMEM,
1802					    "Can't allocate memory for "
1803					    "SCHILY.acl.access");
1804					return (err);
1805				}
1806				archive_set_error(&a->archive,
1807				    ARCHIVE_ERRNO_MISC,
1808				    "Parse error: SCHILY.acl.access");
1809			}
1810		} else if (strcmp(key, "SCHILY.acl.default") == 0) {
1811			if (tar->sconv_acl == NULL) {
1812				tar->sconv_acl =
1813				    archive_string_conversion_from_charset(
1814					&(a->archive), "UTF-8", 1);
1815				if (tar->sconv_acl == NULL)
1816					return (ARCHIVE_FATAL);
1817			}
1818
1819			r = archive_acl_parse_l(archive_entry_acl(entry),
1820			    value, ARCHIVE_ENTRY_ACL_TYPE_DEFAULT,
1821			    tar->sconv_acl);
1822			if (r != ARCHIVE_OK) {
1823				err = r;
1824				if (err == ARCHIVE_FATAL) {
1825					archive_set_error(&a->archive, ENOMEM,
1826					    "Can't allocate memory for "
1827					    "SCHILY.acl.default");
1828					return (err);
1829				}
1830				archive_set_error(&a->archive,
1831				    ARCHIVE_ERRNO_MISC,
1832				    "Parse error: SCHILY.acl.default");
1833			}
1834		} else if (strcmp(key, "SCHILY.devmajor") == 0) {
1835			archive_entry_set_rdevmajor(entry,
1836			    (dev_t)tar_atol10(value, strlen(value)));
1837		} else if (strcmp(key, "SCHILY.devminor") == 0) {
1838			archive_entry_set_rdevminor(entry,
1839			    (dev_t)tar_atol10(value, strlen(value)));
1840		} else if (strcmp(key, "SCHILY.fflags") == 0) {
1841			archive_entry_copy_fflags_text(entry, value);
1842		} else if (strcmp(key, "SCHILY.dev") == 0) {
1843			archive_entry_set_dev(entry,
1844			    (dev_t)tar_atol10(value, strlen(value)));
1845		} else if (strcmp(key, "SCHILY.ino") == 0) {
1846			archive_entry_set_ino(entry,
1847			    tar_atol10(value, strlen(value)));
1848		} else if (strcmp(key, "SCHILY.nlink") == 0) {
1849			archive_entry_set_nlink(entry, (unsigned)
1850			    tar_atol10(value, strlen(value)));
1851		} else if (strcmp(key, "SCHILY.realsize") == 0) {
1852			tar->realsize = tar_atol10(value, strlen(value));
1853			archive_entry_set_size(entry, tar->realsize);
1854		} else if (strcmp(key, "SUN.holesdata") == 0) {
1855			/* A Solaris extension for sparse. */
1856			r = solaris_sparse_parse(a, tar, entry, value);
1857			if (r < err) {
1858				if (r == ARCHIVE_FATAL)
1859					return (r);
1860				err = r;
1861				archive_set_error(&a->archive,
1862				    ARCHIVE_ERRNO_MISC,
1863				    "Parse error: SUN.holesdata");
1864			}
1865		}
1866		break;
1867	case 'a':
1868		if (strcmp(key, "atime") == 0) {
1869			pax_time(value, &s, &n);
1870			archive_entry_set_atime(entry, s, n);
1871		}
1872		break;
1873	case 'c':
1874		if (strcmp(key, "ctime") == 0) {
1875			pax_time(value, &s, &n);
1876			archive_entry_set_ctime(entry, s, n);
1877		} else if (strcmp(key, "charset") == 0) {
1878			/* TODO: Publish charset information in entry. */
1879		} else if (strcmp(key, "comment") == 0) {
1880			/* TODO: Publish comment in entry. */
1881		}
1882		break;
1883	case 'g':
1884		if (strcmp(key, "gid") == 0) {
1885			archive_entry_set_gid(entry,
1886			    tar_atol10(value, strlen(value)));
1887		} else if (strcmp(key, "gname") == 0) {
1888			archive_strcpy(&(tar->entry_gname), value);
1889		}
1890		break;
1891	case 'h':
1892		if (strcmp(key, "hdrcharset") == 0) {
1893			if (strcmp(value, "BINARY") == 0)
1894				/* Binary  mode. */
1895				tar->pax_hdrcharset_binary = 1;
1896			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1897				tar->pax_hdrcharset_binary = 0;
1898		}
1899		break;
1900	case 'l':
1901		/* pax interchange doesn't distinguish hardlink vs. symlink. */
1902		if (strcmp(key, "linkpath") == 0) {
1903			archive_strcpy(&(tar->entry_linkpath), value);
1904		}
1905		break;
1906	case 'm':
1907		if (strcmp(key, "mtime") == 0) {
1908			pax_time(value, &s, &n);
1909			archive_entry_set_mtime(entry, s, n);
1910		}
1911		break;
1912	case 'p':
1913		if (strcmp(key, "path") == 0) {
1914			archive_strcpy(&(tar->entry_pathname), value);
1915		}
1916		break;
1917	case 'r':
1918		/* POSIX has reserved 'realtime.*' */
1919		break;
1920	case 's':
1921		/* POSIX has reserved 'security.*' */
1922		/* Someday: if (strcmp(key, "security.acl") == 0) { ... } */
1923		if (strcmp(key, "size") == 0) {
1924			/* "size" is the size of the data in the entry. */
1925			tar->entry_bytes_remaining
1926			    = tar_atol10(value, strlen(value));
1927			/*
1928			 * But, "size" is not necessarily the size of
1929			 * the file on disk; if this is a sparse file,
1930			 * the disk size may have already been set from
1931			 * GNU.sparse.realsize or GNU.sparse.size or
1932			 * an old GNU header field or SCHILY.realsize
1933			 * or ....
1934			 */
1935			if (tar->realsize < 0) {
1936				archive_entry_set_size(entry,
1937				    tar->entry_bytes_remaining);
1938				tar->realsize
1939				    = tar->entry_bytes_remaining;
1940			}
1941		}
1942		break;
1943	case 'u':
1944		if (strcmp(key, "uid") == 0) {
1945			archive_entry_set_uid(entry,
1946			    tar_atol10(value, strlen(value)));
1947		} else if (strcmp(key, "uname") == 0) {
1948			archive_strcpy(&(tar->entry_uname), value);
1949		}
1950		break;
1951	}
1952	return (err);
1953}
1954
1955
1956
1957/*
1958 * parse a decimal time value, which may include a fractional portion
1959 */
1960static void
1961pax_time(const char *p, int64_t *ps, long *pn)
1962{
1963	char digit;
1964	int64_t	s;
1965	unsigned long l;
1966	int sign;
1967	int64_t limit, last_digit_limit;
1968
1969	limit = INT64_MAX / 10;
1970	last_digit_limit = INT64_MAX % 10;
1971
1972	s = 0;
1973	sign = 1;
1974	if (*p == '-') {
1975		sign = -1;
1976		p++;
1977	}
1978	while (*p >= '0' && *p <= '9') {
1979		digit = *p - '0';
1980		if (s > limit ||
1981		    (s == limit && digit > last_digit_limit)) {
1982			s = INT64_MAX;
1983			break;
1984		}
1985		s = (s * 10) + digit;
1986		++p;
1987	}
1988
1989	*ps = s * sign;
1990
1991	/* Calculate nanoseconds. */
1992	*pn = 0;
1993
1994	if (*p != '.')
1995		return;
1996
1997	l = 100000000UL;
1998	do {
1999		++p;
2000		if (*p >= '0' && *p <= '9')
2001			*pn += (*p - '0') * l;
2002		else
2003			break;
2004	} while (l /= 10);
2005}
2006
2007/*
2008 * Parse GNU tar header
2009 */
2010static int
2011header_gnutar(struct archive_read *a, struct tar *tar,
2012    struct archive_entry *entry, const void *h, size_t *unconsumed)
2013{
2014	const struct archive_entry_header_gnutar *header;
2015	int64_t t;
2016	int err = ARCHIVE_OK;
2017
2018	/*
2019	 * GNU header is like POSIX ustar, except 'prefix' is
2020	 * replaced with some other fields. This also means the
2021	 * filename is stored as in old-style archives.
2022	 */
2023
2024	/* Grab fields common to all tar variants. */
2025	err = header_common(a, tar, entry, h);
2026	if (err == ARCHIVE_FATAL)
2027		return (err);
2028
2029	/* Copy filename over (to ensure null termination). */
2030	header = (const struct archive_entry_header_gnutar *)h;
2031	if (archive_entry_copy_pathname_l(entry,
2032	    header->name, sizeof(header->name), tar->sconv) != 0) {
2033		err = set_conversion_failed_error(a, tar->sconv, "Pathname");
2034		if (err == ARCHIVE_FATAL)
2035			return (err);
2036	}
2037
2038	/* Fields common to ustar and GNU */
2039	/* XXX Can the following be factored out since it's common
2040	 * to ustar and gnu tar?  Is it okay to move it down into
2041	 * header_common, perhaps?  */
2042	if (archive_entry_copy_uname_l(entry,
2043	    header->uname, sizeof(header->uname), tar->sconv) != 0) {
2044		err = set_conversion_failed_error(a, tar->sconv, "Uname");
2045		if (err == ARCHIVE_FATAL)
2046			return (err);
2047	}
2048
2049	if (archive_entry_copy_gname_l(entry,
2050	    header->gname, sizeof(header->gname), tar->sconv) != 0) {
2051		err = set_conversion_failed_error(a, tar->sconv, "Gname");
2052		if (err == ARCHIVE_FATAL)
2053			return (err);
2054	}
2055
2056	/* Parse out device numbers only for char and block specials */
2057	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
2058		archive_entry_set_rdevmajor(entry, (dev_t)
2059		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
2060		archive_entry_set_rdevminor(entry, (dev_t)
2061		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
2062	} else
2063		archive_entry_set_rdev(entry, 0);
2064
2065	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
2066
2067	/* Grab GNU-specific fields. */
2068	t = tar_atol(header->atime, sizeof(header->atime));
2069	if (t > 0)
2070		archive_entry_set_atime(entry, t, 0);
2071	t = tar_atol(header->ctime, sizeof(header->ctime));
2072	if (t > 0)
2073		archive_entry_set_ctime(entry, t, 0);
2074
2075	if (header->realsize[0] != 0) {
2076		tar->realsize
2077		    = tar_atol(header->realsize, sizeof(header->realsize));
2078		archive_entry_set_size(entry, tar->realsize);
2079	}
2080
2081	if (header->sparse[0].offset[0] != 0) {
2082		if (gnu_sparse_old_read(a, tar, header, unconsumed)
2083		    != ARCHIVE_OK)
2084			return (ARCHIVE_FATAL);
2085	} else {
2086		if (header->isextended[0] != 0) {
2087			/* XXX WTF? XXX */
2088		}
2089	}
2090
2091	return (err);
2092}
2093
2094static int
2095gnu_add_sparse_entry(struct archive_read *a, struct tar *tar,
2096    int64_t offset, int64_t remaining)
2097{
2098	struct sparse_block *p;
2099
2100	p = (struct sparse_block *)malloc(sizeof(*p));
2101	if (p == NULL) {
2102		archive_set_error(&a->archive, ENOMEM, "Out of memory");
2103		return (ARCHIVE_FATAL);
2104	}
2105	memset(p, 0, sizeof(*p));
2106	if (tar->sparse_last != NULL)
2107		tar->sparse_last->next = p;
2108	else
2109		tar->sparse_list = p;
2110	tar->sparse_last = p;
2111	p->offset = offset;
2112	p->remaining = remaining;
2113	return (ARCHIVE_OK);
2114}
2115
2116static void
2117gnu_clear_sparse_list(struct tar *tar)
2118{
2119	struct sparse_block *p;
2120
2121	while (tar->sparse_list != NULL) {
2122		p = tar->sparse_list;
2123		tar->sparse_list = p->next;
2124		free(p);
2125	}
2126	tar->sparse_last = NULL;
2127}
2128
2129/*
2130 * GNU tar old-format sparse data.
2131 *
2132 * GNU old-format sparse data is stored in a fixed-field
2133 * format.  Offset/size values are 11-byte octal fields (same
2134 * format as 'size' field in ustart header).  These are
2135 * stored in the header, allocating subsequent header blocks
2136 * as needed.  Extending the header in this way is a pretty
2137 * severe POSIX violation; this design has earned GNU tar a
2138 * lot of criticism.
2139 */
2140
2141static int
2142gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
2143    const struct archive_entry_header_gnutar *header, size_t *unconsumed)
2144{
2145	ssize_t bytes_read;
2146	const void *data;
2147	struct extended {
2148		struct gnu_sparse sparse[21];
2149		char	isextended[1];
2150		char	padding[7];
2151	};
2152	const struct extended *ext;
2153
2154	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
2155		return (ARCHIVE_FATAL);
2156	if (header->isextended[0] == 0)
2157		return (ARCHIVE_OK);
2158
2159	do {
2160		tar_flush_unconsumed(a, unconsumed);
2161		data = __archive_read_ahead(a, 512, &bytes_read);
2162		if (bytes_read < 0)
2163			return (ARCHIVE_FATAL);
2164		if (bytes_read < 512) {
2165			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
2166			    "Truncated tar archive "
2167			    "detected while reading sparse file data");
2168			return (ARCHIVE_FATAL);
2169		}
2170		*unconsumed = 512;
2171		ext = (const struct extended *)data;
2172		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
2173			return (ARCHIVE_FATAL);
2174	} while (ext->isextended[0] != 0);
2175	if (tar->sparse_list != NULL)
2176		tar->entry_offset = tar->sparse_list->offset;
2177	return (ARCHIVE_OK);
2178}
2179
2180static int
2181gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
2182    const struct gnu_sparse *sparse, int length)
2183{
2184	while (length > 0 && sparse->offset[0] != 0) {
2185		if (gnu_add_sparse_entry(a, tar,
2186		    tar_atol(sparse->offset, sizeof(sparse->offset)),
2187		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
2188		    != ARCHIVE_OK)
2189			return (ARCHIVE_FATAL);
2190		sparse++;
2191		length--;
2192	}
2193	return (ARCHIVE_OK);
2194}
2195
2196/*
2197 * GNU tar sparse format 0.0
2198 *
2199 * Beginning with GNU tar 1.15, sparse files are stored using
2200 * information in the pax extended header.  The GNU tar maintainers
2201 * have gone through a number of variations in the process of working
2202 * out this scheme; fortunately, they're all numbered.
2203 *
2204 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
2205 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
2206 * store offset/size for each block.  The repeated instances of these
2207 * latter fields violate the pax specification (which frowns on
2208 * duplicate keys), so this format was quickly replaced.
2209 */
2210
2211/*
2212 * GNU tar sparse format 0.1
2213 *
2214 * This version replaced the offset/numbytes attributes with
2215 * a single "map" attribute that stored a list of integers.  This
2216 * format had two problems: First, the "map" attribute could be very
2217 * long, which caused problems for some implementations.  More
2218 * importantly, the sparse data was lost when extracted by archivers
2219 * that didn't recognize this extension.
2220 */
2221
2222static int
2223gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
2224{
2225	const char *e;
2226	int64_t offset = -1, size = -1;
2227
2228	for (;;) {
2229		e = p;
2230		while (*e != '\0' && *e != ',') {
2231			if (*e < '0' || *e > '9')
2232				return (ARCHIVE_WARN);
2233			e++;
2234		}
2235		if (offset < 0) {
2236			offset = tar_atol10(p, e - p);
2237			if (offset < 0)
2238				return (ARCHIVE_WARN);
2239		} else {
2240			size = tar_atol10(p, e - p);
2241			if (size < 0)
2242				return (ARCHIVE_WARN);
2243			if (gnu_add_sparse_entry(a, tar, offset, size)
2244			    != ARCHIVE_OK)
2245				return (ARCHIVE_FATAL);
2246			offset = -1;
2247		}
2248		if (*e == '\0')
2249			return (ARCHIVE_OK);
2250		p = e + 1;
2251	}
2252}
2253
2254/*
2255 * GNU tar sparse format 1.0
2256 *
2257 * The idea: The offset/size data is stored as a series of base-10
2258 * ASCII numbers prepended to the file data, so that dearchivers that
2259 * don't support this format will extract the block map along with the
2260 * data and a separate post-process can restore the sparseness.
2261 *
2262 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
2263 * padding to the body of the file when using this format.  GNU tar
2264 * 1.17 corrected this bug without bumping the version number, so
2265 * it's not possible to support both variants.  This code supports
2266 * the later variant at the expense of not supporting the former.
2267 *
2268 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
2269 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
2270 */
2271
2272/*
2273 * Read the next line from the input, and parse it as a decimal
2274 * integer followed by '\n'.  Returns positive integer value or
2275 * negative on error.
2276 */
2277static int64_t
2278gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
2279    int64_t *remaining, size_t *unconsumed)
2280{
2281	int64_t l, limit, last_digit_limit;
2282	const char *p;
2283	ssize_t bytes_read;
2284	int base, digit;
2285
2286	base = 10;
2287	limit = INT64_MAX / base;
2288	last_digit_limit = INT64_MAX % base;
2289
2290	/*
2291	 * Skip any lines starting with '#'; GNU tar specs
2292	 * don't require this, but they should.
2293	 */
2294	do {
2295		bytes_read = readline(a, tar, &p,
2296			(ssize_t)tar_min(*remaining, 100), unconsumed);
2297		if (bytes_read <= 0)
2298			return (ARCHIVE_FATAL);
2299		*remaining -= bytes_read;
2300	} while (p[0] == '#');
2301
2302	l = 0;
2303	while (bytes_read > 0) {
2304		if (*p == '\n')
2305			return (l);
2306		if (*p < '0' || *p >= '0' + base)
2307			return (ARCHIVE_WARN);
2308		digit = *p - '0';
2309		if (l > limit || (l == limit && digit > last_digit_limit))
2310			l = INT64_MAX; /* Truncate on overflow. */
2311		else
2312			l = (l * base) + digit;
2313		p++;
2314		bytes_read--;
2315	}
2316	/* TODO: Error message. */
2317	return (ARCHIVE_WARN);
2318}
2319
2320/*
2321 * Returns length (in bytes) of the sparse data description
2322 * that was read.
2323 */
2324static ssize_t
2325gnu_sparse_10_read(struct archive_read *a, struct tar *tar, size_t *unconsumed)
2326{
2327	ssize_t bytes_read;
2328	int entries;
2329	int64_t offset, size, to_skip, remaining;
2330
2331	/* Clear out the existing sparse list. */
2332	gnu_clear_sparse_list(tar);
2333
2334	remaining = tar->entry_bytes_remaining;
2335
2336	/* Parse entries. */
2337	entries = (int)gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2338	if (entries < 0)
2339		return (ARCHIVE_FATAL);
2340	/* Parse the individual entries. */
2341	while (entries-- > 0) {
2342		/* Parse offset/size */
2343		offset = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2344		if (offset < 0)
2345			return (ARCHIVE_FATAL);
2346		size = gnu_sparse_10_atol(a, tar, &remaining, unconsumed);
2347		if (size < 0)
2348			return (ARCHIVE_FATAL);
2349		/* Add a new sparse entry. */
2350		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
2351			return (ARCHIVE_FATAL);
2352	}
2353	/* Skip rest of block... */
2354	tar_flush_unconsumed(a, unconsumed);
2355	bytes_read = (ssize_t)(tar->entry_bytes_remaining - remaining);
2356	to_skip = 0x1ff & -bytes_read;
2357	if (to_skip != __archive_read_consume(a, to_skip))
2358		return (ARCHIVE_FATAL);
2359	return ((ssize_t)(bytes_read + to_skip));
2360}
2361
2362/*
2363 * Solaris pax extension for a sparse file. This is recorded with the
2364 * data and hole pairs. The way recording sparse information by Solaris'
2365 * pax simply indicates where data and sparse are, so the stored contents
2366 * consist of both data and hole.
2367 */
2368static int
2369solaris_sparse_parse(struct archive_read *a, struct tar *tar,
2370    struct archive_entry *entry, const char *p)
2371{
2372	const char *e;
2373	int64_t start, end;
2374	int hole = 1;
2375
2376	(void)entry; /* UNUSED */
2377
2378	end = 0;
2379	if (*p == ' ')
2380		p++;
2381	else
2382		return (ARCHIVE_WARN);
2383	for (;;) {
2384		e = p;
2385		while (*e != '\0' && *e != ' ') {
2386			if (*e < '0' || *e > '9')
2387				return (ARCHIVE_WARN);
2388			e++;
2389		}
2390		start = end;
2391		end = tar_atol10(p, e - p);
2392		if (end < 0)
2393			return (ARCHIVE_WARN);
2394		if (start < end) {
2395			if (gnu_add_sparse_entry(a, tar, start,
2396			    end - start) != ARCHIVE_OK)
2397				return (ARCHIVE_FATAL);
2398			tar->sparse_last->hole = hole;
2399		}
2400		if (*e == '\0')
2401			return (ARCHIVE_OK);
2402		p = e + 1;
2403		hole = hole == 0;
2404	}
2405}
2406
2407/*-
2408 * Convert text->integer.
2409 *
2410 * Traditional tar formats (including POSIX) specify base-8 for
2411 * all of the standard numeric fields.  This is a significant limitation
2412 * in practice:
2413 *   = file size is limited to 8GB
2414 *   = rdevmajor and rdevminor are limited to 21 bits
2415 *   = uid/gid are limited to 21 bits
2416 *
2417 * There are two workarounds for this:
2418 *   = pax extended headers, which use variable-length string fields
2419 *   = GNU tar and STAR both allow either base-8 or base-256 in
2420 *      most fields.  The high bit is set to indicate base-256.
2421 *
2422 * On read, this implementation supports both extensions.
2423 */
2424static int64_t
2425tar_atol(const char *p, size_t char_cnt)
2426{
2427	/*
2428	 * Technically, GNU tar considers a field to be in base-256
2429	 * only if the first byte is 0xff or 0x80.
2430	 */
2431	if (*p & 0x80)
2432		return (tar_atol256(p, char_cnt));
2433	return (tar_atol8(p, char_cnt));
2434}
2435
2436/*
2437 * Note that this implementation does not (and should not!) obey
2438 * locale settings; you cannot simply substitute strtol here, since
2439 * it does obey locale.
2440 */
2441static int64_t
2442tar_atol_base_n(const char *p, size_t char_cnt, int base)
2443{
2444	int64_t	l, limit, last_digit_limit;
2445	int digit, sign;
2446
2447	limit = INT64_MAX / base;
2448	last_digit_limit = INT64_MAX % base;
2449
2450	/* the pointer will not be dereferenced if char_cnt is zero
2451	 * due to the way the && operator is evaulated.
2452	 */
2453	while (char_cnt != 0 && (*p == ' ' || *p == '\t')) {
2454		p++;
2455		char_cnt--;
2456	}
2457
2458	sign = 1;
2459	if (char_cnt != 0 && *p == '-') {
2460		sign = -1;
2461		p++;
2462		char_cnt--;
2463	}
2464
2465	l = 0;
2466	if (char_cnt != 0) {
2467		digit = *p - '0';
2468		while (digit >= 0 && digit < base  && char_cnt != 0) {
2469			if (l>limit || (l == limit && digit > last_digit_limit)) {
2470				l = INT64_MAX; /* Truncate on overflow. */
2471				break;
2472			}
2473			l = (l * base) + digit;
2474			digit = *++p - '0';
2475			char_cnt--;
2476		}
2477	}
2478	return (sign < 0) ? -l : l;
2479}
2480
2481static int64_t
2482tar_atol8(const char *p, size_t char_cnt)
2483{
2484	return tar_atol_base_n(p, char_cnt, 8);
2485}
2486
2487static int64_t
2488tar_atol10(const char *p, size_t char_cnt)
2489{
2490	return tar_atol_base_n(p, char_cnt, 10);
2491}
2492
2493/*
2494 * Parse a base-256 integer.  This is just a straight signed binary
2495 * value in big-endian order, except that the high-order bit is
2496 * ignored.
2497 */
2498static int64_t
2499tar_atol256(const char *_p, size_t char_cnt)
2500{
2501	int64_t	l, upper_limit, lower_limit;
2502	const unsigned char *p = (const unsigned char *)_p;
2503
2504	upper_limit = INT64_MAX / 256;
2505	lower_limit = INT64_MIN / 256;
2506
2507	/* Pad with 1 or 0 bits, depending on sign. */
2508	if ((0x40 & *p) == 0x40)
2509		l = (int64_t)-1;
2510	else
2511		l = 0;
2512	l = (l << 6) | (0x3f & *p++);
2513	while (--char_cnt > 0) {
2514		if (l > upper_limit) {
2515			l = INT64_MAX; /* Truncate on overflow */
2516			break;
2517		} else if (l < lower_limit) {
2518			l = INT64_MIN;
2519			break;
2520		}
2521		l = (l << 8) | (0xff & (int64_t)*p++);
2522	}
2523	return (l);
2524}
2525
2526/*
2527 * Returns length of line (including trailing newline)
2528 * or negative on error.  'start' argument is updated to
2529 * point to first character of line.  This avoids copying
2530 * when possible.
2531 */
2532static ssize_t
2533readline(struct archive_read *a, struct tar *tar, const char **start,
2534    ssize_t limit, size_t *unconsumed)
2535{
2536	ssize_t bytes_read;
2537	ssize_t total_size = 0;
2538	const void *t;
2539	const char *s;
2540	void *p;
2541
2542	tar_flush_unconsumed(a, unconsumed);
2543
2544	t = __archive_read_ahead(a, 1, &bytes_read);
2545	if (bytes_read <= 0)
2546		return (ARCHIVE_FATAL);
2547	s = t;  /* Start of line? */
2548	p = memchr(t, '\n', bytes_read);
2549	/* If we found '\n' in the read buffer, return pointer to that. */
2550	if (p != NULL) {
2551		bytes_read = 1 + ((const char *)p) - s;
2552		if (bytes_read > limit) {
2553			archive_set_error(&a->archive,
2554			    ARCHIVE_ERRNO_FILE_FORMAT,
2555			    "Line too long");
2556			return (ARCHIVE_FATAL);
2557		}
2558		*unconsumed = bytes_read;
2559		*start = s;
2560		return (bytes_read);
2561	}
2562	*unconsumed = bytes_read;
2563	/* Otherwise, we need to accumulate in a line buffer. */
2564	for (;;) {
2565		if (total_size + bytes_read > limit) {
2566			archive_set_error(&a->archive,
2567			    ARCHIVE_ERRNO_FILE_FORMAT,
2568			    "Line too long");
2569			return (ARCHIVE_FATAL);
2570		}
2571		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2572			archive_set_error(&a->archive, ENOMEM,
2573			    "Can't allocate working buffer");
2574			return (ARCHIVE_FATAL);
2575		}
2576		memcpy(tar->line.s + total_size, t, bytes_read);
2577		tar_flush_unconsumed(a, unconsumed);
2578		total_size += bytes_read;
2579		/* If we found '\n', clean up and return. */
2580		if (p != NULL) {
2581			*start = tar->line.s;
2582			return (total_size);
2583		}
2584		/* Read some more. */
2585		t = __archive_read_ahead(a, 1, &bytes_read);
2586		if (bytes_read <= 0)
2587			return (ARCHIVE_FATAL);
2588		s = t;  /* Start of line? */
2589		p = memchr(t, '\n', bytes_read);
2590		/* If we found '\n', trim the read. */
2591		if (p != NULL) {
2592			bytes_read = 1 + ((const char *)p) - s;
2593		}
2594		*unconsumed = bytes_read;
2595	}
2596}
2597
2598/*
2599 * base64_decode - Base64 decode
2600 *
2601 * This accepts most variations of base-64 encoding, including:
2602 *    * with or without line breaks
2603 *    * with or without the final group padded with '=' or '_' characters
2604 * (The most economical Base-64 variant does not pad the last group and
2605 * omits line breaks; RFC1341 used for MIME requires both.)
2606 */
2607static char *
2608base64_decode(const char *s, size_t len, size_t *out_len)
2609{
2610	static const unsigned char digits[64] = {
2611		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2612		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2613		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2614		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2615		'4','5','6','7','8','9','+','/' };
2616	static unsigned char decode_table[128];
2617	char *out, *d;
2618	const unsigned char *src = (const unsigned char *)s;
2619
2620	/* If the decode table is not yet initialized, prepare it. */
2621	if (decode_table[digits[1]] != 1) {
2622		unsigned i;
2623		memset(decode_table, 0xff, sizeof(decode_table));
2624		for (i = 0; i < sizeof(digits); i++)
2625			decode_table[digits[i]] = i;
2626	}
2627
2628	/* Allocate enough space to hold the entire output. */
2629	/* Note that we may not use all of this... */
2630	out = (char *)malloc(len - len / 4 + 1);
2631	if (out == NULL) {
2632		*out_len = 0;
2633		return (NULL);
2634	}
2635	d = out;
2636
2637	while (len > 0) {
2638		/* Collect the next group of (up to) four characters. */
2639		int v = 0;
2640		int group_size = 0;
2641		while (group_size < 4 && len > 0) {
2642			/* '=' or '_' padding indicates final group. */
2643			if (*src == '=' || *src == '_') {
2644				len = 0;
2645				break;
2646			}
2647			/* Skip illegal characters (including line breaks) */
2648			if (*src > 127 || *src < 32
2649			    || decode_table[*src] == 0xff) {
2650				len--;
2651				src++;
2652				continue;
2653			}
2654			v <<= 6;
2655			v |= decode_table[*src++];
2656			len --;
2657			group_size++;
2658		}
2659		/* Align a short group properly. */
2660		v <<= 6 * (4 - group_size);
2661		/* Unpack the group we just collected. */
2662		switch (group_size) {
2663		case 4: d[2] = v & 0xff;
2664			/* FALLTHROUGH */
2665		case 3: d[1] = (v >> 8) & 0xff;
2666			/* FALLTHROUGH */
2667		case 2: d[0] = (v >> 16) & 0xff;
2668			break;
2669		case 1: /* this is invalid! */
2670			break;
2671		}
2672		d += group_size * 3 / 4;
2673	}
2674
2675	*out_len = d - out;
2676	return (out);
2677}
2678
2679static char *
2680url_decode(const char *in)
2681{
2682	char *out, *d;
2683	const char *s;
2684
2685	out = (char *)malloc(strlen(in) + 1);
2686	if (out == NULL)
2687		return (NULL);
2688	for (s = in, d = out; *s != '\0'; ) {
2689		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2690			/* Try to convert % escape */
2691			int digit1 = tohex(s[1]);
2692			int digit2 = tohex(s[2]);
2693			if (digit1 >= 0 && digit2 >= 0) {
2694				/* Looks good, consume three chars */
2695				s += 3;
2696				/* Convert output */
2697				*d++ = ((digit1 << 4) | digit2);
2698				continue;
2699			}
2700			/* Else fall through and treat '%' as normal char */
2701		}
2702		*d++ = *s++;
2703	}
2704	*d = '\0';
2705	return (out);
2706}
2707
2708static int
2709tohex(int c)
2710{
2711	if (c >= '0' && c <= '9')
2712		return (c - '0');
2713	else if (c >= 'A' && c <= 'F')
2714		return (c - 'A' + 10);
2715	else if (c >= 'a' && c <= 'f')
2716		return (c - 'a' + 10);
2717	else
2718		return (-1);
2719}
2720