1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $");
28
29#ifdef HAVE_ERRNO_H
30#include <errno.h>
31#endif
32#include <stddef.h>
33/* #include <stdint.h> */ /* See archive_platform.h */
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41/* Obtain suitable wide-character manipulation functions. */
42#ifdef HAVE_WCHAR_H
43#include <wchar.h>
44#else
45/* Good enough for equality testing, which is all we need. */
46static int wcscmp(const wchar_t *s1, const wchar_t *s2)
47{
48	int diff = *s1 - *s2;
49	while (*s1 && diff == 0)
50		diff = (int)*++s1 - (int)*++s2;
51	return diff;
52}
53/* Good enough for equality testing, which is all we need. */
54static int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n)
55{
56	int diff = *s1 - *s2;
57	while (*s1 && diff == 0 && n-- > 0)
58		diff = (int)*++s1 - (int)*++s2;
59	return diff;
60}
61static size_t wcslen(const wchar_t *s)
62{
63	const wchar_t *p = s;
64	while (*p)
65		p++;
66	return p - s;
67}
68#endif
69
70#include "archive.h"
71#include "archive_entry.h"
72#include "archive_private.h"
73#include "archive_read_private.h"
74
75#define tar_min(a,b) ((a) < (b) ? (a) : (b))
76
77/*
78 * Layout of POSIX 'ustar' tar header.
79 */
80struct archive_entry_header_ustar {
81	char	name[100];
82	char	mode[8];
83	char	uid[8];
84	char	gid[8];
85	char	size[12];
86	char	mtime[12];
87	char	checksum[8];
88	char	typeflag[1];
89	char	linkname[100];	/* "old format" header ends here */
90	char	magic[6];	/* For POSIX: "ustar\0" */
91	char	version[2];	/* For POSIX: "00" */
92	char	uname[32];
93	char	gname[32];
94	char	rdevmajor[8];
95	char	rdevminor[8];
96	char	prefix[155];
97};
98
99/*
100 * Structure of GNU tar header
101 */
102struct gnu_sparse {
103	char	offset[12];
104	char	numbytes[12];
105};
106
107struct archive_entry_header_gnutar {
108	char	name[100];
109	char	mode[8];
110	char	uid[8];
111	char	gid[8];
112	char	size[12];
113	char	mtime[12];
114	char	checksum[8];
115	char	typeflag[1];
116	char	linkname[100];
117	char	magic[8];  /* "ustar  \0" (note blank/blank/null at end) */
118	char	uname[32];
119	char	gname[32];
120	char	rdevmajor[8];
121	char	rdevminor[8];
122	char	atime[12];
123	char	ctime[12];
124	char	offset[12];
125	char	longnames[4];
126	char	unused[1];
127	struct gnu_sparse sparse[4];
128	char	isextended[1];
129	char	realsize[12];
130	/*
131	 * Old GNU format doesn't use POSIX 'prefix' field; they use
132	 * the 'L' (longname) entry instead.
133	 */
134};
135
136/*
137 * Data specific to this format.
138 */
139struct sparse_block {
140	struct sparse_block	*next;
141	off_t	offset;
142	off_t	remaining;
143};
144
145struct tar {
146	struct archive_string	 acl_text;
147	struct archive_string	 entry_pathname;
148	/* For "GNU.sparse.name" and other similar path extensions. */
149	struct archive_string	 entry_pathname_override;
150	struct archive_string	 entry_linkpath;
151	struct archive_string	 entry_uname;
152	struct archive_string	 entry_gname;
153	struct archive_string	 longlink;
154	struct archive_string	 longname;
155	struct archive_string	 pax_header;
156	struct archive_string	 pax_global;
157	struct archive_string	 line;
158	int			 pax_hdrcharset_binary;
159	wchar_t 		*pax_entry;
160	size_t			 pax_entry_length;
161	int			 header_recursion_depth;
162	int64_t			 entry_bytes_remaining;
163	int64_t			 entry_offset;
164	int64_t			 entry_padding;
165	int64_t			 realsize;
166	struct sparse_block	*sparse_list;
167	struct sparse_block	*sparse_last;
168	int64_t			 sparse_offset;
169	int64_t			 sparse_numbytes;
170	int			 sparse_gnu_major;
171	int			 sparse_gnu_minor;
172	char			 sparse_gnu_pending;
173};
174
175static ssize_t	UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n);
176static int	archive_block_is_null(const unsigned char *p);
177static char	*base64_decode(const char *, size_t, size_t *);
178static int	gnu_add_sparse_entry(struct archive_read *, struct tar *,
179		    off_t offset, off_t remaining);
180static void	gnu_clear_sparse_list(struct tar *);
181static int	gnu_sparse_old_read(struct archive_read *, struct tar *,
182		    const struct archive_entry_header_gnutar *header);
183static int	gnu_sparse_old_parse(struct archive_read *a, struct tar *,
184		    const struct gnu_sparse *sparse, int length);
185static int	gnu_sparse_01_parse(struct archive_read *, struct tar *,
186		    const char *);
187static ssize_t	gnu_sparse_10_read(struct archive_read *, struct tar *);
188static int	header_Solaris_ACL(struct archive_read *,  struct tar *,
189		    struct archive_entry *, const void *);
190static int	header_common(struct archive_read *,  struct tar *,
191		    struct archive_entry *, const void *);
192static int	header_old_tar(struct archive_read *, struct tar *,
193		    struct archive_entry *, const void *);
194static int	header_pax_extensions(struct archive_read *, struct tar *,
195		    struct archive_entry *, const void *);
196static int	header_pax_global(struct archive_read *, struct tar *,
197		    struct archive_entry *, const void *h);
198static int	header_longlink(struct archive_read *, struct tar *,
199		    struct archive_entry *, const void *h);
200static int	header_longname(struct archive_read *, struct tar *,
201		    struct archive_entry *, const void *h);
202static int	header_volume(struct archive_read *, struct tar *,
203		    struct archive_entry *, const void *h);
204static int	header_ustar(struct archive_read *, struct tar *,
205		    struct archive_entry *, const void *h);
206static int	header_gnutar(struct archive_read *, struct tar *,
207		    struct archive_entry *, const void *h);
208static int	archive_read_format_tar_bid(struct archive_read *);
209static int	archive_read_format_tar_cleanup(struct archive_read *);
210static int	archive_read_format_tar_read_data(struct archive_read *a,
211		    const void **buff, size_t *size, off_t *offset);
212static int	archive_read_format_tar_skip(struct archive_read *a);
213static int	archive_read_format_tar_read_header(struct archive_read *,
214		    struct archive_entry *);
215static int	checksum(struct archive_read *, const void *);
216static int 	pax_attribute(struct archive_read *, struct tar *,
217		    struct archive_entry *, char *key, char *value);
218static int 	pax_header(struct archive_read *, struct tar *,
219		    struct archive_entry *, char *attr);
220static void	pax_time(const char *, int64_t *sec, long *nanos);
221static ssize_t	readline(struct archive_read *, struct tar *, const char **,
222		    ssize_t limit);
223static int	read_body_to_string(struct archive_read *, struct tar *,
224		    struct archive_string *, const void *h);
225static int64_t	tar_atol(const char *, unsigned);
226static int64_t	tar_atol10(const char *, unsigned);
227static int64_t	tar_atol256(const char *, unsigned);
228static int64_t	tar_atol8(const char *, unsigned);
229static int	tar_read_header(struct archive_read *, struct tar *,
230		    struct archive_entry *);
231static int	tohex(int c);
232static char	*url_decode(const char *);
233static wchar_t	*utf8_decode(struct tar *, const char *, size_t length);
234
235int
236archive_read_support_format_gnutar(struct archive *a)
237{
238	return (archive_read_support_format_tar(a));
239}
240
241
242int
243archive_read_support_format_tar(struct archive *_a)
244{
245	struct archive_read *a = (struct archive_read *)_a;
246	struct tar *tar;
247	int r;
248
249	tar = (struct tar *)malloc(sizeof(*tar));
250	if (tar == NULL) {
251		archive_set_error(&a->archive, ENOMEM,
252		    "Can't allocate tar data");
253		return (ARCHIVE_FATAL);
254	}
255	memset(tar, 0, sizeof(*tar));
256
257	r = __archive_read_register_format(a, tar, "tar",
258	    archive_read_format_tar_bid,
259	    NULL,
260	    archive_read_format_tar_read_header,
261	    archive_read_format_tar_read_data,
262	    archive_read_format_tar_skip,
263	    archive_read_format_tar_cleanup);
264
265	if (r != ARCHIVE_OK)
266		free(tar);
267	return (ARCHIVE_OK);
268}
269
270static int
271archive_read_format_tar_cleanup(struct archive_read *a)
272{
273	struct tar *tar;
274
275	tar = (struct tar *)(a->format->data);
276	gnu_clear_sparse_list(tar);
277	archive_string_free(&tar->acl_text);
278	archive_string_free(&tar->entry_pathname);
279	archive_string_free(&tar->entry_pathname_override);
280	archive_string_free(&tar->entry_linkpath);
281	archive_string_free(&tar->entry_uname);
282	archive_string_free(&tar->entry_gname);
283	archive_string_free(&tar->line);
284	archive_string_free(&tar->pax_global);
285	archive_string_free(&tar->pax_header);
286	archive_string_free(&tar->longname);
287	archive_string_free(&tar->longlink);
288	free(tar->pax_entry);
289	free(tar);
290	(a->format->data) = NULL;
291	return (ARCHIVE_OK);
292}
293
294
295static int
296archive_read_format_tar_bid(struct archive_read *a)
297{
298	int bid;
299	const void *h;
300	const struct archive_entry_header_ustar *header;
301
302	bid = 0;
303
304	/* Now let's look at the actual header and see if it matches. */
305	h = __archive_read_ahead(a, 512, NULL);
306	if (h == NULL)
307		return (-1);
308
309	/* If it's an end-of-archive mark, we can handle it. */
310	if ((*(const char *)h) == 0
311	    && archive_block_is_null((const unsigned char *)h)) {
312		/*
313		 * Usually, I bid the number of bits verified, but
314		 * in this case, 4096 seems excessive so I picked 10 as
315		 * an arbitrary but reasonable-seeming value.
316		 */
317		return (10);
318	}
319
320	/* If it's not an end-of-archive mark, it must have a valid checksum.*/
321	if (!checksum(a, h))
322		return (0);
323	bid += 48;  /* Checksum is usually 6 octal digits. */
324
325	header = (const struct archive_entry_header_ustar *)h;
326
327	/* Recognize POSIX formats. */
328	if ((memcmp(header->magic, "ustar\0", 6) == 0)
329	    &&(memcmp(header->version, "00", 2)==0))
330		bid += 56;
331
332	/* Recognize GNU tar format. */
333	if ((memcmp(header->magic, "ustar ", 6) == 0)
334	    &&(memcmp(header->version, " \0", 2)==0))
335		bid += 56;
336
337	/* Type flag must be null, digit or A-Z, a-z. */
338	if (header->typeflag[0] != 0 &&
339	    !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
340	    !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
341	    !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
342		return (0);
343	bid += 2;  /* 6 bits of variation in an 8-bit field leaves 2 bits. */
344
345	/* Sanity check: Look at first byte of mode field. */
346	switch (255 & (unsigned)header->mode[0]) {
347	case 0: case 255:
348		/* Base-256 value: No further verification possible! */
349		break;
350	case ' ': /* Not recommended, but not illegal, either. */
351		break;
352	case '0': case '1': case '2': case '3':
353	case '4': case '5': case '6': case '7':
354		/* Octal Value. */
355		/* TODO: Check format of remainder of this field. */
356		break;
357	default:
358		/* Not a valid mode; bail out here. */
359		return (0);
360	}
361	/* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
362
363	return (bid);
364}
365
366/*
367 * The function invoked by archive_read_header().  This
368 * just sets up a few things and then calls the internal
369 * tar_read_header() function below.
370 */
371static int
372archive_read_format_tar_read_header(struct archive_read *a,
373    struct archive_entry *entry)
374{
375	/*
376	 * When converting tar archives to cpio archives, it is
377	 * essential that each distinct file have a distinct inode
378	 * number.  To simplify this, we keep a static count here to
379	 * assign fake dev/inode numbers to each tar entry.  Note that
380	 * pax format archives may overwrite this with something more
381	 * useful.
382	 *
383	 * Ideally, we would track every file read from the archive so
384	 * that we could assign the same dev/ino pair to hardlinks,
385	 * but the memory required to store a complete lookup table is
386	 * probably not worthwhile just to support the relatively
387	 * obscure tar->cpio conversion case.
388	 */
389	static int default_inode;
390	static int default_dev;
391	struct tar *tar;
392	struct sparse_block *sp;
393	const char *p;
394	int r;
395	size_t l;
396
397	/* Assign default device/inode values. */
398	archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
399	archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
400	/* Limit generated st_ino number to 16 bits. */
401	if (default_inode >= 0xffff) {
402		++default_dev;
403		default_inode = 0;
404	}
405
406	tar = (struct tar *)(a->format->data);
407	tar->entry_offset = 0;
408	while (tar->sparse_list != NULL) {
409		sp = tar->sparse_list;
410		tar->sparse_list = sp->next;
411		free(sp);
412	}
413	tar->sparse_last = NULL;
414	tar->realsize = -1; /* Mark this as "unset" */
415
416	r = tar_read_header(a, tar, entry);
417
418	/*
419	 * "non-sparse" files are really just sparse files with
420	 * a single block.
421	 */
422	if (tar->sparse_list == NULL)
423		if (gnu_add_sparse_entry(a, tar, 0, tar->entry_bytes_remaining)
424		    != ARCHIVE_OK)
425			return (ARCHIVE_FATAL);
426
427	if (r == ARCHIVE_OK) {
428		/*
429		 * "Regular" entry with trailing '/' is really
430		 * directory: This is needed for certain old tar
431		 * variants and even for some broken newer ones.
432		 */
433		p = archive_entry_pathname(entry);
434		l = strlen(p);
435		if (archive_entry_filetype(entry) == AE_IFREG
436		    && p[l-1] == '/')
437			archive_entry_set_filetype(entry, AE_IFDIR);
438	}
439	return (r);
440}
441
442static int
443archive_read_format_tar_read_data(struct archive_read *a,
444    const void **buff, size_t *size, off_t *offset)
445{
446	ssize_t bytes_read;
447	struct tar *tar;
448	struct sparse_block *p;
449
450	tar = (struct tar *)(a->format->data);
451
452	if (tar->sparse_gnu_pending) {
453		if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) {
454			tar->sparse_gnu_pending = 0;
455			/* Read initial sparse map. */
456			bytes_read = gnu_sparse_10_read(a, tar);
457			tar->entry_bytes_remaining -= bytes_read;
458			if (bytes_read < 0)
459				return (bytes_read);
460		} else {
461			*size = 0;
462			*offset = 0;
463			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
464			    "Unrecognized GNU sparse file format");
465			return (ARCHIVE_WARN);
466		}
467		tar->sparse_gnu_pending = 0;
468	}
469
470	/* Remove exhausted entries from sparse list. */
471	while (tar->sparse_list != NULL &&
472	    tar->sparse_list->remaining == 0) {
473		p = tar->sparse_list;
474		tar->sparse_list = p->next;
475		free(p);
476	}
477
478	/* If we're at end of file, return EOF. */
479	if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) {
480		if (__archive_read_skip(a, tar->entry_padding) < 0)
481			return (ARCHIVE_FATAL);
482		tar->entry_padding = 0;
483		*buff = NULL;
484		*size = 0;
485		*offset = tar->realsize;
486		return (ARCHIVE_EOF);
487	}
488
489	*buff = __archive_read_ahead(a, 1, &bytes_read);
490	if (bytes_read < 0)
491		return (ARCHIVE_FATAL);
492	if (*buff == NULL) {
493		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
494		    "Truncated tar archive");
495		return (ARCHIVE_FATAL);
496	}
497	if (bytes_read > tar->entry_bytes_remaining)
498		bytes_read = tar->entry_bytes_remaining;
499	/* Don't read more than is available in the
500	 * current sparse block. */
501	if (tar->sparse_list->remaining < bytes_read)
502		bytes_read = tar->sparse_list->remaining;
503	*size = bytes_read;
504	*offset = tar->sparse_list->offset;
505	tar->sparse_list->remaining -= bytes_read;
506	tar->sparse_list->offset += bytes_read;
507	tar->entry_bytes_remaining -= bytes_read;
508	__archive_read_consume(a, bytes_read);
509	return (ARCHIVE_OK);
510}
511
512static int
513archive_read_format_tar_skip(struct archive_read *a)
514{
515	int64_t bytes_skipped;
516	struct tar* tar;
517
518	tar = (struct tar *)(a->format->data);
519
520	/*
521	 * Compression layer skip functions are required to either skip the
522	 * length requested or fail, so we can rely upon the entire entry
523	 * plus padding being skipped.
524	 */
525	bytes_skipped = __archive_read_skip(a,
526	    tar->entry_bytes_remaining + tar->entry_padding);
527	if (bytes_skipped < 0)
528		return (ARCHIVE_FATAL);
529
530	tar->entry_bytes_remaining = 0;
531	tar->entry_padding = 0;
532
533	/* Free the sparse list. */
534	gnu_clear_sparse_list(tar);
535
536	return (ARCHIVE_OK);
537}
538
539/*
540 * This function recursively interprets all of the headers associated
541 * with a single entry.
542 */
543static int
544tar_read_header(struct archive_read *a, struct tar *tar,
545    struct archive_entry *entry)
546{
547	ssize_t bytes;
548	int err;
549	const void *h;
550	const struct archive_entry_header_ustar *header;
551
552	/* Read 512-byte header record */
553	h = __archive_read_ahead(a, 512, &bytes);
554	if (bytes < 0)
555		return (bytes);
556	if (bytes < 512) {  /* Short read or EOF. */
557		/* Try requesting just one byte and see what happens. */
558		(void)__archive_read_ahead(a, 1, &bytes);
559		if (bytes == 0) {
560			/*
561			 * The archive ends at a 512-byte boundary but
562			 * without a proper end-of-archive marker.
563			 * Yes, there are tar writers that do this;
564			 * hold our nose and accept it.
565			 */
566			return (ARCHIVE_EOF);
567		}
568		/* Archive ends with a partial block; this is bad. */
569		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
570		    "Truncated tar archive");
571		return (ARCHIVE_FATAL);
572	}
573	__archive_read_consume(a, 512);
574
575
576	/* Check for end-of-archive mark. */
577	if (((*(const char *)h)==0) && archive_block_is_null((const unsigned char *)h)) {
578		/* Try to consume a second all-null record, as well. */
579		h = __archive_read_ahead(a, 512, NULL);
580		if (h != NULL)
581			__archive_read_consume(a, 512);
582		archive_set_error(&a->archive, 0, NULL);
583		if (a->archive.archive_format_name == NULL) {
584			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
585			a->archive.archive_format_name = "tar";
586		}
587		return (ARCHIVE_EOF);
588	}
589
590	/*
591	 * Note: If the checksum fails and we return ARCHIVE_RETRY,
592	 * then the client is likely to just retry.  This is a very
593	 * crude way to search for the next valid header!
594	 *
595	 * TODO: Improve this by implementing a real header scan.
596	 */
597	if (!checksum(a, h)) {
598		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
599		return (ARCHIVE_RETRY); /* Retryable: Invalid header */
600	}
601
602	if (++tar->header_recursion_depth > 32) {
603		archive_set_error(&a->archive, EINVAL, "Too many special headers");
604		return (ARCHIVE_WARN);
605	}
606
607	/* Determine the format variant. */
608	header = (const struct archive_entry_header_ustar *)h;
609	switch(header->typeflag[0]) {
610	case 'A': /* Solaris tar ACL */
611		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
612		a->archive.archive_format_name = "Solaris tar";
613		err = header_Solaris_ACL(a, tar, entry, h);
614		break;
615	case 'g': /* POSIX-standard 'g' header. */
616		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
617		a->archive.archive_format_name = "POSIX pax interchange format";
618		err = header_pax_global(a, tar, entry, h);
619		break;
620	case 'K': /* Long link name (GNU tar, others) */
621		err = header_longlink(a, tar, entry, h);
622		break;
623	case 'L': /* Long filename (GNU tar, others) */
624		err = header_longname(a, tar, entry, h);
625		break;
626	case 'V': /* GNU volume header */
627		err = header_volume(a, tar, entry, h);
628		break;
629	case 'X': /* Used by SUN tar; same as 'x'. */
630		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
631		a->archive.archive_format_name =
632		    "POSIX pax interchange format (Sun variant)";
633		err = header_pax_extensions(a, tar, entry, h);
634		break;
635	case 'x': /* POSIX-standard 'x' header. */
636		a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
637		a->archive.archive_format_name = "POSIX pax interchange format";
638		err = header_pax_extensions(a, tar, entry, h);
639		break;
640	default:
641		if (memcmp(header->magic, "ustar  \0", 8) == 0) {
642			a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
643			a->archive.archive_format_name = "GNU tar format";
644			err = header_gnutar(a, tar, entry, h);
645		} else if (memcmp(header->magic, "ustar", 5) == 0) {
646			if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
647				a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
648				a->archive.archive_format_name = "POSIX ustar format";
649			}
650			err = header_ustar(a, tar, entry, h);
651		} else {
652			a->archive.archive_format = ARCHIVE_FORMAT_TAR;
653			a->archive.archive_format_name = "tar (non-POSIX)";
654			err = header_old_tar(a, tar, entry, h);
655		}
656	}
657	--tar->header_recursion_depth;
658	/* We return warnings or success as-is.  Anything else is fatal. */
659	if (err == ARCHIVE_WARN || err == ARCHIVE_OK)
660		return (err);
661	if (err == ARCHIVE_EOF)
662		/* EOF when recursively reading a header is bad. */
663		archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
664	return (ARCHIVE_FATAL);
665}
666
667/*
668 * Return true if block checksum is correct.
669 */
670static int
671checksum(struct archive_read *a, const void *h)
672{
673	const unsigned char *bytes;
674	const struct archive_entry_header_ustar	*header;
675	int check, i, sum;
676
677	(void)a; /* UNUSED */
678	bytes = (const unsigned char *)h;
679	header = (const struct archive_entry_header_ustar *)h;
680
681	/*
682	 * Test the checksum.  Note that POSIX specifies _unsigned_
683	 * bytes for this calculation.
684	 */
685	sum = tar_atol(header->checksum, sizeof(header->checksum));
686	check = 0;
687	for (i = 0; i < 148; i++)
688		check += (unsigned char)bytes[i];
689	for (; i < 156; i++)
690		check += 32;
691	for (; i < 512; i++)
692		check += (unsigned char)bytes[i];
693	if (sum == check)
694		return (1);
695
696	/*
697	 * Repeat test with _signed_ bytes, just in case this archive
698	 * was created by an old BSD, Solaris, or HP-UX tar with a
699	 * broken checksum calculation.
700	 */
701	check = 0;
702	for (i = 0; i < 148; i++)
703		check += (signed char)bytes[i];
704	for (; i < 156; i++)
705		check += 32;
706	for (; i < 512; i++)
707		check += (signed char)bytes[i];
708	if (sum == check)
709		return (1);
710
711	return (0);
712}
713
714/*
715 * Return true if this block contains only nulls.
716 */
717static int
718archive_block_is_null(const unsigned char *p)
719{
720	unsigned i;
721
722	for (i = 0; i < 512; i++)
723		if (*p++)
724			return (0);
725	return (1);
726}
727
728/*
729 * Interpret 'A' Solaris ACL header
730 */
731static int
732header_Solaris_ACL(struct archive_read *a, struct tar *tar,
733    struct archive_entry *entry, const void *h)
734{
735	const struct archive_entry_header_ustar *header;
736	size_t size;
737	int err;
738	int64_t type;
739	char *acl, *p;
740	wchar_t *wp;
741
742	/*
743	 * read_body_to_string adds a NUL terminator, but we need a little
744	 * more to make sure that we don't overrun acl_text later.
745	 */
746	header = (const struct archive_entry_header_ustar *)h;
747	size = tar_atol(header->size, sizeof(header->size));
748	err = read_body_to_string(a, tar, &(tar->acl_text), h);
749	if (err != ARCHIVE_OK)
750		return (err);
751	/* Recursively read next header */
752	err = tar_read_header(a, tar, entry);
753	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
754		return (err);
755
756	/* TODO: Examine the first characters to see if this
757	 * is an AIX ACL descriptor.  We'll likely never support
758	 * them, but it would be polite to recognize and warn when
759	 * we do see them. */
760
761	/* Leading octal number indicates ACL type and number of entries. */
762	p = acl = tar->acl_text.s;
763	type = 0;
764	while (*p != '\0' && p < acl + size) {
765		if (*p < '0' || *p > '7') {
766			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
767			    "Malformed Solaris ACL attribute (invalid digit)");
768			return(ARCHIVE_WARN);
769		}
770		type <<= 3;
771		type += *p - '0';
772		if (type > 077777777) {
773			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
774			    "Malformed Solaris ACL attribute (count too large)");
775			return (ARCHIVE_WARN);
776		}
777		p++;
778	}
779	switch ((int)type & ~0777777) {
780	case 01000000:
781		/* POSIX.1e ACL */
782		break;
783	case 03000000:
784		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
785		    "Solaris NFSv4 ACLs not supported");
786		return (ARCHIVE_WARN);
787	default:
788		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
789		    "Malformed Solaris ACL attribute (unsupported type %o)",
790		    (int)type);
791		return (ARCHIVE_WARN);
792	}
793	p++;
794
795	if (p >= acl + size) {
796		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
797		    "Malformed Solaris ACL attribute (body overflow)");
798		return(ARCHIVE_WARN);
799	}
800
801	/* ACL text is null-terminated; find the end. */
802	size -= (p - acl);
803	acl = p;
804
805	while (*p != '\0' && p < acl + size)
806		p++;
807
808	wp = utf8_decode(tar, acl, p - acl);
809	err = __archive_entry_acl_parse_w(entry, wp,
810	    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
811	if (err != ARCHIVE_OK)
812		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
813		    "Malformed Solaris ACL attribute (unparsable)");
814	return (err);
815}
816
817/*
818 * Interpret 'K' long linkname header.
819 */
820static int
821header_longlink(struct archive_read *a, struct tar *tar,
822    struct archive_entry *entry, const void *h)
823{
824	int err;
825
826	err = read_body_to_string(a, tar, &(tar->longlink), h);
827	if (err != ARCHIVE_OK)
828		return (err);
829	err = tar_read_header(a, tar, entry);
830	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
831		return (err);
832	/* Set symlink if symlink already set, else hardlink. */
833	archive_entry_copy_link(entry, tar->longlink.s);
834	return (ARCHIVE_OK);
835}
836
837/*
838 * Interpret 'L' long filename header.
839 */
840static int
841header_longname(struct archive_read *a, struct tar *tar,
842    struct archive_entry *entry, const void *h)
843{
844	int err;
845
846	err = read_body_to_string(a, tar, &(tar->longname), h);
847	if (err != ARCHIVE_OK)
848		return (err);
849	/* Read and parse "real" header, then override name. */
850	err = tar_read_header(a, tar, entry);
851	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
852		return (err);
853	archive_entry_copy_pathname(entry, tar->longname.s);
854	return (ARCHIVE_OK);
855}
856
857
858/*
859 * Interpret 'V' GNU tar volume header.
860 */
861static int
862header_volume(struct archive_read *a, struct tar *tar,
863    struct archive_entry *entry, const void *h)
864{
865	(void)h;
866
867	/* Just skip this and read the next header. */
868	return (tar_read_header(a, tar, entry));
869}
870
871/*
872 * Read body of an archive entry into an archive_string object.
873 */
874static int
875read_body_to_string(struct archive_read *a, struct tar *tar,
876    struct archive_string *as, const void *h)
877{
878	off_t size, padded_size;
879	const struct archive_entry_header_ustar *header;
880	const void *src;
881
882	(void)tar; /* UNUSED */
883	header = (const struct archive_entry_header_ustar *)h;
884	size  = tar_atol(header->size, sizeof(header->size));
885	if ((size > 1048576) || (size < 0)) {
886		archive_set_error(&a->archive, EINVAL,
887		    "Special header too large");
888		return (ARCHIVE_FATAL);
889	}
890
891	/* Fail if we can't make our buffer big enough. */
892	if (archive_string_ensure(as, size+1) == NULL) {
893		archive_set_error(&a->archive, ENOMEM,
894		    "No memory");
895		return (ARCHIVE_FATAL);
896	}
897
898 	/* Read the body into the string. */
899	padded_size = (size + 511) & ~ 511;
900	src = __archive_read_ahead(a, padded_size, NULL);
901	if (src == NULL)
902		return (ARCHIVE_FATAL);
903	memcpy(as->s, src, size);
904	__archive_read_consume(a, padded_size);
905	as->s[size] = '\0';
906	return (ARCHIVE_OK);
907}
908
909/*
910 * Parse out common header elements.
911 *
912 * This would be the same as header_old_tar, except that the
913 * filename is handled slightly differently for old and POSIX
914 * entries  (POSIX entries support a 'prefix').  This factoring
915 * allows header_old_tar and header_ustar
916 * to handle filenames differently, while still putting most of the
917 * common parsing into one place.
918 */
919static int
920header_common(struct archive_read *a, struct tar *tar,
921    struct archive_entry *entry, const void *h)
922{
923	const struct archive_entry_header_ustar	*header;
924	char	tartype;
925
926	(void)a; /* UNUSED */
927
928	header = (const struct archive_entry_header_ustar *)h;
929	if (header->linkname[0])
930		archive_strncpy(&(tar->entry_linkpath), header->linkname,
931		    sizeof(header->linkname));
932	else
933		archive_string_empty(&(tar->entry_linkpath));
934
935	/* Parse out the numeric fields (all are octal) */
936	archive_entry_set_mode(entry, tar_atol(header->mode, sizeof(header->mode)));
937	archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
938	archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
939	tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
940	tar->realsize = tar->entry_bytes_remaining;
941	archive_entry_set_size(entry, tar->entry_bytes_remaining);
942	archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
943
944	/* Handle the tar type flag appropriately. */
945	tartype = header->typeflag[0];
946
947	switch (tartype) {
948	case '1': /* Hard link */
949		archive_entry_copy_hardlink(entry, tar->entry_linkpath.s);
950		/*
951		 * The following may seem odd, but: Technically, tar
952		 * does not store the file type for a "hard link"
953		 * entry, only the fact that it is a hard link.  So, I
954		 * leave the type zero normally.  But, pax interchange
955		 * format allows hard links to have data, which
956		 * implies that the underlying entry is a regular
957		 * file.
958		 */
959		if (archive_entry_size(entry) > 0)
960			archive_entry_set_filetype(entry, AE_IFREG);
961
962		/*
963		 * A tricky point: Traditionally, tar readers have
964		 * ignored the size field when reading hardlink
965		 * entries, and some writers put non-zero sizes even
966		 * though the body is empty.  POSIX blessed this
967		 * convention in the 1988 standard, but broke with
968		 * this tradition in 2001 by permitting hardlink
969		 * entries to store valid bodies in pax interchange
970		 * format, but not in ustar format.  Since there is no
971		 * hard and fast way to distinguish pax interchange
972		 * from earlier archives (the 'x' and 'g' entries are
973		 * optional, after all), we need a heuristic.
974		 */
975		if (archive_entry_size(entry) == 0) {
976			/* If the size is already zero, we're done. */
977		}  else if (a->archive.archive_format
978		    == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
979			/* Definitely pax extended; must obey hardlink size. */
980		} else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
981		    || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
982		{
983			/* Old-style or GNU tar: we must ignore the size. */
984			archive_entry_set_size(entry, 0);
985			tar->entry_bytes_remaining = 0;
986		} else if (archive_read_format_tar_bid(a) > 50) {
987			/*
988			 * We don't know if it's pax: If the bid
989			 * function sees a valid ustar header
990			 * immediately following, then let's ignore
991			 * the hardlink size.
992			 */
993			archive_entry_set_size(entry, 0);
994			tar->entry_bytes_remaining = 0;
995		}
996		/*
997		 * TODO: There are still two cases I'd like to handle:
998		 *   = a ustar non-pax archive with a hardlink entry at
999		 *     end-of-archive.  (Look for block of nulls following?)
1000		 *   = a pax archive that has not seen any pax headers
1001		 *     and has an entry which is a hardlink entry storing
1002		 *     a body containing an uncompressed tar archive.
1003		 * The first is worth addressing; I don't see any reliable
1004		 * way to deal with the second possibility.
1005		 */
1006		break;
1007	case '2': /* Symlink */
1008		archive_entry_set_filetype(entry, AE_IFLNK);
1009		archive_entry_set_size(entry, 0);
1010		tar->entry_bytes_remaining = 0;
1011		archive_entry_copy_symlink(entry, tar->entry_linkpath.s);
1012		break;
1013	case '3': /* Character device */
1014		archive_entry_set_filetype(entry, AE_IFCHR);
1015		archive_entry_set_size(entry, 0);
1016		tar->entry_bytes_remaining = 0;
1017		break;
1018	case '4': /* Block device */
1019		archive_entry_set_filetype(entry, AE_IFBLK);
1020		archive_entry_set_size(entry, 0);
1021		tar->entry_bytes_remaining = 0;
1022		break;
1023	case '5': /* Dir */
1024		archive_entry_set_filetype(entry, AE_IFDIR);
1025		archive_entry_set_size(entry, 0);
1026		tar->entry_bytes_remaining = 0;
1027		break;
1028	case '6': /* FIFO device */
1029		archive_entry_set_filetype(entry, AE_IFIFO);
1030		archive_entry_set_size(entry, 0);
1031		tar->entry_bytes_remaining = 0;
1032		break;
1033	case 'D': /* GNU incremental directory type */
1034		/*
1035		 * No special handling is actually required here.
1036		 * It might be nice someday to preprocess the file list and
1037		 * provide it to the client, though.
1038		 */
1039		archive_entry_set_filetype(entry, AE_IFDIR);
1040		break;
1041	case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1042		/*
1043		 * As far as I can tell, this is just like a regular file
1044		 * entry, except that the contents should be _appended_ to
1045		 * the indicated file at the indicated offset.  This may
1046		 * require some API work to fully support.
1047		 */
1048		break;
1049	case 'N': /* Old GNU "long filename" entry. */
1050		/* The body of this entry is a script for renaming
1051		 * previously-extracted entries.  Ugh.  It will never
1052		 * be supported by libarchive. */
1053		archive_entry_set_filetype(entry, AE_IFREG);
1054		break;
1055	case 'S': /* GNU sparse files */
1056		/*
1057		 * Sparse files are really just regular files with
1058		 * sparse information in the extended area.
1059		 */
1060		/* FALLTHROUGH */
1061	default: /* Regular file  and non-standard types */
1062		/*
1063		 * Per POSIX: non-recognized types should always be
1064		 * treated as regular files.
1065		 */
1066		archive_entry_set_filetype(entry, AE_IFREG);
1067		break;
1068	}
1069	return (0);
1070}
1071
1072/*
1073 * Parse out header elements for "old-style" tar archives.
1074 */
1075static int
1076header_old_tar(struct archive_read *a, struct tar *tar,
1077    struct archive_entry *entry, const void *h)
1078{
1079	const struct archive_entry_header_ustar	*header;
1080
1081	/* Copy filename over (to ensure null termination). */
1082	header = (const struct archive_entry_header_ustar *)h;
1083	archive_strncpy(&(tar->entry_pathname), header->name, sizeof(header->name));
1084	archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1085
1086	/* Grab rest of common fields */
1087	header_common(a, tar, entry, h);
1088
1089	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1090	return (0);
1091}
1092
1093/*
1094 * Parse a file header for a pax extended archive entry.
1095 */
1096static int
1097header_pax_global(struct archive_read *a, struct tar *tar,
1098    struct archive_entry *entry, const void *h)
1099{
1100	int err;
1101
1102	err = read_body_to_string(a, tar, &(tar->pax_global), h);
1103	if (err != ARCHIVE_OK)
1104		return (err);
1105	err = tar_read_header(a, tar, entry);
1106	return (err);
1107}
1108
1109static int
1110header_pax_extensions(struct archive_read *a, struct tar *tar,
1111    struct archive_entry *entry, const void *h)
1112{
1113	int err, err2;
1114
1115	err = read_body_to_string(a, tar, &(tar->pax_header), h);
1116	if (err != ARCHIVE_OK)
1117		return (err);
1118
1119	/* Parse the next header. */
1120	err = tar_read_header(a, tar, entry);
1121	if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1122		return (err);
1123
1124	/*
1125	 * TODO: Parse global/default options into 'entry' struct here
1126	 * before handling file-specific options.
1127	 *
1128	 * This design (parse standard header, then overwrite with pax
1129	 * extended attribute data) usually works well, but isn't ideal;
1130	 * it would be better to parse the pax extended attributes first
1131	 * and then skip any fields in the standard header that were
1132	 * defined in the pax header.
1133	 */
1134	err2 = pax_header(a, tar, entry, tar->pax_header.s);
1135	err =  err_combine(err, err2);
1136	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1137	return (err);
1138}
1139
1140
1141/*
1142 * Parse a file header for a Posix "ustar" archive entry.  This also
1143 * handles "pax" or "extended ustar" entries.
1144 */
1145static int
1146header_ustar(struct archive_read *a, struct tar *tar,
1147    struct archive_entry *entry, const void *h)
1148{
1149	const struct archive_entry_header_ustar	*header;
1150	struct archive_string *as;
1151
1152	header = (const struct archive_entry_header_ustar *)h;
1153
1154	/* Copy name into an internal buffer to ensure null-termination. */
1155	as = &(tar->entry_pathname);
1156	if (header->prefix[0]) {
1157		archive_strncpy(as, header->prefix, sizeof(header->prefix));
1158		if (as->s[archive_strlen(as) - 1] != '/')
1159			archive_strappend_char(as, '/');
1160		archive_strncat(as, header->name, sizeof(header->name));
1161	} else
1162		archive_strncpy(as, header->name, sizeof(header->name));
1163
1164	archive_entry_copy_pathname(entry, as->s);
1165
1166	/* Handle rest of common fields. */
1167	header_common(a, tar, entry, h);
1168
1169	/* Handle POSIX ustar fields. */
1170	archive_strncpy(&(tar->entry_uname), header->uname,
1171	    sizeof(header->uname));
1172	archive_entry_copy_uname(entry, tar->entry_uname.s);
1173
1174	archive_strncpy(&(tar->entry_gname), header->gname,
1175	    sizeof(header->gname));
1176	archive_entry_copy_gname(entry, tar->entry_gname.s);
1177
1178	/* Parse out device numbers only for char and block specials. */
1179	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1180		archive_entry_set_rdevmajor(entry,
1181		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1182		archive_entry_set_rdevminor(entry,
1183		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1184	}
1185
1186	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1187
1188	return (0);
1189}
1190
1191
1192/*
1193 * Parse the pax extended attributes record.
1194 *
1195 * Returns non-zero if there's an error in the data.
1196 */
1197static int
1198pax_header(struct archive_read *a, struct tar *tar,
1199    struct archive_entry *entry, char *attr)
1200{
1201	size_t attr_length, l, line_length;
1202	char *p;
1203	char *key, *value;
1204	int err, err2;
1205
1206	attr_length = strlen(attr);
1207	tar->pax_hdrcharset_binary = 0;
1208	archive_string_empty(&(tar->entry_gname));
1209	archive_string_empty(&(tar->entry_linkpath));
1210	archive_string_empty(&(tar->entry_pathname));
1211	archive_string_empty(&(tar->entry_pathname_override));
1212	archive_string_empty(&(tar->entry_uname));
1213	err = ARCHIVE_OK;
1214	while (attr_length > 0) {
1215		/* Parse decimal length field at start of line. */
1216		line_length = 0;
1217		l = attr_length;
1218		p = attr; /* Record start of line. */
1219		while (l>0) {
1220			if (*p == ' ') {
1221				p++;
1222				l--;
1223				break;
1224			}
1225			if (*p < '0' || *p > '9') {
1226				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1227				    "Ignoring malformed pax extended attributes");
1228				return (ARCHIVE_WARN);
1229			}
1230			line_length *= 10;
1231			line_length += *p - '0';
1232			if (line_length > 999999) {
1233				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1234				    "Rejecting pax extended attribute > 1MB");
1235				return (ARCHIVE_WARN);
1236			}
1237			p++;
1238			l--;
1239		}
1240
1241		/*
1242		 * Parsed length must be no bigger than available data,
1243		 * at least 1, and the last character of the line must
1244		 * be '\n'.
1245		 */
1246		if (line_length > attr_length
1247		    || line_length < 1
1248		    || attr[line_length - 1] != '\n')
1249		{
1250			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1251			    "Ignoring malformed pax extended attribute");
1252			return (ARCHIVE_WARN);
1253		}
1254
1255		/* Null-terminate the line. */
1256		attr[line_length - 1] = '\0';
1257
1258		/* Find end of key and null terminate it. */
1259		key = p;
1260		if (key[0] == '=')
1261			return (-1);
1262		while (*p && *p != '=')
1263			++p;
1264		if (*p == '\0') {
1265			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1266			    "Invalid pax extended attributes");
1267			return (ARCHIVE_WARN);
1268		}
1269		*p = '\0';
1270
1271		/* Identify null-terminated 'value' portion. */
1272		value = p + 1;
1273
1274		/* Identify this attribute and set it in the entry. */
1275		err2 = pax_attribute(a, tar, entry, key, value);
1276		err = err_combine(err, err2);
1277
1278		/* Skip to next line */
1279		attr += line_length;
1280		attr_length -= line_length;
1281	}
1282	if (archive_strlen(&(tar->entry_gname)) > 0) {
1283		value = tar->entry_gname.s;
1284		if (tar->pax_hdrcharset_binary)
1285			archive_entry_copy_gname(entry, value);
1286		else {
1287			if (!archive_entry_update_gname_utf8(entry, value)) {
1288				err = ARCHIVE_WARN;
1289				archive_set_error(&a->archive,
1290				    ARCHIVE_ERRNO_FILE_FORMAT,
1291				    "Gname in pax header can't "
1292				    "be converted to current locale.");
1293			}
1294		}
1295	}
1296	if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1297		value = tar->entry_linkpath.s;
1298		if (tar->pax_hdrcharset_binary)
1299			archive_entry_copy_link(entry, value);
1300		else {
1301			if (!archive_entry_update_link_utf8(entry, value)) {
1302				err = ARCHIVE_WARN;
1303				archive_set_error(&a->archive,
1304				    ARCHIVE_ERRNO_FILE_FORMAT,
1305				    "Linkname in pax header can't "
1306				    "be converted to current locale.");
1307			}
1308		}
1309	}
1310	/*
1311	 * Some extensions (such as the GNU sparse file extensions)
1312	 * deliberately store a synthetic name under the regular 'path'
1313	 * attribute and the real file name under a different attribute.
1314	 * Since we're supposed to not care about the order, we
1315	 * have no choice but to store all of the various filenames
1316	 * we find and figure it all out afterwards.  This is the
1317	 * figuring out part.
1318	 */
1319	value = NULL;
1320	if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1321		value = tar->entry_pathname_override.s;
1322	else if (archive_strlen(&(tar->entry_pathname)) > 0)
1323		value = tar->entry_pathname.s;
1324	if (value != NULL) {
1325		if (tar->pax_hdrcharset_binary)
1326			archive_entry_copy_pathname(entry, value);
1327		else {
1328			if (!archive_entry_update_pathname_utf8(entry, value)) {
1329				err = ARCHIVE_WARN;
1330				archive_set_error(&a->archive,
1331				    ARCHIVE_ERRNO_FILE_FORMAT,
1332				    "Pathname in pax header can't be "
1333				    "converted to current locale.");
1334			}
1335		}
1336	}
1337	if (archive_strlen(&(tar->entry_uname)) > 0) {
1338		value = tar->entry_uname.s;
1339		if (tar->pax_hdrcharset_binary)
1340			archive_entry_copy_uname(entry, value);
1341		else {
1342			if (!archive_entry_update_uname_utf8(entry, value)) {
1343				err = ARCHIVE_WARN;
1344				archive_set_error(&a->archive,
1345				    ARCHIVE_ERRNO_FILE_FORMAT,
1346				    "Uname in pax header can't "
1347				    "be converted to current locale.");
1348			}
1349		}
1350	}
1351	return (err);
1352}
1353
1354static int
1355pax_attribute_xattr(struct archive_entry *entry,
1356	char *name, char *value)
1357{
1358	char *name_decoded;
1359	void *value_decoded;
1360	size_t value_len;
1361
1362	if (strlen(name) < 18 || (strncmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1363		return 3;
1364
1365	name += 17;
1366
1367	/* URL-decode name */
1368	name_decoded = url_decode(name);
1369	if (name_decoded == NULL)
1370		return 2;
1371
1372	/* Base-64 decode value */
1373	value_decoded = base64_decode(value, strlen(value), &value_len);
1374	if (value_decoded == NULL) {
1375		free(name_decoded);
1376		return 1;
1377	}
1378
1379	archive_entry_xattr_add_entry(entry, name_decoded,
1380		value_decoded, value_len);
1381
1382	free(name_decoded);
1383	free(value_decoded);
1384	return 0;
1385}
1386
1387/*
1388 * Parse a single key=value attribute.  key/value pointers are
1389 * assumed to point into reasonably long-lived storage.
1390 *
1391 * Note that POSIX reserves all-lowercase keywords.  Vendor-specific
1392 * extensions should always have keywords of the form "VENDOR.attribute"
1393 * In particular, it's quite feasible to support many different
1394 * vendor extensions here.  I'm using "LIBARCHIVE" for extensions
1395 * unique to this library.
1396 *
1397 * Investigate other vendor-specific extensions and see if
1398 * any of them look useful.
1399 */
1400static int
1401pax_attribute(struct archive_read *a, struct tar *tar,
1402	struct archive_entry *entry, char *key, char *value)
1403{
1404	int64_t s;
1405	long n;
1406	wchar_t *wp;
1407
1408	switch (key[0]) {
1409	case 'G':
1410		/* GNU "0.0" sparse pax format. */
1411		if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1412			tar->sparse_offset = -1;
1413			tar->sparse_numbytes = -1;
1414			tar->sparse_gnu_major = 0;
1415			tar->sparse_gnu_minor = 0;
1416		}
1417		if (strcmp(key, "GNU.sparse.offset") == 0) {
1418			tar->sparse_offset = tar_atol10(value, strlen(value));
1419			if (tar->sparse_numbytes != -1) {
1420				if (gnu_add_sparse_entry(a, tar,
1421				    tar->sparse_offset, tar->sparse_numbytes)
1422				    != ARCHIVE_OK)
1423					return(ARCHIVE_FATAL);
1424				tar->sparse_offset = -1;
1425				tar->sparse_numbytes = -1;
1426			}
1427		}
1428		if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1429			tar->sparse_numbytes = tar_atol10(value, strlen(value));
1430			if (tar->sparse_numbytes != -1) {
1431				if (gnu_add_sparse_entry(a, tar,
1432				    tar->sparse_offset, tar->sparse_numbytes)
1433				    != ARCHIVE_OK)
1434					return (ARCHIVE_FATAL);
1435				tar->sparse_offset = -1;
1436				tar->sparse_numbytes = -1;
1437			}
1438		}
1439		if (strcmp(key, "GNU.sparse.size") == 0) {
1440			tar->realsize = tar_atol10(value, strlen(value));
1441			archive_entry_set_size(entry, tar->realsize);
1442		}
1443
1444		/* GNU "0.1" sparse pax format. */
1445		if (strcmp(key, "GNU.sparse.map") == 0) {
1446			tar->sparse_gnu_major = 0;
1447			tar->sparse_gnu_minor = 1;
1448			if (gnu_sparse_01_parse(a, tar, value) != ARCHIVE_OK)
1449				return (ARCHIVE_WARN);
1450		}
1451
1452		/* GNU "1.0" sparse pax format */
1453		if (strcmp(key, "GNU.sparse.major") == 0) {
1454			tar->sparse_gnu_major = tar_atol10(value, strlen(value));
1455			tar->sparse_gnu_pending = 1;
1456		}
1457		if (strcmp(key, "GNU.sparse.minor") == 0) {
1458			tar->sparse_gnu_minor = tar_atol10(value, strlen(value));
1459			tar->sparse_gnu_pending = 1;
1460		}
1461		if (strcmp(key, "GNU.sparse.name") == 0) {
1462			/*
1463			 * The real filename; when storing sparse
1464			 * files, GNU tar puts a synthesized name into
1465			 * the regular 'path' attribute in an attempt
1466			 * to limit confusion. ;-)
1467			 */
1468			archive_strcpy(&(tar->entry_pathname_override), value);
1469		}
1470		if (strcmp(key, "GNU.sparse.realsize") == 0) {
1471			tar->realsize = tar_atol10(value, strlen(value));
1472			archive_entry_set_size(entry, tar->realsize);
1473		}
1474		break;
1475	case 'L':
1476		/* Our extensions */
1477/* TODO: Handle arbitrary extended attributes... */
1478/*
1479		if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0)
1480			archive_entry_set_xxxxxx(entry, value);
1481*/
1482		if (strcmp(key, "LIBARCHIVE.creationtime")==0) {
1483			pax_time(value, &s, &n);
1484			archive_entry_set_birthtime(entry, s, n);
1485		}
1486		if (strncmp(key, "LIBARCHIVE.xattr.", 17)==0)
1487			pax_attribute_xattr(entry, key, value);
1488		break;
1489	case 'S':
1490		/* We support some keys used by the "star" archiver */
1491		if (strcmp(key, "SCHILY.acl.access")==0) {
1492			wp = utf8_decode(tar, value, strlen(value));
1493			/* TODO: if (wp == NULL) */
1494			__archive_entry_acl_parse_w(entry, wp,
1495			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
1496		} else if (strcmp(key, "SCHILY.acl.default")==0) {
1497			wp = utf8_decode(tar, value, strlen(value));
1498			/* TODO: if (wp == NULL) */
1499			__archive_entry_acl_parse_w(entry, wp,
1500			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
1501		} else if (strcmp(key, "SCHILY.devmajor")==0) {
1502			archive_entry_set_rdevmajor(entry,
1503			    tar_atol10(value, strlen(value)));
1504		} else if (strcmp(key, "SCHILY.devminor")==0) {
1505			archive_entry_set_rdevminor(entry,
1506			    tar_atol10(value, strlen(value)));
1507		} else if (strcmp(key, "SCHILY.fflags")==0) {
1508			archive_entry_copy_fflags_text(entry, value);
1509		} else if (strcmp(key, "SCHILY.dev")==0) {
1510			archive_entry_set_dev(entry,
1511			    tar_atol10(value, strlen(value)));
1512		} else if (strcmp(key, "SCHILY.ino")==0) {
1513			archive_entry_set_ino(entry,
1514			    tar_atol10(value, strlen(value)));
1515		} else if (strcmp(key, "SCHILY.nlink")==0) {
1516			archive_entry_set_nlink(entry,
1517			    tar_atol10(value, strlen(value)));
1518		} else if (strcmp(key, "SCHILY.realsize")==0) {
1519			tar->realsize = tar_atol10(value, strlen(value));
1520			archive_entry_set_size(entry, tar->realsize);
1521		}
1522		break;
1523	case 'a':
1524		if (strcmp(key, "atime")==0) {
1525			pax_time(value, &s, &n);
1526			archive_entry_set_atime(entry, s, n);
1527		}
1528		break;
1529	case 'c':
1530		if (strcmp(key, "ctime")==0) {
1531			pax_time(value, &s, &n);
1532			archive_entry_set_ctime(entry, s, n);
1533		} else if (strcmp(key, "charset")==0) {
1534			/* TODO: Publish charset information in entry. */
1535		} else if (strcmp(key, "comment")==0) {
1536			/* TODO: Publish comment in entry. */
1537		}
1538		break;
1539	case 'g':
1540		if (strcmp(key, "gid")==0) {
1541			archive_entry_set_gid(entry,
1542			    tar_atol10(value, strlen(value)));
1543		} else if (strcmp(key, "gname")==0) {
1544			archive_strcpy(&(tar->entry_gname), value);
1545		}
1546		break;
1547	case 'h':
1548		if (strcmp(key, "hdrcharset") == 0) {
1549			if (strcmp(value, "BINARY") == 0)
1550				tar->pax_hdrcharset_binary = 1;
1551			else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1552				tar->pax_hdrcharset_binary = 0;
1553			else {
1554				/* TODO: Warn about unsupported hdrcharset */
1555			}
1556		}
1557		break;
1558	case 'l':
1559		/* pax interchange doesn't distinguish hardlink vs. symlink. */
1560		if (strcmp(key, "linkpath")==0) {
1561			archive_strcpy(&(tar->entry_linkpath), value);
1562		}
1563		break;
1564	case 'm':
1565		if (strcmp(key, "mtime")==0) {
1566			pax_time(value, &s, &n);
1567			archive_entry_set_mtime(entry, s, n);
1568		}
1569		break;
1570	case 'p':
1571		if (strcmp(key, "path")==0) {
1572			archive_strcpy(&(tar->entry_pathname), value);
1573		}
1574		break;
1575	case 'r':
1576		/* POSIX has reserved 'realtime.*' */
1577		break;
1578	case 's':
1579		/* POSIX has reserved 'security.*' */
1580		/* Someday: if (strcmp(key, "security.acl")==0) { ... } */
1581		if (strcmp(key, "size")==0) {
1582			/* "size" is the size of the data in the entry. */
1583			tar->entry_bytes_remaining
1584			    = tar_atol10(value, strlen(value));
1585			/*
1586			 * But, "size" is not necessarily the size of
1587			 * the file on disk; if this is a sparse file,
1588			 * the disk size may have already been set from
1589			 * GNU.sparse.realsize or GNU.sparse.size or
1590			 * an old GNU header field or SCHILY.realsize
1591			 * or ....
1592			 */
1593			if (tar->realsize < 0) {
1594				archive_entry_set_size(entry,
1595				    tar->entry_bytes_remaining);
1596				tar->realsize
1597				    = tar->entry_bytes_remaining;
1598			}
1599		}
1600		break;
1601	case 'u':
1602		if (strcmp(key, "uid")==0) {
1603			archive_entry_set_uid(entry,
1604			    tar_atol10(value, strlen(value)));
1605		} else if (strcmp(key, "uname")==0) {
1606			archive_strcpy(&(tar->entry_uname), value);
1607		}
1608		break;
1609	}
1610	return (0);
1611}
1612
1613
1614
1615/*
1616 * parse a decimal time value, which may include a fractional portion
1617 */
1618static void
1619pax_time(const char *p, int64_t *ps, long *pn)
1620{
1621	char digit;
1622	int64_t	s;
1623	unsigned long l;
1624	int sign;
1625	int64_t limit, last_digit_limit;
1626
1627	limit = INT64_MAX / 10;
1628	last_digit_limit = INT64_MAX % 10;
1629
1630	s = 0;
1631	sign = 1;
1632	if (*p == '-') {
1633		sign = -1;
1634		p++;
1635	}
1636	while (*p >= '0' && *p <= '9') {
1637		digit = *p - '0';
1638		if (s > limit ||
1639		    (s == limit && digit > last_digit_limit)) {
1640			s = INT64_MAX;
1641			break;
1642		}
1643		s = (s * 10) + digit;
1644		++p;
1645	}
1646
1647	*ps = s * sign;
1648
1649	/* Calculate nanoseconds. */
1650	*pn = 0;
1651
1652	if (*p != '.')
1653		return;
1654
1655	l = 100000000UL;
1656	do {
1657		++p;
1658		if (*p >= '0' && *p <= '9')
1659			*pn += (*p - '0') * l;
1660		else
1661			break;
1662	} while (l /= 10);
1663}
1664
1665/*
1666 * Parse GNU tar header
1667 */
1668static int
1669header_gnutar(struct archive_read *a, struct tar *tar,
1670    struct archive_entry *entry, const void *h)
1671{
1672	const struct archive_entry_header_gnutar *header;
1673
1674	(void)a;
1675
1676	/*
1677	 * GNU header is like POSIX ustar, except 'prefix' is
1678	 * replaced with some other fields. This also means the
1679	 * filename is stored as in old-style archives.
1680	 */
1681
1682	/* Grab fields common to all tar variants. */
1683	header_common(a, tar, entry, h);
1684
1685	/* Copy filename over (to ensure null termination). */
1686	header = (const struct archive_entry_header_gnutar *)h;
1687	archive_strncpy(&(tar->entry_pathname), header->name,
1688	    sizeof(header->name));
1689	archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1690
1691	/* Fields common to ustar and GNU */
1692	/* XXX Can the following be factored out since it's common
1693	 * to ustar and gnu tar?  Is it okay to move it down into
1694	 * header_common, perhaps?  */
1695	archive_strncpy(&(tar->entry_uname),
1696	    header->uname, sizeof(header->uname));
1697	archive_entry_copy_uname(entry, tar->entry_uname.s);
1698
1699	archive_strncpy(&(tar->entry_gname),
1700	    header->gname, sizeof(header->gname));
1701	archive_entry_copy_gname(entry, tar->entry_gname.s);
1702
1703	/* Parse out device numbers only for char and block specials */
1704	if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1705		archive_entry_set_rdevmajor(entry,
1706		    tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1707		archive_entry_set_rdevminor(entry,
1708		    tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1709	} else
1710		archive_entry_set_rdev(entry, 0);
1711
1712	tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1713
1714	/* Grab GNU-specific fields. */
1715	archive_entry_set_atime(entry,
1716	    tar_atol(header->atime, sizeof(header->atime)), 0);
1717	archive_entry_set_ctime(entry,
1718	    tar_atol(header->ctime, sizeof(header->ctime)), 0);
1719	if (header->realsize[0] != 0) {
1720		tar->realsize
1721		    = tar_atol(header->realsize, sizeof(header->realsize));
1722		archive_entry_set_size(entry, tar->realsize);
1723	}
1724
1725	if (header->sparse[0].offset[0] != 0) {
1726		if (gnu_sparse_old_read(a, tar, header) != ARCHIVE_OK)
1727			return (ARCHIVE_FATAL);
1728	} else {
1729		if (header->isextended[0] != 0) {
1730			/* XXX WTF? XXX */
1731		}
1732	}
1733
1734	return (0);
1735}
1736
1737static int
1738gnu_add_sparse_entry(struct archive_read *a, struct tar *tar, off_t offset,
1739	off_t remaining)
1740{
1741	struct sparse_block *p;
1742
1743	p = (struct sparse_block *)malloc(sizeof(*p));
1744	if (p == NULL) {
1745		archive_set_error(&a->archive, ENOMEM, "Out of memory");
1746		return (ARCHIVE_FATAL);
1747	}
1748	memset(p, 0, sizeof(*p));
1749	if (tar->sparse_last != NULL)
1750		tar->sparse_last->next = p;
1751	else
1752		tar->sparse_list = p;
1753	tar->sparse_last = p;
1754	p->offset = offset;
1755	p->remaining = remaining;
1756	return (ARCHIVE_OK);
1757}
1758
1759static void
1760gnu_clear_sparse_list(struct tar *tar)
1761{
1762	struct sparse_block *p;
1763
1764	while (tar->sparse_list != NULL) {
1765		p = tar->sparse_list;
1766		tar->sparse_list = p->next;
1767		free(p);
1768	}
1769	tar->sparse_last = NULL;
1770}
1771
1772/*
1773 * GNU tar old-format sparse data.
1774 *
1775 * GNU old-format sparse data is stored in a fixed-field
1776 * format.  Offset/size values are 11-byte octal fields (same
1777 * format as 'size' field in ustart header).  These are
1778 * stored in the header, allocating subsequent header blocks
1779 * as needed.  Extending the header in this way is a pretty
1780 * severe POSIX violation; this design has earned GNU tar a
1781 * lot of criticism.
1782 */
1783
1784static int
1785gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
1786    const struct archive_entry_header_gnutar *header)
1787{
1788	ssize_t bytes_read;
1789	const void *data;
1790	struct extended {
1791		struct gnu_sparse sparse[21];
1792		char	isextended[1];
1793		char	padding[7];
1794	};
1795	const struct extended *ext;
1796
1797	if (gnu_sparse_old_parse(a, tar, header->sparse, 4) != ARCHIVE_OK)
1798		return (ARCHIVE_FATAL);
1799	if (header->isextended[0] == 0)
1800		return (ARCHIVE_OK);
1801
1802	do {
1803		data = __archive_read_ahead(a, 512, &bytes_read);
1804		if (bytes_read < 0)
1805			return (ARCHIVE_FATAL);
1806		if (bytes_read < 512) {
1807			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1808			    "Truncated tar archive "
1809			    "detected while reading sparse file data");
1810			return (ARCHIVE_FATAL);
1811		}
1812		__archive_read_consume(a, 512);
1813		ext = (const struct extended *)data;
1814		if (gnu_sparse_old_parse(a, tar, ext->sparse, 21) != ARCHIVE_OK)
1815			return (ARCHIVE_FATAL);
1816	} while (ext->isextended[0] != 0);
1817	if (tar->sparse_list != NULL)
1818		tar->entry_offset = tar->sparse_list->offset;
1819	return (ARCHIVE_OK);
1820}
1821
1822static int
1823gnu_sparse_old_parse(struct archive_read *a, struct tar *tar,
1824    const struct gnu_sparse *sparse, int length)
1825{
1826	while (length > 0 && sparse->offset[0] != 0) {
1827		if (gnu_add_sparse_entry(a, tar,
1828		    tar_atol(sparse->offset, sizeof(sparse->offset)),
1829		    tar_atol(sparse->numbytes, sizeof(sparse->numbytes)))
1830		    != ARCHIVE_OK)
1831			return (ARCHIVE_FATAL);
1832		sparse++;
1833		length--;
1834	}
1835	return (ARCHIVE_OK);
1836}
1837
1838/*
1839 * GNU tar sparse format 0.0
1840 *
1841 * Beginning with GNU tar 1.15, sparse files are stored using
1842 * information in the pax extended header.  The GNU tar maintainers
1843 * have gone through a number of variations in the process of working
1844 * out this scheme; furtunately, they're all numbered.
1845 *
1846 * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
1847 * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
1848 * store offset/size for each block.  The repeated instances of these
1849 * latter fields violate the pax specification (which frowns on
1850 * duplicate keys), so this format was quickly replaced.
1851 */
1852
1853/*
1854 * GNU tar sparse format 0.1
1855 *
1856 * This version replaced the offset/numbytes attributes with
1857 * a single "map" attribute that stored a list of integers.  This
1858 * format had two problems: First, the "map" attribute could be very
1859 * long, which caused problems for some implementations.  More
1860 * importantly, the sparse data was lost when extracted by archivers
1861 * that didn't recognize this extension.
1862 */
1863
1864static int
1865gnu_sparse_01_parse(struct archive_read *a, struct tar *tar, const char *p)
1866{
1867	const char *e;
1868	off_t offset = -1, size = -1;
1869
1870	for (;;) {
1871		e = p;
1872		while (*e != '\0' && *e != ',') {
1873			if (*e < '0' || *e > '9')
1874				return (ARCHIVE_WARN);
1875			e++;
1876		}
1877		if (offset < 0) {
1878			offset = tar_atol10(p, e - p);
1879			if (offset < 0)
1880				return (ARCHIVE_WARN);
1881		} else {
1882			size = tar_atol10(p, e - p);
1883			if (size < 0)
1884				return (ARCHIVE_WARN);
1885			if (gnu_add_sparse_entry(a, tar, offset, size)
1886			    != ARCHIVE_OK)
1887				return (ARCHIVE_FATAL);
1888			offset = -1;
1889		}
1890		if (*e == '\0')
1891			return (ARCHIVE_OK);
1892		p = e + 1;
1893	}
1894}
1895
1896/*
1897 * GNU tar sparse format 1.0
1898 *
1899 * The idea: The offset/size data is stored as a series of base-10
1900 * ASCII numbers prepended to the file data, so that dearchivers that
1901 * don't support this format will extract the block map along with the
1902 * data and a separate post-process can restore the sparseness.
1903 *
1904 * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
1905 * padding to the body of the file when using this format.  GNU tar
1906 * 1.17 corrected this bug without bumping the version number, so
1907 * it's not possible to support both variants.  This code supports
1908 * the later variant at the expense of not supporting the former.
1909 *
1910 * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
1911 * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
1912 */
1913
1914/*
1915 * Read the next line from the input, and parse it as a decimal
1916 * integer followed by '\n'.  Returns positive integer value or
1917 * negative on error.
1918 */
1919static int64_t
1920gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
1921    ssize_t *remaining)
1922{
1923	int64_t l, limit, last_digit_limit;
1924	const char *p;
1925	ssize_t bytes_read;
1926	int base, digit;
1927
1928	base = 10;
1929	limit = INT64_MAX / base;
1930	last_digit_limit = INT64_MAX % base;
1931
1932	/*
1933	 * Skip any lines starting with '#'; GNU tar specs
1934	 * don't require this, but they should.
1935	 */
1936	do {
1937		bytes_read = readline(a, tar, &p, tar_min(*remaining, 100));
1938		if (bytes_read <= 0)
1939			return (ARCHIVE_FATAL);
1940		*remaining -= bytes_read;
1941	} while (p[0] == '#');
1942
1943	l = 0;
1944	while (bytes_read > 0) {
1945		if (*p == '\n')
1946			return (l);
1947		if (*p < '0' || *p >= '0' + base)
1948			return (ARCHIVE_WARN);
1949		digit = *p - '0';
1950		if (l > limit || (l == limit && digit > last_digit_limit))
1951			l = INT64_MAX; /* Truncate on overflow. */
1952		else
1953			l = (l * base) + digit;
1954		p++;
1955		bytes_read--;
1956	}
1957	/* TODO: Error message. */
1958	return (ARCHIVE_WARN);
1959}
1960
1961/*
1962 * Returns length (in bytes) of the sparse data description
1963 * that was read.
1964 */
1965static ssize_t
1966gnu_sparse_10_read(struct archive_read *a, struct tar *tar)
1967{
1968	ssize_t remaining, bytes_read;
1969	int entries;
1970	off_t offset, size, to_skip;
1971
1972	/* Clear out the existing sparse list. */
1973	gnu_clear_sparse_list(tar);
1974
1975	remaining = tar->entry_bytes_remaining;
1976
1977	/* Parse entries. */
1978	entries = gnu_sparse_10_atol(a, tar, &remaining);
1979	if (entries < 0)
1980		return (ARCHIVE_FATAL);
1981	/* Parse the individual entries. */
1982	while (entries-- > 0) {
1983		/* Parse offset/size */
1984		offset = gnu_sparse_10_atol(a, tar, &remaining);
1985		if (offset < 0)
1986			return (ARCHIVE_FATAL);
1987		size = gnu_sparse_10_atol(a, tar, &remaining);
1988		if (size < 0)
1989			return (ARCHIVE_FATAL);
1990		/* Add a new sparse entry. */
1991		if (gnu_add_sparse_entry(a, tar, offset, size) != ARCHIVE_OK)
1992			return (ARCHIVE_FATAL);
1993	}
1994	/* Skip rest of block... */
1995	bytes_read = tar->entry_bytes_remaining - remaining;
1996	to_skip = 0x1ff & -bytes_read;
1997	if (to_skip != __archive_read_skip(a, to_skip))
1998		return (ARCHIVE_FATAL);
1999	return (bytes_read + to_skip);
2000}
2001
2002/*-
2003 * Convert text->integer.
2004 *
2005 * Traditional tar formats (including POSIX) specify base-8 for
2006 * all of the standard numeric fields.  This is a significant limitation
2007 * in practice:
2008 *   = file size is limited to 8GB
2009 *   = rdevmajor and rdevminor are limited to 21 bits
2010 *   = uid/gid are limited to 21 bits
2011 *
2012 * There are two workarounds for this:
2013 *   = pax extended headers, which use variable-length string fields
2014 *   = GNU tar and STAR both allow either base-8 or base-256 in
2015 *      most fields.  The high bit is set to indicate base-256.
2016 *
2017 * On read, this implementation supports both extensions.
2018 */
2019static int64_t
2020tar_atol(const char *p, unsigned char_cnt)
2021{
2022	/*
2023	 * Technically, GNU tar considers a field to be in base-256
2024	 * only if the first byte is 0xff or 0x80.
2025	 */
2026	if (*p & 0x80)
2027		return (tar_atol256(p, char_cnt));
2028	return (tar_atol8(p, char_cnt));
2029}
2030
2031/*
2032 * Note that this implementation does not (and should not!) obey
2033 * locale settings; you cannot simply substitute strtol here, since
2034 * it does obey locale.
2035 */
2036static int64_t
2037tar_atol8(const char *p, unsigned char_cnt)
2038{
2039	int64_t	l, limit, last_digit_limit;
2040	int digit, sign, base;
2041
2042	base = 8;
2043	limit = INT64_MAX / base;
2044	last_digit_limit = INT64_MAX % base;
2045
2046	while (*p == ' ' || *p == '\t')
2047		p++;
2048	if (*p == '-') {
2049		sign = -1;
2050		p++;
2051	} else
2052		sign = 1;
2053
2054	l = 0;
2055	digit = *p - '0';
2056	while (digit >= 0 && digit < base  && char_cnt-- > 0) {
2057		if (l>limit || (l == limit && digit > last_digit_limit)) {
2058			l = INT64_MAX; /* Truncate on overflow. */
2059			break;
2060		}
2061		l = (l * base) + digit;
2062		digit = *++p - '0';
2063	}
2064	return (sign < 0) ? -l : l;
2065}
2066
2067/*
2068 * Note that this implementation does not (and should not!) obey
2069 * locale settings; you cannot simply substitute strtol here, since
2070 * it does obey locale.
2071 */
2072static int64_t
2073tar_atol10(const char *p, unsigned char_cnt)
2074{
2075	int64_t l, limit, last_digit_limit;
2076	int base, digit, sign;
2077
2078	base = 10;
2079	limit = INT64_MAX / base;
2080	last_digit_limit = INT64_MAX % base;
2081
2082	while (*p == ' ' || *p == '\t')
2083		p++;
2084	if (*p == '-') {
2085		sign = -1;
2086		p++;
2087	} else
2088		sign = 1;
2089
2090	l = 0;
2091	digit = *p - '0';
2092	while (digit >= 0 && digit < base  && char_cnt-- > 0) {
2093		if (l > limit || (l == limit && digit > last_digit_limit)) {
2094			l = INT64_MAX; /* Truncate on overflow. */
2095			break;
2096		}
2097		l = (l * base) + digit;
2098		digit = *++p - '0';
2099	}
2100	return (sign < 0) ? -l : l;
2101}
2102
2103/*
2104 * Parse a base-256 integer.  This is just a straight signed binary
2105 * value in big-endian order, except that the high-order bit is
2106 * ignored.
2107 */
2108static int64_t
2109tar_atol256(const char *_p, unsigned char_cnt)
2110{
2111	int64_t	l, upper_limit, lower_limit;
2112	const unsigned char *p = (const unsigned char *)_p;
2113
2114	upper_limit = INT64_MAX / 256;
2115	lower_limit = INT64_MIN / 256;
2116
2117	/* Pad with 1 or 0 bits, depending on sign. */
2118	if ((0x40 & *p) == 0x40)
2119		l = (int64_t)-1;
2120	else
2121		l = 0;
2122	l = (l << 6) | (0x3f & *p++);
2123	while (--char_cnt > 0) {
2124		if (l > upper_limit) {
2125			l = INT64_MAX; /* Truncate on overflow */
2126			break;
2127		} else if (l < lower_limit) {
2128			l = INT64_MIN;
2129			break;
2130		}
2131		l = (l << 8) | (0xff & (int64_t)*p++);
2132	}
2133	return (l);
2134}
2135
2136/*
2137 * Returns length of line (including trailing newline)
2138 * or negative on error.  'start' argument is updated to
2139 * point to first character of line.  This avoids copying
2140 * when possible.
2141 */
2142static ssize_t
2143readline(struct archive_read *a, struct tar *tar, const char **start,
2144    ssize_t limit)
2145{
2146	ssize_t bytes_read;
2147	ssize_t total_size = 0;
2148	const void *t;
2149	const char *s;
2150	void *p;
2151
2152	t = __archive_read_ahead(a, 1, &bytes_read);
2153	if (bytes_read <= 0)
2154		return (ARCHIVE_FATAL);
2155	s = t;  /* Start of line? */
2156	p = memchr(t, '\n', bytes_read);
2157	/* If we found '\n' in the read buffer, return pointer to that. */
2158	if (p != NULL) {
2159		bytes_read = 1 + ((const char *)p) - s;
2160		if (bytes_read > limit) {
2161			archive_set_error(&a->archive,
2162			    ARCHIVE_ERRNO_FILE_FORMAT,
2163			    "Line too long");
2164			return (ARCHIVE_FATAL);
2165		}
2166		__archive_read_consume(a, bytes_read);
2167		*start = s;
2168		return (bytes_read);
2169	}
2170	/* Otherwise, we need to accumulate in a line buffer. */
2171	for (;;) {
2172		if (total_size + bytes_read > limit) {
2173			archive_set_error(&a->archive,
2174			    ARCHIVE_ERRNO_FILE_FORMAT,
2175			    "Line too long");
2176			return (ARCHIVE_FATAL);
2177		}
2178		if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2179			archive_set_error(&a->archive, ENOMEM,
2180			    "Can't allocate working buffer");
2181			return (ARCHIVE_FATAL);
2182		}
2183		memcpy(tar->line.s + total_size, t, bytes_read);
2184		__archive_read_consume(a, bytes_read);
2185		total_size += bytes_read;
2186		/* If we found '\n', clean up and return. */
2187		if (p != NULL) {
2188			*start = tar->line.s;
2189			return (total_size);
2190		}
2191		/* Read some more. */
2192		t = __archive_read_ahead(a, 1, &bytes_read);
2193		if (bytes_read <= 0)
2194			return (ARCHIVE_FATAL);
2195		s = t;  /* Start of line? */
2196		p = memchr(t, '\n', bytes_read);
2197		/* If we found '\n', trim the read. */
2198		if (p != NULL) {
2199			bytes_read = 1 + ((const char *)p) - s;
2200		}
2201	}
2202}
2203
2204static wchar_t *
2205utf8_decode(struct tar *tar, const char *src, size_t length)
2206{
2207	wchar_t *dest;
2208	ssize_t n;
2209
2210	/* Ensure pax_entry buffer is big enough. */
2211	if (tar->pax_entry_length <= length) {
2212		wchar_t *old_entry;
2213
2214		if (tar->pax_entry_length <= 0)
2215			tar->pax_entry_length = 1024;
2216		while (tar->pax_entry_length <= length + 1)
2217			tar->pax_entry_length *= 2;
2218
2219		old_entry = tar->pax_entry;
2220		tar->pax_entry = (wchar_t *)realloc(tar->pax_entry,
2221		    tar->pax_entry_length * sizeof(wchar_t));
2222		if (tar->pax_entry == NULL) {
2223			free(old_entry);
2224			/* TODO: Handle this error. */
2225			return (NULL);
2226		}
2227	}
2228
2229	dest = tar->pax_entry;
2230	while (length > 0) {
2231		n = UTF8_mbrtowc(dest, src, length);
2232		if (n < 0)
2233			return (NULL);
2234		if (n == 0)
2235			break;
2236		dest++;
2237		src += n;
2238		length -= n;
2239	}
2240	*dest = L'\0';
2241	return (tar->pax_entry);
2242}
2243
2244/*
2245 * Copied and simplified from FreeBSD libc/locale.
2246 */
2247static ssize_t
2248UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
2249{
2250        int ch, i, len, mask;
2251        unsigned long wch;
2252
2253        if (s == NULL || n == 0 || pwc == NULL)
2254                return (0);
2255
2256        /*
2257         * Determine the number of octets that make up this character from
2258         * the first octet, and a mask that extracts the interesting bits of
2259         * the first octet.
2260         */
2261        ch = (unsigned char)*s;
2262        if ((ch & 0x80) == 0) {
2263                mask = 0x7f;
2264                len = 1;
2265        } else if ((ch & 0xe0) == 0xc0) {
2266                mask = 0x1f;
2267                len = 2;
2268        } else if ((ch & 0xf0) == 0xe0) {
2269                mask = 0x0f;
2270                len = 3;
2271        } else if ((ch & 0xf8) == 0xf0) {
2272                mask = 0x07;
2273                len = 4;
2274        } else {
2275		/* Invalid first byte. */
2276		return (-1);
2277        }
2278
2279        if (n < (size_t)len) {
2280		/* Valid first byte but truncated. */
2281                return (-2);
2282	}
2283
2284        /*
2285         * Decode the octet sequence representing the character in chunks
2286         * of 6 bits, most significant first.
2287         */
2288        wch = (unsigned char)*s++ & mask;
2289        i = len;
2290        while (--i != 0) {
2291                if ((*s & 0xc0) != 0x80) {
2292			/* Invalid intermediate byte; consume one byte and
2293			 * emit '?' */
2294			*pwc = '?';
2295			return (1);
2296                }
2297                wch <<= 6;
2298                wch |= *s++ & 0x3f;
2299        }
2300
2301	/* Assign the value to the output; out-of-range values
2302	 * just get truncated. */
2303	*pwc = (wchar_t)wch;
2304#ifdef WCHAR_MAX
2305	/*
2306	 * If platform has WCHAR_MAX, we can do something
2307	 * more sensible with out-of-range values.
2308	 */
2309	if (wch >= WCHAR_MAX)
2310		*pwc = '?';
2311#endif
2312	/* Return number of bytes input consumed: 0 for end-of-string. */
2313        return (wch == L'\0' ? 0 : len);
2314}
2315
2316
2317/*
2318 * base64_decode - Base64 decode
2319 *
2320 * This accepts most variations of base-64 encoding, including:
2321 *    * with or without line breaks
2322 *    * with or without the final group padded with '=' or '_' characters
2323 * (The most economical Base-64 variant does not pad the last group and
2324 * omits line breaks; RFC1341 used for MIME requires both.)
2325 */
2326static char *
2327base64_decode(const char *s, size_t len, size_t *out_len)
2328{
2329	static const unsigned char digits[64] = {
2330		'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2331		'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2332		'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2333		'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2334		'4','5','6','7','8','9','+','/' };
2335	static unsigned char decode_table[128];
2336	char *out, *d;
2337	const unsigned char *src = (const unsigned char *)s;
2338
2339	/* If the decode table is not yet initialized, prepare it. */
2340	if (decode_table[digits[1]] != 1) {
2341		unsigned i;
2342		memset(decode_table, 0xff, sizeof(decode_table));
2343		for (i = 0; i < sizeof(digits); i++)
2344			decode_table[digits[i]] = i;
2345	}
2346
2347	/* Allocate enough space to hold the entire output. */
2348	/* Note that we may not use all of this... */
2349	out = (char *)malloc(len - len / 4 + 1);
2350	if (out == NULL) {
2351		*out_len = 0;
2352		return (NULL);
2353	}
2354	d = out;
2355
2356	while (len > 0) {
2357		/* Collect the next group of (up to) four characters. */
2358		int v = 0;
2359		int group_size = 0;
2360		while (group_size < 4 && len > 0) {
2361			/* '=' or '_' padding indicates final group. */
2362			if (*src == '=' || *src == '_') {
2363				len = 0;
2364				break;
2365			}
2366			/* Skip illegal characters (including line breaks) */
2367			if (*src > 127 || *src < 32
2368			    || decode_table[*src] == 0xff) {
2369				len--;
2370				src++;
2371				continue;
2372			}
2373			v <<= 6;
2374			v |= decode_table[*src++];
2375			len --;
2376			group_size++;
2377		}
2378		/* Align a short group properly. */
2379		v <<= 6 * (4 - group_size);
2380		/* Unpack the group we just collected. */
2381		switch (group_size) {
2382		case 4: d[2] = v & 0xff;
2383			/* FALLTHROUGH */
2384		case 3: d[1] = (v >> 8) & 0xff;
2385			/* FALLTHROUGH */
2386		case 2: d[0] = (v >> 16) & 0xff;
2387			break;
2388		case 1: /* this is invalid! */
2389			break;
2390		}
2391		d += group_size * 3 / 4;
2392	}
2393
2394	*out_len = d - out;
2395	return (out);
2396}
2397
2398static char *
2399url_decode(const char *in)
2400{
2401	char *out, *d;
2402	const char *s;
2403
2404	out = (char *)malloc(strlen(in) + 1);
2405	if (out == NULL)
2406		return (NULL);
2407	for (s = in, d = out; *s != '\0'; ) {
2408		if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2409			/* Try to convert % escape */
2410			int digit1 = tohex(s[1]);
2411			int digit2 = tohex(s[2]);
2412			if (digit1 >= 0 && digit2 >= 0) {
2413				/* Looks good, consume three chars */
2414				s += 3;
2415				/* Convert output */
2416				*d++ = ((digit1 << 4) | digit2);
2417				continue;
2418			}
2419			/* Else fall through and treat '%' as normal char */
2420		}
2421		*d++ = *s++;
2422	}
2423	*d = '\0';
2424	return (out);
2425}
2426
2427static int
2428tohex(int c)
2429{
2430	if (c >= '0' && c <= '9')
2431		return (c - '0');
2432	else if (c >= 'A' && c <= 'F')
2433		return (c - 'A' + 10);
2434	else if (c >= 'a' && c <= 'f')
2435		return (c - 'a' + 10);
2436	else
2437		return (-1);
2438}
2439