1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2010-2012 Michihiro NAKAJIMA
4 * Copyright (c) 2016 Martin Matuska
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_write_set_format_pax.c 358090 2020-02-19 01:51:44Z mm $");
30
31#ifdef HAVE_ERRNO_H
32#include <errno.h>
33#endif
34#ifdef HAVE_STDLIB_H
35#include <stdlib.h>
36#endif
37#ifdef HAVE_STRING_H
38#include <string.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_entry_locale.h"
44#include "archive_private.h"
45#include "archive_write_private.h"
46#include "archive_write_set_format_private.h"
47
48struct sparse_block {
49	struct sparse_block	*next;
50	int		is_hole;
51	uint64_t	offset;
52	uint64_t	remaining;
53};
54
55struct pax {
56	uint64_t	entry_bytes_remaining;
57	uint64_t	entry_padding;
58	struct archive_string	l_url_encoded_name;
59	struct archive_string	pax_header;
60	struct archive_string	sparse_map;
61	size_t			sparse_map_padding;
62	struct sparse_block	*sparse_list;
63	struct sparse_block	*sparse_tail;
64	struct archive_string_conv *sconv_utf8;
65	int			 opt_binary;
66
67	unsigned flags;
68#define WRITE_SCHILY_XATTR       (1 << 0)
69#define WRITE_LIBARCHIVE_XATTR   (1 << 1)
70};
71
72static void		 add_pax_attr(struct archive_string *, const char *key,
73			     const char *value);
74static void		 add_pax_attr_binary(struct archive_string *,
75			     const char *key,
76			     const char *value, size_t value_len);
77static void		 add_pax_attr_int(struct archive_string *,
78			     const char *key, int64_t value);
79static void		 add_pax_attr_time(struct archive_string *,
80			     const char *key, int64_t sec,
81			     unsigned long nanos);
82static int		 add_pax_acl(struct archive_write *,
83			    struct archive_entry *, struct pax *, int);
84static ssize_t		 archive_write_pax_data(struct archive_write *,
85			     const void *, size_t);
86static int		 archive_write_pax_close(struct archive_write *);
87static int		 archive_write_pax_free(struct archive_write *);
88static int		 archive_write_pax_finish_entry(struct archive_write *);
89static int		 archive_write_pax_header(struct archive_write *,
90			     struct archive_entry *);
91static int		 archive_write_pax_options(struct archive_write *,
92			     const char *, const char *);
93static char		*base64_encode(const char *src, size_t len);
94static char		*build_gnu_sparse_name(char *dest, const char *src);
95static char		*build_pax_attribute_name(char *dest, const char *src);
96static char		*build_ustar_entry_name(char *dest, const char *src,
97			     size_t src_length, const char *insert);
98static char		*format_int(char *dest, int64_t);
99static int		 has_non_ASCII(const char *);
100static void		 sparse_list_clear(struct pax *);
101static int		 sparse_list_add(struct pax *, int64_t, int64_t);
102static char		*url_encode(const char *in);
103
104/*
105 * Set output format to 'restricted pax' format.
106 *
107 * This is the same as normal 'pax', but tries to suppress
108 * the pax header whenever possible.  This is the default for
109 * bsdtar, for instance.
110 */
111int
112archive_write_set_format_pax_restricted(struct archive *_a)
113{
114	struct archive_write *a = (struct archive_write *)_a;
115	int r;
116
117	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
118	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax_restricted");
119
120	r = archive_write_set_format_pax(&a->archive);
121	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED;
122	a->archive.archive_format_name = "restricted POSIX pax interchange";
123	return (r);
124}
125
126/*
127 * Set output format to 'pax' format.
128 */
129int
130archive_write_set_format_pax(struct archive *_a)
131{
132	struct archive_write *a = (struct archive_write *)_a;
133	struct pax *pax;
134
135	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
136	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax");
137
138	if (a->format_free != NULL)
139		(a->format_free)(a);
140
141	pax = (struct pax *)calloc(1, sizeof(*pax));
142	if (pax == NULL) {
143		archive_set_error(&a->archive, ENOMEM,
144		    "Can't allocate pax data");
145		return (ARCHIVE_FATAL);
146	}
147	pax->flags = WRITE_LIBARCHIVE_XATTR | WRITE_SCHILY_XATTR;
148
149	a->format_data = pax;
150	a->format_name = "pax";
151	a->format_options = archive_write_pax_options;
152	a->format_write_header = archive_write_pax_header;
153	a->format_write_data = archive_write_pax_data;
154	a->format_close = archive_write_pax_close;
155	a->format_free = archive_write_pax_free;
156	a->format_finish_entry = archive_write_pax_finish_entry;
157	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
158	a->archive.archive_format_name = "POSIX pax interchange";
159	return (ARCHIVE_OK);
160}
161
162static int
163archive_write_pax_options(struct archive_write *a, const char *key,
164    const char *val)
165{
166	struct pax *pax = (struct pax *)a->format_data;
167	int ret = ARCHIVE_FAILED;
168
169	if (strcmp(key, "hdrcharset")  == 0) {
170		/*
171		 * The character-set we can use are defined in
172		 * IEEE Std 1003.1-2001
173		 */
174		if (val == NULL || val[0] == 0)
175			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
176			    "pax: hdrcharset option needs a character-set name");
177		else if (strcmp(val, "BINARY") == 0 ||
178		    strcmp(val, "binary") == 0) {
179			/*
180			 * Specify binary mode. We will not convert
181			 * filenames, uname and gname to any charsets.
182			 */
183			pax->opt_binary = 1;
184			ret = ARCHIVE_OK;
185		} else if (strcmp(val, "UTF-8") == 0) {
186			/*
187			 * Specify UTF-8 character-set to be used for
188			 * filenames. This is almost the test that
189			 * running platform supports the string conversion.
190			 * Especially libarchive_test needs this trick for
191			 * its test.
192			 */
193			pax->sconv_utf8 = archive_string_conversion_to_charset(
194			    &(a->archive), "UTF-8", 0);
195			if (pax->sconv_utf8 == NULL)
196				ret = ARCHIVE_FATAL;
197			else
198				ret = ARCHIVE_OK;
199		} else
200			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
201			    "pax: invalid charset name");
202		return (ret);
203	} else if (strcmp(key, "xattrheader") == 0) {
204		if (val == NULL || val[0] == 0) {
205			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
206			    "pax: xattrheader requires a value");
207		} else if (strcmp(val, "ALL") == 0 ||
208		    strcmp(val, "all") == 0) {
209			pax->flags |= WRITE_LIBARCHIVE_XATTR | WRITE_SCHILY_XATTR;
210			ret = ARCHIVE_OK;
211		} else if (strcmp(val, "SCHILY") == 0 ||
212		    strcmp(val, "schily") == 0) {
213			pax->flags |= WRITE_SCHILY_XATTR;
214			pax->flags &= ~WRITE_LIBARCHIVE_XATTR;
215			ret = ARCHIVE_OK;
216		} else if (strcmp(val, "LIBARCHIVE") == 0 ||
217		    strcmp(val, "libarchive") == 0) {
218			pax->flags |= WRITE_LIBARCHIVE_XATTR;
219			pax->flags &= ~WRITE_SCHILY_XATTR;
220			ret = ARCHIVE_OK;
221		} else
222			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
223			    "pax: invalid xattr header name");
224		return (ret);
225	}
226
227	/* Note: The "warn" return is just to inform the options
228	 * supervisor that we didn't handle it.  It will generate
229	 * a suitable error if no one used this option. */
230	return (ARCHIVE_WARN);
231}
232
233/*
234 * Note: This code assumes that 'nanos' has the same sign as 'sec',
235 * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
236 * and not -0.8 seconds.  This is a pretty pedantic point, as we're
237 * unlikely to encounter many real files created before Jan 1, 1970,
238 * much less ones with timestamps recorded to sub-second resolution.
239 */
240static void
241add_pax_attr_time(struct archive_string *as, const char *key,
242    int64_t sec, unsigned long nanos)
243{
244	int digit, i;
245	char *t;
246	/*
247	 * Note that each byte contributes fewer than 3 base-10
248	 * digits, so this will always be big enough.
249	 */
250	char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)];
251
252	tmp[sizeof(tmp) - 1] = 0;
253	t = tmp + sizeof(tmp) - 1;
254
255	/* Skip trailing zeros in the fractional part. */
256	for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
257		digit = nanos % 10;
258		nanos /= 10;
259	}
260
261	/* Only format the fraction if it's non-zero. */
262	if (i > 0) {
263		while (i > 0) {
264			*--t = "0123456789"[digit];
265			digit = nanos % 10;
266			nanos /= 10;
267			i--;
268		}
269		*--t = '.';
270	}
271	t = format_int(t, sec);
272
273	add_pax_attr(as, key, t);
274}
275
276static char *
277format_int(char *t, int64_t i)
278{
279	uint64_t ui;
280
281	if (i < 0)
282		ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i);
283	else
284		ui = i;
285
286	do {
287		*--t = "0123456789"[ui % 10];
288	} while (ui /= 10);
289	if (i < 0)
290		*--t = '-';
291	return (t);
292}
293
294static void
295add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
296{
297	char tmp[1 + 3 * sizeof(value)];
298
299	tmp[sizeof(tmp) - 1] = 0;
300	add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
301}
302
303/*
304 * Add a key/value attribute to the pax header.  This function handles
305 * the length field and various other syntactic requirements.
306 */
307static void
308add_pax_attr(struct archive_string *as, const char *key, const char *value)
309{
310	add_pax_attr_binary(as, key, value, strlen(value));
311}
312
313/*
314 * Add a key/value attribute to the pax header.  This function handles
315 * binary values.
316 */
317static void
318add_pax_attr_binary(struct archive_string *as, const char *key,
319		    const char *value, size_t value_len)
320{
321	int digits, i, len, next_ten;
322	char tmp[1 + 3 * sizeof(int)];	/* < 3 base-10 digits per byte */
323
324	/*-
325	 * PAX attributes have the following layout:
326	 *     <len> <space> <key> <=> <value> <nl>
327	 */
328	len = 1 + (int)strlen(key) + 1 + (int)value_len + 1;
329
330	/*
331	 * The <len> field includes the length of the <len> field, so
332	 * computing the correct length is tricky.  I start by
333	 * counting the number of base-10 digits in 'len' and
334	 * computing the next higher power of 10.
335	 */
336	next_ten = 1;
337	digits = 0;
338	i = len;
339	while (i > 0) {
340		i = i / 10;
341		digits++;
342		next_ten = next_ten * 10;
343	}
344	/*
345	 * For example, if string without the length field is 99
346	 * chars, then adding the 2 digit length "99" will force the
347	 * total length past 100, requiring an extra digit.  The next
348	 * statement adjusts for this effect.
349	 */
350	if (len + digits >= next_ten)
351		digits++;
352
353	/* Now, we have the right length so we can build the line. */
354	tmp[sizeof(tmp) - 1] = 0;	/* Null-terminate the work area. */
355	archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits));
356	archive_strappend_char(as, ' ');
357	archive_strcat(as, key);
358	archive_strappend_char(as, '=');
359	archive_array_append(as, value, value_len);
360	archive_strappend_char(as, '\n');
361}
362
363static void
364archive_write_pax_header_xattr(struct pax *pax, const char *encoded_name,
365    const void *value, size_t value_len)
366{
367	struct archive_string s;
368	char *encoded_value;
369
370	if (pax->flags & WRITE_LIBARCHIVE_XATTR) {
371		encoded_value = base64_encode((const char *)value, value_len);
372
373		if (encoded_name != NULL && encoded_value != NULL) {
374			archive_string_init(&s);
375			archive_strcpy(&s, "LIBARCHIVE.xattr.");
376			archive_strcat(&s, encoded_name);
377			add_pax_attr(&(pax->pax_header), s.s, encoded_value);
378			archive_string_free(&s);
379		}
380		free(encoded_value);
381	}
382	if (pax->flags & WRITE_SCHILY_XATTR) {
383		archive_string_init(&s);
384		archive_strcpy(&s, "SCHILY.xattr.");
385		archive_strcat(&s, encoded_name);
386		add_pax_attr_binary(&(pax->pax_header), s.s, value, value_len);
387		archive_string_free(&s);
388	}
389}
390
391static int
392archive_write_pax_header_xattrs(struct archive_write *a,
393    struct pax *pax, struct archive_entry *entry)
394{
395	int i = archive_entry_xattr_reset(entry);
396
397	while (i--) {
398		const char *name;
399		const void *value;
400		char *url_encoded_name = NULL, *encoded_name = NULL;
401		size_t size;
402		int r;
403
404		archive_entry_xattr_next(entry, &name, &value, &size);
405		url_encoded_name = url_encode(name);
406		if (url_encoded_name != NULL) {
407			/* Convert narrow-character to UTF-8. */
408			r = archive_strcpy_l(&(pax->l_url_encoded_name),
409			    url_encoded_name, pax->sconv_utf8);
410			free(url_encoded_name); /* Done with this. */
411			if (r == 0)
412				encoded_name = pax->l_url_encoded_name.s;
413			else if (errno == ENOMEM) {
414				archive_set_error(&a->archive, ENOMEM,
415				    "Can't allocate memory for Linkname");
416				return (ARCHIVE_FATAL);
417			}
418		}
419
420		archive_write_pax_header_xattr(pax, encoded_name,
421		    value, size);
422
423	}
424	return (ARCHIVE_OK);
425}
426
427static int
428get_entry_hardlink(struct archive_write *a, struct archive_entry *entry,
429    const char **name, size_t *length, struct archive_string_conv *sc)
430{
431	int r;
432
433	r = archive_entry_hardlink_l(entry, name, length, sc);
434	if (r != 0) {
435		if (errno == ENOMEM) {
436			archive_set_error(&a->archive, ENOMEM,
437			    "Can't allocate memory for Linkname");
438			return (ARCHIVE_FATAL);
439		}
440		return (ARCHIVE_WARN);
441	}
442	return (ARCHIVE_OK);
443}
444
445static int
446get_entry_pathname(struct archive_write *a, struct archive_entry *entry,
447    const char **name, size_t *length, struct archive_string_conv *sc)
448{
449	int r;
450
451	r = archive_entry_pathname_l(entry, name, length, sc);
452	if (r != 0) {
453		if (errno == ENOMEM) {
454			archive_set_error(&a->archive, ENOMEM,
455			    "Can't allocate memory for Pathname");
456			return (ARCHIVE_FATAL);
457		}
458		return (ARCHIVE_WARN);
459	}
460	return (ARCHIVE_OK);
461}
462
463static int
464get_entry_uname(struct archive_write *a, struct archive_entry *entry,
465    const char **name, size_t *length, struct archive_string_conv *sc)
466{
467	int r;
468
469	r = archive_entry_uname_l(entry, name, length, sc);
470	if (r != 0) {
471		if (errno == ENOMEM) {
472			archive_set_error(&a->archive, ENOMEM,
473			    "Can't allocate memory for Uname");
474			return (ARCHIVE_FATAL);
475		}
476		return (ARCHIVE_WARN);
477	}
478	return (ARCHIVE_OK);
479}
480
481static int
482get_entry_gname(struct archive_write *a, struct archive_entry *entry,
483    const char **name, size_t *length, struct archive_string_conv *sc)
484{
485	int r;
486
487	r = archive_entry_gname_l(entry, name, length, sc);
488	if (r != 0) {
489		if (errno == ENOMEM) {
490			archive_set_error(&a->archive, ENOMEM,
491			    "Can't allocate memory for Gname");
492			return (ARCHIVE_FATAL);
493		}
494		return (ARCHIVE_WARN);
495	}
496	return (ARCHIVE_OK);
497}
498
499static int
500get_entry_symlink(struct archive_write *a, struct archive_entry *entry,
501    const char **name, size_t *length, struct archive_string_conv *sc)
502{
503	int r;
504
505	r = archive_entry_symlink_l(entry, name, length, sc);
506	if (r != 0) {
507		if (errno == ENOMEM) {
508			archive_set_error(&a->archive, ENOMEM,
509			    "Can't allocate memory for Linkname");
510			return (ARCHIVE_FATAL);
511		}
512		return (ARCHIVE_WARN);
513	}
514	return (ARCHIVE_OK);
515}
516
517/* Add ACL to pax header */
518static int
519add_pax_acl(struct archive_write *a,
520    struct archive_entry *entry, struct pax *pax, int flags)
521{
522	char *p;
523	const char *attr;
524	int acl_types;
525
526	acl_types = archive_entry_acl_types(entry);
527
528	if ((acl_types & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0)
529		attr = "SCHILY.acl.ace";
530	else if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)
531		attr = "SCHILY.acl.access";
532	else if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0)
533		attr = "SCHILY.acl.default";
534	else
535		return (ARCHIVE_FATAL);
536
537	p = archive_entry_acl_to_text_l(entry, NULL, flags, pax->sconv_utf8);
538	if (p == NULL) {
539		if (errno == ENOMEM) {
540			archive_set_error(&a->archive, ENOMEM, "%s %s",
541			    "Can't allocate memory for ", attr);
542			return (ARCHIVE_FATAL);
543		}
544		archive_set_error(&a->archive,
545		    ARCHIVE_ERRNO_FILE_FORMAT, "%s %s %s",
546		    "Can't translate ", attr, " to UTF-8");
547		return(ARCHIVE_WARN);
548	} else if (*p != '\0') {
549		add_pax_attr(&(pax->pax_header),
550		    attr, p);
551		free(p);
552	}
553	return(ARCHIVE_OK);
554}
555
556/*
557 * TODO: Consider adding 'comment' and 'charset' fields to
558 * archive_entry so that clients can specify them.  Also, consider
559 * adding generic key/value tags so clients can add arbitrary
560 * key/value data.
561 *
562 * TODO: Break up this 700-line function!!!!  Yowza!
563 */
564static int
565archive_write_pax_header(struct archive_write *a,
566    struct archive_entry *entry_original)
567{
568	struct archive_entry *entry_main;
569	const char *p;
570	const char *suffix;
571	int need_extension, r, ret;
572	int acl_types;
573	int sparse_count;
574	uint64_t sparse_total, real_size;
575	struct pax *pax;
576	const char *hardlink;
577	const char *path = NULL, *linkpath = NULL;
578	const char *uname = NULL, *gname = NULL;
579	const void *mac_metadata;
580	size_t mac_metadata_size;
581	struct archive_string_conv *sconv;
582	size_t hardlink_length, path_length, linkpath_length;
583	size_t uname_length, gname_length;
584
585	char paxbuff[512];
586	char ustarbuff[512];
587	char ustar_entry_name[256];
588	char pax_entry_name[256];
589	char gnu_sparse_name[256];
590	struct archive_string entry_name;
591
592	ret = ARCHIVE_OK;
593	need_extension = 0;
594	pax = (struct pax *)a->format_data;
595
596	/* Sanity check. */
597	if (archive_entry_pathname(entry_original) == NULL) {
598		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
599			  "Can't record entry in tar file without pathname");
600		return (ARCHIVE_FAILED);
601	}
602
603	/*
604	 * Choose a header encoding.
605	 */
606	if (pax->opt_binary)
607		sconv = NULL;/* Binary mode. */
608	else {
609		/* Header encoding is UTF-8. */
610		if (pax->sconv_utf8 == NULL) {
611			/* Initialize the string conversion object
612			 * we must need */
613			pax->sconv_utf8 = archive_string_conversion_to_charset(
614			    &(a->archive), "UTF-8", 1);
615			if (pax->sconv_utf8 == NULL)
616				/* Couldn't allocate memory */
617				return (ARCHIVE_FAILED);
618		}
619		sconv = pax->sconv_utf8;
620	}
621
622	r = get_entry_hardlink(a, entry_original, &hardlink,
623	    &hardlink_length, sconv);
624	if (r == ARCHIVE_FATAL)
625		return (r);
626	else if (r != ARCHIVE_OK) {
627		r = get_entry_hardlink(a, entry_original, &hardlink,
628		    &hardlink_length, NULL);
629		if (r == ARCHIVE_FATAL)
630			return (r);
631		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
632		    "Can't translate linkname '%s' to %s", hardlink,
633		    archive_string_conversion_charset_name(sconv));
634		ret = ARCHIVE_WARN;
635		sconv = NULL;/* The header charset switches to binary mode. */
636	}
637
638	/* Make sure this is a type of entry that we can handle here */
639	if (hardlink == NULL) {
640		switch (archive_entry_filetype(entry_original)) {
641		case AE_IFBLK:
642		case AE_IFCHR:
643		case AE_IFIFO:
644		case AE_IFLNK:
645		case AE_IFREG:
646			break;
647		case AE_IFDIR:
648		{
649			/*
650			 * Ensure a trailing '/'.  Modify the original
651			 * entry so the client sees the change.
652			 */
653#if defined(_WIN32) && !defined(__CYGWIN__)
654			const wchar_t *wp;
655
656			wp = archive_entry_pathname_w(entry_original);
657			if (wp != NULL && wp[wcslen(wp) -1] != L'/') {
658				struct archive_wstring ws;
659
660				archive_string_init(&ws);
661				path_length = wcslen(wp);
662				if (archive_wstring_ensure(&ws,
663				    path_length + 2) == NULL) {
664					archive_set_error(&a->archive, ENOMEM,
665					    "Can't allocate pax data");
666					archive_wstring_free(&ws);
667					return(ARCHIVE_FATAL);
668				}
669				/* Should we keep '\' ? */
670				if (wp[path_length -1] == L'\\')
671					path_length--;
672				archive_wstrncpy(&ws, wp, path_length);
673				archive_wstrappend_wchar(&ws, L'/');
674				archive_entry_copy_pathname_w(
675				    entry_original, ws.s);
676				archive_wstring_free(&ws);
677				p = NULL;
678			} else
679#endif
680				p = archive_entry_pathname(entry_original);
681			/*
682			 * On Windows, this is a backup operation just in
683			 * case getting WCS failed. On POSIX, this is a
684			 * normal operation.
685			 */
686			if (p != NULL && p[0] != '\0' && p[strlen(p) - 1] != '/') {
687				struct archive_string as;
688
689				archive_string_init(&as);
690				path_length = strlen(p);
691				if (archive_string_ensure(&as,
692				    path_length + 2) == NULL) {
693					archive_set_error(&a->archive, ENOMEM,
694					    "Can't allocate pax data");
695					archive_string_free(&as);
696					return(ARCHIVE_FATAL);
697				}
698#if defined(_WIN32) && !defined(__CYGWIN__)
699				/* NOTE: This might break the pathname
700				 * if the current code page is CP932 and
701				 * the pathname includes a character '\'
702				 * as a part of its multibyte pathname. */
703				if (p[strlen(p) -1] == '\\')
704					path_length--;
705				else
706#endif
707				archive_strncpy(&as, p, path_length);
708				archive_strappend_char(&as, '/');
709				archive_entry_copy_pathname(
710				    entry_original, as.s);
711				archive_string_free(&as);
712			}
713			break;
714		}
715		default: /* AE_IFSOCK and unknown */
716			__archive_write_entry_filetype_unsupported(
717			    &a->archive, entry_original, "pax");
718			return (ARCHIVE_FAILED);
719		}
720	}
721
722	/*
723	 * If Mac OS metadata blob is here, recurse to write that
724	 * as a separate entry.  This is really a pretty poor design:
725	 * In particular, it doubles the overhead for long filenames.
726	 * TODO: Help Apple folks design something better and figure
727	 * out how to transition from this legacy format.
728	 *
729	 * Note that this code is present on every platform; clients
730	 * on non-Mac are unlikely to ever provide this data, but
731	 * applications that copy entries from one archive to another
732	 * should not lose data just because the local filesystem
733	 * can't store it.
734	 */
735	mac_metadata =
736	    archive_entry_mac_metadata(entry_original, &mac_metadata_size);
737	if (mac_metadata != NULL) {
738		const char *oname;
739		char *name, *bname;
740		size_t name_length;
741		struct archive_entry *extra = archive_entry_new2(&a->archive);
742
743		oname = archive_entry_pathname(entry_original);
744		name_length = strlen(oname);
745		name = malloc(name_length + 3);
746		if (name == NULL || extra == NULL) {
747			/* XXX error message */
748			archive_entry_free(extra);
749			free(name);
750			return (ARCHIVE_FAILED);
751		}
752		strcpy(name, oname);
753		/* Find last '/'; strip trailing '/' characters */
754		bname = strrchr(name, '/');
755		while (bname != NULL && bname[1] == '\0') {
756			*bname = '\0';
757			bname = strrchr(name, '/');
758		}
759		if (bname == NULL) {
760			memmove(name + 2, name, name_length + 1);
761			memmove(name, "._", 2);
762		} else {
763			bname += 1;
764			memmove(bname + 2, bname, strlen(bname) + 1);
765			memmove(bname, "._", 2);
766		}
767		archive_entry_copy_pathname(extra, name);
768		free(name);
769
770		archive_entry_set_size(extra, mac_metadata_size);
771		archive_entry_set_filetype(extra, AE_IFREG);
772		archive_entry_set_perm(extra,
773		    archive_entry_perm(entry_original));
774		archive_entry_set_mtime(extra,
775		    archive_entry_mtime(entry_original),
776		    archive_entry_mtime_nsec(entry_original));
777		archive_entry_set_gid(extra,
778		    archive_entry_gid(entry_original));
779		archive_entry_set_gname(extra,
780		    archive_entry_gname(entry_original));
781		archive_entry_set_uid(extra,
782		    archive_entry_uid(entry_original));
783		archive_entry_set_uname(extra,
784		    archive_entry_uname(entry_original));
785
786		/* Recurse to write the special copyfile entry. */
787		r = archive_write_pax_header(a, extra);
788		archive_entry_free(extra);
789		if (r < ARCHIVE_WARN)
790			return (r);
791		if (r < ret)
792			ret = r;
793		r = (int)archive_write_pax_data(a, mac_metadata,
794		    mac_metadata_size);
795		if (r < ARCHIVE_WARN)
796			return (r);
797		if (r < ret)
798			ret = r;
799		r = archive_write_pax_finish_entry(a);
800		if (r < ARCHIVE_WARN)
801			return (r);
802		if (r < ret)
803			ret = r;
804	}
805
806	/* Copy entry so we can modify it as needed. */
807#if defined(_WIN32) && !defined(__CYGWIN__)
808	/* Make sure the path separators in pathname, hardlink and symlink
809	 * are all slash '/', not the Windows path separator '\'. */
810	entry_main = __la_win_entry_in_posix_pathseparator(entry_original);
811	if (entry_main == entry_original)
812		entry_main = archive_entry_clone(entry_original);
813#else
814	entry_main = archive_entry_clone(entry_original);
815#endif
816	if (entry_main == NULL) {
817		archive_set_error(&a->archive, ENOMEM,
818		    "Can't allocate pax data");
819		return(ARCHIVE_FATAL);
820	}
821	archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
822	archive_string_empty(&(pax->sparse_map));
823	sparse_total = 0;
824	sparse_list_clear(pax);
825
826	if (hardlink == NULL &&
827	    archive_entry_filetype(entry_main) == AE_IFREG)
828		sparse_count = archive_entry_sparse_reset(entry_main);
829	else
830		sparse_count = 0;
831	if (sparse_count) {
832		int64_t offset, length, last_offset = 0;
833		/* Get the last entry of sparse block. */
834		while (archive_entry_sparse_next(
835		    entry_main, &offset, &length) == ARCHIVE_OK)
836			last_offset = offset + length;
837
838		/* If the last sparse block does not reach the end of file,
839		 * We have to add a empty sparse block as the last entry to
840		 * manage storing file data. */
841		if (last_offset < archive_entry_size(entry_main))
842			archive_entry_sparse_add_entry(entry_main,
843			    archive_entry_size(entry_main), 0);
844		sparse_count = archive_entry_sparse_reset(entry_main);
845	}
846
847	/*
848	 * First, check the name fields and see if any of them
849	 * require binary coding.  If any of them does, then all of
850	 * them do.
851	 */
852	r = get_entry_pathname(a, entry_main, &path, &path_length, sconv);
853	if (r == ARCHIVE_FATAL) {
854		archive_entry_free(entry_main);
855		return (r);
856	} else if (r != ARCHIVE_OK) {
857		r = get_entry_pathname(a, entry_main, &path,
858		    &path_length, NULL);
859		if (r == ARCHIVE_FATAL) {
860			archive_entry_free(entry_main);
861			return (r);
862		}
863		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
864		    "Can't translate pathname '%s' to %s", path,
865		    archive_string_conversion_charset_name(sconv));
866		ret = ARCHIVE_WARN;
867		sconv = NULL;/* The header charset switches to binary mode. */
868	}
869	r = get_entry_uname(a, entry_main, &uname, &uname_length, sconv);
870	if (r == ARCHIVE_FATAL) {
871		archive_entry_free(entry_main);
872		return (r);
873	} else if (r != ARCHIVE_OK) {
874		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
875		if (r == ARCHIVE_FATAL) {
876			archive_entry_free(entry_main);
877			return (r);
878		}
879		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
880		    "Can't translate uname '%s' to %s", uname,
881		    archive_string_conversion_charset_name(sconv));
882		ret = ARCHIVE_WARN;
883		sconv = NULL;/* The header charset switches to binary mode. */
884	}
885	r = get_entry_gname(a, entry_main, &gname, &gname_length, sconv);
886	if (r == ARCHIVE_FATAL) {
887		archive_entry_free(entry_main);
888		return (r);
889	} else if (r != ARCHIVE_OK) {
890		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
891		if (r == ARCHIVE_FATAL) {
892			archive_entry_free(entry_main);
893			return (r);
894		}
895		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
896		    "Can't translate gname '%s' to %s", gname,
897		    archive_string_conversion_charset_name(sconv));
898		ret = ARCHIVE_WARN;
899		sconv = NULL;/* The header charset switches to binary mode. */
900	}
901	linkpath = hardlink;
902	linkpath_length = hardlink_length;
903	if (linkpath == NULL) {
904		r = get_entry_symlink(a, entry_main, &linkpath,
905		    &linkpath_length, sconv);
906		if (r == ARCHIVE_FATAL) {
907			archive_entry_free(entry_main);
908			return (r);
909		} else if (r != ARCHIVE_OK) {
910			r = get_entry_symlink(a, entry_main, &linkpath,
911			    &linkpath_length, NULL);
912			if (r == ARCHIVE_FATAL) {
913				archive_entry_free(entry_main);
914				return (r);
915			}
916			archive_set_error(&a->archive,
917			    ARCHIVE_ERRNO_FILE_FORMAT,
918			    "Can't translate linkname '%s' to %s", linkpath,
919			    archive_string_conversion_charset_name(sconv));
920			ret = ARCHIVE_WARN;
921			sconv = NULL;
922		}
923	}
924
925	/* If any string conversions failed, get all attributes
926	 * in binary-mode. */
927	if (sconv == NULL && !pax->opt_binary) {
928		if (hardlink != NULL) {
929			r = get_entry_hardlink(a, entry_main, &hardlink,
930			    &hardlink_length, NULL);
931			if (r == ARCHIVE_FATAL) {
932				archive_entry_free(entry_main);
933				return (r);
934			}
935			linkpath = hardlink;
936			linkpath_length = hardlink_length;
937		}
938		r = get_entry_pathname(a, entry_main, &path,
939		    &path_length, NULL);
940		if (r == ARCHIVE_FATAL) {
941			archive_entry_free(entry_main);
942			return (r);
943		}
944		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
945		if (r == ARCHIVE_FATAL) {
946			archive_entry_free(entry_main);
947			return (r);
948		}
949		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
950		if (r == ARCHIVE_FATAL) {
951			archive_entry_free(entry_main);
952			return (r);
953		}
954	}
955
956	/* Store the header encoding first, to be nice to readers. */
957	if (sconv == NULL)
958		add_pax_attr(&(pax->pax_header), "hdrcharset", "BINARY");
959
960
961	/*
962	 * If name is too long, or has non-ASCII characters, add
963	 * 'path' to pax extended attrs.  (Note that an unconvertible
964	 * name must have non-ASCII characters.)
965	 */
966	if (has_non_ASCII(path)) {
967		/* We have non-ASCII characters. */
968		add_pax_attr(&(pax->pax_header), "path", path);
969		archive_entry_set_pathname(entry_main,
970		    build_ustar_entry_name(ustar_entry_name,
971			path, path_length, NULL));
972		need_extension = 1;
973	} else {
974		/* We have an all-ASCII path; we'd like to just store
975		 * it in the ustar header if it will fit.  Yes, this
976		 * duplicates some of the logic in
977		 * archive_write_set_format_ustar.c
978		 */
979		if (path_length <= 100) {
980			/* Fits in the old 100-char tar name field. */
981		} else {
982			/* Find largest suffix that will fit. */
983			/* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
984			suffix = strchr(path + path_length - 100 - 1, '/');
985			/* Don't attempt an empty prefix. */
986			if (suffix == path)
987				suffix = strchr(suffix + 1, '/');
988			/* We can put it in the ustar header if it's
989			 * all ASCII and it's either <= 100 characters
990			 * or can be split at a '/' into a prefix <=
991			 * 155 chars and a suffix <= 100 chars.  (Note
992			 * the strchr() above will return NULL exactly
993			 * when the path can't be split.)
994			 */
995			if (suffix == NULL       /* Suffix > 100 chars. */
996			    || suffix[1] == '\0'    /* empty suffix */
997			    || suffix - path > 155)  /* Prefix > 155 chars */
998			{
999				add_pax_attr(&(pax->pax_header), "path", path);
1000				archive_entry_set_pathname(entry_main,
1001				    build_ustar_entry_name(ustar_entry_name,
1002					path, path_length, NULL));
1003				need_extension = 1;
1004			}
1005		}
1006	}
1007
1008	if (linkpath != NULL) {
1009		/* If link name is too long or has non-ASCII characters, add
1010		 * 'linkpath' to pax extended attrs. */
1011		if (linkpath_length > 100 || has_non_ASCII(linkpath)) {
1012			add_pax_attr(&(pax->pax_header), "linkpath", linkpath);
1013			if (linkpath_length > 100) {
1014				if (hardlink != NULL)
1015					archive_entry_set_hardlink(entry_main,
1016					    "././@LongHardLink");
1017				else
1018					archive_entry_set_symlink(entry_main,
1019					    "././@LongSymLink");
1020			}
1021			need_extension = 1;
1022		}
1023	}
1024	/* Save a pathname since it will be renamed if `entry_main` has
1025	 * sparse blocks. */
1026	archive_string_init(&entry_name);
1027	archive_strcpy(&entry_name, archive_entry_pathname(entry_main));
1028
1029	/* If file size is too large, add 'size' to pax extended attrs. */
1030	if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) {
1031		add_pax_attr_int(&(pax->pax_header), "size",
1032		    archive_entry_size(entry_main));
1033		need_extension = 1;
1034	}
1035
1036	/* If numeric GID is too large, add 'gid' to pax extended attrs. */
1037	if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) {
1038		add_pax_attr_int(&(pax->pax_header), "gid",
1039		    archive_entry_gid(entry_main));
1040		need_extension = 1;
1041	}
1042
1043	/* If group name is too large or has non-ASCII characters, add
1044	 * 'gname' to pax extended attrs. */
1045	if (gname != NULL) {
1046		if (gname_length > 31 || has_non_ASCII(gname)) {
1047			add_pax_attr(&(pax->pax_header), "gname", gname);
1048			need_extension = 1;
1049		}
1050	}
1051
1052	/* If numeric UID is too large, add 'uid' to pax extended attrs. */
1053	if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) {
1054		add_pax_attr_int(&(pax->pax_header), "uid",
1055		    archive_entry_uid(entry_main));
1056		need_extension = 1;
1057	}
1058
1059	/* Add 'uname' to pax extended attrs if necessary. */
1060	if (uname != NULL) {
1061		if (uname_length > 31 || has_non_ASCII(uname)) {
1062			add_pax_attr(&(pax->pax_header), "uname", uname);
1063			need_extension = 1;
1064		}
1065	}
1066
1067	/*
1068	 * POSIX/SUSv3 doesn't provide a standard key for large device
1069	 * numbers.  I use the same keys here that Joerg Schilling
1070	 * used for 'star.'  (Which, somewhat confusingly, are called
1071	 * "devXXX" even though they code "rdev" values.)  No doubt,
1072	 * other implementations use other keys.  Note that there's no
1073	 * reason we can't write the same information into a number of
1074	 * different keys.
1075	 *
1076	 * Of course, this is only needed for block or char device entries.
1077	 */
1078	if (archive_entry_filetype(entry_main) == AE_IFBLK
1079	    || archive_entry_filetype(entry_main) == AE_IFCHR) {
1080		/*
1081		 * If rdevmajor is too large, add 'SCHILY.devmajor' to
1082		 * extended attributes.
1083		 */
1084		int rdevmajor, rdevminor;
1085		rdevmajor = archive_entry_rdevmajor(entry_main);
1086		rdevminor = archive_entry_rdevminor(entry_main);
1087		if (rdevmajor >= (1 << 18)) {
1088			add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor",
1089			    rdevmajor);
1090			/*
1091			 * Non-strict formatting below means we don't
1092			 * have to truncate here.  Not truncating improves
1093			 * the chance that some more modern tar archivers
1094			 * (such as GNU tar 1.13) can restore the full
1095			 * value even if they don't understand the pax
1096			 * extended attributes.  See my rant below about
1097			 * file size fields for additional details.
1098			 */
1099			/* archive_entry_set_rdevmajor(entry_main,
1100			   rdevmajor & ((1 << 18) - 1)); */
1101			need_extension = 1;
1102		}
1103
1104		/*
1105		 * If devminor is too large, add 'SCHILY.devminor' to
1106		 * extended attributes.
1107		 */
1108		if (rdevminor >= (1 << 18)) {
1109			add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor",
1110			    rdevminor);
1111			/* Truncation is not necessary here, either. */
1112			/* archive_entry_set_rdevminor(entry_main,
1113			   rdevminor & ((1 << 18) - 1)); */
1114			need_extension = 1;
1115		}
1116	}
1117
1118	/*
1119	 * Technically, the mtime field in the ustar header can
1120	 * support 33 bits, but many platforms use signed 32-bit time
1121	 * values.  The cutoff of 0x7fffffff here is a compromise.
1122	 * Yes, this check is duplicated just below; this helps to
1123	 * avoid writing an mtime attribute just to handle a
1124	 * high-resolution timestamp in "restricted pax" mode.
1125	 */
1126	if (!need_extension &&
1127	    ((archive_entry_mtime(entry_main) < 0)
1128		|| (archive_entry_mtime(entry_main) >= 0x7fffffff)))
1129		need_extension = 1;
1130
1131	/* I use a star-compatible file flag attribute. */
1132	p = archive_entry_fflags_text(entry_main);
1133	if (!need_extension && p != NULL  &&  *p != '\0')
1134		need_extension = 1;
1135
1136	/* If there are extended attributes, we need an extension */
1137	if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
1138		need_extension = 1;
1139
1140	/* If there are sparse info, we need an extension */
1141	if (!need_extension && sparse_count > 0)
1142		need_extension = 1;
1143
1144	acl_types = archive_entry_acl_types(entry_original);
1145
1146	/* If there are any ACL entries, we need an extension */
1147	if (!need_extension && acl_types != 0)
1148		need_extension = 1;
1149
1150	/* If the symlink type is defined, we need an extension */
1151	if (!need_extension && archive_entry_symlink_type(entry_main) > 0)
1152		need_extension = 1;
1153
1154	/*
1155	 * Libarchive used to include these in extended headers for
1156	 * restricted pax format, but that confused people who
1157	 * expected ustar-like time semantics.  So now we only include
1158	 * them in full pax format.
1159	 */
1160	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED) {
1161		if (archive_entry_ctime(entry_main) != 0  ||
1162		    archive_entry_ctime_nsec(entry_main) != 0)
1163			add_pax_attr_time(&(pax->pax_header), "ctime",
1164			    archive_entry_ctime(entry_main),
1165			    archive_entry_ctime_nsec(entry_main));
1166
1167		if (archive_entry_atime(entry_main) != 0 ||
1168		    archive_entry_atime_nsec(entry_main) != 0)
1169			add_pax_attr_time(&(pax->pax_header), "atime",
1170			    archive_entry_atime(entry_main),
1171			    archive_entry_atime_nsec(entry_main));
1172
1173		/* Store birth/creationtime only if it's earlier than mtime */
1174		if (archive_entry_birthtime_is_set(entry_main) &&
1175		    archive_entry_birthtime(entry_main)
1176		    < archive_entry_mtime(entry_main))
1177			add_pax_attr_time(&(pax->pax_header),
1178			    "LIBARCHIVE.creationtime",
1179			    archive_entry_birthtime(entry_main),
1180			    archive_entry_birthtime_nsec(entry_main));
1181	}
1182
1183	/*
1184	 * The following items are handled differently in "pax
1185	 * restricted" format.  In particular, in "pax restricted"
1186	 * format they won't be added unless need_extension is
1187	 * already set (we're already generating an extended header, so
1188	 * may as well include these).
1189	 */
1190	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED ||
1191	    need_extension) {
1192		if (archive_entry_mtime(entry_main) < 0  ||
1193		    archive_entry_mtime(entry_main) >= 0x7fffffff  ||
1194		    archive_entry_mtime_nsec(entry_main) != 0)
1195			add_pax_attr_time(&(pax->pax_header), "mtime",
1196			    archive_entry_mtime(entry_main),
1197			    archive_entry_mtime_nsec(entry_main));
1198
1199		/* I use a star-compatible file flag attribute. */
1200		p = archive_entry_fflags_text(entry_main);
1201		if (p != NULL  &&  *p != '\0')
1202			add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p);
1203
1204		/* I use star-compatible ACL attributes. */
1205		if ((acl_types & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
1206			ret = add_pax_acl(a, entry_original, pax,
1207			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1208			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA |
1209			    ARCHIVE_ENTRY_ACL_STYLE_COMPACT);
1210			if (ret == ARCHIVE_FATAL) {
1211				archive_entry_free(entry_main);
1212				archive_string_free(&entry_name);
1213				return (ARCHIVE_FATAL);
1214			}
1215		}
1216		if (acl_types & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) {
1217			ret = add_pax_acl(a, entry_original, pax,
1218			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
1219			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1220			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA);
1221			if (ret == ARCHIVE_FATAL) {
1222				archive_entry_free(entry_main);
1223				archive_string_free(&entry_name);
1224				return (ARCHIVE_FATAL);
1225			}
1226		}
1227		if (acl_types & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) {
1228			ret = add_pax_acl(a, entry_original, pax,
1229			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
1230			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1231			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA);
1232			if (ret == ARCHIVE_FATAL) {
1233				archive_entry_free(entry_main);
1234				archive_string_free(&entry_name);
1235				return (ARCHIVE_FATAL);
1236			}
1237		}
1238
1239		/* We use GNU-tar-compatible sparse attributes. */
1240		if (sparse_count > 0) {
1241			int64_t soffset, slength;
1242
1243			add_pax_attr_int(&(pax->pax_header),
1244			    "GNU.sparse.major", 1);
1245			add_pax_attr_int(&(pax->pax_header),
1246			    "GNU.sparse.minor", 0);
1247			/*
1248			 * Make sure to store the original path, since
1249			 * truncation to ustar limit happened already.
1250			 */
1251			add_pax_attr(&(pax->pax_header),
1252			    "GNU.sparse.name", path);
1253			add_pax_attr_int(&(pax->pax_header),
1254			    "GNU.sparse.realsize",
1255			    archive_entry_size(entry_main));
1256
1257			/* Rename the file name which will be used for
1258			 * ustar header to a special name, which GNU
1259			 * PAX Format 1.0 requires */
1260			archive_entry_set_pathname(entry_main,
1261			    build_gnu_sparse_name(gnu_sparse_name,
1262			        entry_name.s));
1263
1264			/*
1265			 * - Make a sparse map, which will precede a file data.
1266			 * - Get the total size of available data of sparse.
1267			 */
1268			archive_string_sprintf(&(pax->sparse_map), "%d\n",
1269			    sparse_count);
1270			while (archive_entry_sparse_next(entry_main,
1271			    &soffset, &slength) == ARCHIVE_OK) {
1272				archive_string_sprintf(&(pax->sparse_map),
1273				    "%jd\n%jd\n",
1274				    (intmax_t)soffset,
1275				    (intmax_t)slength);
1276				sparse_total += slength;
1277				if (sparse_list_add(pax, soffset, slength)
1278				    != ARCHIVE_OK) {
1279					archive_set_error(&a->archive,
1280					    ENOMEM,
1281					    "Can't allocate memory");
1282					archive_entry_free(entry_main);
1283					archive_string_free(&entry_name);
1284					return (ARCHIVE_FATAL);
1285				}
1286			}
1287		}
1288
1289		/* Store extended attributes */
1290		if (archive_write_pax_header_xattrs(a, pax, entry_original)
1291		    == ARCHIVE_FATAL) {
1292			archive_entry_free(entry_main);
1293			archive_string_free(&entry_name);
1294			return (ARCHIVE_FATAL);
1295		}
1296
1297		/* Store extended symlink information */
1298		if (archive_entry_symlink_type(entry_main) ==
1299		    AE_SYMLINK_TYPE_FILE) {
1300			add_pax_attr(&(pax->pax_header),
1301			    "LIBARCHIVE.symlinktype", "file");
1302		} else if (archive_entry_symlink_type(entry_main) ==
1303		    AE_SYMLINK_TYPE_DIRECTORY) {
1304			add_pax_attr(&(pax->pax_header),
1305			    "LIBARCHIVE.symlinktype", "dir");
1306		}
1307	}
1308
1309	/* Only regular files have data. */
1310	if (archive_entry_filetype(entry_main) != AE_IFREG)
1311		archive_entry_set_size(entry_main, 0);
1312
1313	/*
1314	 * Pax-restricted does not store data for hardlinks, in order
1315	 * to improve compatibility with ustar.
1316	 */
1317	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE &&
1318	    hardlink != NULL)
1319		archive_entry_set_size(entry_main, 0);
1320
1321	/*
1322	 * XXX Full pax interchange format does permit a hardlink
1323	 * entry to have data associated with it.  I'm not supporting
1324	 * that here because the client expects me to tell them whether
1325	 * or not this format expects data for hardlinks.  If I
1326	 * don't check here, then every pax archive will end up with
1327	 * duplicated data for hardlinks.  Someday, there may be
1328	 * need to select this behavior, in which case the following
1329	 * will need to be revisited. XXX
1330	 */
1331	if (hardlink != NULL)
1332		archive_entry_set_size(entry_main, 0);
1333
1334	/* Save a real file size. */
1335	real_size = archive_entry_size(entry_main);
1336	/*
1337	 * Overwrite a file size by the total size of sparse blocks and
1338	 * the size of sparse map info. That file size is the length of
1339	 * the data, which we will exactly store into an archive file.
1340	 */
1341	if (archive_strlen(&(pax->sparse_map))) {
1342		size_t mapsize = archive_strlen(&(pax->sparse_map));
1343		pax->sparse_map_padding = 0x1ff & (-(ssize_t)mapsize);
1344		archive_entry_set_size(entry_main,
1345		    mapsize + pax->sparse_map_padding + sparse_total);
1346	}
1347
1348	/* Format 'ustar' header for main entry.
1349	 *
1350	 * The trouble with file size: If the reader can't understand
1351	 * the file size, they may not be able to locate the next
1352	 * entry and the rest of the archive is toast.  Pax-compliant
1353	 * readers are supposed to ignore the file size in the main
1354	 * header, so the question becomes how to maximize portability
1355	 * for readers that don't support pax attribute extensions.
1356	 * For maximum compatibility, I permit numeric extensions in
1357	 * the main header so that the file size stored will always be
1358	 * correct, even if it's in a format that only some
1359	 * implementations understand.  The technique used here is:
1360	 *
1361	 *  a) If possible, follow the standard exactly.  This handles
1362	 *  files up to 8 gigabytes minus 1.
1363	 *
1364	 *  b) If that fails, try octal but omit the field terminator.
1365	 *  That handles files up to 64 gigabytes minus 1.
1366	 *
1367	 *  c) Otherwise, use base-256 extensions.  That handles files
1368	 *  up to 2^63 in this implementation, with the potential to
1369	 *  go up to 2^94.  That should hold us for a while. ;-)
1370	 *
1371	 * The non-strict formatter uses similar logic for other
1372	 * numeric fields, though they're less critical.
1373	 */
1374	if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0,
1375	    NULL) == ARCHIVE_FATAL) {
1376		archive_entry_free(entry_main);
1377		archive_string_free(&entry_name);
1378		return (ARCHIVE_FATAL);
1379	}
1380
1381	/* If we built any extended attributes, write that entry first. */
1382	if (archive_strlen(&(pax->pax_header)) > 0) {
1383		struct archive_entry *pax_attr_entry;
1384		time_t s;
1385		int64_t uid, gid;
1386		int mode;
1387
1388		pax_attr_entry = archive_entry_new2(&a->archive);
1389		p = entry_name.s;
1390		archive_entry_set_pathname(pax_attr_entry,
1391		    build_pax_attribute_name(pax_entry_name, p));
1392		archive_entry_set_size(pax_attr_entry,
1393		    archive_strlen(&(pax->pax_header)));
1394		/* Copy uid/gid (but clip to ustar limits). */
1395		uid = archive_entry_uid(entry_main);
1396		if (uid >= 1 << 18)
1397			uid = (1 << 18) - 1;
1398		archive_entry_set_uid(pax_attr_entry, uid);
1399		gid = archive_entry_gid(entry_main);
1400		if (gid >= 1 << 18)
1401			gid = (1 << 18) - 1;
1402		archive_entry_set_gid(pax_attr_entry, gid);
1403		/* Copy mode over (but not setuid/setgid bits) */
1404		mode = archive_entry_mode(entry_main);
1405#ifdef S_ISUID
1406		mode &= ~S_ISUID;
1407#endif
1408#ifdef S_ISGID
1409		mode &= ~S_ISGID;
1410#endif
1411#ifdef S_ISVTX
1412		mode &= ~S_ISVTX;
1413#endif
1414		archive_entry_set_mode(pax_attr_entry, mode);
1415
1416		/* Copy uname/gname. */
1417		archive_entry_set_uname(pax_attr_entry,
1418		    archive_entry_uname(entry_main));
1419		archive_entry_set_gname(pax_attr_entry,
1420		    archive_entry_gname(entry_main));
1421
1422		/* Copy mtime, but clip to ustar limits. */
1423		s = archive_entry_mtime(entry_main);
1424		if (s < 0) { s = 0; }
1425		if (s >= 0x7fffffff) { s = 0x7fffffff; }
1426		archive_entry_set_mtime(pax_attr_entry, s, 0);
1427
1428		/* Standard ustar doesn't support atime. */
1429		archive_entry_set_atime(pax_attr_entry, 0, 0);
1430
1431		/* Standard ustar doesn't support ctime. */
1432		archive_entry_set_ctime(pax_attr_entry, 0, 0);
1433
1434		r = __archive_write_format_header_ustar(a, paxbuff,
1435		    pax_attr_entry, 'x', 1, NULL);
1436
1437		archive_entry_free(pax_attr_entry);
1438
1439		/* Note that the 'x' header shouldn't ever fail to format */
1440		if (r < ARCHIVE_WARN) {
1441			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1442			    "archive_write_pax_header: "
1443			    "'x' header failed?!  This can't happen.\n");
1444			archive_entry_free(entry_main);
1445			archive_string_free(&entry_name);
1446			return (ARCHIVE_FATAL);
1447		} else if (r < ret)
1448			ret = r;
1449		r = __archive_write_output(a, paxbuff, 512);
1450		if (r != ARCHIVE_OK) {
1451			sparse_list_clear(pax);
1452			pax->entry_bytes_remaining = 0;
1453			pax->entry_padding = 0;
1454			archive_entry_free(entry_main);
1455			archive_string_free(&entry_name);
1456			return (ARCHIVE_FATAL);
1457		}
1458
1459		pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header));
1460		pax->entry_padding =
1461		    0x1ff & (-(int64_t)pax->entry_bytes_remaining);
1462
1463		r = __archive_write_output(a, pax->pax_header.s,
1464		    archive_strlen(&(pax->pax_header)));
1465		if (r != ARCHIVE_OK) {
1466			/* If a write fails, we're pretty much toast. */
1467			archive_entry_free(entry_main);
1468			archive_string_free(&entry_name);
1469			return (ARCHIVE_FATAL);
1470		}
1471		/* Pad out the end of the entry. */
1472		r = __archive_write_nulls(a, (size_t)pax->entry_padding);
1473		if (r != ARCHIVE_OK) {
1474			/* If a write fails, we're pretty much toast. */
1475			archive_entry_free(entry_main);
1476			archive_string_free(&entry_name);
1477			return (ARCHIVE_FATAL);
1478		}
1479		pax->entry_bytes_remaining = pax->entry_padding = 0;
1480	}
1481
1482	/* Write the header for main entry. */
1483	r = __archive_write_output(a, ustarbuff, 512);
1484	if (r != ARCHIVE_OK) {
1485		archive_entry_free(entry_main);
1486		archive_string_free(&entry_name);
1487		return (r);
1488	}
1489
1490	/*
1491	 * Inform the client of the on-disk size we're using, so
1492	 * they can avoid unnecessarily writing a body for something
1493	 * that we're just going to ignore.
1494	 */
1495	archive_entry_set_size(entry_original, real_size);
1496	if (pax->sparse_list == NULL && real_size > 0) {
1497		/* This is not a sparse file but we handle its data as
1498		 * a sparse block. */
1499		sparse_list_add(pax, 0, real_size);
1500		sparse_total = real_size;
1501	}
1502	pax->entry_padding = 0x1ff & (-(int64_t)sparse_total);
1503	archive_entry_free(entry_main);
1504	archive_string_free(&entry_name);
1505
1506	return (ret);
1507}
1508
1509/*
1510 * We need a valid name for the regular 'ustar' entry.  This routine
1511 * tries to hack something more-or-less reasonable.
1512 *
1513 * The approach here tries to preserve leading dir names.  We do so by
1514 * working with four sections:
1515 *   1) "prefix" directory names,
1516 *   2) "suffix" directory names,
1517 *   3) inserted dir name (optional),
1518 *   4) filename.
1519 *
1520 * These sections must satisfy the following requirements:
1521 *   * Parts 1 & 2 together form an initial portion of the dir name.
1522 *   * Part 3 is specified by the caller.  (It should not contain a leading
1523 *     or trailing '/'.)
1524 *   * Part 4 forms an initial portion of the base filename.
1525 *   * The filename must be <= 99 chars to fit the ustar 'name' field.
1526 *   * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
1527 *   * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
1528 *   * If the original name ends in a '/', the new name must also end in a '/'
1529 *   * Trailing '/.' sequences may be stripped.
1530 *
1531 * Note: Recall that the ustar format does not store the '/' separating
1532 * parts 1 & 2, but does store the '/' separating parts 2 & 3.
1533 */
1534static char *
1535build_ustar_entry_name(char *dest, const char *src, size_t src_length,
1536    const char *insert)
1537{
1538	const char *prefix, *prefix_end;
1539	const char *suffix, *suffix_end;
1540	const char *filename, *filename_end;
1541	char *p;
1542	int need_slash = 0; /* Was there a trailing slash? */
1543	size_t suffix_length = 99;
1544	size_t insert_length;
1545
1546	/* Length of additional dir element to be added. */
1547	if (insert == NULL)
1548		insert_length = 0;
1549	else
1550		/* +2 here allows for '/' before and after the insert. */
1551		insert_length = strlen(insert) + 2;
1552
1553	/* Step 0: Quick bailout in a common case. */
1554	if (src_length < 100 && insert == NULL) {
1555		strncpy(dest, src, src_length);
1556		dest[src_length] = '\0';
1557		return (dest);
1558	}
1559
1560	/* Step 1: Locate filename and enforce the length restriction. */
1561	filename_end = src + src_length;
1562	/* Remove trailing '/' chars and '/.' pairs. */
1563	for (;;) {
1564		if (filename_end > src && filename_end[-1] == '/') {
1565			filename_end --;
1566			need_slash = 1; /* Remember to restore trailing '/'. */
1567			continue;
1568		}
1569		if (filename_end > src + 1 && filename_end[-1] == '.'
1570		    && filename_end[-2] == '/') {
1571			filename_end -= 2;
1572			need_slash = 1; /* "foo/." will become "foo/" */
1573			continue;
1574		}
1575		break;
1576	}
1577	if (need_slash)
1578		suffix_length--;
1579	/* Find start of filename. */
1580	filename = filename_end - 1;
1581	while ((filename > src) && (*filename != '/'))
1582		filename --;
1583	if ((*filename == '/') && (filename < filename_end - 1))
1584		filename ++;
1585	/* Adjust filename_end so that filename + insert fits in 99 chars. */
1586	suffix_length -= insert_length;
1587	if (filename_end > filename + suffix_length)
1588		filename_end = filename + suffix_length;
1589	/* Calculate max size for "suffix" section (#3 above). */
1590	suffix_length -= filename_end - filename;
1591
1592	/* Step 2: Locate the "prefix" section of the dirname, including
1593	 * trailing '/'. */
1594	prefix = src;
1595	prefix_end = prefix + 155;
1596	if (prefix_end > filename)
1597		prefix_end = filename;
1598	while (prefix_end > prefix && *prefix_end != '/')
1599		prefix_end--;
1600	if ((prefix_end < filename) && (*prefix_end == '/'))
1601		prefix_end++;
1602
1603	/* Step 3: Locate the "suffix" section of the dirname,
1604	 * including trailing '/'. */
1605	suffix = prefix_end;
1606	suffix_end = suffix + suffix_length; /* Enforce limit. */
1607	if (suffix_end > filename)
1608		suffix_end = filename;
1609	if (suffix_end < suffix)
1610		suffix_end = suffix;
1611	while (suffix_end > suffix && *suffix_end != '/')
1612		suffix_end--;
1613	if ((suffix_end < filename) && (*suffix_end == '/'))
1614		suffix_end++;
1615
1616	/* Step 4: Build the new name. */
1617	/* The OpenBSD strlcpy function is safer, but less portable. */
1618	/* Rather than maintain two versions, just use the strncpy version. */
1619	p = dest;
1620	if (prefix_end > prefix) {
1621		strncpy(p, prefix, prefix_end - prefix);
1622		p += prefix_end - prefix;
1623	}
1624	if (suffix_end > suffix) {
1625		strncpy(p, suffix, suffix_end - suffix);
1626		p += suffix_end - suffix;
1627	}
1628	if (insert != NULL) {
1629		/* Note: assume insert does not have leading or trailing '/' */
1630		strcpy(p, insert);
1631		p += strlen(insert);
1632		*p++ = '/';
1633	}
1634	strncpy(p, filename, filename_end - filename);
1635	p += filename_end - filename;
1636	if (need_slash)
1637		*p++ = '/';
1638	*p = '\0';
1639
1640	return (dest);
1641}
1642
1643/*
1644 * The ustar header for the pax extended attributes must have a
1645 * reasonable name:  SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename'
1646 * where 'pid' is the PID of the archiving process.  Unfortunately,
1647 * that makes testing a pain since the output varies for each run,
1648 * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename'
1649 * for now.  (Someday, I'll make this settable.  Then I can use the
1650 * SUS recommendation as default and test harnesses can override it
1651 * to get predictable results.)
1652 *
1653 * Joerg Schilling has argued that this is unnecessary because, in
1654 * practice, if the pax extended attributes get extracted as regular
1655 * files, no one is going to bother reading those attributes to
1656 * manually restore them.  Based on this, 'star' uses
1657 * /tmp/PaxHeader/'basename' as the ustar header name.  This is a
1658 * tempting argument, in part because it's simpler than the SUSv3
1659 * recommendation, but I'm not entirely convinced.  I'm also
1660 * uncomfortable with the fact that "/tmp" is a Unix-ism.
1661 *
1662 * The following routine leverages build_ustar_entry_name() above and
1663 * so is simpler than you might think.  It just needs to provide the
1664 * additional path element and handle a few pathological cases).
1665 */
1666static char *
1667build_pax_attribute_name(char *dest, const char *src)
1668{
1669	char buff[64];
1670	const char *p;
1671
1672	/* Handle the null filename case. */
1673	if (src == NULL || *src == '\0') {
1674		strcpy(dest, "PaxHeader/blank");
1675		return (dest);
1676	}
1677
1678	/* Prune final '/' and other unwanted final elements. */
1679	p = src + strlen(src);
1680	for (;;) {
1681		/* Ends in "/", remove the '/' */
1682		if (p > src && p[-1] == '/') {
1683			--p;
1684			continue;
1685		}
1686		/* Ends in "/.", remove the '.' */
1687		if (p > src + 1 && p[-1] == '.'
1688		    && p[-2] == '/') {
1689			--p;
1690			continue;
1691		}
1692		break;
1693	}
1694
1695	/* Pathological case: After above, there was nothing left.
1696	 * This includes "/." "/./." "/.//./." etc. */
1697	if (p == src) {
1698		strcpy(dest, "/PaxHeader/rootdir");
1699		return (dest);
1700	}
1701
1702	/* Convert unadorned "." into a suitable filename. */
1703	if (*src == '.' && p == src + 1) {
1704		strcpy(dest, "PaxHeader/currentdir");
1705		return (dest);
1706	}
1707
1708	/*
1709	 * TODO: Push this string into the 'pax' structure to avoid
1710	 * recomputing it every time.  That will also open the door
1711	 * to having clients override it.
1712	 */
1713#if HAVE_GETPID && 0  /* Disable this for now; see above comment. */
1714	sprintf(buff, "PaxHeader.%d", getpid());
1715#else
1716	/* If the platform can't fetch the pid, don't include it. */
1717	strcpy(buff, "PaxHeader");
1718#endif
1719	/* General case: build a ustar-compatible name adding
1720	 * "/PaxHeader/". */
1721	build_ustar_entry_name(dest, src, p - src, buff);
1722
1723	return (dest);
1724}
1725
1726/*
1727 * GNU PAX Format 1.0 requires the special name, which pattern is:
1728 * <dir>/GNUSparseFile.<pid>/<original file name>
1729 *
1730 * Since reproducible archives are more important, use 0 as pid.
1731 *
1732 * This function is used for only Sparse file, a file type of which
1733 * is regular file.
1734 */
1735static char *
1736build_gnu_sparse_name(char *dest, const char *src)
1737{
1738	const char *p;
1739
1740	/* Handle the null filename case. */
1741	if (src == NULL || *src == '\0') {
1742		strcpy(dest, "GNUSparseFile/blank");
1743		return (dest);
1744	}
1745
1746	/* Prune final '/' and other unwanted final elements. */
1747	p = src + strlen(src);
1748	for (;;) {
1749		/* Ends in "/", remove the '/' */
1750		if (p > src && p[-1] == '/') {
1751			--p;
1752			continue;
1753		}
1754		/* Ends in "/.", remove the '.' */
1755		if (p > src + 1 && p[-1] == '.'
1756		    && p[-2] == '/') {
1757			--p;
1758			continue;
1759		}
1760		break;
1761	}
1762
1763	/* General case: build a ustar-compatible name adding
1764	 * "/GNUSparseFile/". */
1765	build_ustar_entry_name(dest, src, p - src, "GNUSparseFile.0");
1766
1767	return (dest);
1768}
1769
1770/* Write two null blocks for the end of archive */
1771static int
1772archive_write_pax_close(struct archive_write *a)
1773{
1774	return (__archive_write_nulls(a, 512 * 2));
1775}
1776
1777static int
1778archive_write_pax_free(struct archive_write *a)
1779{
1780	struct pax *pax;
1781
1782	pax = (struct pax *)a->format_data;
1783	if (pax == NULL)
1784		return (ARCHIVE_OK);
1785
1786	archive_string_free(&pax->pax_header);
1787	archive_string_free(&pax->sparse_map);
1788	archive_string_free(&pax->l_url_encoded_name);
1789	sparse_list_clear(pax);
1790	free(pax);
1791	a->format_data = NULL;
1792	return (ARCHIVE_OK);
1793}
1794
1795static int
1796archive_write_pax_finish_entry(struct archive_write *a)
1797{
1798	struct pax *pax;
1799	uint64_t remaining;
1800	int ret;
1801
1802	pax = (struct pax *)a->format_data;
1803	remaining = pax->entry_bytes_remaining;
1804	if (remaining == 0) {
1805		while (pax->sparse_list) {
1806			struct sparse_block *sb;
1807			if (!pax->sparse_list->is_hole)
1808				remaining += pax->sparse_list->remaining;
1809			sb = pax->sparse_list->next;
1810			free(pax->sparse_list);
1811			pax->sparse_list = sb;
1812		}
1813	}
1814	ret = __archive_write_nulls(a, (size_t)(remaining + pax->entry_padding));
1815	pax->entry_bytes_remaining = pax->entry_padding = 0;
1816	return (ret);
1817}
1818
1819static ssize_t
1820archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
1821{
1822	struct pax *pax;
1823	size_t ws;
1824	size_t total;
1825	int ret;
1826
1827	pax = (struct pax *)a->format_data;
1828
1829	/*
1830	 * According to GNU PAX format 1.0, write a sparse map
1831	 * before the body.
1832	 */
1833	if (archive_strlen(&(pax->sparse_map))) {
1834		ret = __archive_write_output(a, pax->sparse_map.s,
1835		    archive_strlen(&(pax->sparse_map)));
1836		if (ret != ARCHIVE_OK)
1837			return (ret);
1838		ret = __archive_write_nulls(a, pax->sparse_map_padding);
1839		if (ret != ARCHIVE_OK)
1840			return (ret);
1841		archive_string_empty(&(pax->sparse_map));
1842	}
1843
1844	total = 0;
1845	while (total < s) {
1846		const unsigned char *p;
1847
1848		while (pax->sparse_list != NULL &&
1849		    pax->sparse_list->remaining == 0) {
1850			struct sparse_block *sb = pax->sparse_list->next;
1851			free(pax->sparse_list);
1852			pax->sparse_list = sb;
1853		}
1854
1855		if (pax->sparse_list == NULL)
1856			return (total);
1857
1858		p = ((const unsigned char *)buff) + total;
1859		ws = s - total;
1860		if (ws > pax->sparse_list->remaining)
1861			ws = (size_t)pax->sparse_list->remaining;
1862
1863		if (pax->sparse_list->is_hole) {
1864			/* Current block is hole thus we do not write
1865			 * the body. */
1866			pax->sparse_list->remaining -= ws;
1867			total += ws;
1868			continue;
1869		}
1870
1871		ret = __archive_write_output(a, p, ws);
1872		pax->sparse_list->remaining -= ws;
1873		total += ws;
1874		if (ret != ARCHIVE_OK)
1875			return (ret);
1876	}
1877	return (total);
1878}
1879
1880static int
1881has_non_ASCII(const char *_p)
1882{
1883	const unsigned char *p = (const unsigned char *)_p;
1884
1885	if (p == NULL)
1886		return (1);
1887	while (*p != '\0' && *p < 128)
1888		p++;
1889	return (*p != '\0');
1890}
1891
1892/*
1893 * Used by extended attribute support; encodes the name
1894 * so that there will be no '=' characters in the result.
1895 */
1896static char *
1897url_encode(const char *in)
1898{
1899	const char *s;
1900	char *d;
1901	int out_len = 0;
1902	char *out;
1903
1904	for (s = in; *s != '\0'; s++) {
1905		if (*s < 33 || *s > 126 || *s == '%' || *s == '=')
1906			out_len += 3;
1907		else
1908			out_len++;
1909	}
1910
1911	out = (char *)malloc(out_len + 1);
1912	if (out == NULL)
1913		return (NULL);
1914
1915	for (s = in, d = out; *s != '\0'; s++) {
1916		/* encode any non-printable ASCII character or '%' or '=' */
1917		if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
1918			/* URL encoding is '%' followed by two hex digits */
1919			*d++ = '%';
1920			*d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
1921			*d++ = "0123456789ABCDEF"[0x0f & *s];
1922		} else {
1923			*d++ = *s;
1924		}
1925	}
1926	*d = '\0';
1927	return (out);
1928}
1929
1930/*
1931 * Encode a sequence of bytes into a C string using base-64 encoding.
1932 *
1933 * Returns a null-terminated C string allocated with malloc(); caller
1934 * is responsible for freeing the result.
1935 */
1936static char *
1937base64_encode(const char *s, size_t len)
1938{
1939	static const char digits[64] =
1940	    { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
1941	      'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
1942	      'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
1943	      't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
1944	      '8','9','+','/' };
1945	int v;
1946	char *d, *out;
1947
1948	/* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
1949	out = (char *)malloc((len * 4 + 2) / 3 + 1);
1950	if (out == NULL)
1951		return (NULL);
1952	d = out;
1953
1954	/* Convert each group of 3 bytes into 4 characters. */
1955	while (len >= 3) {
1956		v = (((int)s[0] << 16) & 0xff0000)
1957		    | (((int)s[1] << 8) & 0xff00)
1958		    | (((int)s[2]) & 0x00ff);
1959		s += 3;
1960		len -= 3;
1961		*d++ = digits[(v >> 18) & 0x3f];
1962		*d++ = digits[(v >> 12) & 0x3f];
1963		*d++ = digits[(v >> 6) & 0x3f];
1964		*d++ = digits[(v) & 0x3f];
1965	}
1966	/* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
1967	switch (len) {
1968	case 0: break;
1969	case 1:
1970		v = (((int)s[0] << 16) & 0xff0000);
1971		*d++ = digits[(v >> 18) & 0x3f];
1972		*d++ = digits[(v >> 12) & 0x3f];
1973		break;
1974	case 2:
1975		v = (((int)s[0] << 16) & 0xff0000)
1976		    | (((int)s[1] << 8) & 0xff00);
1977		*d++ = digits[(v >> 18) & 0x3f];
1978		*d++ = digits[(v >> 12) & 0x3f];
1979		*d++ = digits[(v >> 6) & 0x3f];
1980		break;
1981	}
1982	/* Add trailing NUL character so output is a valid C string. */
1983	*d = '\0';
1984	return (out);
1985}
1986
1987static void
1988sparse_list_clear(struct pax *pax)
1989{
1990	while (pax->sparse_list != NULL) {
1991		struct sparse_block *sb = pax->sparse_list;
1992		pax->sparse_list = sb->next;
1993		free(sb);
1994	}
1995	pax->sparse_tail = NULL;
1996}
1997
1998static int
1999_sparse_list_add_block(struct pax *pax, int64_t offset, int64_t length,
2000    int is_hole)
2001{
2002	struct sparse_block *sb;
2003
2004	sb = (struct sparse_block *)malloc(sizeof(*sb));
2005	if (sb == NULL)
2006		return (ARCHIVE_FATAL);
2007	sb->next = NULL;
2008	sb->is_hole = is_hole;
2009	sb->offset = offset;
2010	sb->remaining = length;
2011	if (pax->sparse_list == NULL || pax->sparse_tail == NULL)
2012		pax->sparse_list = pax->sparse_tail = sb;
2013	else {
2014		pax->sparse_tail->next = sb;
2015		pax->sparse_tail = sb;
2016	}
2017	return (ARCHIVE_OK);
2018}
2019
2020static int
2021sparse_list_add(struct pax *pax, int64_t offset, int64_t length)
2022{
2023	int64_t last_offset;
2024	int r;
2025
2026	if (pax->sparse_tail == NULL)
2027		last_offset = 0;
2028	else {
2029		last_offset = pax->sparse_tail->offset +
2030		    pax->sparse_tail->remaining;
2031	}
2032	if (last_offset < offset) {
2033		/* Add a hole block. */
2034		r = _sparse_list_add_block(pax, last_offset,
2035		    offset - last_offset, 1);
2036		if (r != ARCHIVE_OK)
2037			return (r);
2038	}
2039	/* Add data block. */
2040	return (_sparse_list_add_block(pax, offset, length, 0));
2041}
2042
2043