1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2010-2012 Michihiro NAKAJIMA
4 * Copyright (c) 2016 Martin Matuska
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29
30#ifdef HAVE_ERRNO_H
31#include <errno.h>
32#endif
33#ifdef HAVE_STDLIB_H
34#include <stdlib.h>
35#endif
36#ifdef HAVE_STRING_H
37#include <string.h>
38#endif
39
40#include "archive.h"
41#include "archive_entry.h"
42#include "archive_entry_locale.h"
43#include "archive_private.h"
44#include "archive_write_private.h"
45#include "archive_write_set_format_private.h"
46
47struct sparse_block {
48	struct sparse_block	*next;
49	int		is_hole;
50	uint64_t	offset;
51	uint64_t	remaining;
52};
53
54struct pax {
55	uint64_t	entry_bytes_remaining;
56	uint64_t	entry_padding;
57	struct archive_string	l_url_encoded_name;
58	struct archive_string	pax_header;
59	struct archive_string	sparse_map;
60	size_t			sparse_map_padding;
61	struct sparse_block	*sparse_list;
62	struct sparse_block	*sparse_tail;
63	struct archive_string_conv *sconv_utf8;
64	int			 opt_binary;
65
66	unsigned flags;
67#define WRITE_SCHILY_XATTR       (1 << 0)
68#define WRITE_LIBARCHIVE_XATTR   (1 << 1)
69};
70
71static void		 add_pax_attr(struct archive_string *, const char *key,
72			     const char *value);
73static void		 add_pax_attr_binary(struct archive_string *,
74			     const char *key,
75			     const char *value, size_t value_len);
76static void		 add_pax_attr_int(struct archive_string *,
77			     const char *key, int64_t value);
78static void		 add_pax_attr_time(struct archive_string *,
79			     const char *key, int64_t sec,
80			     unsigned long nanos);
81static int		 add_pax_acl(struct archive_write *,
82			    struct archive_entry *, struct pax *, int);
83static ssize_t		 archive_write_pax_data(struct archive_write *,
84			     const void *, size_t);
85static int		 archive_write_pax_close(struct archive_write *);
86static int		 archive_write_pax_free(struct archive_write *);
87static int		 archive_write_pax_finish_entry(struct archive_write *);
88static int		 archive_write_pax_header(struct archive_write *,
89			     struct archive_entry *);
90static int		 archive_write_pax_options(struct archive_write *,
91			     const char *, const char *);
92static char		*base64_encode(const char *src, size_t len);
93static char		*build_gnu_sparse_name(char *dest, const char *src);
94static char		*build_pax_attribute_name(char *dest, const char *src);
95static char		*build_ustar_entry_name(char *dest, const char *src,
96			     size_t src_length, const char *insert);
97static char		*format_int(char *dest, int64_t);
98static int		 has_non_ASCII(const char *);
99static void		 sparse_list_clear(struct pax *);
100static int		 sparse_list_add(struct pax *, int64_t, int64_t);
101static char		*url_encode(const char *in);
102static time_t		 get_ustar_max_mtime(void);
103
104/*
105 * Set output format to 'restricted pax' format.
106 *
107 * This is the same as normal 'pax', but tries to suppress
108 * the pax header whenever possible.  This is the default for
109 * bsdtar, for instance.
110 */
111int
112archive_write_set_format_pax_restricted(struct archive *_a)
113{
114	struct archive_write *a = (struct archive_write *)_a;
115	int r;
116
117	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
118	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax_restricted");
119
120	r = archive_write_set_format_pax(&a->archive);
121	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED;
122	a->archive.archive_format_name = "restricted POSIX pax interchange";
123	return (r);
124}
125
126/*
127 * Set output format to 'pax' format.
128 */
129int
130archive_write_set_format_pax(struct archive *_a)
131{
132	struct archive_write *a = (struct archive_write *)_a;
133	struct pax *pax;
134
135	archive_check_magic(_a, ARCHIVE_WRITE_MAGIC,
136	    ARCHIVE_STATE_NEW, "archive_write_set_format_pax");
137
138	if (a->format_free != NULL)
139		(a->format_free)(a);
140
141	pax = (struct pax *)calloc(1, sizeof(*pax));
142	if (pax == NULL) {
143		archive_set_error(&a->archive, ENOMEM,
144		    "Can't allocate pax data");
145		return (ARCHIVE_FATAL);
146	}
147	pax->flags = WRITE_LIBARCHIVE_XATTR | WRITE_SCHILY_XATTR;
148
149	a->format_data = pax;
150	a->format_name = "pax";
151	a->format_options = archive_write_pax_options;
152	a->format_write_header = archive_write_pax_header;
153	a->format_write_data = archive_write_pax_data;
154	a->format_close = archive_write_pax_close;
155	a->format_free = archive_write_pax_free;
156	a->format_finish_entry = archive_write_pax_finish_entry;
157	a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
158	a->archive.archive_format_name = "POSIX pax interchange";
159	return (ARCHIVE_OK);
160}
161
162static int
163archive_write_pax_options(struct archive_write *a, const char *key,
164    const char *val)
165{
166	struct pax *pax = (struct pax *)a->format_data;
167	int ret = ARCHIVE_FAILED;
168
169	if (strcmp(key, "hdrcharset")  == 0) {
170		/*
171		 * The character-set we can use are defined in
172		 * IEEE Std 1003.1-2001
173		 */
174		if (val == NULL || val[0] == 0)
175			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
176			    "pax: hdrcharset option needs a character-set name");
177		else if (strcmp(val, "BINARY") == 0 ||
178		    strcmp(val, "binary") == 0) {
179			/*
180			 * Specify binary mode. We will not convert
181			 * filenames, uname and gname to any charsets.
182			 */
183			pax->opt_binary = 1;
184			ret = ARCHIVE_OK;
185		} else if (strcmp(val, "UTF-8") == 0) {
186			/*
187			 * Specify UTF-8 character-set to be used for
188			 * filenames. This is almost the test that
189			 * running platform supports the string conversion.
190			 * Especially libarchive_test needs this trick for
191			 * its test.
192			 */
193			pax->sconv_utf8 = archive_string_conversion_to_charset(
194			    &(a->archive), "UTF-8", 0);
195			if (pax->sconv_utf8 == NULL)
196				ret = ARCHIVE_FATAL;
197			else
198				ret = ARCHIVE_OK;
199		} else
200			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
201			    "pax: invalid charset name");
202		return (ret);
203	} else if (strcmp(key, "xattrheader") == 0) {
204		if (val == NULL || val[0] == 0) {
205			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
206			    "pax: xattrheader requires a value");
207		} else if (strcmp(val, "ALL") == 0 ||
208		    strcmp(val, "all") == 0) {
209			pax->flags |= WRITE_LIBARCHIVE_XATTR | WRITE_SCHILY_XATTR;
210			ret = ARCHIVE_OK;
211		} else if (strcmp(val, "SCHILY") == 0 ||
212		    strcmp(val, "schily") == 0) {
213			pax->flags |= WRITE_SCHILY_XATTR;
214			pax->flags &= ~WRITE_LIBARCHIVE_XATTR;
215			ret = ARCHIVE_OK;
216		} else if (strcmp(val, "LIBARCHIVE") == 0 ||
217		    strcmp(val, "libarchive") == 0) {
218			pax->flags |= WRITE_LIBARCHIVE_XATTR;
219			pax->flags &= ~WRITE_SCHILY_XATTR;
220			ret = ARCHIVE_OK;
221		} else
222			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
223			    "pax: invalid xattr header name");
224		return (ret);
225	}
226
227	/* Note: The "warn" return is just to inform the options
228	 * supervisor that we didn't handle it.  It will generate
229	 * a suitable error if no one used this option. */
230	return (ARCHIVE_WARN);
231}
232
233/*
234 * Note: This code assumes that 'nanos' has the same sign as 'sec',
235 * which implies that sec=-1, nanos=200000000 represents -1.2 seconds
236 * and not -0.8 seconds.  This is a pretty pedantic point, as we're
237 * unlikely to encounter many real files created before Jan 1, 1970,
238 * much less ones with timestamps recorded to sub-second resolution.
239 */
240static void
241add_pax_attr_time(struct archive_string *as, const char *key,
242    int64_t sec, unsigned long nanos)
243{
244	int digit, i;
245	char *t;
246	/*
247	 * Note that each byte contributes fewer than 3 base-10
248	 * digits, so this will always be big enough.
249	 */
250	char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)];
251
252	tmp[sizeof(tmp) - 1] = 0;
253	t = tmp + sizeof(tmp) - 1;
254
255	/* Skip trailing zeros in the fractional part. */
256	for (digit = 0, i = 10; i > 0 && digit == 0; i--) {
257		digit = nanos % 10;
258		nanos /= 10;
259	}
260
261	/* Only format the fraction if it's non-zero. */
262	if (i > 0) {
263		while (i > 0) {
264			*--t = "0123456789"[digit];
265			digit = nanos % 10;
266			nanos /= 10;
267			i--;
268		}
269		*--t = '.';
270	}
271	t = format_int(t, sec);
272
273	add_pax_attr(as, key, t);
274}
275
276static char *
277format_int(char *t, int64_t i)
278{
279	uint64_t ui;
280
281	if (i < 0)
282		ui = (i == INT64_MIN) ? (uint64_t)(INT64_MAX) + 1 : (uint64_t)(-i);
283	else
284		ui = i;
285
286	do {
287		*--t = "0123456789"[ui % 10];
288	} while (ui /= 10);
289	if (i < 0)
290		*--t = '-';
291	return (t);
292}
293
294static void
295add_pax_attr_int(struct archive_string *as, const char *key, int64_t value)
296{
297	char tmp[1 + 3 * sizeof(value)];
298
299	tmp[sizeof(tmp) - 1] = 0;
300	add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value));
301}
302
303/*
304 * Add a key/value attribute to the pax header.  This function handles
305 * the length field and various other syntactic requirements.
306 */
307static void
308add_pax_attr(struct archive_string *as, const char *key, const char *value)
309{
310	add_pax_attr_binary(as, key, value, strlen(value));
311}
312
313/*
314 * Add a key/value attribute to the pax header.  This function handles
315 * binary values.
316 */
317static void
318add_pax_attr_binary(struct archive_string *as, const char *key,
319		    const char *value, size_t value_len)
320{
321	int digits, i, len, next_ten;
322	char tmp[1 + 3 * sizeof(int)];	/* < 3 base-10 digits per byte */
323
324	/*-
325	 * PAX attributes have the following layout:
326	 *     <len> <space> <key> <=> <value> <nl>
327	 */
328	len = 1 + (int)strlen(key) + 1 + (int)value_len + 1;
329
330	/*
331	 * The <len> field includes the length of the <len> field, so
332	 * computing the correct length is tricky.  I start by
333	 * counting the number of base-10 digits in 'len' and
334	 * computing the next higher power of 10.
335	 */
336	next_ten = 1;
337	digits = 0;
338	i = len;
339	while (i > 0) {
340		i = i / 10;
341		digits++;
342		next_ten = next_ten * 10;
343	}
344	/*
345	 * For example, if string without the length field is 99
346	 * chars, then adding the 2 digit length "99" will force the
347	 * total length past 100, requiring an extra digit.  The next
348	 * statement adjusts for this effect.
349	 */
350	if (len + digits >= next_ten)
351		digits++;
352
353	/* Now, we have the right length so we can build the line. */
354	tmp[sizeof(tmp) - 1] = 0;	/* Null-terminate the work area. */
355	archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits));
356	archive_strappend_char(as, ' ');
357	archive_strcat(as, key);
358	archive_strappend_char(as, '=');
359	archive_array_append(as, value, value_len);
360	archive_strappend_char(as, '\n');
361}
362
363static void
364archive_write_pax_header_xattr(struct pax *pax, const char *encoded_name,
365    const void *value, size_t value_len)
366{
367	struct archive_string s;
368	char *encoded_value;
369
370	if (encoded_name == NULL)
371		return;
372
373	if (pax->flags & WRITE_LIBARCHIVE_XATTR) {
374		encoded_value = base64_encode((const char *)value, value_len);
375		if (encoded_value != NULL) {
376			archive_string_init(&s);
377			archive_strcpy(&s, "LIBARCHIVE.xattr.");
378			archive_strcat(&s, encoded_name);
379			add_pax_attr(&(pax->pax_header), s.s, encoded_value);
380			archive_string_free(&s);
381		}
382		free(encoded_value);
383	}
384	if (pax->flags & WRITE_SCHILY_XATTR) {
385		archive_string_init(&s);
386		archive_strcpy(&s, "SCHILY.xattr.");
387		archive_strcat(&s, encoded_name);
388		add_pax_attr_binary(&(pax->pax_header), s.s, value, value_len);
389		archive_string_free(&s);
390	}
391}
392
393static int
394archive_write_pax_header_xattrs(struct archive_write *a,
395    struct pax *pax, struct archive_entry *entry)
396{
397	int i = archive_entry_xattr_reset(entry);
398
399	while (i--) {
400		const char *name;
401		const void *value;
402		char *url_encoded_name = NULL, *encoded_name = NULL;
403		size_t size;
404		int r;
405
406		archive_entry_xattr_next(entry, &name, &value, &size);
407		url_encoded_name = url_encode(name);
408		if (url_encoded_name == NULL)
409			goto malloc_error;
410		else {
411			/* Convert narrow-character to UTF-8. */
412			r = archive_strcpy_l(&(pax->l_url_encoded_name),
413			    url_encoded_name, pax->sconv_utf8);
414			free(url_encoded_name); /* Done with this. */
415			if (r == 0)
416				encoded_name = pax->l_url_encoded_name.s;
417			else if (r == -1)
418				goto malloc_error;
419			else {
420				archive_set_error(&a->archive,
421				    ARCHIVE_ERRNO_MISC,
422				    "Error encoding pax extended attribute");
423				return (ARCHIVE_FAILED);
424			}
425		}
426
427		archive_write_pax_header_xattr(pax, encoded_name,
428		    value, size);
429
430	}
431	return (ARCHIVE_OK);
432malloc_error:
433	archive_set_error(&a->archive, ENOMEM, "Can't allocate memory");
434	return (ARCHIVE_FATAL);
435}
436
437static int
438get_entry_hardlink(struct archive_write *a, struct archive_entry *entry,
439    const char **name, size_t *length, struct archive_string_conv *sc)
440{
441	int r;
442
443	r = archive_entry_hardlink_l(entry, name, length, sc);
444	if (r != 0) {
445		if (errno == ENOMEM) {
446			archive_set_error(&a->archive, ENOMEM,
447			    "Can't allocate memory for Linkname");
448			return (ARCHIVE_FATAL);
449		}
450		return (ARCHIVE_WARN);
451	}
452	return (ARCHIVE_OK);
453}
454
455static int
456get_entry_pathname(struct archive_write *a, struct archive_entry *entry,
457    const char **name, size_t *length, struct archive_string_conv *sc)
458{
459	int r;
460
461	r = archive_entry_pathname_l(entry, name, length, sc);
462	if (r != 0) {
463		if (errno == ENOMEM) {
464			archive_set_error(&a->archive, ENOMEM,
465			    "Can't allocate memory for Pathname");
466			return (ARCHIVE_FATAL);
467		}
468		return (ARCHIVE_WARN);
469	}
470	return (ARCHIVE_OK);
471}
472
473static int
474get_entry_uname(struct archive_write *a, struct archive_entry *entry,
475    const char **name, size_t *length, struct archive_string_conv *sc)
476{
477	int r;
478
479	r = archive_entry_uname_l(entry, name, length, sc);
480	if (r != 0) {
481		if (errno == ENOMEM) {
482			archive_set_error(&a->archive, ENOMEM,
483			    "Can't allocate memory for Uname");
484			return (ARCHIVE_FATAL);
485		}
486		return (ARCHIVE_WARN);
487	}
488	return (ARCHIVE_OK);
489}
490
491static int
492get_entry_gname(struct archive_write *a, struct archive_entry *entry,
493    const char **name, size_t *length, struct archive_string_conv *sc)
494{
495	int r;
496
497	r = archive_entry_gname_l(entry, name, length, sc);
498	if (r != 0) {
499		if (errno == ENOMEM) {
500			archive_set_error(&a->archive, ENOMEM,
501			    "Can't allocate memory for Gname");
502			return (ARCHIVE_FATAL);
503		}
504		return (ARCHIVE_WARN);
505	}
506	return (ARCHIVE_OK);
507}
508
509static int
510get_entry_symlink(struct archive_write *a, struct archive_entry *entry,
511    const char **name, size_t *length, struct archive_string_conv *sc)
512{
513	int r;
514
515	r = archive_entry_symlink_l(entry, name, length, sc);
516	if (r != 0) {
517		if (errno == ENOMEM) {
518			archive_set_error(&a->archive, ENOMEM,
519			    "Can't allocate memory for Linkname");
520			return (ARCHIVE_FATAL);
521		}
522		return (ARCHIVE_WARN);
523	}
524	return (ARCHIVE_OK);
525}
526
527/* Add ACL to pax header */
528static int
529add_pax_acl(struct archive_write *a,
530    struct archive_entry *entry, struct pax *pax, int flags)
531{
532	char *p;
533	const char *attr;
534	int acl_types;
535
536	acl_types = archive_entry_acl_types(entry);
537
538	if ((acl_types & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0)
539		attr = "SCHILY.acl.ace";
540	else if ((flags & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) != 0)
541		attr = "SCHILY.acl.access";
542	else if ((flags & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) != 0)
543		attr = "SCHILY.acl.default";
544	else
545		return (ARCHIVE_FATAL);
546
547	p = archive_entry_acl_to_text_l(entry, NULL, flags, pax->sconv_utf8);
548	if (p == NULL) {
549		if (errno == ENOMEM) {
550			archive_set_error(&a->archive, ENOMEM, "%s %s",
551			    "Can't allocate memory for ", attr);
552			return (ARCHIVE_FATAL);
553		}
554		archive_set_error(&a->archive,
555		    ARCHIVE_ERRNO_FILE_FORMAT, "%s %s %s",
556		    "Can't translate ", attr, " to UTF-8");
557		return(ARCHIVE_WARN);
558	}
559
560	if (*p != '\0') {
561		add_pax_attr(&(pax->pax_header),
562		    attr, p);
563	}
564	free(p);
565	return(ARCHIVE_OK);
566}
567
568/*
569 * TODO: Consider adding 'comment' and 'charset' fields to
570 * archive_entry so that clients can specify them.  Also, consider
571 * adding generic key/value tags so clients can add arbitrary
572 * key/value data.
573 *
574 * TODO: Break up this 700-line function!!!!  Yowza!
575 */
576static int
577archive_write_pax_header(struct archive_write *a,
578    struct archive_entry *entry_original)
579{
580	struct archive_entry *entry_main;
581	const char *p;
582	const char *suffix;
583	int need_extension, r, ret;
584	int acl_types;
585	int sparse_count;
586	uint64_t sparse_total, real_size;
587	struct pax *pax;
588	const char *hardlink;
589	const char *path = NULL, *linkpath = NULL;
590	const char *uname = NULL, *gname = NULL;
591	const void *mac_metadata;
592	size_t mac_metadata_size;
593	struct archive_string_conv *sconv;
594	size_t hardlink_length, path_length, linkpath_length;
595	size_t uname_length, gname_length;
596
597	char paxbuff[512];
598	char ustarbuff[512];
599	char ustar_entry_name[256];
600	char pax_entry_name[256];
601	char gnu_sparse_name[256];
602	struct archive_string entry_name;
603
604	ret = ARCHIVE_OK;
605	need_extension = 0;
606	pax = (struct pax *)a->format_data;
607
608	const time_t ustar_max_mtime = get_ustar_max_mtime();
609
610	/* Sanity check. */
611	if (archive_entry_pathname(entry_original) == NULL) {
612		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
613			  "Can't record entry in tar file without pathname");
614		return (ARCHIVE_FAILED);
615	}
616
617	/*
618	 * Choose a header encoding.
619	 */
620	if (pax->opt_binary)
621		sconv = NULL;/* Binary mode. */
622	else {
623		/* Header encoding is UTF-8. */
624		if (pax->sconv_utf8 == NULL) {
625			/* Initialize the string conversion object
626			 * we must need */
627			pax->sconv_utf8 = archive_string_conversion_to_charset(
628			    &(a->archive), "UTF-8", 1);
629			if (pax->sconv_utf8 == NULL)
630				/* Couldn't allocate memory */
631				return (ARCHIVE_FAILED);
632		}
633		sconv = pax->sconv_utf8;
634	}
635
636	r = get_entry_hardlink(a, entry_original, &hardlink,
637	    &hardlink_length, sconv);
638	if (r == ARCHIVE_FATAL)
639		return (r);
640	else if (r != ARCHIVE_OK) {
641		r = get_entry_hardlink(a, entry_original, &hardlink,
642		    &hardlink_length, NULL);
643		if (r == ARCHIVE_FATAL)
644			return (r);
645		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
646		    "Can't translate linkname '%s' to %s", hardlink,
647		    archive_string_conversion_charset_name(sconv));
648		ret = ARCHIVE_WARN;
649		sconv = NULL;/* The header charset switches to binary mode. */
650	}
651
652	/* Make sure this is a type of entry that we can handle here */
653	if (hardlink == NULL) {
654		switch (archive_entry_filetype(entry_original)) {
655		case AE_IFBLK:
656		case AE_IFCHR:
657		case AE_IFIFO:
658		case AE_IFLNK:
659		case AE_IFREG:
660			break;
661		case AE_IFDIR:
662		{
663			/*
664			 * Ensure a trailing '/'.  Modify the original
665			 * entry so the client sees the change.
666			 */
667#if defined(_WIN32) && !defined(__CYGWIN__)
668			const wchar_t *wp;
669
670			wp = archive_entry_pathname_w(entry_original);
671			if (wp != NULL && wp[wcslen(wp) -1] != L'/') {
672				struct archive_wstring ws;
673
674				archive_string_init(&ws);
675				path_length = wcslen(wp);
676				if (archive_wstring_ensure(&ws,
677				    path_length + 2) == NULL) {
678					archive_set_error(&a->archive, ENOMEM,
679					    "Can't allocate pax data");
680					archive_wstring_free(&ws);
681					return(ARCHIVE_FATAL);
682				}
683				/* Should we keep '\' ? */
684				if (wp[path_length -1] == L'\\')
685					path_length--;
686				archive_wstrncpy(&ws, wp, path_length);
687				archive_wstrappend_wchar(&ws, L'/');
688				archive_entry_copy_pathname_w(
689				    entry_original, ws.s);
690				archive_wstring_free(&ws);
691				p = NULL;
692			} else
693#endif
694				p = archive_entry_pathname(entry_original);
695			/*
696			 * On Windows, this is a backup operation just in
697			 * case getting WCS failed. On POSIX, this is a
698			 * normal operation.
699			 */
700			if (p != NULL && p[0] != '\0' && p[strlen(p) - 1] != '/') {
701				struct archive_string as;
702
703				archive_string_init(&as);
704				path_length = strlen(p);
705				if (archive_string_ensure(&as,
706				    path_length + 2) == NULL) {
707					archive_set_error(&a->archive, ENOMEM,
708					    "Can't allocate pax data");
709					archive_string_free(&as);
710					return(ARCHIVE_FATAL);
711				}
712#if defined(_WIN32) && !defined(__CYGWIN__)
713				/* NOTE: This might break the pathname
714				 * if the current code page is CP932 and
715				 * the pathname includes a character '\'
716				 * as a part of its multibyte pathname. */
717				if (p[strlen(p) -1] == '\\')
718					path_length--;
719				else
720#endif
721				archive_strncpy(&as, p, path_length);
722				archive_strappend_char(&as, '/');
723				archive_entry_copy_pathname(
724				    entry_original, as.s);
725				archive_string_free(&as);
726			}
727			break;
728		}
729		default: /* AE_IFSOCK and unknown */
730			__archive_write_entry_filetype_unsupported(
731			    &a->archive, entry_original, "pax");
732			return (ARCHIVE_FAILED);
733		}
734	}
735
736	/*
737	 * If Mac OS metadata blob is here, recurse to write that
738	 * as a separate entry.  This is really a pretty poor design:
739	 * In particular, it doubles the overhead for long filenames.
740	 * TODO: Help Apple folks design something better and figure
741	 * out how to transition from this legacy format.
742	 *
743	 * Note that this code is present on every platform; clients
744	 * on non-Mac are unlikely to ever provide this data, but
745	 * applications that copy entries from one archive to another
746	 * should not lose data just because the local filesystem
747	 * can't store it.
748	 */
749	mac_metadata =
750	    archive_entry_mac_metadata(entry_original, &mac_metadata_size);
751	if (mac_metadata != NULL) {
752		const char *oname;
753		char *name, *bname;
754		size_t name_length;
755		struct archive_entry *extra = archive_entry_new2(&a->archive);
756
757		oname = archive_entry_pathname(entry_original);
758		name_length = strlen(oname);
759		name = malloc(name_length + 3);
760		if (name == NULL || extra == NULL) {
761			/* XXX error message */
762			archive_entry_free(extra);
763			free(name);
764			return (ARCHIVE_FAILED);
765		}
766		strcpy(name, oname);
767		/* Find last '/'; strip trailing '/' characters */
768		bname = strrchr(name, '/');
769		while (bname != NULL && bname[1] == '\0') {
770			*bname = '\0';
771			bname = strrchr(name, '/');
772		}
773		if (bname == NULL) {
774			memmove(name + 2, name, name_length + 1);
775			memmove(name, "._", 2);
776		} else {
777			bname += 1;
778			memmove(bname + 2, bname, strlen(bname) + 1);
779			memmove(bname, "._", 2);
780		}
781		archive_entry_copy_pathname(extra, name);
782		free(name);
783
784		archive_entry_set_size(extra, mac_metadata_size);
785		archive_entry_set_filetype(extra, AE_IFREG);
786		archive_entry_set_perm(extra,
787		    archive_entry_perm(entry_original));
788		archive_entry_set_mtime(extra,
789		    archive_entry_mtime(entry_original),
790		    archive_entry_mtime_nsec(entry_original));
791		archive_entry_set_gid(extra,
792		    archive_entry_gid(entry_original));
793		archive_entry_set_gname(extra,
794		    archive_entry_gname(entry_original));
795		archive_entry_set_uid(extra,
796		    archive_entry_uid(entry_original));
797		archive_entry_set_uname(extra,
798		    archive_entry_uname(entry_original));
799
800		/* Recurse to write the special copyfile entry. */
801		r = archive_write_pax_header(a, extra);
802		archive_entry_free(extra);
803		if (r < ARCHIVE_WARN)
804			return (r);
805		if (r < ret)
806			ret = r;
807		r = (int)archive_write_pax_data(a, mac_metadata,
808		    mac_metadata_size);
809		if (r < ARCHIVE_WARN)
810			return (r);
811		if (r < ret)
812			ret = r;
813		r = archive_write_pax_finish_entry(a);
814		if (r < ARCHIVE_WARN)
815			return (r);
816		if (r < ret)
817			ret = r;
818	}
819
820	/* Copy entry so we can modify it as needed. */
821#if defined(_WIN32) && !defined(__CYGWIN__)
822	/* Make sure the path separators in pathname, hardlink and symlink
823	 * are all slash '/', not the Windows path separator '\'. */
824	entry_main = __la_win_entry_in_posix_pathseparator(entry_original);
825	if (entry_main == entry_original)
826		entry_main = archive_entry_clone(entry_original);
827#else
828	entry_main = archive_entry_clone(entry_original);
829#endif
830	if (entry_main == NULL) {
831		archive_set_error(&a->archive, ENOMEM,
832		    "Can't allocate pax data");
833		return(ARCHIVE_FATAL);
834	}
835	archive_string_empty(&(pax->pax_header)); /* Blank our work area. */
836	archive_string_empty(&(pax->sparse_map));
837	sparse_total = 0;
838	sparse_list_clear(pax);
839
840	if (hardlink == NULL &&
841	    archive_entry_filetype(entry_main) == AE_IFREG)
842		sparse_count = archive_entry_sparse_reset(entry_main);
843	else
844		sparse_count = 0;
845	if (sparse_count) {
846		int64_t offset, length, last_offset = 0;
847		/* Get the last entry of sparse block. */
848		while (archive_entry_sparse_next(
849		    entry_main, &offset, &length) == ARCHIVE_OK)
850			last_offset = offset + length;
851
852		/* If the last sparse block does not reach the end of file,
853		 * We have to add a empty sparse block as the last entry to
854		 * manage storing file data. */
855		if (last_offset < archive_entry_size(entry_main))
856			archive_entry_sparse_add_entry(entry_main,
857			    archive_entry_size(entry_main), 0);
858		sparse_count = archive_entry_sparse_reset(entry_main);
859	}
860
861	/*
862	 * First, check the name fields and see if any of them
863	 * require binary coding.  If any of them does, then all of
864	 * them do.
865	 */
866	r = get_entry_pathname(a, entry_main, &path, &path_length, sconv);
867	if (r == ARCHIVE_FATAL) {
868		archive_entry_free(entry_main);
869		return (r);
870	} else if (r != ARCHIVE_OK) {
871		r = get_entry_pathname(a, entry_main, &path,
872		    &path_length, NULL);
873		if (r == ARCHIVE_FATAL) {
874			archive_entry_free(entry_main);
875			return (r);
876		}
877		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
878		    "Can't translate pathname '%s' to %s", path,
879		    archive_string_conversion_charset_name(sconv));
880		ret = ARCHIVE_WARN;
881		sconv = NULL;/* The header charset switches to binary mode. */
882	}
883	r = get_entry_uname(a, entry_main, &uname, &uname_length, sconv);
884	if (r == ARCHIVE_FATAL) {
885		archive_entry_free(entry_main);
886		return (r);
887	} else if (r != ARCHIVE_OK) {
888		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
889		if (r == ARCHIVE_FATAL) {
890			archive_entry_free(entry_main);
891			return (r);
892		}
893		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
894		    "Can't translate uname '%s' to %s", uname,
895		    archive_string_conversion_charset_name(sconv));
896		ret = ARCHIVE_WARN;
897		sconv = NULL;/* The header charset switches to binary mode. */
898	}
899	r = get_entry_gname(a, entry_main, &gname, &gname_length, sconv);
900	if (r == ARCHIVE_FATAL) {
901		archive_entry_free(entry_main);
902		return (r);
903	} else if (r != ARCHIVE_OK) {
904		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
905		if (r == ARCHIVE_FATAL) {
906			archive_entry_free(entry_main);
907			return (r);
908		}
909		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
910		    "Can't translate gname '%s' to %s", gname,
911		    archive_string_conversion_charset_name(sconv));
912		ret = ARCHIVE_WARN;
913		sconv = NULL;/* The header charset switches to binary mode. */
914	}
915	linkpath = hardlink;
916	linkpath_length = hardlink_length;
917	if (linkpath == NULL) {
918		r = get_entry_symlink(a, entry_main, &linkpath,
919		    &linkpath_length, sconv);
920		if (r == ARCHIVE_FATAL) {
921			archive_entry_free(entry_main);
922			return (r);
923		} else if (r != ARCHIVE_OK) {
924			r = get_entry_symlink(a, entry_main, &linkpath,
925			    &linkpath_length, NULL);
926			if (r == ARCHIVE_FATAL) {
927				archive_entry_free(entry_main);
928				return (r);
929			}
930			archive_set_error(&a->archive,
931			    ARCHIVE_ERRNO_FILE_FORMAT,
932			    "Can't translate linkname '%s' to %s", linkpath,
933			    archive_string_conversion_charset_name(sconv));
934			ret = ARCHIVE_WARN;
935			sconv = NULL;
936		}
937	}
938
939	/* If any string conversions failed, get all attributes
940	 * in binary-mode. */
941	if (sconv == NULL && !pax->opt_binary) {
942		if (hardlink != NULL) {
943			r = get_entry_hardlink(a, entry_main, &hardlink,
944			    &hardlink_length, NULL);
945			if (r == ARCHIVE_FATAL) {
946				archive_entry_free(entry_main);
947				return (r);
948			}
949			linkpath = hardlink;
950			linkpath_length = hardlink_length;
951		}
952		r = get_entry_pathname(a, entry_main, &path,
953		    &path_length, NULL);
954		if (r == ARCHIVE_FATAL) {
955			archive_entry_free(entry_main);
956			return (r);
957		}
958		r = get_entry_uname(a, entry_main, &uname, &uname_length, NULL);
959		if (r == ARCHIVE_FATAL) {
960			archive_entry_free(entry_main);
961			return (r);
962		}
963		r = get_entry_gname(a, entry_main, &gname, &gname_length, NULL);
964		if (r == ARCHIVE_FATAL) {
965			archive_entry_free(entry_main);
966			return (r);
967		}
968	}
969
970	/* Store the header encoding first, to be nice to readers. */
971	if (sconv == NULL)
972		add_pax_attr(&(pax->pax_header), "hdrcharset", "BINARY");
973
974
975	/*
976	 * If name is too long, or has non-ASCII characters, add
977	 * 'path' to pax extended attrs.  (Note that an unconvertible
978	 * name must have non-ASCII characters.)
979	 */
980	if (has_non_ASCII(path)) {
981		/* We have non-ASCII characters. */
982		add_pax_attr(&(pax->pax_header), "path", path);
983		archive_entry_set_pathname(entry_main,
984		    build_ustar_entry_name(ustar_entry_name,
985			path, path_length, NULL));
986		need_extension = 1;
987	} else {
988		/* We have an all-ASCII path; we'd like to just store
989		 * it in the ustar header if it will fit.  Yes, this
990		 * duplicates some of the logic in
991		 * archive_write_set_format_ustar.c
992		 */
993		if (path_length <= 100) {
994			/* Fits in the old 100-char tar name field. */
995		} else {
996			/* Find largest suffix that will fit. */
997			/* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */
998			suffix = strchr(path + path_length - 100 - 1, '/');
999			/* Don't attempt an empty prefix. */
1000			if (suffix == path)
1001				suffix = strchr(suffix + 1, '/');
1002			/* We can put it in the ustar header if it's
1003			 * all ASCII and it's either <= 100 characters
1004			 * or can be split at a '/' into a prefix <=
1005			 * 155 chars and a suffix <= 100 chars.  (Note
1006			 * the strchr() above will return NULL exactly
1007			 * when the path can't be split.)
1008			 */
1009			if (suffix == NULL       /* Suffix > 100 chars. */
1010			    || suffix[1] == '\0'    /* empty suffix */
1011			    || suffix - path > 155)  /* Prefix > 155 chars */
1012			{
1013				add_pax_attr(&(pax->pax_header), "path", path);
1014				archive_entry_set_pathname(entry_main,
1015				    build_ustar_entry_name(ustar_entry_name,
1016					path, path_length, NULL));
1017				need_extension = 1;
1018			}
1019		}
1020	}
1021
1022	if (linkpath != NULL) {
1023		/* If link name is too long or has non-ASCII characters, add
1024		 * 'linkpath' to pax extended attrs. */
1025		if (linkpath_length > 100 || has_non_ASCII(linkpath)) {
1026			add_pax_attr(&(pax->pax_header), "linkpath", linkpath);
1027			if (linkpath_length > 100) {
1028				if (hardlink != NULL)
1029					archive_entry_set_hardlink(entry_main,
1030					    "././@LongHardLink");
1031				else
1032					archive_entry_set_symlink(entry_main,
1033					    "././@LongSymLink");
1034			}
1035			need_extension = 1;
1036		}
1037	}
1038	/* Save a pathname since it will be renamed if `entry_main` has
1039	 * sparse blocks. */
1040	archive_string_init(&entry_name);
1041	archive_strcpy(&entry_name, archive_entry_pathname(entry_main));
1042
1043	/* If file size is too large, we need pax extended attrs. */
1044	if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) {
1045		need_extension = 1;
1046	}
1047
1048	/* If numeric GID is too large, add 'gid' to pax extended attrs. */
1049	if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) {
1050		add_pax_attr_int(&(pax->pax_header), "gid",
1051		    archive_entry_gid(entry_main));
1052		need_extension = 1;
1053	}
1054
1055	/* If group name is too large or has non-ASCII characters, add
1056	 * 'gname' to pax extended attrs. */
1057	if (gname != NULL) {
1058		if (gname_length > 31 || has_non_ASCII(gname)) {
1059			add_pax_attr(&(pax->pax_header), "gname", gname);
1060			need_extension = 1;
1061		}
1062	}
1063
1064	/* If numeric UID is too large, add 'uid' to pax extended attrs. */
1065	if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) {
1066		add_pax_attr_int(&(pax->pax_header), "uid",
1067		    archive_entry_uid(entry_main));
1068		need_extension = 1;
1069	}
1070
1071	/* Add 'uname' to pax extended attrs if necessary. */
1072	if (uname != NULL) {
1073		if (uname_length > 31 || has_non_ASCII(uname)) {
1074			add_pax_attr(&(pax->pax_header), "uname", uname);
1075			need_extension = 1;
1076		}
1077	}
1078
1079	/*
1080	 * POSIX/SUSv3 doesn't provide a standard key for large device
1081	 * numbers.  I use the same keys here that Joerg Schilling
1082	 * used for 'star.'  (Which, somewhat confusingly, are called
1083	 * "devXXX" even though they code "rdev" values.)  No doubt,
1084	 * other implementations use other keys.  Note that there's no
1085	 * reason we can't write the same information into a number of
1086	 * different keys.
1087	 *
1088	 * Of course, this is only needed for block or char device entries.
1089	 */
1090	if (archive_entry_filetype(entry_main) == AE_IFBLK
1091	    || archive_entry_filetype(entry_main) == AE_IFCHR) {
1092		/*
1093		 * If rdevmajor is too large, add 'SCHILY.devmajor' to
1094		 * extended attributes.
1095		 */
1096		int rdevmajor, rdevminor;
1097		rdevmajor = archive_entry_rdevmajor(entry_main);
1098		rdevminor = archive_entry_rdevminor(entry_main);
1099		if (rdevmajor >= (1 << 18)) {
1100			add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor",
1101			    rdevmajor);
1102			/*
1103			 * Non-strict formatting below means we don't
1104			 * have to truncate here.  Not truncating improves
1105			 * the chance that some more modern tar archivers
1106			 * (such as GNU tar 1.13) can restore the full
1107			 * value even if they don't understand the pax
1108			 * extended attributes.  See my rant below about
1109			 * file size fields for additional details.
1110			 */
1111			/* archive_entry_set_rdevmajor(entry_main,
1112			   rdevmajor & ((1 << 18) - 1)); */
1113			need_extension = 1;
1114		}
1115
1116		/*
1117		 * If devminor is too large, add 'SCHILY.devminor' to
1118		 * extended attributes.
1119		 */
1120		if (rdevminor >= (1 << 18)) {
1121			add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor",
1122			    rdevminor);
1123			/* Truncation is not necessary here, either. */
1124			/* archive_entry_set_rdevminor(entry_main,
1125			   rdevminor & ((1 << 18) - 1)); */
1126			need_extension = 1;
1127		}
1128	}
1129
1130	/*
1131	 * Yes, this check is duplicated just below; this helps to
1132	 * avoid writing an mtime attribute just to handle a
1133	 * high-resolution timestamp in "restricted pax" mode.
1134	 */
1135	if (!need_extension &&
1136	    ((archive_entry_mtime(entry_main) < 0)
1137		|| (archive_entry_mtime(entry_main) >= ustar_max_mtime)))
1138		need_extension = 1;
1139
1140	/* I use a star-compatible file flag attribute. */
1141	p = archive_entry_fflags_text(entry_main);
1142	if (!need_extension && p != NULL  &&  *p != '\0')
1143		need_extension = 1;
1144
1145	/* If there are extended attributes, we need an extension */
1146	if (!need_extension && archive_entry_xattr_count(entry_original) > 0)
1147		need_extension = 1;
1148
1149	/* If there are sparse info, we need an extension */
1150	if (!need_extension && sparse_count > 0)
1151		need_extension = 1;
1152
1153	acl_types = archive_entry_acl_types(entry_original);
1154
1155	/* If there are any ACL entries, we need an extension */
1156	if (!need_extension && acl_types != 0)
1157		need_extension = 1;
1158
1159	/* If the symlink type is defined, we need an extension */
1160	if (!need_extension && archive_entry_symlink_type(entry_main) > 0)
1161		need_extension = 1;
1162
1163	/*
1164	 * Libarchive used to include these in extended headers for
1165	 * restricted pax format, but that confused people who
1166	 * expected ustar-like time semantics.  So now we only include
1167	 * them in full pax format.
1168	 */
1169	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED) {
1170		if (archive_entry_ctime(entry_main) != 0  ||
1171		    archive_entry_ctime_nsec(entry_main) != 0)
1172			add_pax_attr_time(&(pax->pax_header), "ctime",
1173			    archive_entry_ctime(entry_main),
1174			    archive_entry_ctime_nsec(entry_main));
1175
1176		if (archive_entry_atime(entry_main) != 0 ||
1177		    archive_entry_atime_nsec(entry_main) != 0)
1178			add_pax_attr_time(&(pax->pax_header), "atime",
1179			    archive_entry_atime(entry_main),
1180			    archive_entry_atime_nsec(entry_main));
1181
1182		/* Store birth/creationtime only if it's earlier than mtime */
1183		if (archive_entry_birthtime_is_set(entry_main) &&
1184		    archive_entry_birthtime(entry_main)
1185		    < archive_entry_mtime(entry_main))
1186			add_pax_attr_time(&(pax->pax_header),
1187			    "LIBARCHIVE.creationtime",
1188			    archive_entry_birthtime(entry_main),
1189			    archive_entry_birthtime_nsec(entry_main));
1190	}
1191
1192	/*
1193	 * The following items are handled differently in "pax
1194	 * restricted" format.  In particular, in "pax restricted"
1195	 * format they won't be added unless need_extension is
1196	 * already set (we're already generating an extended header, so
1197	 * may as well include these).
1198	 */
1199	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED ||
1200	    need_extension) {
1201		if (archive_entry_mtime(entry_main) < 0  ||
1202		    archive_entry_mtime(entry_main) >= ustar_max_mtime  ||
1203		    archive_entry_mtime_nsec(entry_main) != 0)
1204			add_pax_attr_time(&(pax->pax_header), "mtime",
1205			    archive_entry_mtime(entry_main),
1206			    archive_entry_mtime_nsec(entry_main));
1207
1208		/* I use a star-compatible file flag attribute. */
1209		p = archive_entry_fflags_text(entry_main);
1210		if (p != NULL  &&  *p != '\0')
1211			add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p);
1212
1213		/* I use star-compatible ACL attributes. */
1214		if ((acl_types & ARCHIVE_ENTRY_ACL_TYPE_NFS4) != 0) {
1215			ret = add_pax_acl(a, entry_original, pax,
1216			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1217			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA |
1218			    ARCHIVE_ENTRY_ACL_STYLE_COMPACT);
1219			if (ret == ARCHIVE_FATAL) {
1220				archive_entry_free(entry_main);
1221				archive_string_free(&entry_name);
1222				return (ARCHIVE_FATAL);
1223			}
1224		}
1225		if (acl_types & ARCHIVE_ENTRY_ACL_TYPE_ACCESS) {
1226			ret = add_pax_acl(a, entry_original, pax,
1227			    ARCHIVE_ENTRY_ACL_TYPE_ACCESS |
1228			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1229			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA);
1230			if (ret == ARCHIVE_FATAL) {
1231				archive_entry_free(entry_main);
1232				archive_string_free(&entry_name);
1233				return (ARCHIVE_FATAL);
1234			}
1235		}
1236		if (acl_types & ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) {
1237			ret = add_pax_acl(a, entry_original, pax,
1238			    ARCHIVE_ENTRY_ACL_TYPE_DEFAULT |
1239			    ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID |
1240			    ARCHIVE_ENTRY_ACL_STYLE_SEPARATOR_COMMA);
1241			if (ret == ARCHIVE_FATAL) {
1242				archive_entry_free(entry_main);
1243				archive_string_free(&entry_name);
1244				return (ARCHIVE_FATAL);
1245			}
1246		}
1247
1248		/* We use GNU-tar-compatible sparse attributes. */
1249		if (sparse_count > 0) {
1250			int64_t soffset, slength;
1251
1252			add_pax_attr_int(&(pax->pax_header),
1253			    "GNU.sparse.major", 1);
1254			add_pax_attr_int(&(pax->pax_header),
1255			    "GNU.sparse.minor", 0);
1256			/*
1257			 * Make sure to store the original path, since
1258			 * truncation to ustar limit happened already.
1259			 */
1260			add_pax_attr(&(pax->pax_header),
1261			    "GNU.sparse.name", path);
1262			add_pax_attr_int(&(pax->pax_header),
1263			    "GNU.sparse.realsize",
1264			    archive_entry_size(entry_main));
1265
1266			/* Rename the file name which will be used for
1267			 * ustar header to a special name, which GNU
1268			 * PAX Format 1.0 requires */
1269			archive_entry_set_pathname(entry_main,
1270			    build_gnu_sparse_name(gnu_sparse_name,
1271			        entry_name.s));
1272
1273			/*
1274			 * - Make a sparse map, which will precede a file data.
1275			 * - Get the total size of available data of sparse.
1276			 */
1277			archive_string_sprintf(&(pax->sparse_map), "%d\n",
1278			    sparse_count);
1279			while (archive_entry_sparse_next(entry_main,
1280			    &soffset, &slength) == ARCHIVE_OK) {
1281				archive_string_sprintf(&(pax->sparse_map),
1282				    "%jd\n%jd\n",
1283				    (intmax_t)soffset,
1284				    (intmax_t)slength);
1285				sparse_total += slength;
1286				if (sparse_list_add(pax, soffset, slength)
1287				    != ARCHIVE_OK) {
1288					archive_set_error(&a->archive,
1289					    ENOMEM,
1290					    "Can't allocate memory");
1291					archive_entry_free(entry_main);
1292					archive_string_free(&entry_name);
1293					return (ARCHIVE_FATAL);
1294				}
1295			}
1296		}
1297
1298		/* Store extended attributes */
1299		if (archive_write_pax_header_xattrs(a, pax, entry_original)
1300		    == ARCHIVE_FATAL) {
1301			archive_entry_free(entry_main);
1302			archive_string_free(&entry_name);
1303			return (ARCHIVE_FATAL);
1304		}
1305
1306		/* Store extended symlink information */
1307		if (archive_entry_symlink_type(entry_main) ==
1308		    AE_SYMLINK_TYPE_FILE) {
1309			add_pax_attr(&(pax->pax_header),
1310			    "LIBARCHIVE.symlinktype", "file");
1311		} else if (archive_entry_symlink_type(entry_main) ==
1312		    AE_SYMLINK_TYPE_DIRECTORY) {
1313			add_pax_attr(&(pax->pax_header),
1314			    "LIBARCHIVE.symlinktype", "dir");
1315		}
1316	}
1317
1318	/* Only regular files have data. */
1319	if (archive_entry_filetype(entry_main) != AE_IFREG)
1320		archive_entry_set_size(entry_main, 0);
1321
1322	/*
1323	 * Pax-restricted does not store data for hardlinks, in order
1324	 * to improve compatibility with ustar.
1325	 */
1326	if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE &&
1327	    hardlink != NULL)
1328		archive_entry_set_size(entry_main, 0);
1329
1330	/*
1331	 * XXX Full pax interchange format does permit a hardlink
1332	 * entry to have data associated with it.  I'm not supporting
1333	 * that here because the client expects me to tell them whether
1334	 * or not this format expects data for hardlinks.  If I
1335	 * don't check here, then every pax archive will end up with
1336	 * duplicated data for hardlinks.  Someday, there may be
1337	 * need to select this behavior, in which case the following
1338	 * will need to be revisited. XXX
1339	 */
1340	if (hardlink != NULL)
1341		archive_entry_set_size(entry_main, 0);
1342
1343	/* Save a real file size. */
1344	real_size = archive_entry_size(entry_main);
1345	/*
1346	 * Overwrite a file size by the total size of sparse blocks and
1347	 * the size of sparse map info. That file size is the length of
1348	 * the data, which we will exactly store into an archive file.
1349	 */
1350	if (archive_strlen(&(pax->sparse_map))) {
1351		size_t mapsize = archive_strlen(&(pax->sparse_map));
1352		pax->sparse_map_padding = 0x1ff & (-(ssize_t)mapsize);
1353		archive_entry_set_size(entry_main,
1354		    mapsize + pax->sparse_map_padding + sparse_total);
1355	}
1356
1357	/* If file size is too large, add 'size' to pax extended attrs. */
1358	if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) {
1359		add_pax_attr_int(&(pax->pax_header), "size",
1360		    archive_entry_size(entry_main));
1361	}
1362
1363	/* Format 'ustar' header for main entry.
1364	 *
1365	 * The trouble with file size: If the reader can't understand
1366	 * the file size, they may not be able to locate the next
1367	 * entry and the rest of the archive is toast.  Pax-compliant
1368	 * readers are supposed to ignore the file size in the main
1369	 * header, so the question becomes how to maximize portability
1370	 * for readers that don't support pax attribute extensions.
1371	 * For maximum compatibility, I permit numeric extensions in
1372	 * the main header so that the file size stored will always be
1373	 * correct, even if it's in a format that only some
1374	 * implementations understand.  The technique used here is:
1375	 *
1376	 *  a) If possible, follow the standard exactly.  This handles
1377	 *  files up to 8 gigabytes minus 1.
1378	 *
1379	 *  b) If that fails, try octal but omit the field terminator.
1380	 *  That handles files up to 64 gigabytes minus 1.
1381	 *
1382	 *  c) Otherwise, use base-256 extensions.  That handles files
1383	 *  up to 2^63 in this implementation, with the potential to
1384	 *  go up to 2^94.  That should hold us for a while. ;-)
1385	 *
1386	 * The non-strict formatter uses similar logic for other
1387	 * numeric fields, though they're less critical.
1388	 */
1389	if (__archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0,
1390	    NULL) == ARCHIVE_FATAL) {
1391		archive_entry_free(entry_main);
1392		archive_string_free(&entry_name);
1393		return (ARCHIVE_FATAL);
1394	}
1395
1396	/* If we built any extended attributes, write that entry first. */
1397	if (archive_strlen(&(pax->pax_header)) > 0) {
1398		struct archive_entry *pax_attr_entry;
1399		time_t s;
1400		int64_t uid, gid;
1401		int mode;
1402
1403		pax_attr_entry = archive_entry_new2(&a->archive);
1404		p = entry_name.s;
1405		archive_entry_set_pathname(pax_attr_entry,
1406		    build_pax_attribute_name(pax_entry_name, p));
1407		archive_entry_set_size(pax_attr_entry,
1408		    archive_strlen(&(pax->pax_header)));
1409		/* Copy uid/gid (but clip to ustar limits). */
1410		uid = archive_entry_uid(entry_main);
1411		if (uid >= 1 << 18)
1412			uid = (1 << 18) - 1;
1413		archive_entry_set_uid(pax_attr_entry, uid);
1414		gid = archive_entry_gid(entry_main);
1415		if (gid >= 1 << 18)
1416			gid = (1 << 18) - 1;
1417		archive_entry_set_gid(pax_attr_entry, gid);
1418		/* Copy mode over (but not setuid/setgid bits) */
1419		mode = archive_entry_mode(entry_main);
1420#ifdef S_ISUID
1421		mode &= ~S_ISUID;
1422#endif
1423#ifdef S_ISGID
1424		mode &= ~S_ISGID;
1425#endif
1426#ifdef S_ISVTX
1427		mode &= ~S_ISVTX;
1428#endif
1429		archive_entry_set_mode(pax_attr_entry, mode);
1430
1431		/* Copy uname/gname. */
1432		archive_entry_set_uname(pax_attr_entry,
1433		    archive_entry_uname(entry_main));
1434		archive_entry_set_gname(pax_attr_entry,
1435		    archive_entry_gname(entry_main));
1436
1437		/* Copy mtime, but clip to ustar limits. */
1438		s = archive_entry_mtime(entry_main);
1439		if (s < 0) { s = 0; }
1440		if (s > ustar_max_mtime) { s = ustar_max_mtime; }
1441		archive_entry_set_mtime(pax_attr_entry, s, 0);
1442
1443		/* Standard ustar doesn't support atime. */
1444		archive_entry_set_atime(pax_attr_entry, 0, 0);
1445
1446		/* Standard ustar doesn't support ctime. */
1447		archive_entry_set_ctime(pax_attr_entry, 0, 0);
1448
1449		r = __archive_write_format_header_ustar(a, paxbuff,
1450		    pax_attr_entry, 'x', 1, NULL);
1451
1452		archive_entry_free(pax_attr_entry);
1453
1454		/* Note that the 'x' header shouldn't ever fail to format */
1455		if (r < ARCHIVE_WARN) {
1456			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1457			    "archive_write_pax_header: "
1458			    "'x' header failed?!  This can't happen.\n");
1459			archive_entry_free(entry_main);
1460			archive_string_free(&entry_name);
1461			return (ARCHIVE_FATAL);
1462		} else if (r < ret)
1463			ret = r;
1464		r = __archive_write_output(a, paxbuff, 512);
1465		if (r != ARCHIVE_OK) {
1466			sparse_list_clear(pax);
1467			pax->entry_bytes_remaining = 0;
1468			pax->entry_padding = 0;
1469			archive_entry_free(entry_main);
1470			archive_string_free(&entry_name);
1471			return (ARCHIVE_FATAL);
1472		}
1473
1474		pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header));
1475		pax->entry_padding =
1476		    0x1ff & (-(int64_t)pax->entry_bytes_remaining);
1477
1478		r = __archive_write_output(a, pax->pax_header.s,
1479		    archive_strlen(&(pax->pax_header)));
1480		if (r != ARCHIVE_OK) {
1481			/* If a write fails, we're pretty much toast. */
1482			archive_entry_free(entry_main);
1483			archive_string_free(&entry_name);
1484			return (ARCHIVE_FATAL);
1485		}
1486		/* Pad out the end of the entry. */
1487		r = __archive_write_nulls(a, (size_t)pax->entry_padding);
1488		if (r != ARCHIVE_OK) {
1489			/* If a write fails, we're pretty much toast. */
1490			archive_entry_free(entry_main);
1491			archive_string_free(&entry_name);
1492			return (ARCHIVE_FATAL);
1493		}
1494		pax->entry_bytes_remaining = pax->entry_padding = 0;
1495	}
1496
1497	/* Write the header for main entry. */
1498	r = __archive_write_output(a, ustarbuff, 512);
1499	if (r != ARCHIVE_OK) {
1500		archive_entry_free(entry_main);
1501		archive_string_free(&entry_name);
1502		return (r);
1503	}
1504
1505	/*
1506	 * Inform the client of the on-disk size we're using, so
1507	 * they can avoid unnecessarily writing a body for something
1508	 * that we're just going to ignore.
1509	 */
1510	archive_entry_set_size(entry_original, real_size);
1511	if (pax->sparse_list == NULL && real_size > 0) {
1512		/* This is not a sparse file but we handle its data as
1513		 * a sparse block. */
1514		sparse_list_add(pax, 0, real_size);
1515		sparse_total = real_size;
1516	}
1517	pax->entry_padding = 0x1ff & (-(int64_t)sparse_total);
1518	archive_entry_free(entry_main);
1519	archive_string_free(&entry_name);
1520
1521	return (ret);
1522}
1523
1524/*
1525 * We need a valid name for the regular 'ustar' entry.  This routine
1526 * tries to hack something more-or-less reasonable.
1527 *
1528 * The approach here tries to preserve leading dir names.  We do so by
1529 * working with four sections:
1530 *   1) "prefix" directory names,
1531 *   2) "suffix" directory names,
1532 *   3) inserted dir name (optional),
1533 *   4) filename.
1534 *
1535 * These sections must satisfy the following requirements:
1536 *   * Parts 1 & 2 together form an initial portion of the dir name.
1537 *   * Part 3 is specified by the caller.  (It should not contain a leading
1538 *     or trailing '/'.)
1539 *   * Part 4 forms an initial portion of the base filename.
1540 *   * The filename must be <= 99 chars to fit the ustar 'name' field.
1541 *   * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld.
1542 *   * Part 1 must be <= 155 chars to fit the ustar 'prefix' field.
1543 *   * If the original name ends in a '/', the new name must also end in a '/'
1544 *   * Trailing '/.' sequences may be stripped.
1545 *
1546 * Note: Recall that the ustar format does not store the '/' separating
1547 * parts 1 & 2, but does store the '/' separating parts 2 & 3.
1548 */
1549static char *
1550build_ustar_entry_name(char *dest, const char *src, size_t src_length,
1551    const char *insert)
1552{
1553	const char *prefix, *prefix_end;
1554	const char *suffix, *suffix_end;
1555	const char *filename, *filename_end;
1556	char *p;
1557	int need_slash = 0; /* Was there a trailing slash? */
1558	size_t suffix_length = 99;
1559	size_t insert_length;
1560
1561	/* Length of additional dir element to be added. */
1562	if (insert == NULL)
1563		insert_length = 0;
1564	else
1565		/* +2 here allows for '/' before and after the insert. */
1566		insert_length = strlen(insert) + 2;
1567
1568	/* Step 0: Quick bailout in a common case. */
1569	if (src_length < 100 && insert == NULL) {
1570		strncpy(dest, src, src_length);
1571		dest[src_length] = '\0';
1572		return (dest);
1573	}
1574
1575	/* Step 1: Locate filename and enforce the length restriction. */
1576	filename_end = src + src_length;
1577	/* Remove trailing '/' chars and '/.' pairs. */
1578	for (;;) {
1579		if (filename_end > src && filename_end[-1] == '/') {
1580			filename_end --;
1581			need_slash = 1; /* Remember to restore trailing '/'. */
1582			continue;
1583		}
1584		if (filename_end > src + 1 && filename_end[-1] == '.'
1585		    && filename_end[-2] == '/') {
1586			filename_end -= 2;
1587			need_slash = 1; /* "foo/." will become "foo/" */
1588			continue;
1589		}
1590		break;
1591	}
1592	if (need_slash)
1593		suffix_length--;
1594	/* Find start of filename. */
1595	filename = filename_end - 1;
1596	while ((filename > src) && (*filename != '/'))
1597		filename --;
1598	if ((*filename == '/') && (filename < filename_end - 1))
1599		filename ++;
1600	/* Adjust filename_end so that filename + insert fits in 99 chars. */
1601	suffix_length -= insert_length;
1602	if (filename_end > filename + suffix_length)
1603		filename_end = filename + suffix_length;
1604	/* Calculate max size for "suffix" section (#3 above). */
1605	suffix_length -= filename_end - filename;
1606
1607	/* Step 2: Locate the "prefix" section of the dirname, including
1608	 * trailing '/'. */
1609	prefix = src;
1610	prefix_end = prefix + 155;
1611	if (prefix_end > filename)
1612		prefix_end = filename;
1613	while (prefix_end > prefix && *prefix_end != '/')
1614		prefix_end--;
1615	if ((prefix_end < filename) && (*prefix_end == '/'))
1616		prefix_end++;
1617
1618	/* Step 3: Locate the "suffix" section of the dirname,
1619	 * including trailing '/'. */
1620	suffix = prefix_end;
1621	suffix_end = suffix + suffix_length; /* Enforce limit. */
1622	if (suffix_end > filename)
1623		suffix_end = filename;
1624	if (suffix_end < suffix)
1625		suffix_end = suffix;
1626	while (suffix_end > suffix && *suffix_end != '/')
1627		suffix_end--;
1628	if ((suffix_end < filename) && (*suffix_end == '/'))
1629		suffix_end++;
1630
1631	/* Step 4: Build the new name. */
1632	/* The OpenBSD strlcpy function is safer, but less portable. */
1633	/* Rather than maintain two versions, just use the strncpy version. */
1634	p = dest;
1635	if (prefix_end > prefix) {
1636		strncpy(p, prefix, prefix_end - prefix);
1637		p += prefix_end - prefix;
1638	}
1639	if (suffix_end > suffix) {
1640		strncpy(p, suffix, suffix_end - suffix);
1641		p += suffix_end - suffix;
1642	}
1643	if (insert != NULL) {
1644		/* Note: assume insert does not have leading or trailing '/' */
1645		strcpy(p, insert);
1646		p += strlen(insert);
1647		*p++ = '/';
1648	}
1649	strncpy(p, filename, filename_end - filename);
1650	p += filename_end - filename;
1651	if (need_slash)
1652		*p++ = '/';
1653	*p = '\0';
1654
1655	return (dest);
1656}
1657
1658/*
1659 * The ustar header for the pax extended attributes must have a
1660 * reasonable name:  SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename'
1661 * where 'pid' is the PID of the archiving process.  Unfortunately,
1662 * that makes testing a pain since the output varies for each run,
1663 * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename'
1664 * for now.  (Someday, I'll make this settable.  Then I can use the
1665 * SUS recommendation as default and test harnesses can override it
1666 * to get predictable results.)
1667 *
1668 * Joerg Schilling has argued that this is unnecessary because, in
1669 * practice, if the pax extended attributes get extracted as regular
1670 * files, no one is going to bother reading those attributes to
1671 * manually restore them.  Based on this, 'star' uses
1672 * /tmp/PaxHeader/'basename' as the ustar header name.  This is a
1673 * tempting argument, in part because it's simpler than the SUSv3
1674 * recommendation, but I'm not entirely convinced.  I'm also
1675 * uncomfortable with the fact that "/tmp" is a Unix-ism.
1676 *
1677 * The following routine leverages build_ustar_entry_name() above and
1678 * so is simpler than you might think.  It just needs to provide the
1679 * additional path element and handle a few pathological cases).
1680 */
1681static char *
1682build_pax_attribute_name(char *dest, const char *src)
1683{
1684	char buff[64];
1685	const char *p;
1686
1687	/* Handle the null filename case. */
1688	if (src == NULL || *src == '\0') {
1689		strcpy(dest, "PaxHeader/blank");
1690		return (dest);
1691	}
1692
1693	/* Prune final '/' and other unwanted final elements. */
1694	p = src + strlen(src);
1695	for (;;) {
1696		/* Ends in "/", remove the '/' */
1697		if (p > src && p[-1] == '/') {
1698			--p;
1699			continue;
1700		}
1701		/* Ends in "/.", remove the '.' */
1702		if (p > src + 1 && p[-1] == '.'
1703		    && p[-2] == '/') {
1704			--p;
1705			continue;
1706		}
1707		break;
1708	}
1709
1710	/* Pathological case: After above, there was nothing left.
1711	 * This includes "/." "/./." "/.//./." etc. */
1712	if (p == src) {
1713		strcpy(dest, "/PaxHeader/rootdir");
1714		return (dest);
1715	}
1716
1717	/* Convert unadorned "." into a suitable filename. */
1718	if (*src == '.' && p == src + 1) {
1719		strcpy(dest, "PaxHeader/currentdir");
1720		return (dest);
1721	}
1722
1723	/*
1724	 * TODO: Push this string into the 'pax' structure to avoid
1725	 * recomputing it every time.  That will also open the door
1726	 * to having clients override it.
1727	 */
1728#if HAVE_GETPID && 0  /* Disable this for now; see above comment. */
1729	snprintf(buff, sizeof(buff), "PaxHeader.%d", getpid());
1730#else
1731	/* If the platform can't fetch the pid, don't include it. */
1732	strcpy(buff, "PaxHeader");
1733#endif
1734	/* General case: build a ustar-compatible name adding
1735	 * "/PaxHeader/". */
1736	build_ustar_entry_name(dest, src, p - src, buff);
1737
1738	return (dest);
1739}
1740
1741/*
1742 * GNU PAX Format 1.0 requires the special name, which pattern is:
1743 * <dir>/GNUSparseFile.<pid>/<original file name>
1744 *
1745 * Since reproducible archives are more important, use 0 as pid.
1746 *
1747 * This function is used for only Sparse file, a file type of which
1748 * is regular file.
1749 */
1750static char *
1751build_gnu_sparse_name(char *dest, const char *src)
1752{
1753	const char *p;
1754
1755	/* Handle the null filename case. */
1756	if (src == NULL || *src == '\0') {
1757		strcpy(dest, "GNUSparseFile/blank");
1758		return (dest);
1759	}
1760
1761	/* Prune final '/' and other unwanted final elements. */
1762	p = src + strlen(src);
1763	for (;;) {
1764		/* Ends in "/", remove the '/' */
1765		if (p > src && p[-1] == '/') {
1766			--p;
1767			continue;
1768		}
1769		/* Ends in "/.", remove the '.' */
1770		if (p > src + 1 && p[-1] == '.'
1771		    && p[-2] == '/') {
1772			--p;
1773			continue;
1774		}
1775		break;
1776	}
1777
1778	/* General case: build a ustar-compatible name adding
1779	 * "/GNUSparseFile/". */
1780	build_ustar_entry_name(dest, src, p - src, "GNUSparseFile.0");
1781
1782	return (dest);
1783}
1784
1785/* Write two null blocks for the end of archive */
1786static int
1787archive_write_pax_close(struct archive_write *a)
1788{
1789	return (__archive_write_nulls(a, 512 * 2));
1790}
1791
1792static int
1793archive_write_pax_free(struct archive_write *a)
1794{
1795	struct pax *pax;
1796
1797	pax = (struct pax *)a->format_data;
1798	if (pax == NULL)
1799		return (ARCHIVE_OK);
1800
1801	archive_string_free(&pax->pax_header);
1802	archive_string_free(&pax->sparse_map);
1803	archive_string_free(&pax->l_url_encoded_name);
1804	sparse_list_clear(pax);
1805	free(pax);
1806	a->format_data = NULL;
1807	return (ARCHIVE_OK);
1808}
1809
1810static int
1811archive_write_pax_finish_entry(struct archive_write *a)
1812{
1813	struct pax *pax;
1814	uint64_t remaining;
1815	int ret;
1816
1817	pax = (struct pax *)a->format_data;
1818	remaining = pax->entry_bytes_remaining;
1819	if (remaining == 0) {
1820		while (pax->sparse_list) {
1821			struct sparse_block *sb;
1822			if (!pax->sparse_list->is_hole)
1823				remaining += pax->sparse_list->remaining;
1824			sb = pax->sparse_list->next;
1825			free(pax->sparse_list);
1826			pax->sparse_list = sb;
1827		}
1828	}
1829	ret = __archive_write_nulls(a, (size_t)(remaining + pax->entry_padding));
1830	pax->entry_bytes_remaining = pax->entry_padding = 0;
1831	return (ret);
1832}
1833
1834static ssize_t
1835archive_write_pax_data(struct archive_write *a, const void *buff, size_t s)
1836{
1837	struct pax *pax;
1838	size_t ws;
1839	size_t total;
1840	int ret;
1841
1842	pax = (struct pax *)a->format_data;
1843
1844	/*
1845	 * According to GNU PAX format 1.0, write a sparse map
1846	 * before the body.
1847	 */
1848	if (archive_strlen(&(pax->sparse_map))) {
1849		ret = __archive_write_output(a, pax->sparse_map.s,
1850		    archive_strlen(&(pax->sparse_map)));
1851		if (ret != ARCHIVE_OK)
1852			return (ret);
1853		ret = __archive_write_nulls(a, pax->sparse_map_padding);
1854		if (ret != ARCHIVE_OK)
1855			return (ret);
1856		archive_string_empty(&(pax->sparse_map));
1857	}
1858
1859	total = 0;
1860	while (total < s) {
1861		const unsigned char *p;
1862
1863		while (pax->sparse_list != NULL &&
1864		    pax->sparse_list->remaining == 0) {
1865			struct sparse_block *sb = pax->sparse_list->next;
1866			free(pax->sparse_list);
1867			pax->sparse_list = sb;
1868		}
1869
1870		if (pax->sparse_list == NULL)
1871			return (total);
1872
1873		p = ((const unsigned char *)buff) + total;
1874		ws = s - total;
1875		if (ws > pax->sparse_list->remaining)
1876			ws = (size_t)pax->sparse_list->remaining;
1877
1878		if (pax->sparse_list->is_hole) {
1879			/* Current block is hole thus we do not write
1880			 * the body. */
1881			pax->sparse_list->remaining -= ws;
1882			total += ws;
1883			continue;
1884		}
1885
1886		ret = __archive_write_output(a, p, ws);
1887		pax->sparse_list->remaining -= ws;
1888		total += ws;
1889		if (ret != ARCHIVE_OK)
1890			return (ret);
1891	}
1892	return (total);
1893}
1894
1895static int
1896has_non_ASCII(const char *_p)
1897{
1898	const unsigned char *p = (const unsigned char *)_p;
1899
1900	if (p == NULL)
1901		return (1);
1902	while (*p != '\0' && *p < 128)
1903		p++;
1904	return (*p != '\0');
1905}
1906
1907/*
1908 * Used by extended attribute support; encodes the name
1909 * so that there will be no '=' characters in the result.
1910 */
1911static char *
1912url_encode(const char *in)
1913{
1914	const char *s;
1915	char *d;
1916	size_t out_len = 0;
1917	char *out;
1918
1919	for (s = in; *s != '\0'; s++) {
1920		if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
1921			if (SIZE_MAX - out_len < 4)
1922				return (NULL);
1923			out_len += 3;
1924		} else {
1925			if (SIZE_MAX - out_len < 2)
1926				return (NULL);
1927			out_len++;
1928		}
1929	}
1930
1931	out = (char *)malloc(out_len + 1);
1932	if (out == NULL)
1933		return (NULL);
1934
1935	for (s = in, d = out; *s != '\0'; s++) {
1936		/* encode any non-printable ASCII character or '%' or '=' */
1937		if (*s < 33 || *s > 126 || *s == '%' || *s == '=') {
1938			/* URL encoding is '%' followed by two hex digits */
1939			*d++ = '%';
1940			*d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)];
1941			*d++ = "0123456789ABCDEF"[0x0f & *s];
1942		} else {
1943			*d++ = *s;
1944		}
1945	}
1946	*d = '\0';
1947	return (out);
1948}
1949
1950/*
1951 * Encode a sequence of bytes into a C string using base-64 encoding.
1952 *
1953 * Returns a null-terminated C string allocated with malloc(); caller
1954 * is responsible for freeing the result.
1955 */
1956static char *
1957base64_encode(const char *s, size_t len)
1958{
1959	static const char digits[64] =
1960	    { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O',
1961	      'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d',
1962	      'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s',
1963	      't','u','v','w','x','y','z','0','1','2','3','4','5','6','7',
1964	      '8','9','+','/' };
1965	int v;
1966	char *d, *out;
1967
1968	/* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */
1969	out = (char *)malloc((len * 4 + 2) / 3 + 1);
1970	if (out == NULL)
1971		return (NULL);
1972	d = out;
1973
1974	/* Convert each group of 3 bytes into 4 characters. */
1975	while (len >= 3) {
1976		v = (((int)s[0] << 16) & 0xff0000)
1977		    | (((int)s[1] << 8) & 0xff00)
1978		    | (((int)s[2]) & 0x00ff);
1979		s += 3;
1980		len -= 3;
1981		*d++ = digits[(v >> 18) & 0x3f];
1982		*d++ = digits[(v >> 12) & 0x3f];
1983		*d++ = digits[(v >> 6) & 0x3f];
1984		*d++ = digits[(v) & 0x3f];
1985	}
1986	/* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */
1987	switch (len) {
1988	case 0: break;
1989	case 1:
1990		v = (((int)s[0] << 16) & 0xff0000);
1991		*d++ = digits[(v >> 18) & 0x3f];
1992		*d++ = digits[(v >> 12) & 0x3f];
1993		break;
1994	case 2:
1995		v = (((int)s[0] << 16) & 0xff0000)
1996		    | (((int)s[1] << 8) & 0xff00);
1997		*d++ = digits[(v >> 18) & 0x3f];
1998		*d++ = digits[(v >> 12) & 0x3f];
1999		*d++ = digits[(v >> 6) & 0x3f];
2000		break;
2001	}
2002	/* Add trailing NUL character so output is a valid C string. */
2003	*d = '\0';
2004	return (out);
2005}
2006
2007static void
2008sparse_list_clear(struct pax *pax)
2009{
2010	while (pax->sparse_list != NULL) {
2011		struct sparse_block *sb = pax->sparse_list;
2012		pax->sparse_list = sb->next;
2013		free(sb);
2014	}
2015	pax->sparse_tail = NULL;
2016}
2017
2018static int
2019_sparse_list_add_block(struct pax *pax, int64_t offset, int64_t length,
2020    int is_hole)
2021{
2022	struct sparse_block *sb;
2023
2024	sb = (struct sparse_block *)malloc(sizeof(*sb));
2025	if (sb == NULL)
2026		return (ARCHIVE_FATAL);
2027	sb->next = NULL;
2028	sb->is_hole = is_hole;
2029	sb->offset = offset;
2030	sb->remaining = length;
2031	if (pax->sparse_list == NULL || pax->sparse_tail == NULL)
2032		pax->sparse_list = pax->sparse_tail = sb;
2033	else {
2034		pax->sparse_tail->next = sb;
2035		pax->sparse_tail = sb;
2036	}
2037	return (ARCHIVE_OK);
2038}
2039
2040static int
2041sparse_list_add(struct pax *pax, int64_t offset, int64_t length)
2042{
2043	int64_t last_offset;
2044	int r;
2045
2046	if (pax->sparse_tail == NULL)
2047		last_offset = 0;
2048	else {
2049		last_offset = pax->sparse_tail->offset +
2050		    pax->sparse_tail->remaining;
2051	}
2052	if (last_offset < offset) {
2053		/* Add a hole block. */
2054		r = _sparse_list_add_block(pax, last_offset,
2055		    offset - last_offset, 1);
2056		if (r != ARCHIVE_OK)
2057			return (r);
2058	}
2059	/* Add data block. */
2060	return (_sparse_list_add_block(pax, offset, length, 0));
2061}
2062
2063static time_t
2064get_ustar_max_mtime(void)
2065{
2066	/*
2067	 * Technically, the mtime field in the ustar header can
2068	 * support 33 bits. We are using all of them to keep
2069	 * tar/test/test_option_C_mtree.c simple and passing after 2038.
2070	 * For platforms that use signed 32-bit time values we
2071	 * use the 32-bit maximum.
2072	 */
2073	if (sizeof(time_t) > sizeof(int32_t))
2074		return (time_t)0x1ffffffff;
2075	else
2076		return (time_t)0x7fffffff;
2077}
2078