archive_write_set_format_pax.c revision 228761
1185377Ssam/*- 2187831Ssam * Copyright (c) 2003-2007 Tim Kientzle 3185377Ssam * All rights reserved. 4185377Ssam * 5185377Ssam * Redistribution and use in source and binary forms, with or without 6185377Ssam * modification, are permitted provided that the following conditions 7185377Ssam * are met: 8185377Ssam * 1. Redistributions of source code must retain the above copyright 9185377Ssam * notice, this list of conditions and the following disclaimer. 10185377Ssam * 2. Redistributions in binary form must reproduce the above copyright 11185377Ssam * notice, this list of conditions and the following disclaimer in the 12185377Ssam * documentation and/or other materials provided with the distribution. 13185377Ssam * 14185377Ssam * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15185377Ssam * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16185377Ssam * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17186018Ssam * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18185377Ssam * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19185377Ssam * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20185377Ssam * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21185377Ssam * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22185377Ssam * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23185377Ssam * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24220298Sadrian */ 25185377Ssam 26188968Ssam#include "archive_platform.h" 27239604Sadrian__FBSDID("$FreeBSD: head/lib/libarchive/archive_write_set_format_pax.c 201162 2009-12-29 05:47:46Z kientzle $"); 28188968Ssam 29185406Ssam#ifdef HAVE_ERRNO_H 30185406Ssam#include <errno.h> 31185406Ssam#endif 32185406Ssam#ifdef HAVE_STDLIB_H 33185406Ssam#include <stdlib.h> 34185406Ssam#endif 35185406Ssam#ifdef HAVE_STRING_H 36185406Ssam#include <string.h> 37185406Ssam#endif 38185377Ssam 39186018Ssam#include "archive.h" 40185377Ssam#include "archive_entry.h" 41185417Ssam#include "archive_private.h" 42185406Ssam#include "archive_write_private.h" 43185406Ssam 44185406Ssamstruct pax { 45185377Ssam uint64_t entry_bytes_remaining; 46185377Ssam uint64_t entry_padding; 47185377Ssam struct archive_string pax_header; 48185377Ssam}; 49185377Ssam 50185377Ssamstatic void add_pax_attr(struct archive_string *, const char *key, 51185377Ssam const char *value); 52185377Ssamstatic void add_pax_attr_int(struct archive_string *, 53185377Ssam const char *key, int64_t value); 54185377Ssamstatic void add_pax_attr_time(struct archive_string *, 55185377Ssam const char *key, int64_t sec, 56185377Ssam unsigned long nanos); 57185377Ssamstatic void add_pax_attr_w(struct archive_string *, 58217624Sadrian const char *key, const wchar_t *wvalue); 59185377Ssamstatic ssize_t archive_write_pax_data(struct archive_write *, 60186018Ssam const void *, size_t); 61185377Ssamstatic int archive_write_pax_finish(struct archive_write *); 62185417Ssamstatic int archive_write_pax_destroy(struct archive_write *); 63185406Ssamstatic int archive_write_pax_finish_entry(struct archive_write *); 64185406Ssamstatic int archive_write_pax_header(struct archive_write *, 65185406Ssam struct archive_entry *); 66185406Ssamstatic char *base64_encode(const char *src, size_t len); 67185406Ssamstatic char *build_pax_attribute_name(char *dest, const char *src); 68185406Ssamstatic char *build_ustar_entry_name(char *dest, const char *src, 69217624Sadrian size_t src_length, const char *insert); 70185406Ssamstatic char *format_int(char *dest, int64_t); 71185406Ssamstatic int has_non_ASCII(const wchar_t *); 72185406Ssamstatic char *url_encode(const char *in); 73185406Ssamstatic int write_nulls(struct archive_write *, size_t); 74185406Ssam 75185406Ssam/* 76185406Ssam * Set output format to 'restricted pax' format. 77185406Ssam * 78185406Ssam * This is the same as normal 'pax', but tries to suppress 79185406Ssam * the pax header whenever possible. This is the default for 80185406Ssam * bsdtar, for instance. 81185377Ssam */ 82185406Ssamint 83185406Ssamarchive_write_set_format_pax_restricted(struct archive *_a) 84185406Ssam{ 85188968Ssam struct archive_write *a = (struct archive_write *)_a; 86188968Ssam int r; 87188968Ssam r = archive_write_set_format_pax(&a->archive); 88188968Ssam a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_RESTRICTED; 89188968Ssam a->archive.archive_format_name = "restricted POSIX pax interchange"; 90188968Ssam return (r); 91188968Ssam} 92188968Ssam 93188968Ssam/* 94188968Ssam * Set output format to 'pax' format. 95188968Ssam */ 96188968Ssamint 97188968Ssamarchive_write_set_format_pax(struct archive *_a) 98188968Ssam{ 99188968Ssam struct archive_write *a = (struct archive_write *)_a; 100188968Ssam struct pax *pax; 101188968Ssam 102188968Ssam if (a->format_destroy != NULL) 103188968Ssam (a->format_destroy)(a); 104188968Ssam 105239603Sadrian pax = (struct pax *)malloc(sizeof(*pax)); 106188968Ssam if (pax == NULL) { 107239603Sadrian archive_set_error(&a->archive, ENOMEM, "Can't allocate pax data"); 108188968Ssam return (ARCHIVE_FATAL); 109188968Ssam } 110188968Ssam memset(pax, 0, sizeof(*pax)); 111188968Ssam a->format_data = pax; 112188968Ssam 113221163Sadrian a->pad_uncompressed = 1; 114221163Sadrian a->format_name = "pax"; 115188968Ssam a->format_write_header = archive_write_pax_header; 116188968Ssam a->format_write_data = archive_write_pax_data; 117188968Ssam a->format_finish = archive_write_pax_finish; 118227372Sadrian a->format_destroy = archive_write_pax_destroy; 119227372Sadrian a->format_finish_entry = archive_write_pax_finish_entry; 120227372Sadrian a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE; 121188968Ssam a->archive.archive_format_name = "POSIX pax interchange"; 122188968Ssam return (ARCHIVE_OK); 123222305Sadrian} 124227372Sadrian 125227372Sadrian/* 126227372Sadrian * Note: This code assumes that 'nanos' has the same sign as 'sec', 127239604Sadrian * which implies that sec=-1, nanos=200000000 represents -1.2 seconds 128239604Sadrian * and not -0.8 seconds. This is a pretty pedantic point, as we're 129239604Sadrian * unlikely to encounter many real files created before Jan 1, 1970, 130239604Sadrian * much less ones with timestamps recorded to sub-second resolution. 131239604Sadrian */ 132239604Sadrianstatic void 133239604Sadrianadd_pax_attr_time(struct archive_string *as, const char *key, 134239604Sadrian int64_t sec, unsigned long nanos) 135239604Sadrian{ 136239604Sadrian int digit, i; 137239604Sadrian char *t; 138239604Sadrian /* 139239604Sadrian * Note that each byte contributes fewer than 3 base-10 140239604Sadrian * digits, so this will always be big enough. 141239604Sadrian */ 142250166Sadrian char tmp[1 + 3*sizeof(sec) + 1 + 3*sizeof(nanos)]; 143250166Sadrian 144250166Sadrian tmp[sizeof(tmp) - 1] = 0; 145188968Ssam t = tmp + sizeof(tmp) - 1; 146188968Ssam 147188968Ssam /* Skip trailing zeros in the fractional part. */ 148188968Ssam for (digit = 0, i = 10; i > 0 && digit == 0; i--) { 149187831Ssam digit = nanos % 10; 150187831Ssam nanos /= 10; 151187831Ssam } 152187831Ssam 153187831Ssam /* Only format the fraction if it's non-zero. */ 154187831Ssam if (i > 0) { 155187831Ssam while (i > 0) { 156187831Ssam *--t = "0123456789"[digit]; 157187831Ssam digit = nanos % 10; 158185406Ssam nanos /= 10; 159185406Ssam i--; 160185406Ssam } 161185406Ssam *--t = '.'; 162185406Ssam } 163185406Ssam t = format_int(t, sec); 164185406Ssam 165185406Ssam add_pax_attr(as, key, t); 166185406Ssam} 167185406Ssam 168186018Ssamstatic char * 169185406Ssamformat_int(char *t, int64_t i) 170185417Ssam{ 171185406Ssam int sign; 172185406Ssam 173185406Ssam if (i < 0) { 174185377Ssam sign = -1; 175185406Ssam i = -i; 176185406Ssam } else 177185377Ssam sign = 1; 178185377Ssam 179188968Ssam do { 180188968Ssam *--t = "0123456789"[i % 10]; 181188968Ssam } while (i /= 10); 182188968Ssam if (sign < 0) 183188968Ssam *--t = '-'; 184188968Ssam return (t); 185188968Ssam} 186188968Ssam 187188968Ssamstatic void 188188968Ssamadd_pax_attr_int(struct archive_string *as, const char *key, int64_t value) 189188968Ssam{ 190188968Ssam char tmp[1 + 3 * sizeof(value)]; 191188968Ssam 192188968Ssam tmp[sizeof(tmp) - 1] = 0; 193188968Ssam add_pax_attr(as, key, format_int(tmp + sizeof(tmp) - 1, value)); 194188968Ssam} 195188968Ssam 196188968Ssamstatic char * 197188968Ssamutf8_encode(const wchar_t *wval) 198188968Ssam{ 199188968Ssam int utf8len; 200188968Ssam const wchar_t *wp; 201188968Ssam unsigned long wc; 202188968Ssam char *utf8_value, *p; 203188968Ssam 204188968Ssam utf8len = 0; 205188968Ssam for (wp = wval; *wp != L'\0'; ) { 206188968Ssam wc = *wp++; 207188968Ssam 208188968Ssam if (wc >= 0xd800 && wc <= 0xdbff 209188968Ssam && *wp >= 0xdc00 && *wp <= 0xdfff) { 210188968Ssam /* This is a surrogate pair. Combine into a 211188968Ssam * full Unicode value before encoding into 212188968Ssam * UTF-8. */ 213188968Ssam wc = (wc - 0xd800) << 10; /* High 10 bits */ 214188968Ssam wc += (*wp++ - 0xdc00); /* Low 10 bits */ 215188968Ssam wc += 0x10000; /* Skip BMP */ 216188968Ssam } 217188968Ssam if (wc <= 0x7f) 218188968Ssam utf8len++; 219188968Ssam else if (wc <= 0x7ff) 220188968Ssam utf8len += 2; 221185377Ssam else if (wc <= 0xffff) 222185377Ssam utf8len += 3; 223185377Ssam else if (wc <= 0x1fffff) 224185377Ssam utf8len += 4; 225185377Ssam else if (wc <= 0x3ffffff) 226185377Ssam utf8len += 5; 227185377Ssam else if (wc <= 0x7fffffff) 228217622Sadrian utf8len += 6; 229217622Sadrian /* Ignore larger values; UTF-8 can't encode them. */ 230217622Sadrian } 231217622Sadrian 232217622Sadrian utf8_value = (char *)malloc(utf8len + 1); 233217622Sadrian if (utf8_value == NULL) { 234217622Sadrian __archive_errx(1, "Not enough memory for attributes"); 235185377Ssam return (NULL); 236185377Ssam } 237217622Sadrian 238185377Ssam for (wp = wval, p = utf8_value; *wp != L'\0'; ) { 239185377Ssam wc = *wp++; 240185377Ssam if (wc >= 0xd800 && wc <= 0xdbff 241185377Ssam && *wp >= 0xdc00 && *wp <= 0xdfff) { 242185377Ssam /* Combine surrogate pair. */ 243185377Ssam wc = (wc - 0xd800) << 10; 244185377Ssam wc += *wp++ - 0xdc00 + 0x10000; 245185377Ssam } 246185377Ssam if (wc <= 0x7f) { 247185377Ssam *p++ = (char)wc; 248185377Ssam } else if (wc <= 0x7ff) { 249185377Ssam p[0] = 0xc0 | ((wc >> 6) & 0x1f); 250185377Ssam p[1] = 0x80 | (wc & 0x3f); 251185377Ssam p += 2; 252185377Ssam } else if (wc <= 0xffff) { 253185377Ssam p[0] = 0xe0 | ((wc >> 12) & 0x0f); 254185377Ssam p[1] = 0x80 | ((wc >> 6) & 0x3f); 255185377Ssam p[2] = 0x80 | (wc & 0x3f); 256185377Ssam p += 3; 257185377Ssam } else if (wc <= 0x1fffff) { 258185377Ssam p[0] = 0xf0 | ((wc >> 18) & 0x07); 259185377Ssam p[1] = 0x80 | ((wc >> 12) & 0x3f); 260185377Ssam p[2] = 0x80 | ((wc >> 6) & 0x3f); 261185377Ssam p[3] = 0x80 | (wc & 0x3f); 262185377Ssam p += 4; 263185377Ssam } else if (wc <= 0x3ffffff) { 264185377Ssam p[0] = 0xf8 | ((wc >> 24) & 0x03); 265218011Sadrian p[1] = 0x80 | ((wc >> 18) & 0x3f); 266218011Sadrian p[2] = 0x80 | ((wc >> 12) & 0x3f); 267218011Sadrian p[3] = 0x80 | ((wc >> 6) & 0x3f); 268218011Sadrian p[4] = 0x80 | (wc & 0x3f); 269218011Sadrian p += 5; 270218011Sadrian } else if (wc <= 0x7fffffff) { 271218011Sadrian p[0] = 0xfc | ((wc >> 30) & 0x01); 272218011Sadrian p[1] = 0x80 | ((wc >> 24) & 0x3f); 273218011Sadrian p[1] = 0x80 | ((wc >> 18) & 0x3f); 274218011Sadrian p[2] = 0x80 | ((wc >> 12) & 0x3f); 275218011Sadrian p[3] = 0x80 | ((wc >> 6) & 0x3f); 276218011Sadrian p[4] = 0x80 | (wc & 0x3f); 277218011Sadrian p += 6; 278218011Sadrian } 279185377Ssam /* Ignore larger values; UTF-8 can't encode them. */ 280218011Sadrian } 281218011Sadrian *p = '\0'; 282218011Sadrian 283218011Sadrian return (utf8_value); 284218011Sadrian} 285218011Sadrian 286218011Sadrianstatic void 287218011Sadrianadd_pax_attr_w(struct archive_string *as, const char *key, const wchar_t *wval) 288218011Sadrian{ 289218011Sadrian char *utf8_value = utf8_encode(wval); 290218011Sadrian if (utf8_value == NULL) 291218011Sadrian return; 292218011Sadrian add_pax_attr(as, key, utf8_value); 293218011Sadrian free(utf8_value); 294218011Sadrian} 295218011Sadrian 296218011Sadrian/* 297239287Sadrian * Add a key/value attribute to the pax header. This function handles 298239287Sadrian * the length field and various other syntactic requirements. 299239287Sadrian */ 300218011Sadrianstatic void 301218011Sadrianadd_pax_attr(struct archive_string *as, const char *key, const char *value) 302218011Sadrian{ 303218011Sadrian int digits, i, len, next_ten; 304239287Sadrian char tmp[1 + 3 * sizeof(int)]; /* < 3 base-10 digits per byte */ 305239287Sadrian 306239287Sadrian /*- 307239287Sadrian * PAX attributes have the following layout: 308239287Sadrian * <len> <space> <key> <=> <value> <nl> 309239287Sadrian */ 310239287Sadrian len = 1 + (int)strlen(key) + 1 + (int)strlen(value) + 1; 311239287Sadrian 312239287Sadrian /* 313239287Sadrian * The <len> field includes the length of the <len> field, so 314239287Sadrian * computing the correct length is tricky. I start by 315239287Sadrian * counting the number of base-10 digits in 'len' and 316239287Sadrian * computing the next higher power of 10. 317218011Sadrian */ 318218011Sadrian next_ten = 1; 319218011Sadrian digits = 0; 320218011Sadrian i = len; 321239287Sadrian while (i > 0) { 322239287Sadrian i = i / 10; 323218011Sadrian digits++; 324218011Sadrian next_ten = next_ten * 10; 325218011Sadrian } 326218011Sadrian /* 327239287Sadrian * For example, if string without the length field is 99 328218011Sadrian * chars, then adding the 2 digit length "99" will force the 329218011Sadrian * total length past 100, requiring an extra digit. The next 330250824Sadrian * statement adjusts for this effect. 331218011Sadrian */ 332250824Sadrian if (len + digits >= next_ten) 333218011Sadrian digits++; 334218011Sadrian 335218011Sadrian /* Now, we have the right length so we can build the line. */ 336218011Sadrian tmp[sizeof(tmp) - 1] = 0; /* Null-terminate the work area. */ 337218011Sadrian archive_strcat(as, format_int(tmp + sizeof(tmp) - 1, len + digits)); 338218011Sadrian archive_strappend_char(as, ' '); 339218011Sadrian archive_strcat(as, key); 340218011Sadrian archive_strappend_char(as, '='); 341218011Sadrian archive_strcat(as, value); 342218011Sadrian archive_strappend_char(as, '\n'); 343218011Sadrian} 344185377Ssam 345185377Ssamstatic void 346185377Ssamarchive_write_pax_header_xattrs(struct pax *pax, struct archive_entry *entry) 347185377Ssam{ 348185377Ssam struct archive_string s; 349185377Ssam int i = archive_entry_xattr_reset(entry); 350185377Ssam 351185377Ssam while (i--) { 352185377Ssam const char *name; 353185377Ssam const void *value; 354185377Ssam char *encoded_value; 355218923Sadrian char *url_encoded_name = NULL, *encoded_name = NULL; 356218923Sadrian wchar_t *wcs_name = NULL; 357218923Sadrian size_t size; 358218923Sadrian 359218923Sadrian archive_entry_xattr_next(entry, &name, &value, &size); 360185377Ssam /* Name is URL-encoded, then converted to wchar_t, 361185377Ssam * then UTF-8 encoded. */ 362185377Ssam url_encoded_name = url_encode(name); 363187831Ssam if (url_encoded_name != NULL) { 364185377Ssam /* Convert narrow-character to wide-character. */ 365187831Ssam size_t wcs_length = strlen(url_encoded_name); 366187831Ssam wcs_name = (wchar_t *)malloc((wcs_length + 1) * sizeof(wchar_t)); 367185377Ssam if (wcs_name == NULL) 368185377Ssam __archive_errx(1, "No memory for xattr conversion"); 369185377Ssam mbstowcs(wcs_name, url_encoded_name, wcs_length); 370185377Ssam wcs_name[wcs_length] = 0; 371185377Ssam free(url_encoded_name); /* Done with this. */ 372185377Ssam } 373185377Ssam if (wcs_name != NULL) { 374185377Ssam encoded_name = utf8_encode(wcs_name); 375185377Ssam free(wcs_name); /* Done with wchar_t name. */ 376185377Ssam } 377188773Ssam 378188773Ssam encoded_value = base64_encode((const char *)value, size); 379185377Ssam 380188773Ssam if (encoded_name != NULL && encoded_value != NULL) { 381188773Ssam archive_string_init(&s); 382188773Ssam archive_strcpy(&s, "LIBARCHIVE.xattr."); 383188773Ssam archive_strcat(&s, encoded_name); 384188773Ssam add_pax_attr(&(pax->pax_header), s.s, encoded_value); 385188773Ssam archive_string_free(&s); 386188773Ssam } 387188773Ssam free(encoded_name); 388188773Ssam free(encoded_value); 389185377Ssam } 390188773Ssam} 391188773Ssam 392188773Ssam/* 393188773Ssam * TODO: Consider adding 'comment' and 'charset' fields to 394188773Ssam * archive_entry so that clients can specify them. Also, consider 395188773Ssam * adding generic key/value tags so clients can add arbitrary 396188773Ssam * key/value data. 397188773Ssam */ 398188773Ssamstatic int 399185377Ssamarchive_write_pax_header(struct archive_write *a, 400188773Ssam struct archive_entry *entry_original) 401188773Ssam{ 402188773Ssam struct archive_entry *entry_main; 403188773Ssam const char *p; 404188773Ssam char *t; 405185377Ssam const wchar_t *wp; 406185377Ssam const char *suffix; 407191022Ssam int need_extension, r, ret; 408185377Ssam struct pax *pax; 409185377Ssam const char *hdrcharset = NULL; 410188773Ssam const char *hardlink; 411188773Ssam const char *path = NULL, *linkpath = NULL; 412188773Ssam const char *uname = NULL, *gname = NULL; 413188773Ssam const wchar_t *path_w = NULL, *linkpath_w = NULL; 414188773Ssam const wchar_t *uname_w = NULL, *gname_w = NULL; 415185377Ssam 416185377Ssam char paxbuff[512]; 417185377Ssam char ustarbuff[512]; 418185377Ssam char ustar_entry_name[256]; 419185377Ssam char pax_entry_name[256]; 420185377Ssam 421185377Ssam ret = ARCHIVE_OK; 422185377Ssam need_extension = 0; 423185377Ssam pax = (struct pax *)a->format_data; 424185377Ssam 425185377Ssam hardlink = archive_entry_hardlink(entry_original); 426239634Sadrian 427239634Sadrian /* Make sure this is a type of entry that we can handle here */ 428239634Sadrian if (hardlink == NULL) { 429239634Sadrian switch (archive_entry_filetype(entry_original)) { 430239634Sadrian case AE_IFBLK: 431239634Sadrian case AE_IFCHR: 432239634Sadrian case AE_IFIFO: 433239634Sadrian case AE_IFLNK: 434239634Sadrian case AE_IFREG: 435239634Sadrian break; 436239634Sadrian case AE_IFDIR: 437239634Sadrian /* 438239634Sadrian * Ensure a trailing '/'. Modify the original 439239634Sadrian * entry so the client sees the change. 440239634Sadrian */ 441239634Sadrian p = archive_entry_pathname(entry_original); 442239634Sadrian if (p[strlen(p) - 1] != '/') { 443239634Sadrian t = (char *)malloc(strlen(p) + 2); 444239634Sadrian if (t == NULL) { 445239634Sadrian archive_set_error(&a->archive, ENOMEM, 446239634Sadrian "Can't allocate pax data"); 447239634Sadrian return(ARCHIVE_FATAL); 448239634Sadrian } 449239634Sadrian strcpy(t, p); 450239634Sadrian strcat(t, "/"); 451239634Sadrian archive_entry_copy_pathname(entry_original, t); 452239634Sadrian free(t); 453239634Sadrian } 454239634Sadrian break; 455239634Sadrian case AE_IFSOCK: 456239634Sadrian archive_set_error(&a->archive, 457239634Sadrian ARCHIVE_ERRNO_FILE_FORMAT, 458239634Sadrian "tar format cannot archive socket"); 459239634Sadrian return (ARCHIVE_WARN); 460239634Sadrian default: 461239634Sadrian archive_set_error(&a->archive, 462239634Sadrian ARCHIVE_ERRNO_FILE_FORMAT, 463239634Sadrian "tar format cannot archive this (type=0%lo)", 464239634Sadrian (unsigned long)archive_entry_filetype(entry_original)); 465239634Sadrian return (ARCHIVE_WARN); 466239634Sadrian } 467239634Sadrian } 468239634Sadrian 469239634Sadrian /* Copy entry so we can modify it as needed. */ 470185377Ssam entry_main = archive_entry_clone(entry_original); 471185377Ssam archive_string_empty(&(pax->pax_header)); /* Blank our work area. */ 472185377Ssam 473185377Ssam /* 474185377Ssam * First, check the name fields and see if any of them 475185377Ssam * require binary coding. If any of them does, then all of 476185377Ssam * them do. 477185377Ssam */ 478185377Ssam hdrcharset = NULL; 479185377Ssam path = archive_entry_pathname(entry_main); 480185377Ssam path_w = archive_entry_pathname_w(entry_main); 481187831Ssam if (path != NULL && path_w == NULL) { 482185377Ssam archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 483187831Ssam "Can't translate pathname '%s' to UTF-8", path); 484185377Ssam ret = ARCHIVE_WARN; 485187831Ssam hdrcharset = "BINARY"; 486185377Ssam } 487187831Ssam uname = archive_entry_uname(entry_main); 488185377Ssam uname_w = archive_entry_uname_w(entry_main); 489187831Ssam if (uname != NULL && uname_w == NULL) { 490185377Ssam archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 491185377Ssam "Can't translate uname '%s' to UTF-8", uname); 492185377Ssam ret = ARCHIVE_WARN; 493185377Ssam hdrcharset = "BINARY"; 494185377Ssam } 495185377Ssam gname = archive_entry_gname(entry_main); 496185377Ssam gname_w = archive_entry_gname_w(entry_main); 497185377Ssam if (gname != NULL && gname_w == NULL) { 498185377Ssam archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 499185377Ssam "Can't translate gname '%s' to UTF-8", gname); 500220298Sadrian ret = ARCHIVE_WARN; 501220298Sadrian hdrcharset = "BINARY"; 502185377Ssam } 503185377Ssam linkpath = hardlink; 504185377Ssam if (linkpath != NULL) { 505187831Ssam linkpath_w = archive_entry_hardlink_w(entry_main); 506185377Ssam } else { 507185377Ssam linkpath = archive_entry_symlink(entry_main); 508185377Ssam if (linkpath != NULL) 509220298Sadrian linkpath_w = archive_entry_symlink_w(entry_main); 510220298Sadrian } 511220298Sadrian if (linkpath != NULL && linkpath_w == NULL) { 512220298Sadrian archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 513220298Sadrian "Can't translate linkpath '%s' to UTF-8", linkpath); 514220298Sadrian ret = ARCHIVE_WARN; 515185377Ssam hdrcharset = "BINARY"; 516187831Ssam } 517185377Ssam 518185377Ssam /* Store the header encoding first, to be nice to readers. */ 519185377Ssam if (hdrcharset != NULL) 520240444Sadrian add_pax_attr(&(pax->pax_header), "hdrcharset", hdrcharset); 521240444Sadrian 522240444Sadrian 523240444Sadrian /* 524240444Sadrian * If name is too long, or has non-ASCII characters, add 525240444Sadrian * 'path' to pax extended attrs. (Note that an unconvertible 526240444Sadrian * name must have non-ASCII characters.) 527185377Ssam */ 528185377Ssam if (path == NULL) { 529185377Ssam /* We don't have a narrow version, so we have to store 530185377Ssam * the wide version. */ 531185377Ssam add_pax_attr_w(&(pax->pax_header), "path", path_w); 532185377Ssam archive_entry_set_pathname(entry_main, "@WidePath"); 533187831Ssam need_extension = 1; 534185377Ssam } else if (has_non_ASCII(path_w)) { 535185377Ssam /* We have non-ASCII characters. */ 536185377Ssam if (path_w == NULL || hdrcharset != NULL) { 537220298Sadrian /* Can't do UTF-8, so store it raw. */ 538220298Sadrian add_pax_attr(&(pax->pax_header), "path", path); 539220298Sadrian } else { 540220298Sadrian /* Store UTF-8 */ 541220298Sadrian add_pax_attr_w(&(pax->pax_header), 542220298Sadrian "path", path_w); 543185377Ssam } 544187831Ssam archive_entry_set_pathname(entry_main, 545185377Ssam build_ustar_entry_name(ustar_entry_name, 546185377Ssam path, strlen(path), NULL)); 547185377Ssam need_extension = 1; 548185377Ssam } else { 549185377Ssam /* We have an all-ASCII path; we'd like to just store 550185377Ssam * it in the ustar header if it will fit. Yes, this 551185377Ssam * duplicates some of the logic in 552185377Ssam * write_set_format_ustar.c 553185377Ssam */ 554185377Ssam if (strlen(path) <= 100) { 555185377Ssam /* Fits in the old 100-char tar name field. */ 556185377Ssam } else { 557185377Ssam /* Find largest suffix that will fit. */ 558185377Ssam /* Note: strlen() > 100, so strlen() - 100 - 1 >= 0 */ 559185377Ssam suffix = strchr(path + strlen(path) - 100 - 1, '/'); 560185377Ssam /* Don't attempt an empty prefix. */ 561185377Ssam if (suffix == path) 562185377Ssam suffix = strchr(suffix + 1, '/'); 563185377Ssam /* We can put it in the ustar header if it's 564185377Ssam * all ASCII and it's either <= 100 characters 565185377Ssam * or can be split at a '/' into a prefix <= 566185377Ssam * 155 chars and a suffix <= 100 chars. (Note 567185377Ssam * the strchr() above will return NULL exactly 568185377Ssam * when the path can't be split.) 569185377Ssam */ 570185377Ssam if (suffix == NULL /* Suffix > 100 chars. */ 571185377Ssam || suffix[1] == '\0' /* empty suffix */ 572185377Ssam || suffix - path > 155) /* Prefix > 155 chars */ 573185377Ssam { 574185377Ssam if (path_w == NULL || hdrcharset != NULL) { 575185377Ssam /* Can't do UTF-8, so store it raw. */ 576185377Ssam add_pax_attr(&(pax->pax_header), 577185377Ssam "path", path); 578185377Ssam } else { 579185377Ssam /* Store UTF-8 */ 580185377Ssam add_pax_attr_w(&(pax->pax_header), 581185377Ssam "path", path_w); 582185377Ssam } 583185377Ssam archive_entry_set_pathname(entry_main, 584185377Ssam build_ustar_entry_name(ustar_entry_name, 585185377Ssam path, strlen(path), NULL)); 586185377Ssam need_extension = 1; 587185377Ssam } 588185377Ssam } 589185377Ssam } 590185377Ssam 591185377Ssam if (linkpath != NULL) { 592185377Ssam /* If link name is too long or has non-ASCII characters, add 593185377Ssam * 'linkpath' to pax extended attrs. */ 594185377Ssam if (strlen(linkpath) > 100 || linkpath_w == NULL 595185377Ssam || linkpath_w == NULL || has_non_ASCII(linkpath_w)) { 596185377Ssam if (linkpath_w == NULL || hdrcharset != NULL) 597185377Ssam /* If the linkpath is not convertible 598185377Ssam * to wide, or we're encoding in 599185377Ssam * binary anyway, store it raw. */ 600185377Ssam add_pax_attr(&(pax->pax_header), 601185377Ssam "linkpath", linkpath); 602185377Ssam else 603185377Ssam /* If the link is long or has a 604185377Ssam * non-ASCII character, store it as a 605224716Sadrian * pax extended attribute. */ 606224716Sadrian add_pax_attr_w(&(pax->pax_header), 607224716Sadrian "linkpath", linkpath_w); 608185377Ssam if (strlen(linkpath) > 100) { 609185377Ssam if (hardlink != NULL) 610185377Ssam archive_entry_set_hardlink(entry_main, 611185377Ssam "././@LongHardLink"); 612185377Ssam else 613185377Ssam archive_entry_set_symlink(entry_main, 614185377Ssam "././@LongSymLink"); 615185377Ssam } 616185377Ssam need_extension = 1; 617185377Ssam } 618185377Ssam } 619185377Ssam 620185377Ssam /* If file size is too large, add 'size' to pax extended attrs. */ 621185377Ssam if (archive_entry_size(entry_main) >= (((int64_t)1) << 33)) { 622185377Ssam add_pax_attr_int(&(pax->pax_header), "size", 623185377Ssam archive_entry_size(entry_main)); 624185377Ssam need_extension = 1; 625185377Ssam } 626185377Ssam 627185377Ssam /* If numeric GID is too large, add 'gid' to pax extended attrs. */ 628185377Ssam if ((unsigned int)archive_entry_gid(entry_main) >= (1 << 18)) { 629185377Ssam add_pax_attr_int(&(pax->pax_header), "gid", 630185377Ssam archive_entry_gid(entry_main)); 631185377Ssam need_extension = 1; 632185377Ssam } 633185377Ssam 634185377Ssam /* If group name is too large or has non-ASCII characters, add 635185377Ssam * 'gname' to pax extended attrs. */ 636185377Ssam if (gname != NULL) { 637185377Ssam if (strlen(gname) > 31 638185377Ssam || gname_w == NULL 639185377Ssam || has_non_ASCII(gname_w)) 640185377Ssam { 641185377Ssam if (gname_w == NULL || hdrcharset != NULL) { 642185377Ssam add_pax_attr(&(pax->pax_header), 643185377Ssam "gname", gname); 644185377Ssam } else { 645185377Ssam add_pax_attr_w(&(pax->pax_header), 646185377Ssam "gname", gname_w); 647185377Ssam } 648185377Ssam need_extension = 1; 649185377Ssam } 650185377Ssam } 651185377Ssam 652185377Ssam /* If numeric UID is too large, add 'uid' to pax extended attrs. */ 653185377Ssam if ((unsigned int)archive_entry_uid(entry_main) >= (1 << 18)) { 654185377Ssam add_pax_attr_int(&(pax->pax_header), "uid", 655185377Ssam archive_entry_uid(entry_main)); 656185377Ssam need_extension = 1; 657185377Ssam } 658185377Ssam 659185377Ssam /* Add 'uname' to pax extended attrs if necessary. */ 660185377Ssam if (uname != NULL) { 661185377Ssam if (strlen(uname) > 31 662185377Ssam || uname_w == NULL 663185377Ssam || has_non_ASCII(uname_w)) 664185377Ssam { 665185377Ssam if (uname_w == NULL || hdrcharset != NULL) { 666185377Ssam add_pax_attr(&(pax->pax_header), 667185377Ssam "uname", uname); 668185377Ssam } else { 669185377Ssam add_pax_attr_w(&(pax->pax_header), 670185377Ssam "uname", uname_w); 671185377Ssam } 672185377Ssam need_extension = 1; 673221603Sadrian } 674185377Ssam } 675185377Ssam 676221603Sadrian /* 677221603Sadrian * POSIX/SUSv3 doesn't provide a standard key for large device 678221603Sadrian * numbers. I use the same keys here that Joerg Schilling 679221603Sadrian * used for 'star.' (Which, somewhat confusingly, are called 680185377Ssam * "devXXX" even though they code "rdev" values.) No doubt, 681185377Ssam * other implementations use other keys. Note that there's no 682185377Ssam * reason we can't write the same information into a number of 683185377Ssam * different keys. 684185377Ssam * 685185377Ssam * Of course, this is only needed for block or char device entries. 686221603Sadrian */ 687221603Sadrian if (archive_entry_filetype(entry_main) == AE_IFBLK 688221603Sadrian || archive_entry_filetype(entry_main) == AE_IFCHR) { 689221603Sadrian /* 690221603Sadrian * If rdevmajor is too large, add 'SCHILY.devmajor' to 691221603Sadrian * extended attributes. 692221603Sadrian */ 693221603Sadrian dev_t rdevmajor, rdevminor; 694221603Sadrian rdevmajor = archive_entry_rdevmajor(entry_main); 695221603Sadrian rdevminor = archive_entry_rdevminor(entry_main); 696222584Sadrian if (rdevmajor >= (1 << 18)) { 697222584Sadrian add_pax_attr_int(&(pax->pax_header), "SCHILY.devmajor", 698247366Sadrian rdevmajor); 699247366Sadrian /* 700247366Sadrian * Non-strict formatting below means we don't 701247366Sadrian * have to truncate here. Not truncating improves 702238333Sadrian * the chance that some more modern tar archivers 703238333Sadrian * (such as GNU tar 1.13) can restore the full 704238333Sadrian * value even if they don't understand the pax 705238333Sadrian * extended attributes. See my rant below about 706238333Sadrian * file size fields for additional details. 707238333Sadrian */ 708238333Sadrian /* archive_entry_set_rdevmajor(entry_main, 709238333Sadrian rdevmajor & ((1 << 18) - 1)); */ 710238333Sadrian need_extension = 1; 711238333Sadrian } 712238333Sadrian 713238333Sadrian /* 714238333Sadrian * If devminor is too large, add 'SCHILY.devminor' to 715238280Sadrian * extended attributes. 716238280Sadrian */ 717238280Sadrian if (rdevminor >= (1 << 18)) { 718238280Sadrian add_pax_attr_int(&(pax->pax_header), "SCHILY.devminor", 719238280Sadrian rdevminor); 720238280Sadrian /* Truncation is not necessary here, either. */ 721238280Sadrian /* archive_entry_set_rdevminor(entry_main, 722238280Sadrian rdevminor & ((1 << 18) - 1)); */ 723238280Sadrian need_extension = 1; 724238280Sadrian } 725238280Sadrian } 726238280Sadrian 727238280Sadrian /* 728238280Sadrian * Technically, the mtime field in the ustar header can 729238280Sadrian * support 33 bits, but many platforms use signed 32-bit time 730238280Sadrian * values. The cutoff of 0x7fffffff here is a compromise. 731238280Sadrian * Yes, this check is duplicated just below; this helps to 732238280Sadrian * avoid writing an mtime attribute just to handle a 733238280Sadrian * high-resolution timestamp in "restricted pax" mode. 734238280Sadrian */ 735238280Sadrian if (!need_extension && 736238280Sadrian ((archive_entry_mtime(entry_main) < 0) 737238280Sadrian || (archive_entry_mtime(entry_main) >= 0x7fffffff))) 738238280Sadrian need_extension = 1; 739238280Sadrian 740238858Sadrian /* I use a star-compatible file flag attribute. */ 741238858Sadrian p = archive_entry_fflags_text(entry_main); 742221603Sadrian if (!need_extension && p != NULL && *p != '\0') 743221603Sadrian need_extension = 1; 744244854Sadrian 745244854Sadrian /* If there are non-trivial ACL entries, we need an extension. */ 746221603Sadrian if (!need_extension && archive_entry_acl_count(entry_original, 747221603Sadrian ARCHIVE_ENTRY_ACL_TYPE_ACCESS) > 0) 748185377Ssam need_extension = 1; 749185377Ssam 750185377Ssam /* If there are non-trivial ACL entries, we need an extension. */ 751251360Sadrian if (!need_extension && archive_entry_acl_count(entry_original, 752251360Sadrian ARCHIVE_ENTRY_ACL_TYPE_DEFAULT) > 0) 753251360Sadrian need_extension = 1; 754222584Sadrian 755222584Sadrian /* If there are extended attributes, we need an extension */ 756221603Sadrian if (!need_extension && archive_entry_xattr_count(entry_original) > 0) 757221603Sadrian need_extension = 1; 758221603Sadrian 759221603Sadrian /* 760192396Ssam * The following items are handled differently in "pax 761192396Ssam * restricted" format. In particular, in "pax restricted" 762192396Ssam * format they won't be added unless need_extension is 763195114Ssam * already set (we're already generating an extended header, so 764195114Ssam * may as well include these). 765218150Sadrian */ 766218150Sadrian if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_RESTRICTED || 767218150Sadrian need_extension) { 768218150Sadrian 769218150Sadrian if (archive_entry_mtime(entry_main) < 0 || 770218150Sadrian archive_entry_mtime(entry_main) >= 0x7fffffff || 771218150Sadrian archive_entry_mtime_nsec(entry_main) != 0) 772218150Sadrian add_pax_attr_time(&(pax->pax_header), "mtime", 773218150Sadrian archive_entry_mtime(entry_main), 774218150Sadrian archive_entry_mtime_nsec(entry_main)); 775218150Sadrian 776220324Sadrian if (archive_entry_ctime(entry_main) != 0 || 777220324Sadrian archive_entry_ctime_nsec(entry_main) != 0) 778225444Sadrian add_pax_attr_time(&(pax->pax_header), "ctime", 779225444Sadrian archive_entry_ctime(entry_main), 780226488Sadrian archive_entry_ctime_nsec(entry_main)); 781226488Sadrian 782227410Sadrian if (archive_entry_atime(entry_main) != 0 || 783227410Sadrian archive_entry_atime_nsec(entry_main) != 0) 784239630Sadrian add_pax_attr_time(&(pax->pax_header), "atime", 785239630Sadrian archive_entry_atime(entry_main), 786239630Sadrian archive_entry_atime_nsec(entry_main)); 787251400Sadrian 788251400Sadrian /* Store birth/creationtime only if it's earlier than mtime */ 789185377Ssam if (archive_entry_birthtime_is_set(entry_main) && 790185377Ssam archive_entry_birthtime(entry_main) 791185377Ssam < archive_entry_mtime(entry_main)) 792185377Ssam add_pax_attr_time(&(pax->pax_header), 793185377Ssam "LIBARCHIVE.creationtime", 794185377Ssam archive_entry_birthtime(entry_main), 795185377Ssam archive_entry_birthtime_nsec(entry_main)); 796185377Ssam 797185377Ssam /* I use a star-compatible file flag attribute. */ 798185377Ssam p = archive_entry_fflags_text(entry_main); 799185377Ssam if (p != NULL && *p != '\0') 800185377Ssam add_pax_attr(&(pax->pax_header), "SCHILY.fflags", p); 801185377Ssam 802185377Ssam /* I use star-compatible ACL attributes. */ 803185377Ssam wp = archive_entry_acl_text_w(entry_original, 804185377Ssam ARCHIVE_ENTRY_ACL_TYPE_ACCESS | 805185377Ssam ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); 806185377Ssam if (wp != NULL && *wp != L'\0') 807185377Ssam add_pax_attr_w(&(pax->pax_header), 808185377Ssam "SCHILY.acl.access", wp); 809185377Ssam wp = archive_entry_acl_text_w(entry_original, 810185377Ssam ARCHIVE_ENTRY_ACL_TYPE_DEFAULT | 811185377Ssam ARCHIVE_ENTRY_ACL_STYLE_EXTRA_ID); 812185377Ssam if (wp != NULL && *wp != L'\0') 813185377Ssam add_pax_attr_w(&(pax->pax_header), 814185377Ssam "SCHILY.acl.default", wp); 815185377Ssam 816185377Ssam /* Include star-compatible metadata info. */ 817185377Ssam /* Note: "SCHILY.dev{major,minor}" are NOT the 818185377Ssam * major/minor portions of "SCHILY.dev". */ 819185377Ssam add_pax_attr_int(&(pax->pax_header), "SCHILY.dev", 820185377Ssam archive_entry_dev(entry_main)); 821185377Ssam add_pax_attr_int(&(pax->pax_header), "SCHILY.ino", 822185377Ssam archive_entry_ino64(entry_main)); 823185377Ssam add_pax_attr_int(&(pax->pax_header), "SCHILY.nlink", 824185377Ssam archive_entry_nlink(entry_main)); 825185377Ssam 826185377Ssam /* Store extended attributes */ 827185377Ssam archive_write_pax_header_xattrs(pax, entry_original); 828185377Ssam } 829185377Ssam 830185377Ssam /* Only regular files have data. */ 831185377Ssam if (archive_entry_filetype(entry_main) != AE_IFREG) 832185377Ssam archive_entry_set_size(entry_main, 0); 833185377Ssam 834185377Ssam /* 835185377Ssam * Pax-restricted does not store data for hardlinks, in order 836185377Ssam * to improve compatibility with ustar. 837185377Ssam */ 838185377Ssam if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE && 839185377Ssam hardlink != NULL) 840185377Ssam archive_entry_set_size(entry_main, 0); 841185377Ssam 842185377Ssam /* 843185377Ssam * XXX Full pax interchange format does permit a hardlink 844185377Ssam * entry to have data associated with it. I'm not supporting 845185377Ssam * that here because the client expects me to tell them whether 846185377Ssam * or not this format expects data for hardlinks. If I 847185377Ssam * don't check here, then every pax archive will end up with 848185377Ssam * duplicated data for hardlinks. Someday, there may be 849185377Ssam * need to select this behavior, in which case the following 850185377Ssam * will need to be revisited. XXX 851185377Ssam */ 852185377Ssam if (hardlink != NULL) 853185377Ssam archive_entry_set_size(entry_main, 0); 854185377Ssam 855185377Ssam /* Format 'ustar' header for main entry. 856185377Ssam * 857185377Ssam * The trouble with file size: If the reader can't understand 858185377Ssam * the file size, they may not be able to locate the next 859185377Ssam * entry and the rest of the archive is toast. Pax-compliant 860185377Ssam * readers are supposed to ignore the file size in the main 861185377Ssam * header, so the question becomes how to maximize portability 862185377Ssam * for readers that don't support pax attribute extensions. 863188771Ssam * For maximum compatibility, I permit numeric extensions in 864188771Ssam * the main header so that the file size stored will always be 865188771Ssam * correct, even if it's in a format that only some 866188771Ssam * implementations understand. The technique used here is: 867188771Ssam * 868188771Ssam * a) If possible, follow the standard exactly. This handles 869188771Ssam * files up to 8 gigabytes minus 1. 870188771Ssam * 871188771Ssam * b) If that fails, try octal but omit the field terminator. 872185377Ssam * That handles files up to 64 gigabytes minus 1. 873185377Ssam * 874185377Ssam * c) Otherwise, use base-256 extensions. That handles files 875185377Ssam * up to 2^63 in this implementation, with the potential to 876185377Ssam * go up to 2^94. That should hold us for a while. ;-) 877185377Ssam * 878185377Ssam * The non-strict formatter uses similar logic for other 879185377Ssam * numeric fields, though they're less critical. 880185377Ssam */ 881185377Ssam __archive_write_format_header_ustar(a, ustarbuff, entry_main, -1, 0); 882185377Ssam 883185377Ssam /* If we built any extended attributes, write that entry first. */ 884185377Ssam if (archive_strlen(&(pax->pax_header)) > 0) { 885185377Ssam struct archive_entry *pax_attr_entry; 886188771Ssam time_t s; 887188771Ssam uid_t uid; 888188771Ssam gid_t gid; 889188771Ssam mode_t mode; 890185377Ssam 891185377Ssam pax_attr_entry = archive_entry_new(); 892185377Ssam p = archive_entry_pathname(entry_main); 893185377Ssam archive_entry_set_pathname(pax_attr_entry, 894185377Ssam build_pax_attribute_name(pax_entry_name, p)); 895185377Ssam archive_entry_set_size(pax_attr_entry, 896185377Ssam archive_strlen(&(pax->pax_header))); 897185377Ssam /* Copy uid/gid (but clip to ustar limits). */ 898185377Ssam uid = archive_entry_uid(entry_main); 899185377Ssam if ((unsigned int)uid >= 1 << 18) 900185377Ssam uid = (uid_t)(1 << 18) - 1; 901185377Ssam archive_entry_set_uid(pax_attr_entry, uid); 902185377Ssam gid = archive_entry_gid(entry_main); 903185377Ssam if ((unsigned int)gid >= 1 << 18) 904185377Ssam gid = (gid_t)(1 << 18) - 1; 905185377Ssam archive_entry_set_gid(pax_attr_entry, gid); 906185377Ssam /* Copy mode over (but not setuid/setgid bits) */ 907185377Ssam mode = archive_entry_mode(entry_main); 908185377Ssam#ifdef S_ISUID 909185377Ssam mode &= ~S_ISUID; 910185377Ssam#endif 911185377Ssam#ifdef S_ISGID 912185377Ssam mode &= ~S_ISGID; 913185377Ssam#endif 914185377Ssam#ifdef S_ISVTX 915185380Ssam mode &= ~S_ISVTX; 916185380Ssam#endif 917185380Ssam archive_entry_set_mode(pax_attr_entry, mode); 918185380Ssam 919185380Ssam /* Copy uname/gname. */ 920185380Ssam archive_entry_set_uname(pax_attr_entry, 921185380Ssam archive_entry_uname(entry_main)); 922185380Ssam archive_entry_set_gname(pax_attr_entry, 923185380Ssam archive_entry_gname(entry_main)); 924185377Ssam 925185377Ssam /* Copy mtime, but clip to ustar limits. */ 926185377Ssam s = archive_entry_mtime(entry_main); 927185377Ssam if (s < 0) { s = 0; } 928185377Ssam if (s >= 0x7fffffff) { s = 0x7fffffff; } 929185377Ssam archive_entry_set_mtime(pax_attr_entry, s, 0); 930185377Ssam 931185377Ssam /* Standard ustar doesn't support atime. */ 932185377Ssam archive_entry_set_atime(pax_attr_entry, 0, 0); 933185377Ssam 934185377Ssam /* Standard ustar doesn't support ctime. */ 935185377Ssam archive_entry_set_ctime(pax_attr_entry, 0, 0); 936185377Ssam 937185377Ssam r = __archive_write_format_header_ustar(a, paxbuff, 938185377Ssam pax_attr_entry, 'x', 1); 939185377Ssam 940185377Ssam archive_entry_free(pax_attr_entry); 941185377Ssam 942185377Ssam /* Note that the 'x' header shouldn't ever fail to format */ 943185377Ssam if (r != 0) { 944185377Ssam const char *msg = "archive_write_pax_header: " 945185377Ssam "'x' header failed?! This can't happen.\n"; 946185377Ssam size_t u = write(2, msg, strlen(msg)); 947185377Ssam (void)u; /* UNUSED */ 948185377Ssam exit(1); 949185377Ssam } 950185377Ssam r = (a->compressor.write)(a, paxbuff, 512); 951185377Ssam if (r != ARCHIVE_OK) { 952185377Ssam pax->entry_bytes_remaining = 0; 953185377Ssam pax->entry_padding = 0; 954185377Ssam return (ARCHIVE_FATAL); 955185377Ssam } 956185377Ssam 957185377Ssam pax->entry_bytes_remaining = archive_strlen(&(pax->pax_header)); 958185377Ssam pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); 959185377Ssam 960185377Ssam r = (a->compressor.write)(a, pax->pax_header.s, 961185377Ssam archive_strlen(&(pax->pax_header))); 962185377Ssam if (r != ARCHIVE_OK) { 963185377Ssam /* If a write fails, we're pretty much toast. */ 964185377Ssam return (ARCHIVE_FATAL); 965185377Ssam } 966185377Ssam /* Pad out the end of the entry. */ 967185377Ssam r = write_nulls(a, pax->entry_padding); 968185377Ssam if (r != ARCHIVE_OK) { 969185377Ssam /* If a write fails, we're pretty much toast. */ 970185377Ssam return (ARCHIVE_FATAL); 971185377Ssam } 972185377Ssam pax->entry_bytes_remaining = pax->entry_padding = 0; 973185377Ssam } 974185377Ssam 975185377Ssam /* Write the header for main entry. */ 976185377Ssam r = (a->compressor.write)(a, ustarbuff, 512); 977185377Ssam if (r != ARCHIVE_OK) 978185377Ssam return (r); 979185377Ssam 980185377Ssam /* 981185377Ssam * Inform the client of the on-disk size we're using, so 982185377Ssam * they can avoid unnecessarily writing a body for something 983185377Ssam * that we're just going to ignore. 984185377Ssam */ 985185377Ssam archive_entry_set_size(entry_original, archive_entry_size(entry_main)); 986185377Ssam pax->entry_bytes_remaining = archive_entry_size(entry_main); 987185377Ssam pax->entry_padding = 0x1ff & (-(int64_t)pax->entry_bytes_remaining); 988185377Ssam archive_entry_free(entry_main); 989185377Ssam 990185377Ssam return (ret); 991185377Ssam} 992185377Ssam 993185377Ssam/* 994185377Ssam * We need a valid name for the regular 'ustar' entry. This routine 995185377Ssam * tries to hack something more-or-less reasonable. 996185377Ssam * 997185377Ssam * The approach here tries to preserve leading dir names. We do so by 998185377Ssam * working with four sections: 999185377Ssam * 1) "prefix" directory names, 1000185377Ssam * 2) "suffix" directory names, 1001185377Ssam * 3) inserted dir name (optional), 1002185377Ssam * 4) filename. 1003185377Ssam * 1004185377Ssam * These sections must satisfy the following requirements: 1005185377Ssam * * Parts 1 & 2 together form an initial portion of the dir name. 1006185377Ssam * * Part 3 is specified by the caller. (It should not contain a leading 1007185377Ssam * or trailing '/'.) 1008185377Ssam * * Part 4 forms an initial portion of the base filename. 1009185377Ssam * * The filename must be <= 99 chars to fit the ustar 'name' field. 1010185377Ssam * * Parts 2, 3, 4 together must be <= 99 chars to fit the ustar 'name' fld. 1011185377Ssam * * Part 1 must be <= 155 chars to fit the ustar 'prefix' field. 1012185377Ssam * * If the original name ends in a '/', the new name must also end in a '/' 1013185377Ssam * * Trailing '/.' sequences may be stripped. 1014185377Ssam * 1015185377Ssam * Note: Recall that the ustar format does not store the '/' separating 1016185377Ssam * parts 1 & 2, but does store the '/' separating parts 2 & 3. 1017185377Ssam */ 1018185377Ssamstatic char * 1019185377Ssambuild_ustar_entry_name(char *dest, const char *src, size_t src_length, 1020185377Ssam const char *insert) 1021185377Ssam{ 1022185377Ssam const char *prefix, *prefix_end; 1023185377Ssam const char *suffix, *suffix_end; 1024185377Ssam const char *filename, *filename_end; 1025185377Ssam char *p; 1026185377Ssam int need_slash = 0; /* Was there a trailing slash? */ 1027185377Ssam size_t suffix_length = 99; 1028185377Ssam size_t insert_length; 1029185377Ssam 1030185377Ssam /* Length of additional dir element to be added. */ 1031185377Ssam if (insert == NULL) 1032185377Ssam insert_length = 0; 1033185377Ssam else 1034185377Ssam /* +2 here allows for '/' before and after the insert. */ 1035185377Ssam insert_length = strlen(insert) + 2; 1036185377Ssam 1037185377Ssam /* Step 0: Quick bailout in a common case. */ 1038185377Ssam if (src_length < 100 && insert == NULL) { 1039185377Ssam strncpy(dest, src, src_length); 1040185377Ssam dest[src_length] = '\0'; 1041185377Ssam return (dest); 1042185377Ssam } 1043187831Ssam 1044185377Ssam /* Step 1: Locate filename and enforce the length restriction. */ 1045185377Ssam filename_end = src + src_length; 1046185377Ssam /* Remove trailing '/' chars and '/.' pairs. */ 1047185377Ssam for (;;) { 1048185377Ssam if (filename_end > src && filename_end[-1] == '/') { 1049185377Ssam filename_end --; 1050185377Ssam need_slash = 1; /* Remember to restore trailing '/'. */ 1051187831Ssam continue; 1052185377Ssam } 1053185377Ssam if (filename_end > src + 1 && filename_end[-1] == '.' 1054185377Ssam && filename_end[-2] == '/') { 1055185377Ssam filename_end -= 2; 1056185377Ssam need_slash = 1; /* "foo/." will become "foo/" */ 1057185377Ssam continue; 1058185377Ssam } 1059185377Ssam break; 1060185377Ssam } 1061185377Ssam if (need_slash) 1062185377Ssam suffix_length--; 1063185377Ssam /* Find start of filename. */ 1064220443Sadrian filename = filename_end - 1; 1065220443Sadrian while ((filename > src) && (*filename != '/')) 1066220443Sadrian filename --; 1067220443Sadrian if ((*filename == '/') && (filename < filename_end - 1)) 1068220443Sadrian filename ++; 1069220443Sadrian /* Adjust filename_end so that filename + insert fits in 99 chars. */ 1070240623Sadrian suffix_length -= insert_length; 1071220443Sadrian if (filename_end > filename + suffix_length) 1072220443Sadrian filename_end = filename + suffix_length; 1073220443Sadrian /* Calculate max size for "suffix" section (#3 above). */ 1074220444Sadrian suffix_length -= filename_end - filename; 1075220444Sadrian 1076220443Sadrian /* Step 2: Locate the "prefix" section of the dirname, including 1077221019Sadrian * trailing '/'. */ 1078220443Sadrian prefix = src; 1079220443Sadrian prefix_end = prefix + 155; 1080220443Sadrian if (prefix_end > filename) 1081220443Sadrian prefix_end = filename; 1082220443Sadrian while (prefix_end > prefix && *prefix_end != '/') 1083220443Sadrian prefix_end--; 1084220443Sadrian if ((prefix_end < filename) && (*prefix_end == '/')) 1085220443Sadrian prefix_end++; 1086240623Sadrian 1087220443Sadrian /* Step 3: Locate the "suffix" section of the dirname, 1088220443Sadrian * including trailing '/'. */ 1089220443Sadrian suffix = prefix_end; 1090220443Sadrian suffix_end = suffix + suffix_length; /* Enforce limit. */ 1091220443Sadrian if (suffix_end > filename) 1092220443Sadrian suffix_end = filename; 1093220443Sadrian if (suffix_end < suffix) 1094240623Sadrian suffix_end = suffix; 1095220443Sadrian while (suffix_end > suffix && *suffix_end != '/') 1096220443Sadrian suffix_end--; 1097220443Sadrian if ((suffix_end < filename) && (*suffix_end == '/')) 1098220443Sadrian suffix_end++; 1099220443Sadrian 1100220443Sadrian /* Step 4: Build the new name. */ 1101220443Sadrian /* The OpenBSD strlcpy function is safer, but less portable. */ 1102220443Sadrian /* Rather than maintain two versions, just use the strncpy version. */ 1103220443Sadrian p = dest; 1104220443Sadrian if (prefix_end > prefix) { 1105220443Sadrian strncpy(p, prefix, prefix_end - prefix); 1106220443Sadrian p += prefix_end - prefix; 1107240623Sadrian } 1108220443Sadrian if (suffix_end > suffix) { 1109220443Sadrian strncpy(p, suffix, suffix_end - suffix); 1110220443Sadrian p += suffix_end - suffix; 1111220443Sadrian } 1112220443Sadrian if (insert != NULL) { 1113220443Sadrian /* Note: assume insert does not have leading or trailing '/' */ 1114220443Sadrian strcpy(p, insert); 1115220443Sadrian p += strlen(insert); 1116220443Sadrian *p++ = '/'; 1117220443Sadrian } 1118220443Sadrian strncpy(p, filename, filename_end - filename); 1119220443Sadrian p += filename_end - filename; 1120220443Sadrian if (need_slash) 1121220443Sadrian *p++ = '/'; 1122220443Sadrian *p = '\0'; 1123220443Sadrian 1124220443Sadrian return (dest); 1125220443Sadrian} 1126240623Sadrian 1127220443Sadrian/* 1128220443Sadrian * The ustar header for the pax extended attributes must have a 1129220443Sadrian * reasonable name: SUSv3 requires 'dirname'/PaxHeader.'pid'/'filename' 1130220443Sadrian * where 'pid' is the PID of the archiving process. Unfortunately, 1131220443Sadrian * that makes testing a pain since the output varies for each run, 1132221019Sadrian * so I'm sticking with the simpler 'dirname'/PaxHeader/'filename' 1133221019Sadrian * for now. (Someday, I'll make this settable. Then I can use the 1134221019Sadrian * SUS recommendation as default and test harnesses can override it 1135220443Sadrian * to get predictable results.) 1136220443Sadrian * 1137220443Sadrian * Joerg Schilling has argued that this is unnecessary because, in 1138185377Ssam * practice, if the pax extended attributes get extracted as regular 1139185377Ssam * files, noone is going to bother reading those attributes to 1140185377Ssam * manually restore them. Based on this, 'star' uses 1141185377Ssam * /tmp/PaxHeader/'basename' as the ustar header name. This is a 1142185377Ssam * tempting argument, in part because it's simpler than the SUSv3 1143185377Ssam * recommendation, but I'm not entirely convinced. I'm also 1144185377Ssam * uncomfortable with the fact that "/tmp" is a Unix-ism. 1145185377Ssam * 1146185377Ssam * The following routine leverages build_ustar_entry_name() above and 1147185377Ssam * so is simpler than you might think. It just needs to provide the 1148185377Ssam * additional path element and handle a few pathological cases). 1149185377Ssam */ 1150185377Ssamstatic char * 1151185377Ssambuild_pax_attribute_name(char *dest, const char *src) 1152185377Ssam{ 1153185377Ssam char buff[64]; 1154185377Ssam const char *p; 1155185377Ssam 1156185377Ssam /* Handle the null filename case. */ 1157185377Ssam if (src == NULL || *src == '\0') { 1158185377Ssam strcpy(dest, "PaxHeader/blank"); 1159185377Ssam return (dest); 1160185377Ssam } 1161185377Ssam 1162185377Ssam /* Prune final '/' and other unwanted final elements. */ 1163185377Ssam p = src + strlen(src); 1164187831Ssam for (;;) { 1165187831Ssam /* Ends in "/", remove the '/' */ 1166185377Ssam if (p > src && p[-1] == '/') { 1167185377Ssam --p; 1168185377Ssam continue; 1169185377Ssam } 1170185377Ssam /* Ends in "/.", remove the '.' */ 1171185377Ssam if (p > src + 1 && p[-1] == '.' 1172185377Ssam && p[-2] == '/') { 1173185377Ssam --p; 1174185377Ssam continue; 1175185377Ssam } 1176185377Ssam break; 1177185377Ssam } 1178185377Ssam 1179185377Ssam /* Pathological case: After above, there was nothing left. 1180185377Ssam * This includes "/." "/./." "/.//./." etc. */ 1181185377Ssam if (p == src) { 1182185377Ssam strcpy(dest, "/PaxHeader/rootdir"); 1183185377Ssam return (dest); 1184185377Ssam } 1185185377Ssam 1186185377Ssam /* Convert unadorned "." into a suitable filename. */ 1187185377Ssam if (*src == '.' && p == src + 1) { 1188185377Ssam strcpy(dest, "PaxHeader/currentdir"); 1189185377Ssam return (dest); 1190185377Ssam } 1191187831Ssam 1192187831Ssam /* 1193185377Ssam * TODO: Push this string into the 'pax' structure to avoid 1194185377Ssam * recomputing it every time. That will also open the door 1195185377Ssam * to having clients override it. 1196185377Ssam */ 1197185377Ssam#if HAVE_GETPID && 0 /* Disable this for now; see above comment. */ 1198185377Ssam sprintf(buff, "PaxHeader.%d", getpid()); 1199185377Ssam#else 1200185377Ssam /* If the platform can't fetch the pid, don't include it. */ 1201185377Ssam strcpy(buff, "PaxHeader"); 1202185377Ssam#endif 1203185377Ssam /* General case: build a ustar-compatible name adding "/PaxHeader/". */ 1204185377Ssam build_ustar_entry_name(dest, src, p - src, buff); 1205185377Ssam 1206189713Ssam return (dest); 1207185377Ssam} 1208185377Ssam 1209185377Ssam/* Write two null blocks for the end of archive */ 1210217921Sadrianstatic int 1211217921Sadrianarchive_write_pax_finish(struct archive_write *a) 1212222157Sadrian{ 1213217921Sadrian int r; 1214217921Sadrian 1215185377Ssam if (a->compressor.write == NULL) 1216185377Ssam return (ARCHIVE_OK); 1217185377Ssam 1218185377Ssam r = write_nulls(a, 512 * 2); 1219185377Ssam return (r); 1220185377Ssam} 1221185377Ssam 1222185377Ssamstatic int 1223185377Ssamarchive_write_pax_destroy(struct archive_write *a) 1224185377Ssam{ 1225189713Ssam struct pax *pax; 1226185377Ssam 1227185377Ssam pax = (struct pax *)a->format_data; 1228185377Ssam if (pax == NULL) 1229185377Ssam return (ARCHIVE_OK); 1230185377Ssam 1231185377Ssam archive_string_free(&pax->pax_header); 1232185377Ssam free(pax); 1233185377Ssam a->format_data = NULL; 1234185377Ssam return (ARCHIVE_OK); 1235185377Ssam} 1236185377Ssam 1237185377Ssamstatic int 1238185377Ssamarchive_write_pax_finish_entry(struct archive_write *a) 1239185377Ssam{ 1240185377Ssam struct pax *pax; 1241185377Ssam int ret; 1242219586Sadrian 1243219586Sadrian pax = (struct pax *)a->format_data; 1244219586Sadrian ret = write_nulls(a, pax->entry_bytes_remaining + pax->entry_padding); 1245219586Sadrian pax->entry_bytes_remaining = pax->entry_padding = 0; 1246219586Sadrian return (ret); 1247219586Sadrian} 1248219586Sadrian 1249219586Sadrianstatic int 1250219586Sadrianwrite_nulls(struct archive_write *a, size_t padding) 1251219586Sadrian{ 1252219586Sadrian int ret; 1253219586Sadrian size_t to_write; 1254219586Sadrian 1255219586Sadrian while (padding > 0) { 1256219586Sadrian to_write = padding < a->null_length ? padding : a->null_length; 1257219586Sadrian ret = (a->compressor.write)(a, a->nulls, to_write); 1258219586Sadrian if (ret != ARCHIVE_OK) 1259219586Sadrian return (ret); 1260219586Sadrian padding -= to_write; 1261219586Sadrian } 1262219586Sadrian return (ARCHIVE_OK); 1263219586Sadrian} 1264219586Sadrian 1265219586Sadrianstatic ssize_t 1266219586Sadrianarchive_write_pax_data(struct archive_write *a, const void *buff, size_t s) 1267219586Sadrian{ 1268219586Sadrian struct pax *pax; 1269219586Sadrian int ret; 1270219586Sadrian 1271219586Sadrian pax = (struct pax *)a->format_data; 1272219586Sadrian if (s > pax->entry_bytes_remaining) 1273219586Sadrian s = pax->entry_bytes_remaining; 1274219586Sadrian 1275219586Sadrian ret = (a->compressor.write)(a, buff, s); 1276219586Sadrian pax->entry_bytes_remaining -= s; 1277219586Sadrian if (ret == ARCHIVE_OK) 1278219586Sadrian return (s); 1279219586Sadrian else 1280219586Sadrian return (ret); 1281219586Sadrian} 1282219586Sadrian 1283219586Sadrianstatic int 1284219586Sadrianhas_non_ASCII(const wchar_t *wp) 1285219586Sadrian{ 1286219586Sadrian if (wp == NULL) 1287219586Sadrian return (1); 1288219586Sadrian while (*wp != L'\0' && *wp < 128) 1289219586Sadrian wp++; 1290219586Sadrian return (*wp != L'\0'); 1291219586Sadrian} 1292219586Sadrian 1293219586Sadrian/* 1294219586Sadrian * Used by extended attribute support; encodes the name 1295219586Sadrian * so that there will be no '=' characters in the result. 1296219586Sadrian */ 1297219586Sadrianstatic char * 1298219586Sadrianurl_encode(const char *in) 1299219586Sadrian{ 1300219586Sadrian const char *s; 1301219586Sadrian char *d; 1302219586Sadrian int out_len = 0; 1303219586Sadrian char *out; 1304219586Sadrian 1305219586Sadrian for (s = in; *s != '\0'; s++) { 1306219586Sadrian if (*s < 33 || *s > 126 || *s == '%' || *s == '=') 1307219586Sadrian out_len += 3; 1308219586Sadrian else 1309219586Sadrian out_len++; 1310219586Sadrian } 1311219586Sadrian 1312219586Sadrian out = (char *)malloc(out_len + 1); 1313219586Sadrian if (out == NULL) 1314219586Sadrian return (NULL); 1315219586Sadrian 1316219586Sadrian for (s = in, d = out; *s != '\0'; s++) { 1317219586Sadrian /* encode any non-printable ASCII character or '%' or '=' */ 1318219586Sadrian if (*s < 33 || *s > 126 || *s == '%' || *s == '=') { 1319219586Sadrian /* URL encoding is '%' followed by two hex digits */ 1320219586Sadrian *d++ = '%'; 1321219586Sadrian *d++ = "0123456789ABCDEF"[0x0f & (*s >> 4)]; 1322219586Sadrian *d++ = "0123456789ABCDEF"[0x0f & *s]; 1323219586Sadrian } else { 1324219586Sadrian *d++ = *s; 1325219586Sadrian } 1326219586Sadrian } 1327219586Sadrian *d = '\0'; 1328219586Sadrian return (out); 1329219586Sadrian} 1330219586Sadrian 1331219586Sadrian/* 1332219586Sadrian * Encode a sequence of bytes into a C string using base-64 encoding. 1333219586Sadrian * 1334219586Sadrian * Returns a null-terminated C string allocated with malloc(); caller 1335219586Sadrian * is responsible for freeing the result. 1336219586Sadrian */ 1337219586Sadrianstatic char * 1338219586Sadrianbase64_encode(const char *s, size_t len) 1339219586Sadrian{ 1340219586Sadrian static const char digits[64] = 1341219586Sadrian { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O', 1342219586Sadrian 'P','Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d', 1343219586Sadrian 'e','f','g','h','i','j','k','l','m','n','o','p','q','r','s', 1344219586Sadrian 't','u','v','w','x','y','z','0','1','2','3','4','5','6','7', 1345219586Sadrian '8','9','+','/' }; 1346219586Sadrian int v; 1347219586Sadrian char *d, *out; 1348219586Sadrian 1349219586Sadrian /* 3 bytes becomes 4 chars, but round up and allow for trailing NUL */ 1350219586Sadrian out = (char *)malloc((len * 4 + 2) / 3 + 1); 1351219586Sadrian if (out == NULL) 1352219586Sadrian return (NULL); 1353219586Sadrian d = out; 1354225444Sadrian 1355225444Sadrian /* Convert each group of 3 bytes into 4 characters. */ 1356225444Sadrian while (len >= 3) { 1357225444Sadrian v = (((int)s[0] << 16) & 0xff0000) 1358225444Sadrian | (((int)s[1] << 8) & 0xff00) 1359225444Sadrian | (((int)s[2]) & 0x00ff); 1360225444Sadrian s += 3; 1361225444Sadrian len -= 3; 1362225444Sadrian *d++ = digits[(v >> 18) & 0x3f]; 1363225444Sadrian *d++ = digits[(v >> 12) & 0x3f]; 1364225444Sadrian *d++ = digits[(v >> 6) & 0x3f]; 1365225444Sadrian *d++ = digits[(v) & 0x3f]; 1366225444Sadrian } 1367225444Sadrian /* Handle final group of 1 byte (2 chars) or 2 bytes (3 chars). */ 1368225444Sadrian switch (len) { 1369225444Sadrian case 0: break; 1370225444Sadrian case 1: 1371225444Sadrian v = (((int)s[0] << 16) & 0xff0000); 1372225444Sadrian *d++ = digits[(v >> 18) & 0x3f]; 1373225444Sadrian *d++ = digits[(v >> 12) & 0x3f]; 1374225444Sadrian break; 1375225444Sadrian case 2: 1376225444Sadrian v = (((int)s[0] << 16) & 0xff0000) 1377225444Sadrian | (((int)s[1] << 8) & 0xff00); 1378225444Sadrian *d++ = digits[(v >> 18) & 0x3f]; 1379225444Sadrian *d++ = digits[(v >> 12) & 0x3f]; 1380225444Sadrian *d++ = digits[(v >> 6) & 0x3f]; 1381225444Sadrian break; 1382225444Sadrian } 1383225444Sadrian /* Add trailing NUL character so output is a valid C string. */ 1384225444Sadrian *d = '\0'; 1385225444Sadrian return (out); 1386225444Sadrian} 1387225444Sadrian