archive_read_support_format_ar.c revision 248616
1/*- 2 * Copyright (c) 2007 Kai Wang 3 * Copyright (c) 2007 Tim Kientzle 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer 11 * in this position and unchanged. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28#include "archive_platform.h" 29__FBSDID("$FreeBSD: head/contrib/libarchive/libarchive/archive_read_support_format_ar.c 248616 2013-03-22 13:36:03Z mm $"); 30 31#ifdef HAVE_SYS_STAT_H 32#include <sys/stat.h> 33#endif 34#ifdef HAVE_ERRNO_H 35#include <errno.h> 36#endif 37#ifdef HAVE_STDLIB_H 38#include <stdlib.h> 39#endif 40#ifdef HAVE_STRING_H 41#include <string.h> 42#endif 43#ifdef HAVE_LIMITS_H 44#include <limits.h> 45#endif 46 47#include "archive.h" 48#include "archive_entry.h" 49#include "archive_private.h" 50#include "archive_read_private.h" 51 52struct ar { 53 int64_t entry_bytes_remaining; 54 /* unconsumed is purely to track data we've gotten from readahead, 55 * but haven't yet marked as consumed. Must be paired with 56 * entry_bytes_remaining usage/modification. 57 */ 58 size_t entry_bytes_unconsumed; 59 int64_t entry_offset; 60 int64_t entry_padding; 61 char *strtab; 62 size_t strtab_size; 63 char read_global_header; 64}; 65 66/* 67 * Define structure of the "ar" header. 68 */ 69#define AR_name_offset 0 70#define AR_name_size 16 71#define AR_date_offset 16 72#define AR_date_size 12 73#define AR_uid_offset 28 74#define AR_uid_size 6 75#define AR_gid_offset 34 76#define AR_gid_size 6 77#define AR_mode_offset 40 78#define AR_mode_size 8 79#define AR_size_offset 48 80#define AR_size_size 10 81#define AR_fmag_offset 58 82#define AR_fmag_size 2 83 84static int archive_read_format_ar_bid(struct archive_read *a, int); 85static int archive_read_format_ar_cleanup(struct archive_read *a); 86static int archive_read_format_ar_read_data(struct archive_read *a, 87 const void **buff, size_t *size, int64_t *offset); 88static int archive_read_format_ar_skip(struct archive_read *a); 89static int archive_read_format_ar_read_header(struct archive_read *a, 90 struct archive_entry *e); 91static uint64_t ar_atol8(const char *p, unsigned char_cnt); 92static uint64_t ar_atol10(const char *p, unsigned char_cnt); 93static int ar_parse_gnu_filename_table(struct archive_read *a); 94static int ar_parse_common_header(struct ar *ar, struct archive_entry *, 95 const char *h); 96 97int 98archive_read_support_format_ar(struct archive *_a) 99{ 100 struct archive_read *a = (struct archive_read *)_a; 101 struct ar *ar; 102 int r; 103 104 archive_check_magic(_a, ARCHIVE_READ_MAGIC, 105 ARCHIVE_STATE_NEW, "archive_read_support_format_ar"); 106 107 ar = (struct ar *)malloc(sizeof(*ar)); 108 if (ar == NULL) { 109 archive_set_error(&a->archive, ENOMEM, 110 "Can't allocate ar data"); 111 return (ARCHIVE_FATAL); 112 } 113 memset(ar, 0, sizeof(*ar)); 114 ar->strtab = NULL; 115 116 r = __archive_read_register_format(a, 117 ar, 118 "ar", 119 archive_read_format_ar_bid, 120 NULL, 121 archive_read_format_ar_read_header, 122 archive_read_format_ar_read_data, 123 archive_read_format_ar_skip, 124 NULL, 125 archive_read_format_ar_cleanup); 126 127 if (r != ARCHIVE_OK) { 128 free(ar); 129 return (r); 130 } 131 return (ARCHIVE_OK); 132} 133 134static int 135archive_read_format_ar_cleanup(struct archive_read *a) 136{ 137 struct ar *ar; 138 139 ar = (struct ar *)(a->format->data); 140 if (ar->strtab) 141 free(ar->strtab); 142 free(ar); 143 (a->format->data) = NULL; 144 return (ARCHIVE_OK); 145} 146 147static int 148archive_read_format_ar_bid(struct archive_read *a, int best_bid) 149{ 150 const void *h; 151 152 (void)best_bid; /* UNUSED */ 153 154 /* 155 * Verify the 8-byte file signature. 156 * TODO: Do we need to check more than this? 157 */ 158 if ((h = __archive_read_ahead(a, 8, NULL)) == NULL) 159 return (-1); 160 if (memcmp(h, "!<arch>\n", 8) == 0) { 161 return (64); 162 } 163 return (-1); 164} 165 166static int 167_ar_read_header(struct archive_read *a, struct archive_entry *entry, 168 struct ar *ar, const char *h, size_t *unconsumed) 169{ 170 char filename[AR_name_size + 1]; 171 uint64_t number; /* Used to hold parsed numbers before validation. */ 172 size_t bsd_name_length, entry_size; 173 char *p, *st; 174 const void *b; 175 int r; 176 177 /* Verify the magic signature on the file header. */ 178 if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) { 179 archive_set_error(&a->archive, EINVAL, 180 "Incorrect file header signature"); 181 return (ARCHIVE_WARN); 182 } 183 184 /* Copy filename into work buffer. */ 185 strncpy(filename, h + AR_name_offset, AR_name_size); 186 filename[AR_name_size] = '\0'; 187 188 /* 189 * Guess the format variant based on the filename. 190 */ 191 if (a->archive.archive_format == ARCHIVE_FORMAT_AR) { 192 /* We don't already know the variant, so let's guess. */ 193 /* 194 * Biggest clue is presence of '/': GNU starts special 195 * filenames with '/', appends '/' as terminator to 196 * non-special names, so anything with '/' should be 197 * GNU except for BSD long filenames. 198 */ 199 if (strncmp(filename, "#1/", 3) == 0) 200 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 201 else if (strchr(filename, '/') != NULL) 202 a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU; 203 else if (strncmp(filename, "__.SYMDEF", 9) == 0) 204 a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD; 205 /* 206 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/' 207 * if name exactly fills 16-byte field? If so, we 208 * can't assume entries without '/' are BSD. XXX 209 */ 210 } 211 212 /* Update format name from the code. */ 213 if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU) 214 a->archive.archive_format_name = "ar (GNU/SVR4)"; 215 else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD) 216 a->archive.archive_format_name = "ar (BSD)"; 217 else 218 a->archive.archive_format_name = "ar"; 219 220 /* 221 * Remove trailing spaces from the filename. GNU and BSD 222 * variants both pad filename area out with spaces. 223 * This will only be wrong if GNU/SVR4 'ar' implementations 224 * omit trailing '/' for 16-char filenames and we have 225 * a 16-char filename that ends in ' '. 226 */ 227 p = filename + AR_name_size - 1; 228 while (p >= filename && *p == ' ') { 229 *p = '\0'; 230 p--; 231 } 232 233 /* 234 * Remove trailing slash unless first character is '/'. 235 * (BSD entries never end in '/', so this will only trim 236 * GNU-format entries. GNU special entries start with '/' 237 * and are not terminated in '/', so we don't trim anything 238 * that starts with '/'.) 239 */ 240 if (filename[0] != '/' && *p == '/') 241 *p = '\0'; 242 243 /* 244 * '//' is the GNU filename table. 245 * Later entries can refer to names in this table. 246 */ 247 if (strcmp(filename, "//") == 0) { 248 /* This must come before any call to _read_ahead. */ 249 ar_parse_common_header(ar, entry, h); 250 archive_entry_copy_pathname(entry, filename); 251 archive_entry_set_filetype(entry, AE_IFREG); 252 /* Get the size of the filename table. */ 253 number = ar_atol10(h + AR_size_offset, AR_size_size); 254 if (number > SIZE_MAX) { 255 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 256 "Filename table too large"); 257 return (ARCHIVE_FATAL); 258 } 259 entry_size = (size_t)number; 260 if (entry_size == 0) { 261 archive_set_error(&a->archive, EINVAL, 262 "Invalid string table"); 263 return (ARCHIVE_WARN); 264 } 265 if (ar->strtab != NULL) { 266 archive_set_error(&a->archive, EINVAL, 267 "More than one string tables exist"); 268 return (ARCHIVE_WARN); 269 } 270 271 /* Read the filename table into memory. */ 272 st = malloc(entry_size); 273 if (st == NULL) { 274 archive_set_error(&a->archive, ENOMEM, 275 "Can't allocate filename table buffer"); 276 return (ARCHIVE_FATAL); 277 } 278 ar->strtab = st; 279 ar->strtab_size = entry_size; 280 281 if (*unconsumed) { 282 __archive_read_consume(a, *unconsumed); 283 *unconsumed = 0; 284 } 285 286 if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL) 287 return (ARCHIVE_FATAL); 288 memcpy(st, b, entry_size); 289 __archive_read_consume(a, entry_size); 290 /* All contents are consumed. */ 291 ar->entry_bytes_remaining = 0; 292 archive_entry_set_size(entry, ar->entry_bytes_remaining); 293 294 /* Parse the filename table. */ 295 return (ar_parse_gnu_filename_table(a)); 296 } 297 298 /* 299 * GNU variant handles long filenames by storing /<number> 300 * to indicate a name stored in the filename table. 301 * XXX TODO: Verify that it's all digits... Don't be fooled 302 * by "/9xyz" XXX 303 */ 304 if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') { 305 number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1); 306 /* 307 * If we can't look up the real name, warn and return 308 * the entry with the wrong name. 309 */ 310 if (ar->strtab == NULL || number > ar->strtab_size) { 311 archive_set_error(&a->archive, EINVAL, 312 "Can't find long filename for entry"); 313 archive_entry_copy_pathname(entry, filename); 314 /* Parse the time, owner, mode, size fields. */ 315 ar_parse_common_header(ar, entry, h); 316 return (ARCHIVE_WARN); 317 } 318 319 archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]); 320 /* Parse the time, owner, mode, size fields. */ 321 return (ar_parse_common_header(ar, entry, h)); 322 } 323 324 /* 325 * BSD handles long filenames by storing "#1/" followed by the 326 * length of filename as a decimal number, then prepends the 327 * the filename to the file contents. 328 */ 329 if (strncmp(filename, "#1/", 3) == 0) { 330 /* Parse the time, owner, mode, size fields. */ 331 /* This must occur before _read_ahead is called again. */ 332 ar_parse_common_header(ar, entry, h); 333 334 /* Parse the size of the name, adjust the file size. */ 335 number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3); 336 bsd_name_length = (size_t)number; 337 /* Guard against the filename + trailing NUL 338 * overflowing a size_t and against the filename size 339 * being larger than the entire entry. */ 340 if (number > (uint64_t)(bsd_name_length + 1) 341 || (int64_t)bsd_name_length > ar->entry_bytes_remaining) { 342 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 343 "Bad input file size"); 344 return (ARCHIVE_FATAL); 345 } 346 ar->entry_bytes_remaining -= bsd_name_length; 347 /* Adjust file size reported to client. */ 348 archive_entry_set_size(entry, ar->entry_bytes_remaining); 349 350 if (*unconsumed) { 351 __archive_read_consume(a, *unconsumed); 352 *unconsumed = 0; 353 } 354 355 /* Read the long name into memory. */ 356 if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) { 357 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 358 "Truncated input file"); 359 return (ARCHIVE_FATAL); 360 } 361 /* Store it in the entry. */ 362 p = (char *)malloc(bsd_name_length + 1); 363 if (p == NULL) { 364 archive_set_error(&a->archive, ENOMEM, 365 "Can't allocate fname buffer"); 366 return (ARCHIVE_FATAL); 367 } 368 strncpy(p, b, bsd_name_length); 369 p[bsd_name_length] = '\0'; 370 371 __archive_read_consume(a, bsd_name_length); 372 373 archive_entry_copy_pathname(entry, p); 374 free(p); 375 return (ARCHIVE_OK); 376 } 377 378 /* 379 * "/" is the SVR4/GNU archive symbol table. 380 */ 381 if (strcmp(filename, "/") == 0) { 382 archive_entry_copy_pathname(entry, "/"); 383 /* Parse the time, owner, mode, size fields. */ 384 r = ar_parse_common_header(ar, entry, h); 385 /* Force the file type to a regular file. */ 386 archive_entry_set_filetype(entry, AE_IFREG); 387 return (r); 388 } 389 390 /* 391 * "__.SYMDEF" is a BSD archive symbol table. 392 */ 393 if (strcmp(filename, "__.SYMDEF") == 0) { 394 archive_entry_copy_pathname(entry, filename); 395 /* Parse the time, owner, mode, size fields. */ 396 return (ar_parse_common_header(ar, entry, h)); 397 } 398 399 /* 400 * Otherwise, this is a standard entry. The filename 401 * has already been trimmed as much as possible, based 402 * on our current knowledge of the format. 403 */ 404 archive_entry_copy_pathname(entry, filename); 405 return (ar_parse_common_header(ar, entry, h)); 406} 407 408static int 409archive_read_format_ar_read_header(struct archive_read *a, 410 struct archive_entry *entry) 411{ 412 struct ar *ar = (struct ar*)(a->format->data); 413 size_t unconsumed; 414 const void *header_data; 415 int ret; 416 417 if (!ar->read_global_header) { 418 /* 419 * We are now at the beginning of the archive, 420 * so we need first consume the ar global header. 421 */ 422 __archive_read_consume(a, 8); 423 ar->read_global_header = 1; 424 /* Set a default format code for now. */ 425 a->archive.archive_format = ARCHIVE_FORMAT_AR; 426 } 427 428 /* Read the header for the next file entry. */ 429 if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL) 430 /* Broken header. */ 431 return (ARCHIVE_EOF); 432 433 unconsumed = 60; 434 435 ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed); 436 437 if (unconsumed) 438 __archive_read_consume(a, unconsumed); 439 440 return ret; 441} 442 443 444static int 445ar_parse_common_header(struct ar *ar, struct archive_entry *entry, 446 const char *h) 447{ 448 uint64_t n; 449 450 /* Copy remaining header */ 451 archive_entry_set_mtime(entry, 452 (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L); 453 archive_entry_set_uid(entry, 454 (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size)); 455 archive_entry_set_gid(entry, 456 (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size)); 457 archive_entry_set_mode(entry, 458 (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size)); 459 n = ar_atol10(h + AR_size_offset, AR_size_size); 460 461 ar->entry_offset = 0; 462 ar->entry_padding = n % 2; 463 archive_entry_set_size(entry, n); 464 ar->entry_bytes_remaining = n; 465 return (ARCHIVE_OK); 466} 467 468static int 469archive_read_format_ar_read_data(struct archive_read *a, 470 const void **buff, size_t *size, int64_t *offset) 471{ 472 ssize_t bytes_read; 473 struct ar *ar; 474 475 ar = (struct ar *)(a->format->data); 476 477 if (ar->entry_bytes_unconsumed) { 478 __archive_read_consume(a, ar->entry_bytes_unconsumed); 479 ar->entry_bytes_unconsumed = 0; 480 } 481 482 if (ar->entry_bytes_remaining > 0) { 483 *buff = __archive_read_ahead(a, 1, &bytes_read); 484 if (bytes_read == 0) { 485 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 486 "Truncated ar archive"); 487 return (ARCHIVE_FATAL); 488 } 489 if (bytes_read < 0) 490 return (ARCHIVE_FATAL); 491 if (bytes_read > ar->entry_bytes_remaining) 492 bytes_read = (ssize_t)ar->entry_bytes_remaining; 493 *size = bytes_read; 494 ar->entry_bytes_unconsumed = bytes_read; 495 *offset = ar->entry_offset; 496 ar->entry_offset += bytes_read; 497 ar->entry_bytes_remaining -= bytes_read; 498 return (ARCHIVE_OK); 499 } else { 500 int64_t skipped = __archive_read_consume(a, ar->entry_padding); 501 if (skipped >= 0) { 502 ar->entry_padding -= skipped; 503 } 504 if (ar->entry_padding) { 505 if (skipped >= 0) { 506 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 507 "Truncated ar archive- failed consuming padding"); 508 } 509 return (ARCHIVE_FATAL); 510 } 511 *buff = NULL; 512 *size = 0; 513 *offset = ar->entry_offset; 514 return (ARCHIVE_EOF); 515 } 516} 517 518static int 519archive_read_format_ar_skip(struct archive_read *a) 520{ 521 int64_t bytes_skipped; 522 struct ar* ar; 523 524 ar = (struct ar *)(a->format->data); 525 526 bytes_skipped = __archive_read_consume(a, 527 ar->entry_bytes_remaining + ar->entry_padding 528 + ar->entry_bytes_unconsumed); 529 if (bytes_skipped < 0) 530 return (ARCHIVE_FATAL); 531 532 ar->entry_bytes_remaining = 0; 533 ar->entry_bytes_unconsumed = 0; 534 ar->entry_padding = 0; 535 536 return (ARCHIVE_OK); 537} 538 539static int 540ar_parse_gnu_filename_table(struct archive_read *a) 541{ 542 struct ar *ar; 543 char *p; 544 size_t size; 545 546 ar = (struct ar*)(a->format->data); 547 size = ar->strtab_size; 548 549 for (p = ar->strtab; p < ar->strtab + size - 1; ++p) { 550 if (*p == '/') { 551 *p++ = '\0'; 552 if (*p != '\n') 553 goto bad_string_table; 554 *p = '\0'; 555 } 556 } 557 /* 558 * GNU ar always pads the table to an even size. 559 * The pad character is either '\n' or '`'. 560 */ 561 if (p != ar->strtab + size && *p != '\n' && *p != '`') 562 goto bad_string_table; 563 564 /* Enforce zero termination. */ 565 ar->strtab[size - 1] = '\0'; 566 567 return (ARCHIVE_OK); 568 569bad_string_table: 570 archive_set_error(&a->archive, EINVAL, 571 "Invalid string table"); 572 free(ar->strtab); 573 ar->strtab = NULL; 574 return (ARCHIVE_WARN); 575} 576 577static uint64_t 578ar_atol8(const char *p, unsigned char_cnt) 579{ 580 uint64_t l, limit, last_digit_limit; 581 unsigned int digit, base; 582 583 base = 8; 584 limit = UINT64_MAX / base; 585 last_digit_limit = UINT64_MAX % base; 586 587 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 588 p++; 589 590 l = 0; 591 digit = *p - '0'; 592 while (*p >= '0' && digit < base && char_cnt-- > 0) { 593 if (l>limit || (l == limit && digit > last_digit_limit)) { 594 l = UINT64_MAX; /* Truncate on overflow. */ 595 break; 596 } 597 l = (l * base) + digit; 598 digit = *++p - '0'; 599 } 600 return (l); 601} 602 603static uint64_t 604ar_atol10(const char *p, unsigned char_cnt) 605{ 606 uint64_t l, limit, last_digit_limit; 607 unsigned int base, digit; 608 609 base = 10; 610 limit = UINT64_MAX / base; 611 last_digit_limit = UINT64_MAX % base; 612 613 while ((*p == ' ' || *p == '\t') && char_cnt-- > 0) 614 p++; 615 l = 0; 616 digit = *p - '0'; 617 while (*p >= '0' && digit < base && char_cnt-- > 0) { 618 if (l > limit || (l == limit && digit > last_digit_limit)) { 619 l = UINT64_MAX; /* Truncate on overflow. */ 620 break; 621 } 622 l = (l * base) + digit; 623 digit = *++p - '0'; 624 } 625 return (l); 626} 627