1/*- 2 * Copyright (c) 2004 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27__FBSDID("$FreeBSD$"); 28 29#ifdef HAVE_ERRNO_H 30#include <errno.h> 31#endif 32#include <stdio.h> 33#ifdef HAVE_STDLIB_H 34#include <stdlib.h> 35#endif 36#include <time.h> 37#ifdef HAVE_ZLIB_H 38#include <zlib.h> 39#endif 40 41#include "archive.h" 42#include "archive_entry.h" 43#include "archive_private.h" 44#include "archive_read_private.h" 45#include "archive_endian.h" 46 47#ifndef HAVE_ZLIB_H 48#include "archive_crc32.h" 49#endif 50 51struct zip { 52 /* entry_bytes_remaining is the number of bytes we expect. */ 53 int64_t entry_bytes_remaining; 54 int64_t entry_offset; 55 56 /* These count the number of bytes actually read for the entry. */ 57 int64_t entry_compressed_bytes_read; 58 int64_t entry_uncompressed_bytes_read; 59 60 /* Running CRC32 of the decompressed data */ 61 unsigned long entry_crc32; 62 63 unsigned version; 64 unsigned system; 65 unsigned flags; 66 unsigned compression; 67 const char * compression_name; 68 time_t mtime; 69 time_t ctime; 70 time_t atime; 71 mode_t mode; 72 uid_t uid; 73 gid_t gid; 74 75 /* Flags to mark progress of decompression. */ 76 char decompress_init; 77 char end_of_entry; 78 79 unsigned long crc32; 80 ssize_t filename_length; 81 ssize_t extra_length; 82 int64_t uncompressed_size; 83 int64_t compressed_size; 84 85 unsigned char *uncompressed_buffer; 86 size_t uncompressed_buffer_size; 87#ifdef HAVE_ZLIB_H 88 z_stream stream; 89 char stream_valid; 90#endif 91 92 struct archive_string pathname; 93 struct archive_string extra; 94 char format_name[64]; 95}; 96 97#define ZIP_LENGTH_AT_END 8 98 99struct zip_file_header { 100 char signature[4]; 101 char version[2]; 102 char flags[2]; 103 char compression[2]; 104 char timedate[4]; 105 char crc32[4]; 106 char compressed_size[4]; 107 char uncompressed_size[4]; 108 char filename_length[2]; 109 char extra_length[2]; 110}; 111 112static const char *compression_names[] = { 113 "uncompressed", 114 "shrinking", 115 "reduced-1", 116 "reduced-2", 117 "reduced-3", 118 "reduced-4", 119 "imploded", 120 "reserved", 121 "deflation" 122}; 123 124static int archive_read_format_zip_bid(struct archive_read *); 125static int archive_read_format_zip_cleanup(struct archive_read *); 126static int archive_read_format_zip_read_data(struct archive_read *, 127 const void **, size_t *, off_t *); 128static int archive_read_format_zip_read_data_skip(struct archive_read *a); 129static int archive_read_format_zip_read_header(struct archive_read *, 130 struct archive_entry *); 131static int search_next_signature(struct archive_read *); 132static int zip_read_data_deflate(struct archive_read *a, const void **buff, 133 size_t *size, off_t *offset); 134static int zip_read_data_none(struct archive_read *a, const void **buff, 135 size_t *size, off_t *offset); 136static int zip_read_file_header(struct archive_read *a, 137 struct archive_entry *entry, struct zip *zip); 138static time_t zip_time(const char *); 139static void process_extra(const void* extra, struct zip* zip); 140 141int 142archive_read_support_format_zip(struct archive *_a) 143{ 144 struct archive_read *a = (struct archive_read *)_a; 145 struct zip *zip; 146 int r; 147 148 zip = (struct zip *)malloc(sizeof(*zip)); 149 if (zip == NULL) { 150 archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data"); 151 return (ARCHIVE_FATAL); 152 } 153 memset(zip, 0, sizeof(*zip)); 154 155 r = __archive_read_register_format(a, 156 zip, 157 "zip", 158 archive_read_format_zip_bid, 159 NULL, 160 archive_read_format_zip_read_header, 161 archive_read_format_zip_read_data, 162 archive_read_format_zip_read_data_skip, 163 archive_read_format_zip_cleanup); 164 165 if (r != ARCHIVE_OK) 166 free(zip); 167 return (ARCHIVE_OK); 168} 169 170 171static int 172archive_read_format_zip_bid(struct archive_read *a) 173{ 174 const char *p; 175 const void *buff; 176 ssize_t bytes_avail, offset; 177 178 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) 179 return (-1); 180 181 /* 182 * Bid of 30 here is: 16 bits for "PK", 183 * next 16-bit field has four options (-2 bits). 184 * 16 + 16-2 = 30. 185 */ 186 if (p[0] == 'P' && p[1] == 'K') { 187 if ((p[2] == '\001' && p[3] == '\002') 188 || (p[2] == '\003' && p[3] == '\004') 189 || (p[2] == '\005' && p[3] == '\006') 190 || (p[2] == '\007' && p[3] == '\010') 191 || (p[2] == '0' && p[3] == '0')) 192 return (30); 193 } 194 195 /* 196 * Attempt to handle self-extracting archives 197 * by noting a PE header and searching forward 198 * up to 128k for a 'PK\003\004' marker. 199 */ 200 if (p[0] == 'M' && p[1] == 'Z') { 201 /* 202 * TODO: Optimize by initializing 'offset' to an 203 * estimate of the likely start of the archive data 204 * based on values in the PE header. Note that we 205 * don't need to be exact, but we mustn't skip too 206 * far. The search below will compensate if we 207 * undershoot. 208 */ 209 offset = 0; 210 while (offset < 124000) { 211 /* Get 4k of data beyond where we stopped. */ 212 buff = __archive_read_ahead(a, offset + 4096, 213 &bytes_avail); 214 if (buff == NULL) 215 break; 216 p = (const char *)buff + offset; 217 while (p + 9 < (const char *)buff + bytes_avail) { 218 if (p[0] == 'P' && p[1] == 'K' /* signature */ 219 && p[2] == 3 && p[3] == 4 /* File entry */ 220 && p[8] == 8 /* compression == deflate */ 221 && p[9] == 0 /* High byte of compression */ 222 ) 223 { 224 return (30); 225 } 226 ++p; 227 } 228 offset = p - (const char *)buff; 229 } 230 } 231 232 return (0); 233} 234 235/* 236 * Search forward for a "PK\003\004" file header. This handles the 237 * case of self-extracting archives, where there is an executable 238 * prepended to the ZIP archive. 239 */ 240static int 241skip_sfx(struct archive_read *a) 242{ 243 const void *h; 244 const char *p, *q; 245 size_t skip; 246 ssize_t bytes; 247 248 /* 249 * TODO: We should be able to skip forward by a bunch 250 * by lifting some values from the PE header. We don't 251 * need to be exact (we're still going to search forward 252 * to find the header), but it will speed things up and 253 * reduce the chance of a false positive. 254 */ 255 for (;;) { 256 h = __archive_read_ahead(a, 4, &bytes); 257 if (bytes < 4) 258 return (ARCHIVE_FATAL); 259 p = h; 260 q = p + bytes; 261 262 /* 263 * Scan ahead until we find something that looks 264 * like the zip header. 265 */ 266 while (p + 4 < q) { 267 switch (p[3]) { 268 case '\004': 269 /* TODO: Additional verification here. */ 270 if (memcmp("PK\003\004", p, 4) == 0) { 271 skip = p - (const char *)h; 272 __archive_read_consume(a, skip); 273 return (ARCHIVE_OK); 274 } 275 p += 4; 276 break; 277 case '\003': p += 1; break; 278 case 'K': p += 2; break; 279 case 'P': p += 3; break; 280 default: p += 4; break; 281 } 282 } 283 skip = p - (const char *)h; 284 __archive_read_consume(a, skip); 285 } 286} 287 288static int 289archive_read_format_zip_read_header(struct archive_read *a, 290 struct archive_entry *entry) 291{ 292 const void *h; 293 const char *signature; 294 struct zip *zip; 295 int r = ARCHIVE_OK, r1; 296 297 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 298 if (a->archive.archive_format_name == NULL) 299 a->archive.archive_format_name = "ZIP"; 300 301 zip = (struct zip *)(a->format->data); 302 zip->decompress_init = 0; 303 zip->end_of_entry = 0; 304 zip->entry_uncompressed_bytes_read = 0; 305 zip->entry_compressed_bytes_read = 0; 306 zip->entry_crc32 = crc32(0, NULL, 0); 307 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 308 return (ARCHIVE_FATAL); 309 310 signature = (const char *)h; 311 if (signature[0] == 'M' && signature[1] == 'Z') { 312 /* This is an executable? Must be self-extracting... */ 313 r = skip_sfx(a); 314 if (r < ARCHIVE_WARN) 315 return (r); 316 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 317 return (ARCHIVE_FATAL); 318 signature = (const char *)h; 319 } 320 321 /* If we don't see a PK signature here, scan forward. */ 322 if (signature[0] != 'P' || signature[1] != 'K') { 323 r = search_next_signature(a); 324 if (r != ARCHIVE_OK) { 325 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 326 "Bad ZIP file"); 327 return (ARCHIVE_FATAL); 328 } 329 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 330 return (ARCHIVE_FATAL); 331 signature = (const char *)h; 332 } 333 334 /* 335 * "PK00" signature is used for "split" archives that 336 * only have a single segment. This means we can just 337 * skip the PK00; the first real file header should follow. 338 */ 339 if (signature[2] == '0' && signature[3] == '0') { 340 __archive_read_consume(a, 4); 341 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 342 return (ARCHIVE_FATAL); 343 signature = (const char *)h; 344 if (signature[0] != 'P' || signature[1] != 'K') { 345 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 346 "Bad ZIP file"); 347 return (ARCHIVE_FATAL); 348 } 349 } 350 351 if (signature[2] == '\001' && signature[3] == '\002') { 352 /* Beginning of central directory. */ 353 return (ARCHIVE_EOF); 354 } 355 356 if (signature[2] == '\003' && signature[3] == '\004') { 357 /* Regular file entry. */ 358 r1 = zip_read_file_header(a, entry, zip); 359 if (r1 != ARCHIVE_OK) 360 return (r1); 361 return (r); 362 } 363 364 if (signature[2] == '\005' && signature[3] == '\006') { 365 /* End-of-archive record. */ 366 return (ARCHIVE_EOF); 367 } 368 369 if (signature[2] == '\007' && signature[3] == '\010') { 370 /* 371 * We should never encounter this record here; 372 * see ZIP_LENGTH_AT_END handling below for details. 373 */ 374 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 375 "Bad ZIP file: Unexpected end-of-entry record"); 376 return (ARCHIVE_FATAL); 377 } 378 379 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 380 "Damaged ZIP file or unsupported format variant (%d,%d)", 381 signature[2], signature[3]); 382 return (ARCHIVE_FATAL); 383} 384 385static int 386search_next_signature(struct archive_read *a) 387{ 388 const void *h; 389 const char *p, *q; 390 size_t skip; 391 ssize_t bytes; 392 int64_t skipped = 0; 393 394 for (;;) { 395 h = __archive_read_ahead(a, 4, &bytes); 396 if (h == NULL) 397 return (ARCHIVE_FATAL); 398 p = h; 399 q = p + bytes; 400 401 while (p + 4 <= q) { 402 if (p[0] == 'P' && p[1] == 'K') { 403 if ((p[2] == '\001' && p[3] == '\002') 404 || (p[2] == '\003' && p[3] == '\004') 405 || (p[2] == '\005' && p[3] == '\006') 406 || (p[2] == '\007' && p[3] == '\010') 407 || (p[2] == '0' && p[3] == '0')) { 408 skip = p - (const char *)h; 409 __archive_read_consume(a, skip); 410 return (ARCHIVE_OK); 411 } 412 } 413 ++p; 414 } 415 skip = p - (const char *)h; 416 __archive_read_consume(a, skip); 417 skipped += skip; 418 } 419} 420 421static int 422zip_read_file_header(struct archive_read *a, struct archive_entry *entry, 423 struct zip *zip) 424{ 425 const struct zip_file_header *p; 426 const void *h; 427 428 if ((p = __archive_read_ahead(a, sizeof *p, NULL)) == NULL) { 429 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 430 "Truncated ZIP file header"); 431 return (ARCHIVE_FATAL); 432 } 433 434 zip->version = p->version[0]; 435 zip->system = p->version[1]; 436 zip->flags = archive_le16dec(p->flags); 437 zip->compression = archive_le16dec(p->compression); 438 if (zip->compression < 439 sizeof(compression_names)/sizeof(compression_names[0])) 440 zip->compression_name = compression_names[zip->compression]; 441 else 442 zip->compression_name = "??"; 443 zip->mtime = zip_time(p->timedate); 444 zip->ctime = 0; 445 zip->atime = 0; 446 zip->mode = 0; 447 zip->uid = 0; 448 zip->gid = 0; 449 zip->crc32 = archive_le32dec(p->crc32); 450 zip->filename_length = archive_le16dec(p->filename_length); 451 zip->extra_length = archive_le16dec(p->extra_length); 452 zip->uncompressed_size = archive_le32dec(p->uncompressed_size); 453 zip->compressed_size = archive_le32dec(p->compressed_size); 454 455 __archive_read_consume(a, sizeof(struct zip_file_header)); 456 457 458 /* Read the filename. */ 459 if ((h = __archive_read_ahead(a, zip->filename_length, NULL)) == NULL) { 460 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 461 "Truncated ZIP file header"); 462 return (ARCHIVE_FATAL); 463 } 464 if (archive_string_ensure(&zip->pathname, zip->filename_length) == NULL) 465 __archive_errx(1, "Out of memory"); 466 archive_strncpy(&zip->pathname, h, zip->filename_length); 467 __archive_read_consume(a, zip->filename_length); 468 archive_entry_set_pathname(entry, zip->pathname.s); 469 470 if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') 471 zip->mode = AE_IFDIR | 0777; 472 else 473 zip->mode = AE_IFREG | 0777; 474 475 /* Read the extra data. */ 476 if ((h = __archive_read_ahead(a, zip->extra_length, NULL)) == NULL) { 477 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 478 "Truncated ZIP file header"); 479 return (ARCHIVE_FATAL); 480 } 481 process_extra(h, zip); 482 __archive_read_consume(a, zip->extra_length); 483 484 /* Populate some additional entry fields: */ 485 archive_entry_set_mode(entry, zip->mode); 486 archive_entry_set_uid(entry, zip->uid); 487 archive_entry_set_gid(entry, zip->gid); 488 archive_entry_set_mtime(entry, zip->mtime, 0); 489 archive_entry_set_ctime(entry, zip->ctime, 0); 490 archive_entry_set_atime(entry, zip->atime, 0); 491 /* Set the size only if it's meaningful. */ 492 if (0 == (zip->flags & ZIP_LENGTH_AT_END)) 493 archive_entry_set_size(entry, zip->uncompressed_size); 494 495 zip->entry_bytes_remaining = zip->compressed_size; 496 zip->entry_offset = 0; 497 498 /* If there's no body, force read_data() to return EOF immediately. */ 499 if (0 == (zip->flags & ZIP_LENGTH_AT_END) 500 && zip->entry_bytes_remaining < 1) 501 zip->end_of_entry = 1; 502 503 /* Set up a more descriptive format name. */ 504 sprintf(zip->format_name, "ZIP %d.%d (%s)", 505 zip->version / 10, zip->version % 10, 506 zip->compression_name); 507 a->archive.archive_format_name = zip->format_name; 508 509 return (ARCHIVE_OK); 510} 511 512/* Convert an MSDOS-style date/time into Unix-style time. */ 513static time_t 514zip_time(const char *p) 515{ 516 int msTime, msDate; 517 struct tm ts; 518 519 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); 520 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); 521 522 memset(&ts, 0, sizeof(ts)); 523 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 524 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 525 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 526 ts.tm_hour = (msTime >> 11) & 0x1f; 527 ts.tm_min = (msTime >> 5) & 0x3f; 528 ts.tm_sec = (msTime << 1) & 0x3e; 529 ts.tm_isdst = -1; 530 return mktime(&ts); 531} 532 533static int 534archive_read_format_zip_read_data(struct archive_read *a, 535 const void **buff, size_t *size, off_t *offset) 536{ 537 int r; 538 struct zip *zip; 539 540 zip = (struct zip *)(a->format->data); 541 542 /* 543 * If we hit end-of-entry last time, clean up and return 544 * ARCHIVE_EOF this time. 545 */ 546 if (zip->end_of_entry) { 547 *offset = zip->entry_uncompressed_bytes_read; 548 *size = 0; 549 *buff = NULL; 550 return (ARCHIVE_EOF); 551 } 552 553 switch(zip->compression) { 554 case 0: /* No compression. */ 555 r = zip_read_data_none(a, buff, size, offset); 556 break; 557 case 8: /* Deflate compression. */ 558 r = zip_read_data_deflate(a, buff, size, offset); 559 break; 560 default: /* Unsupported compression. */ 561 *buff = NULL; 562 *size = 0; 563 *offset = 0; 564 /* Return a warning. */ 565 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 566 "Unsupported ZIP compression method (%s)", 567 zip->compression_name); 568 if (zip->flags & ZIP_LENGTH_AT_END) { 569 /* 570 * ZIP_LENGTH_AT_END requires us to 571 * decompress the entry in order to 572 * skip it, but we don't know this 573 * compression method, so we give up. 574 */ 575 r = ARCHIVE_FATAL; 576 } else { 577 /* We can't decompress this entry, but we will 578 * be able to skip() it and try the next entry. */ 579 r = ARCHIVE_WARN; 580 } 581 break; 582 } 583 if (r != ARCHIVE_OK) 584 return (r); 585 /* Update checksum */ 586 if (*size) 587 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size); 588 /* If we hit the end, swallow any end-of-data marker. */ 589 if (zip->end_of_entry) { 590 if (zip->flags & ZIP_LENGTH_AT_END) { 591 const char *p; 592 593 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) { 594 archive_set_error(&a->archive, 595 ARCHIVE_ERRNO_FILE_FORMAT, 596 "Truncated ZIP end-of-file record"); 597 return (ARCHIVE_FATAL); 598 } 599 zip->crc32 = archive_le32dec(p + 4); 600 zip->compressed_size = archive_le32dec(p + 8); 601 zip->uncompressed_size = archive_le32dec(p + 12); 602 __archive_read_consume(a, 16); 603 } 604 /* Check file size, CRC against these values. */ 605 if (zip->compressed_size != zip->entry_compressed_bytes_read) { 606 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 607 "ZIP compressed data is wrong size"); 608 return (ARCHIVE_WARN); 609 } 610 /* Size field only stores the lower 32 bits of the actual size. */ 611 if ((zip->uncompressed_size & UINT32_MAX) 612 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { 613 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 614 "ZIP uncompressed data is wrong size"); 615 return (ARCHIVE_WARN); 616 } 617 /* Check computed CRC against header */ 618 if (zip->crc32 != zip->entry_crc32) { 619 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 620 "ZIP bad CRC: 0x%lx should be 0x%lx", 621 zip->entry_crc32, zip->crc32); 622 return (ARCHIVE_WARN); 623 } 624 } 625 626 /* Return EOF immediately if this is a non-regular file. */ 627 if (AE_IFREG != (zip->mode & AE_IFMT)) 628 return (ARCHIVE_EOF); 629 return (ARCHIVE_OK); 630} 631 632/* 633 * Read "uncompressed" data. According to the current specification, 634 * if ZIP_LENGTH_AT_END is specified, then the size fields in the 635 * initial file header are supposed to be set to zero. This would, of 636 * course, make it impossible for us to read the archive, since we 637 * couldn't determine the end of the file data. Info-ZIP seems to 638 * include the real size fields both before and after the data in this 639 * case (the CRC only appears afterwards), so this works as you would 640 * expect. 641 * 642 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 643 * zip->end_of_entry if it consumes all of the data. 644 */ 645static int 646zip_read_data_none(struct archive_read *a, const void **buff, 647 size_t *size, off_t *offset) 648{ 649 struct zip *zip; 650 ssize_t bytes_avail; 651 652 zip = (struct zip *)(a->format->data); 653 654 if (zip->entry_bytes_remaining == 0) { 655 *buff = NULL; 656 *size = 0; 657 *offset = zip->entry_offset; 658 zip->end_of_entry = 1; 659 return (ARCHIVE_OK); 660 } 661 /* 662 * Note: '1' here is a performance optimization. 663 * Recall that the decompression layer returns a count of 664 * available bytes; asking for more than that forces the 665 * decompressor to combine reads by copying data. 666 */ 667 *buff = __archive_read_ahead(a, 1, &bytes_avail); 668 if (bytes_avail <= 0) { 669 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 670 "Truncated ZIP file data"); 671 return (ARCHIVE_FATAL); 672 } 673 if (bytes_avail > zip->entry_bytes_remaining) 674 bytes_avail = zip->entry_bytes_remaining; 675 __archive_read_consume(a, bytes_avail); 676 *size = bytes_avail; 677 *offset = zip->entry_offset; 678 zip->entry_offset += *size; 679 zip->entry_bytes_remaining -= *size; 680 zip->entry_uncompressed_bytes_read += *size; 681 zip->entry_compressed_bytes_read += *size; 682 return (ARCHIVE_OK); 683} 684 685#ifdef HAVE_ZLIB_H 686static int 687zip_read_data_deflate(struct archive_read *a, const void **buff, 688 size_t *size, off_t *offset) 689{ 690 struct zip *zip; 691 ssize_t bytes_avail; 692 const void *compressed_buff; 693 int r; 694 695 zip = (struct zip *)(a->format->data); 696 697 /* If the buffer hasn't been allocated, allocate it now. */ 698 if (zip->uncompressed_buffer == NULL) { 699 zip->uncompressed_buffer_size = 32 * 1024; 700 zip->uncompressed_buffer 701 = (unsigned char *)malloc(zip->uncompressed_buffer_size); 702 if (zip->uncompressed_buffer == NULL) { 703 archive_set_error(&a->archive, ENOMEM, 704 "No memory for ZIP decompression"); 705 return (ARCHIVE_FATAL); 706 } 707 } 708 709 /* If we haven't yet read any data, initialize the decompressor. */ 710 if (!zip->decompress_init) { 711 if (zip->stream_valid) 712 r = inflateReset(&zip->stream); 713 else 714 r = inflateInit2(&zip->stream, 715 -15 /* Don't check for zlib header */); 716 if (r != Z_OK) { 717 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 718 "Can't initialize ZIP decompression."); 719 return (ARCHIVE_FATAL); 720 } 721 /* Stream structure has been set up. */ 722 zip->stream_valid = 1; 723 /* We've initialized decompression for this stream. */ 724 zip->decompress_init = 1; 725 } 726 727 /* 728 * Note: '1' here is a performance optimization. 729 * Recall that the decompression layer returns a count of 730 * available bytes; asking for more than that forces the 731 * decompressor to combine reads by copying data. 732 */ 733 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); 734 if (bytes_avail <= 0) { 735 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 736 "Truncated ZIP file body"); 737 return (ARCHIVE_FATAL); 738 } 739 740 /* 741 * A bug in zlib.h: stream.next_in should be marked 'const' 742 * but isn't (the library never alters data through the 743 * next_in pointer, only reads it). The result: this ugly 744 * cast to remove 'const'. 745 */ 746 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; 747 zip->stream.avail_in = bytes_avail; 748 zip->stream.total_in = 0; 749 zip->stream.next_out = zip->uncompressed_buffer; 750 zip->stream.avail_out = zip->uncompressed_buffer_size; 751 zip->stream.total_out = 0; 752 753 r = inflate(&zip->stream, 0); 754 switch (r) { 755 case Z_OK: 756 break; 757 case Z_STREAM_END: 758 zip->end_of_entry = 1; 759 break; 760 case Z_MEM_ERROR: 761 archive_set_error(&a->archive, ENOMEM, 762 "Out of memory for ZIP decompression"); 763 return (ARCHIVE_FATAL); 764 default: 765 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 766 "ZIP decompression failed (%d)", r); 767 return (ARCHIVE_FATAL); 768 } 769 770 /* Consume as much as the compressor actually used. */ 771 bytes_avail = zip->stream.total_in; 772 __archive_read_consume(a, bytes_avail); 773 zip->entry_bytes_remaining -= bytes_avail; 774 zip->entry_compressed_bytes_read += bytes_avail; 775 776 *offset = zip->entry_offset; 777 *size = zip->stream.total_out; 778 zip->entry_uncompressed_bytes_read += *size; 779 *buff = zip->uncompressed_buffer; 780 zip->entry_offset += *size; 781 return (ARCHIVE_OK); 782} 783#else 784static int 785zip_read_data_deflate(struct archive_read *a, const void **buff, 786 size_t *size, off_t *offset) 787{ 788 *buff = NULL; 789 *size = 0; 790 *offset = 0; 791 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 792 "libarchive compiled without deflate support (no libz)"); 793 return (ARCHIVE_FATAL); 794} 795#endif 796 797static int 798archive_read_format_zip_read_data_skip(struct archive_read *a) 799{ 800 struct zip *zip; 801 const void *buff = NULL; 802 off_t bytes_skipped; 803 804 zip = (struct zip *)(a->format->data); 805 806 /* If we've already read to end of data, we're done. */ 807 if (zip->end_of_entry) 808 return (ARCHIVE_OK); 809 810 /* 811 * If the length is at the end, we have no choice but 812 * to decompress all the data to find the end marker. 813 */ 814 if (zip->flags & ZIP_LENGTH_AT_END) { 815 size_t size; 816 off_t offset; 817 int r; 818 do { 819 r = archive_read_format_zip_read_data(a, &buff, 820 &size, &offset); 821 } while (r == ARCHIVE_OK); 822 return (r); 823 } 824 825 /* 826 * If the length is at the beginning, we can skip the 827 * compressed data much more quickly. 828 */ 829 bytes_skipped = __archive_read_skip(a, zip->entry_bytes_remaining); 830 if (bytes_skipped < 0) 831 return (ARCHIVE_FATAL); 832 833 /* This entry is finished and done. */ 834 zip->end_of_entry = 1; 835 return (ARCHIVE_OK); 836} 837 838static int 839archive_read_format_zip_cleanup(struct archive_read *a) 840{ 841 struct zip *zip; 842 843 zip = (struct zip *)(a->format->data); 844#ifdef HAVE_ZLIB_H 845 if (zip->stream_valid) 846 inflateEnd(&zip->stream); 847#endif 848 free(zip->uncompressed_buffer); 849 archive_string_free(&(zip->pathname)); 850 archive_string_free(&(zip->extra)); 851 free(zip); 852 (a->format->data) = NULL; 853 return (ARCHIVE_OK); 854} 855 856/* 857 * The extra data is stored as a list of 858 * id1+size1+data1 + id2+size2+data2 ... 859 * triplets. id and size are 2 bytes each. 860 */ 861static void 862process_extra(const void* extra, struct zip* zip) 863{ 864 int offset = 0; 865 const char *p = (const char *)extra; 866 while (offset < zip->extra_length - 4) 867 { 868 unsigned short headerid = archive_le16dec(p + offset); 869 unsigned short datasize = archive_le16dec(p + offset + 2); 870 offset += 4; 871 if (offset + datasize > zip->extra_length) 872 break; 873#ifdef DEBUG 874 fprintf(stderr, "Header id 0x%04x, length %d\n", 875 headerid, datasize); 876#endif 877 switch (headerid) { 878 case 0x0001: 879 /* Zip64 extended information extra field. */ 880 if (datasize >= 8) 881 zip->uncompressed_size = archive_le64dec(p + offset); 882 if (datasize >= 16) 883 zip->compressed_size = archive_le64dec(p + offset + 8); 884 break; 885 case 0x5455: 886 { 887 /* Extended time field "UT". */ 888 int flags = p[offset]; 889 offset++; 890 datasize--; 891 /* Flag bits indicate which dates are present. */ 892 if (flags & 0x01) 893 { 894#ifdef DEBUG 895 fprintf(stderr, "mtime: %lld -> %d\n", 896 (long long)zip->mtime, 897 archive_le32dec(p + offset)); 898#endif 899 if (datasize < 4) 900 break; 901 zip->mtime = archive_le32dec(p + offset); 902 offset += 4; 903 datasize -= 4; 904 } 905 if (flags & 0x02) 906 { 907 if (datasize < 4) 908 break; 909 zip->atime = archive_le32dec(p + offset); 910 offset += 4; 911 datasize -= 4; 912 } 913 if (flags & 0x04) 914 { 915 if (datasize < 4) 916 break; 917 zip->ctime = archive_le32dec(p + offset); 918 offset += 4; 919 datasize -= 4; 920 } 921 break; 922 } 923 case 0x7855: 924 /* Info-ZIP Unix Extra Field (type 2) "Ux". */ 925#ifdef DEBUG 926 fprintf(stderr, "uid %d gid %d\n", 927 archive_le16dec(p + offset), 928 archive_le16dec(p + offset + 2)); 929#endif 930 if (datasize >= 2) 931 zip->uid = archive_le16dec(p + offset); 932 if (datasize >= 4) 933 zip->gid = archive_le16dec(p + offset + 2); 934 break; 935 case 0x7875: 936 /* Info-Zip Unix Extra Field (type 3) "ux". */ 937 break; 938 default: 939 break; 940 } 941 offset += datasize; 942 } 943#ifdef DEBUG 944 if (offset != zip->extra_length) 945 { 946 fprintf(stderr, 947 "Extra data field contents do not match reported size!"); 948 } 949#endif 950} 951