1/*- 2 * Copyright (c) 2004 Tim Kientzle 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26#include "archive_platform.h" 27__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_zip.c 201102 2009-12-28 03:11:36Z kientzle $"); 28 29#ifdef HAVE_ERRNO_H 30#include <errno.h> 31#endif 32#include <stdio.h> 33#ifdef HAVE_STDLIB_H 34#include <stdlib.h> 35#endif 36#include <time.h> 37#ifdef HAVE_ZLIB_H 38#include <zlib.h> 39#endif 40 41#include "archive.h" 42#include "archive_entry.h" 43#include "archive_private.h" 44#include "archive_read_private.h" 45#include "archive_endian.h" 46 47#ifndef HAVE_ZLIB_H 48#include "archive_crc32.h" 49#endif 50 51struct zip { 52 /* entry_bytes_remaining is the number of bytes we expect. */ 53 int64_t entry_bytes_remaining; 54 int64_t entry_offset; 55 56 /* These count the number of bytes actually read for the entry. */ 57 int64_t entry_compressed_bytes_read; 58 int64_t entry_uncompressed_bytes_read; 59 60 /* Running CRC32 of the decompressed data */ 61 unsigned long entry_crc32; 62 63 unsigned version; 64 unsigned system; 65 unsigned flags; 66 unsigned compression; 67 const char * compression_name; 68 time_t mtime; 69 time_t ctime; 70 time_t atime; 71 mode_t mode; 72 uid_t uid; 73 gid_t gid; 74 75 /* Flags to mark progress of decompression. */ 76 char decompress_init; 77 char end_of_entry; 78 79 unsigned long crc32; 80 ssize_t filename_length; 81 ssize_t extra_length; 82 int64_t uncompressed_size; 83 int64_t compressed_size; 84 85 unsigned char *uncompressed_buffer; 86 size_t uncompressed_buffer_size; 87#ifdef HAVE_ZLIB_H 88 z_stream stream; 89 char stream_valid; 90#endif 91 92 struct archive_string pathname; 93 struct archive_string extra; 94 char format_name[64]; 95}; 96 97#define ZIP_LENGTH_AT_END 8 98 99struct zip_file_header { 100 char signature[4]; 101 char version[2]; 102 char flags[2]; 103 char compression[2]; 104 char timedate[4]; 105 char crc32[4]; 106 char compressed_size[4]; 107 char uncompressed_size[4]; 108 char filename_length[2]; 109 char extra_length[2]; 110}; 111 112static const char *compression_names[] = { 113 "uncompressed", 114 "shrinking", 115 "reduced-1", 116 "reduced-2", 117 "reduced-3", 118 "reduced-4", 119 "imploded", 120 "reserved", 121 "deflation" 122}; 123 124static int archive_read_format_zip_bid(struct archive_read *); 125static int archive_read_format_zip_cleanup(struct archive_read *); 126static int archive_read_format_zip_read_data(struct archive_read *, 127 const void **, size_t *, off_t *); 128static int archive_read_format_zip_read_data_skip(struct archive_read *a); 129static int archive_read_format_zip_read_header(struct archive_read *, 130 struct archive_entry *); 131static int zip_read_data_deflate(struct archive_read *a, const void **buff, 132 size_t *size, off_t *offset); 133static int zip_read_data_none(struct archive_read *a, const void **buff, 134 size_t *size, off_t *offset); 135static int zip_read_file_header(struct archive_read *a, 136 struct archive_entry *entry, struct zip *zip); 137static time_t zip_time(const char *); 138static void process_extra(const void* extra, struct zip* zip); 139 140int 141archive_read_support_format_zip(struct archive *_a) 142{ 143 struct archive_read *a = (struct archive_read *)_a; 144 struct zip *zip; 145 int r; 146 147 zip = (struct zip *)malloc(sizeof(*zip)); 148 if (zip == NULL) { 149 archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data"); 150 return (ARCHIVE_FATAL); 151 } 152 memset(zip, 0, sizeof(*zip)); 153 154 r = __archive_read_register_format(a, 155 zip, 156 "zip", 157 archive_read_format_zip_bid, 158 NULL, 159 archive_read_format_zip_read_header, 160 archive_read_format_zip_read_data, 161 archive_read_format_zip_read_data_skip, 162 archive_read_format_zip_cleanup); 163 164 if (r != ARCHIVE_OK) 165 free(zip); 166 return (ARCHIVE_OK); 167} 168 169 170static int 171archive_read_format_zip_bid(struct archive_read *a) 172{ 173 const char *p; 174 const void *buff; 175 ssize_t bytes_avail, offset; 176 177 if ((p = __archive_read_ahead(a, 4, NULL)) == NULL) 178 return (-1); 179 180 /* 181 * Bid of 30 here is: 16 bits for "PK", 182 * next 16-bit field has four options (-2 bits). 183 * 16 + 16-2 = 30. 184 */ 185 if (p[0] == 'P' && p[1] == 'K') { 186 if ((p[2] == '\001' && p[3] == '\002') 187 || (p[2] == '\003' && p[3] == '\004') 188 || (p[2] == '\005' && p[3] == '\006') 189 || (p[2] == '\007' && p[3] == '\010') 190 || (p[2] == '0' && p[3] == '0')) 191 return (30); 192 } 193 194 /* 195 * Attempt to handle self-extracting archives 196 * by noting a PE header and searching forward 197 * up to 128k for a 'PK\003\004' marker. 198 */ 199 if (p[0] == 'M' && p[1] == 'Z') { 200 /* 201 * TODO: Optimize by initializing 'offset' to an 202 * estimate of the likely start of the archive data 203 * based on values in the PE header. Note that we 204 * don't need to be exact, but we mustn't skip too 205 * far. The search below will compensate if we 206 * undershoot. 207 */ 208 offset = 0; 209 while (offset < 124000) { 210 /* Get 4k of data beyond where we stopped. */ 211 buff = __archive_read_ahead(a, offset + 4096, 212 &bytes_avail); 213 if (buff == NULL) 214 break; 215 p = (const char *)buff + offset; 216 while (p + 9 < (const char *)buff + bytes_avail) { 217 if (p[0] == 'P' && p[1] == 'K' /* signature */ 218 && p[2] == 3 && p[3] == 4 /* File entry */ 219 && p[8] == 8 /* compression == deflate */ 220 && p[9] == 0 /* High byte of compression */ 221 ) 222 { 223 return (30); 224 } 225 ++p; 226 } 227 offset = p - (const char *)buff; 228 } 229 } 230 231 return (0); 232} 233 234/* 235 * Search forward for a "PK\003\004" file header. This handles the 236 * case of self-extracting archives, where there is an executable 237 * prepended to the ZIP archive. 238 */ 239static int 240skip_sfx(struct archive_read *a) 241{ 242 const void *h; 243 const char *p, *q; 244 size_t skip; 245 ssize_t bytes; 246 247 /* 248 * TODO: We should be able to skip forward by a bunch 249 * by lifting some values from the PE header. We don't 250 * need to be exact (we're still going to search forward 251 * to find the header), but it will speed things up and 252 * reduce the chance of a false positive. 253 */ 254 for (;;) { 255 h = __archive_read_ahead(a, 4, &bytes); 256 if (bytes < 4) 257 return (ARCHIVE_FATAL); 258 p = h; 259 q = p + bytes; 260 261 /* 262 * Scan ahead until we find something that looks 263 * like the zip header. 264 */ 265 while (p + 4 < q) { 266 switch (p[3]) { 267 case '\004': 268 /* TODO: Additional verification here. */ 269 if (memcmp("PK\003\004", p, 4) == 0) { 270 skip = p - (const char *)h; 271 __archive_read_consume(a, skip); 272 return (ARCHIVE_OK); 273 } 274 p += 4; 275 break; 276 case '\003': p += 1; break; 277 case 'K': p += 2; break; 278 case 'P': p += 3; break; 279 default: p += 4; break; 280 } 281 } 282 skip = p - (const char *)h; 283 __archive_read_consume(a, skip); 284 } 285} 286 287static int 288archive_read_format_zip_read_header(struct archive_read *a, 289 struct archive_entry *entry) 290{ 291 const void *h; 292 const char *signature; 293 struct zip *zip; 294 int r = ARCHIVE_OK, r1; 295 296 a->archive.archive_format = ARCHIVE_FORMAT_ZIP; 297 if (a->archive.archive_format_name == NULL) 298 a->archive.archive_format_name = "ZIP"; 299 300 zip = (struct zip *)(a->format->data); 301 zip->decompress_init = 0; 302 zip->end_of_entry = 0; 303 zip->entry_uncompressed_bytes_read = 0; 304 zip->entry_compressed_bytes_read = 0; 305 zip->entry_crc32 = crc32(0, NULL, 0); 306 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 307 return (ARCHIVE_FATAL); 308 309 signature = (const char *)h; 310 if (signature[0] == 'M' && signature[1] == 'Z') { 311 /* This is an executable? Must be self-extracting... */ 312 r = skip_sfx(a); 313 if (r < ARCHIVE_WARN) 314 return (r); 315 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 316 return (ARCHIVE_FATAL); 317 signature = (const char *)h; 318 } 319 320 if (signature[0] != 'P' || signature[1] != 'K') { 321 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 322 "Bad ZIP file"); 323 return (ARCHIVE_FATAL); 324 } 325 326 /* 327 * "PK00" signature is used for "split" archives that 328 * only have a single segment. This means we can just 329 * skip the PK00; the first real file header should follow. 330 */ 331 if (signature[2] == '0' && signature[3] == '0') { 332 __archive_read_consume(a, 4); 333 if ((h = __archive_read_ahead(a, 4, NULL)) == NULL) 334 return (ARCHIVE_FATAL); 335 signature = (const char *)h; 336 if (signature[0] != 'P' || signature[1] != 'K') { 337 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 338 "Bad ZIP file"); 339 return (ARCHIVE_FATAL); 340 } 341 } 342 343 if (signature[2] == '\001' && signature[3] == '\002') { 344 /* Beginning of central directory. */ 345 return (ARCHIVE_EOF); 346 } 347 348 if (signature[2] == '\003' && signature[3] == '\004') { 349 /* Regular file entry. */ 350 r1 = zip_read_file_header(a, entry, zip); 351 if (r1 != ARCHIVE_OK) 352 return (r1); 353 return (r); 354 } 355 356 if (signature[2] == '\005' && signature[3] == '\006') { 357 /* End-of-archive record. */ 358 return (ARCHIVE_EOF); 359 } 360 361 if (signature[2] == '\007' && signature[3] == '\010') { 362 /* 363 * We should never encounter this record here; 364 * see ZIP_LENGTH_AT_END handling below for details. 365 */ 366 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 367 "Bad ZIP file: Unexpected end-of-entry record"); 368 return (ARCHIVE_FATAL); 369 } 370 371 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 372 "Damaged ZIP file or unsupported format variant (%d,%d)", 373 signature[2], signature[3]); 374 return (ARCHIVE_FATAL); 375} 376 377static int 378zip_read_file_header(struct archive_read *a, struct archive_entry *entry, 379 struct zip *zip) 380{ 381 const struct zip_file_header *p; 382 const void *h; 383 384 if ((p = __archive_read_ahead(a, sizeof *p, NULL)) == NULL) { 385 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 386 "Truncated ZIP file header"); 387 return (ARCHIVE_FATAL); 388 } 389 390 zip->version = p->version[0]; 391 zip->system = p->version[1]; 392 zip->flags = archive_le16dec(p->flags); 393 zip->compression = archive_le16dec(p->compression); 394 if (zip->compression < 395 sizeof(compression_names)/sizeof(compression_names[0])) 396 zip->compression_name = compression_names[zip->compression]; 397 else 398 zip->compression_name = "??"; 399 zip->mtime = zip_time(p->timedate); 400 zip->ctime = 0; 401 zip->atime = 0; 402 zip->mode = 0; 403 zip->uid = 0; 404 zip->gid = 0; 405 zip->crc32 = archive_le32dec(p->crc32); 406 zip->filename_length = archive_le16dec(p->filename_length); 407 zip->extra_length = archive_le16dec(p->extra_length); 408 zip->uncompressed_size = archive_le32dec(p->uncompressed_size); 409 zip->compressed_size = archive_le32dec(p->compressed_size); 410 411 __archive_read_consume(a, sizeof(struct zip_file_header)); 412 413 414 /* Read the filename. */ 415 if ((h = __archive_read_ahead(a, zip->filename_length, NULL)) == NULL) { 416 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 417 "Truncated ZIP file header"); 418 return (ARCHIVE_FATAL); 419 } 420 if (archive_string_ensure(&zip->pathname, zip->filename_length) == NULL) 421 __archive_errx(1, "Out of memory"); 422 archive_strncpy(&zip->pathname, h, zip->filename_length); 423 __archive_read_consume(a, zip->filename_length); 424 archive_entry_set_pathname(entry, zip->pathname.s); 425 426 if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/') 427 zip->mode = AE_IFDIR | 0777; 428 else 429 zip->mode = AE_IFREG | 0777; 430 431 /* Read the extra data. */ 432 if ((h = __archive_read_ahead(a, zip->extra_length, NULL)) == NULL) { 433 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 434 "Truncated ZIP file header"); 435 return (ARCHIVE_FATAL); 436 } 437 process_extra(h, zip); 438 __archive_read_consume(a, zip->extra_length); 439 440 /* Populate some additional entry fields: */ 441 archive_entry_set_mode(entry, zip->mode); 442 archive_entry_set_uid(entry, zip->uid); 443 archive_entry_set_gid(entry, zip->gid); 444 archive_entry_set_mtime(entry, zip->mtime, 0); 445 archive_entry_set_ctime(entry, zip->ctime, 0); 446 archive_entry_set_atime(entry, zip->atime, 0); 447 /* Set the size only if it's meaningful. */ 448 if (0 == (zip->flags & ZIP_LENGTH_AT_END)) 449 archive_entry_set_size(entry, zip->uncompressed_size); 450 451 zip->entry_bytes_remaining = zip->compressed_size; 452 zip->entry_offset = 0; 453 454 /* If there's no body, force read_data() to return EOF immediately. */ 455 if (0 == (zip->flags & ZIP_LENGTH_AT_END) 456 && zip->entry_bytes_remaining < 1) 457 zip->end_of_entry = 1; 458 459 /* Set up a more descriptive format name. */ 460 sprintf(zip->format_name, "ZIP %d.%d (%s)", 461 zip->version / 10, zip->version % 10, 462 zip->compression_name); 463 a->archive.archive_format_name = zip->format_name; 464 465 return (ARCHIVE_OK); 466} 467 468/* Convert an MSDOS-style date/time into Unix-style time. */ 469static time_t 470zip_time(const char *p) 471{ 472 int msTime, msDate; 473 struct tm ts; 474 475 msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]); 476 msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]); 477 478 memset(&ts, 0, sizeof(ts)); 479 ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */ 480 ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */ 481 ts.tm_mday = msDate & 0x1f; /* Day of month. */ 482 ts.tm_hour = (msTime >> 11) & 0x1f; 483 ts.tm_min = (msTime >> 5) & 0x3f; 484 ts.tm_sec = (msTime << 1) & 0x3e; 485 ts.tm_isdst = -1; 486 return mktime(&ts); 487} 488 489static int 490archive_read_format_zip_read_data(struct archive_read *a, 491 const void **buff, size_t *size, off_t *offset) 492{ 493 int r; 494 struct zip *zip; 495 496 zip = (struct zip *)(a->format->data); 497 498 /* 499 * If we hit end-of-entry last time, clean up and return 500 * ARCHIVE_EOF this time. 501 */ 502 if (zip->end_of_entry) { 503 *offset = zip->entry_uncompressed_bytes_read; 504 *size = 0; 505 *buff = NULL; 506 return (ARCHIVE_EOF); 507 } 508 509 switch(zip->compression) { 510 case 0: /* No compression. */ 511 r = zip_read_data_none(a, buff, size, offset); 512 break; 513 case 8: /* Deflate compression. */ 514 r = zip_read_data_deflate(a, buff, size, offset); 515 break; 516 default: /* Unsupported compression. */ 517 *buff = NULL; 518 *size = 0; 519 *offset = 0; 520 /* Return a warning. */ 521 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 522 "Unsupported ZIP compression method (%s)", 523 zip->compression_name); 524 if (zip->flags & ZIP_LENGTH_AT_END) { 525 /* 526 * ZIP_LENGTH_AT_END requires us to 527 * decompress the entry in order to 528 * skip it, but we don't know this 529 * compression method, so we give up. 530 */ 531 r = ARCHIVE_FATAL; 532 } else { 533 /* We can't decompress this entry, but we will 534 * be able to skip() it and try the next entry. */ 535 r = ARCHIVE_WARN; 536 } 537 break; 538 } 539 if (r != ARCHIVE_OK) 540 return (r); 541 /* Update checksum */ 542 if (*size) 543 zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size); 544 /* If we hit the end, swallow any end-of-data marker. */ 545 if (zip->end_of_entry) { 546 if (zip->flags & ZIP_LENGTH_AT_END) { 547 const char *p; 548 549 if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) { 550 archive_set_error(&a->archive, 551 ARCHIVE_ERRNO_FILE_FORMAT, 552 "Truncated ZIP end-of-file record"); 553 return (ARCHIVE_FATAL); 554 } 555 zip->crc32 = archive_le32dec(p + 4); 556 zip->compressed_size = archive_le32dec(p + 8); 557 zip->uncompressed_size = archive_le32dec(p + 12); 558 __archive_read_consume(a, 16); 559 } 560 /* Check file size, CRC against these values. */ 561 if (zip->compressed_size != zip->entry_compressed_bytes_read) { 562 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 563 "ZIP compressed data is wrong size"); 564 return (ARCHIVE_WARN); 565 } 566 /* Size field only stores the lower 32 bits of the actual size. */ 567 if ((zip->uncompressed_size & UINT32_MAX) 568 != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) { 569 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 570 "ZIP uncompressed data is wrong size"); 571 return (ARCHIVE_WARN); 572 } 573 /* Check computed CRC against header */ 574 if (zip->crc32 != zip->entry_crc32) { 575 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 576 "ZIP bad CRC: 0x%lx should be 0x%lx", 577 zip->entry_crc32, zip->crc32); 578 return (ARCHIVE_WARN); 579 } 580 } 581 582 /* Return EOF immediately if this is a non-regular file. */ 583 if (AE_IFREG != (zip->mode & AE_IFMT)) 584 return (ARCHIVE_EOF); 585 return (ARCHIVE_OK); 586} 587 588/* 589 * Read "uncompressed" data. According to the current specification, 590 * if ZIP_LENGTH_AT_END is specified, then the size fields in the 591 * initial file header are supposed to be set to zero. This would, of 592 * course, make it impossible for us to read the archive, since we 593 * couldn't determine the end of the file data. Info-ZIP seems to 594 * include the real size fields both before and after the data in this 595 * case (the CRC only appears afterwards), so this works as you would 596 * expect. 597 * 598 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets 599 * zip->end_of_entry if it consumes all of the data. 600 */ 601static int 602zip_read_data_none(struct archive_read *a, const void **buff, 603 size_t *size, off_t *offset) 604{ 605 struct zip *zip; 606 ssize_t bytes_avail; 607 608 zip = (struct zip *)(a->format->data); 609 610 if (zip->entry_bytes_remaining == 0) { 611 *buff = NULL; 612 *size = 0; 613 *offset = zip->entry_offset; 614 zip->end_of_entry = 1; 615 return (ARCHIVE_OK); 616 } 617 /* 618 * Note: '1' here is a performance optimization. 619 * Recall that the decompression layer returns a count of 620 * available bytes; asking for more than that forces the 621 * decompressor to combine reads by copying data. 622 */ 623 *buff = __archive_read_ahead(a, 1, &bytes_avail); 624 if (bytes_avail <= 0) { 625 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 626 "Truncated ZIP file data"); 627 return (ARCHIVE_FATAL); 628 } 629 if (bytes_avail > zip->entry_bytes_remaining) 630 bytes_avail = zip->entry_bytes_remaining; 631 __archive_read_consume(a, bytes_avail); 632 *size = bytes_avail; 633 *offset = zip->entry_offset; 634 zip->entry_offset += *size; 635 zip->entry_bytes_remaining -= *size; 636 zip->entry_uncompressed_bytes_read += *size; 637 zip->entry_compressed_bytes_read += *size; 638 return (ARCHIVE_OK); 639} 640 641#ifdef HAVE_ZLIB_H 642static int 643zip_read_data_deflate(struct archive_read *a, const void **buff, 644 size_t *size, off_t *offset) 645{ 646 struct zip *zip; 647 ssize_t bytes_avail; 648 const void *compressed_buff; 649 int r; 650 651 zip = (struct zip *)(a->format->data); 652 653 /* If the buffer hasn't been allocated, allocate it now. */ 654 if (zip->uncompressed_buffer == NULL) { 655 zip->uncompressed_buffer_size = 32 * 1024; 656 zip->uncompressed_buffer 657 = (unsigned char *)malloc(zip->uncompressed_buffer_size); 658 if (zip->uncompressed_buffer == NULL) { 659 archive_set_error(&a->archive, ENOMEM, 660 "No memory for ZIP decompression"); 661 return (ARCHIVE_FATAL); 662 } 663 } 664 665 /* If we haven't yet read any data, initialize the decompressor. */ 666 if (!zip->decompress_init) { 667 if (zip->stream_valid) 668 r = inflateReset(&zip->stream); 669 else 670 r = inflateInit2(&zip->stream, 671 -15 /* Don't check for zlib header */); 672 if (r != Z_OK) { 673 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 674 "Can't initialize ZIP decompression."); 675 return (ARCHIVE_FATAL); 676 } 677 /* Stream structure has been set up. */ 678 zip->stream_valid = 1; 679 /* We've initialized decompression for this stream. */ 680 zip->decompress_init = 1; 681 } 682 683 /* 684 * Note: '1' here is a performance optimization. 685 * Recall that the decompression layer returns a count of 686 * available bytes; asking for more than that forces the 687 * decompressor to combine reads by copying data. 688 */ 689 compressed_buff = __archive_read_ahead(a, 1, &bytes_avail); 690 if (bytes_avail <= 0) { 691 archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT, 692 "Truncated ZIP file body"); 693 return (ARCHIVE_FATAL); 694 } 695 696 /* 697 * A bug in zlib.h: stream.next_in should be marked 'const' 698 * but isn't (the library never alters data through the 699 * next_in pointer, only reads it). The result: this ugly 700 * cast to remove 'const'. 701 */ 702 zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff; 703 zip->stream.avail_in = bytes_avail; 704 zip->stream.total_in = 0; 705 zip->stream.next_out = zip->uncompressed_buffer; 706 zip->stream.avail_out = zip->uncompressed_buffer_size; 707 zip->stream.total_out = 0; 708 709 r = inflate(&zip->stream, 0); 710 switch (r) { 711 case Z_OK: 712 break; 713 case Z_STREAM_END: 714 zip->end_of_entry = 1; 715 break; 716 case Z_MEM_ERROR: 717 archive_set_error(&a->archive, ENOMEM, 718 "Out of memory for ZIP decompression"); 719 return (ARCHIVE_FATAL); 720 default: 721 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 722 "ZIP decompression failed (%d)", r); 723 return (ARCHIVE_FATAL); 724 } 725 726 /* Consume as much as the compressor actually used. */ 727 bytes_avail = zip->stream.total_in; 728 __archive_read_consume(a, bytes_avail); 729 zip->entry_bytes_remaining -= bytes_avail; 730 zip->entry_compressed_bytes_read += bytes_avail; 731 732 *offset = zip->entry_offset; 733 *size = zip->stream.total_out; 734 zip->entry_uncompressed_bytes_read += *size; 735 *buff = zip->uncompressed_buffer; 736 zip->entry_offset += *size; 737 return (ARCHIVE_OK); 738} 739#else 740static int 741zip_read_data_deflate(struct archive_read *a, const void **buff, 742 size_t *size, off_t *offset) 743{ 744 *buff = NULL; 745 *size = 0; 746 *offset = 0; 747 archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 748 "libarchive compiled without deflate support (no libz)"); 749 return (ARCHIVE_FATAL); 750} 751#endif 752 753static int 754archive_read_format_zip_read_data_skip(struct archive_read *a) 755{ 756 struct zip *zip; 757 const void *buff = NULL; 758 off_t bytes_skipped; 759 760 zip = (struct zip *)(a->format->data); 761 762 /* If we've already read to end of data, we're done. */ 763 if (zip->end_of_entry) 764 return (ARCHIVE_OK); 765 766 /* 767 * If the length is at the end, we have no choice but 768 * to decompress all the data to find the end marker. 769 */ 770 if (zip->flags & ZIP_LENGTH_AT_END) { 771 size_t size; 772 off_t offset; 773 int r; 774 do { 775 r = archive_read_format_zip_read_data(a, &buff, 776 &size, &offset); 777 } while (r == ARCHIVE_OK); 778 return (r); 779 } 780 781 /* 782 * If the length is at the beginning, we can skip the 783 * compressed data much more quickly. 784 */ 785 bytes_skipped = __archive_read_skip(a, zip->entry_bytes_remaining); 786 if (bytes_skipped < 0) 787 return (ARCHIVE_FATAL); 788 789 /* This entry is finished and done. */ 790 zip->end_of_entry = 1; 791 return (ARCHIVE_OK); 792} 793 794static int 795archive_read_format_zip_cleanup(struct archive_read *a) 796{ 797 struct zip *zip; 798 799 zip = (struct zip *)(a->format->data); 800#ifdef HAVE_ZLIB_H 801 if (zip->stream_valid) 802 inflateEnd(&zip->stream); 803#endif 804 free(zip->uncompressed_buffer); 805 archive_string_free(&(zip->pathname)); 806 archive_string_free(&(zip->extra)); 807 free(zip); 808 (a->format->data) = NULL; 809 return (ARCHIVE_OK); 810} 811 812/* 813 * The extra data is stored as a list of 814 * id1+size1+data1 + id2+size2+data2 ... 815 * triplets. id and size are 2 bytes each. 816 */ 817static void 818process_extra(const void* extra, struct zip* zip) 819{ 820 int offset = 0; 821 const char *p = (const char *)extra; 822 while (offset < zip->extra_length - 4) 823 { 824 unsigned short headerid = archive_le16dec(p + offset); 825 unsigned short datasize = archive_le16dec(p + offset + 2); 826 offset += 4; 827 if (offset + datasize > zip->extra_length) 828 break; 829#ifdef DEBUG 830 fprintf(stderr, "Header id 0x%04x, length %d\n", 831 headerid, datasize); 832#endif 833 switch (headerid) { 834 case 0x0001: 835 /* Zip64 extended information extra field. */ 836 if (datasize >= 8) 837 zip->uncompressed_size = archive_le64dec(p + offset); 838 if (datasize >= 16) 839 zip->compressed_size = archive_le64dec(p + offset + 8); 840 break; 841 case 0x5455: 842 { 843 /* Extended time field "UT". */ 844 int flags = p[offset]; 845 offset++; 846 datasize--; 847 /* Flag bits indicate which dates are present. */ 848 if (flags & 0x01) 849 { 850#ifdef DEBUG 851 fprintf(stderr, "mtime: %lld -> %d\n", 852 (long long)zip->mtime, 853 archive_le32dec(p + offset)); 854#endif 855 if (datasize < 4) 856 break; 857 zip->mtime = archive_le32dec(p + offset); 858 offset += 4; 859 datasize -= 4; 860 } 861 if (flags & 0x02) 862 { 863 if (datasize < 4) 864 break; 865 zip->atime = archive_le32dec(p + offset); 866 offset += 4; 867 datasize -= 4; 868 } 869 if (flags & 0x04) 870 { 871 if (datasize < 4) 872 break; 873 zip->ctime = archive_le32dec(p + offset); 874 offset += 4; 875 datasize -= 4; 876 } 877 break; 878 } 879 case 0x7855: 880 /* Info-ZIP Unix Extra Field (type 2) "Ux". */ 881#ifdef DEBUG 882 fprintf(stderr, "uid %d gid %d\n", 883 archive_le16dec(p + offset), 884 archive_le16dec(p + offset + 2)); 885#endif 886 if (datasize >= 2) 887 zip->uid = archive_le16dec(p + offset); 888 if (datasize >= 4) 889 zip->gid = archive_le16dec(p + offset + 2); 890 break; 891 default: 892 break; 893 } 894 offset += datasize; 895 } 896#ifdef DEBUG 897 if (offset != zip->extra_length) 898 { 899 fprintf(stderr, 900 "Extra data field contents do not match reported size!"); 901 } 902#endif 903} 904