1/*-
2 * Copyright (c) 2004 Tim Kientzle
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "archive_platform.h"
27__FBSDID("$FreeBSD$");
28
29#ifdef HAVE_ERRNO_H
30#include <errno.h>
31#endif
32#include <stdio.h>
33#ifdef HAVE_STDLIB_H
34#include <stdlib.h>
35#endif
36#include <time.h>
37#ifdef HAVE_ZLIB_H
38#include <zlib.h>
39#endif
40
41#include "archive.h"
42#include "archive_entry.h"
43#include "archive_private.h"
44#include "archive_read_private.h"
45#include "archive_endian.h"
46
47#ifndef HAVE_ZLIB_H
48#include "archive_crc32.h"
49#endif
50
51struct zip {
52	/* entry_bytes_remaining is the number of bytes we expect. */
53	int64_t			entry_bytes_remaining;
54	int64_t			entry_offset;
55
56	/* These count the number of bytes actually read for the entry. */
57	int64_t			entry_compressed_bytes_read;
58	int64_t			entry_uncompressed_bytes_read;
59
60	/* Running CRC32 of the decompressed data */
61	unsigned long		entry_crc32;
62
63	unsigned		version;
64	unsigned		system;
65	unsigned		flags;
66	unsigned		compression;
67	const char *		compression_name;
68	time_t			mtime;
69	time_t			ctime;
70	time_t			atime;
71	mode_t			mode;
72	uid_t			uid;
73	gid_t			gid;
74
75	/* Flags to mark progress of decompression. */
76	char			decompress_init;
77	char			end_of_entry;
78
79	unsigned long		crc32;
80	ssize_t			filename_length;
81	ssize_t			extra_length;
82	int64_t			uncompressed_size;
83	int64_t			compressed_size;
84
85	unsigned char 		*uncompressed_buffer;
86	size_t 			uncompressed_buffer_size;
87#ifdef HAVE_ZLIB_H
88	z_stream		stream;
89	char			stream_valid;
90#endif
91
92	struct archive_string	pathname;
93	struct archive_string	extra;
94	char	format_name[64];
95};
96
97#define ZIP_LENGTH_AT_END	8
98
99struct zip_file_header {
100	char	signature[4];
101	char	version[2];
102	char	flags[2];
103	char	compression[2];
104	char	timedate[4];
105	char	crc32[4];
106	char	compressed_size[4];
107	char	uncompressed_size[4];
108	char	filename_length[2];
109	char	extra_length[2];
110};
111
112static const char *compression_names[] = {
113	"uncompressed",
114	"shrinking",
115	"reduced-1",
116	"reduced-2",
117	"reduced-3",
118	"reduced-4",
119	"imploded",
120	"reserved",
121	"deflation"
122};
123
124static int	archive_read_format_zip_bid(struct archive_read *);
125static int	archive_read_format_zip_cleanup(struct archive_read *);
126static int	archive_read_format_zip_read_data(struct archive_read *,
127		    const void **, size_t *, off_t *);
128static int	archive_read_format_zip_read_data_skip(struct archive_read *a);
129static int	archive_read_format_zip_read_header(struct archive_read *,
130		    struct archive_entry *);
131static int	search_next_signature(struct archive_read *);
132static int	zip_read_data_deflate(struct archive_read *a, const void **buff,
133		    size_t *size, off_t *offset);
134static int	zip_read_data_none(struct archive_read *a, const void **buff,
135		    size_t *size, off_t *offset);
136static int	zip_read_file_header(struct archive_read *a,
137		    struct archive_entry *entry, struct zip *zip);
138static time_t	zip_time(const char *);
139static void process_extra(const void* extra, struct zip* zip);
140
141int
142archive_read_support_format_zip(struct archive *_a)
143{
144	struct archive_read *a = (struct archive_read *)_a;
145	struct zip *zip;
146	int r;
147
148	zip = (struct zip *)malloc(sizeof(*zip));
149	if (zip == NULL) {
150		archive_set_error(&a->archive, ENOMEM, "Can't allocate zip data");
151		return (ARCHIVE_FATAL);
152	}
153	memset(zip, 0, sizeof(*zip));
154
155	r = __archive_read_register_format(a,
156	    zip,
157	    "zip",
158	    archive_read_format_zip_bid,
159	    NULL,
160	    archive_read_format_zip_read_header,
161	    archive_read_format_zip_read_data,
162	    archive_read_format_zip_read_data_skip,
163	    archive_read_format_zip_cleanup);
164
165	if (r != ARCHIVE_OK)
166		free(zip);
167	return (ARCHIVE_OK);
168}
169
170
171static int
172archive_read_format_zip_bid(struct archive_read *a)
173{
174	const char *p;
175	const void *buff;
176	ssize_t bytes_avail, offset;
177
178	if ((p = __archive_read_ahead(a, 4, NULL)) == NULL)
179		return (-1);
180
181	/*
182	 * Bid of 30 here is: 16 bits for "PK",
183	 * next 16-bit field has four options (-2 bits).
184	 * 16 + 16-2 = 30.
185	 */
186	if (p[0] == 'P' && p[1] == 'K') {
187		if ((p[2] == '\001' && p[3] == '\002')
188		    || (p[2] == '\003' && p[3] == '\004')
189		    || (p[2] == '\005' && p[3] == '\006')
190		    || (p[2] == '\007' && p[3] == '\010')
191		    || (p[2] == '0' && p[3] == '0'))
192			return (30);
193	}
194
195	/*
196	 * Attempt to handle self-extracting archives
197	 * by noting a PE header and searching forward
198	 * up to 128k for a 'PK\003\004' marker.
199	 */
200	if (p[0] == 'M' && p[1] == 'Z') {
201		/*
202		 * TODO: Optimize by initializing 'offset' to an
203		 * estimate of the likely start of the archive data
204		 * based on values in the PE header.  Note that we
205		 * don't need to be exact, but we mustn't skip too
206		 * far.  The search below will compensate if we
207		 * undershoot.
208		 */
209		offset = 0;
210		while (offset < 124000) {
211			/* Get 4k of data beyond where we stopped. */
212			buff = __archive_read_ahead(a, offset + 4096,
213			    &bytes_avail);
214			if (buff == NULL)
215				break;
216			p = (const char *)buff + offset;
217			while (p + 9 < (const char *)buff + bytes_avail) {
218				if (p[0] == 'P' && p[1] == 'K' /* signature */
219				    && p[2] == 3 && p[3] == 4 /* File entry */
220				    && p[8] == 8 /* compression == deflate */
221				    && p[9] == 0 /* High byte of compression */
222					)
223				{
224					return (30);
225				}
226				++p;
227			}
228			offset = p - (const char *)buff;
229		}
230	}
231
232	return (0);
233}
234
235/*
236 * Search forward for a "PK\003\004" file header.  This handles the
237 * case of self-extracting archives, where there is an executable
238 * prepended to the ZIP archive.
239 */
240static int
241skip_sfx(struct archive_read *a)
242{
243	const void *h;
244	const char *p, *q;
245	size_t skip;
246	ssize_t bytes;
247
248	/*
249	 * TODO: We should be able to skip forward by a bunch
250	 * by lifting some values from the PE header.  We don't
251	 * need to be exact (we're still going to search forward
252	 * to find the header), but it will speed things up and
253	 * reduce the chance of a false positive.
254	 */
255	for (;;) {
256		h = __archive_read_ahead(a, 4, &bytes);
257		if (bytes < 4)
258			return (ARCHIVE_FATAL);
259		p = h;
260		q = p + bytes;
261
262		/*
263		 * Scan ahead until we find something that looks
264		 * like the zip header.
265		 */
266		while (p + 4 < q) {
267			switch (p[3]) {
268			case '\004':
269				/* TODO: Additional verification here. */
270				if (memcmp("PK\003\004", p, 4) == 0) {
271					skip = p - (const char *)h;
272					__archive_read_consume(a, skip);
273					return (ARCHIVE_OK);
274				}
275				p += 4;
276				break;
277			case '\003': p += 1; break;
278			case 'K': p += 2; break;
279			case 'P': p += 3; break;
280			default: p += 4; break;
281			}
282		}
283		skip = p - (const char *)h;
284		__archive_read_consume(a, skip);
285	}
286}
287
288static int
289archive_read_format_zip_read_header(struct archive_read *a,
290    struct archive_entry *entry)
291{
292	const void *h;
293	const char *signature;
294	struct zip *zip;
295	int r = ARCHIVE_OK, r1;
296
297	a->archive.archive_format = ARCHIVE_FORMAT_ZIP;
298	if (a->archive.archive_format_name == NULL)
299		a->archive.archive_format_name = "ZIP";
300
301	zip = (struct zip *)(a->format->data);
302	zip->decompress_init = 0;
303	zip->end_of_entry = 0;
304	zip->entry_uncompressed_bytes_read = 0;
305	zip->entry_compressed_bytes_read = 0;
306	zip->entry_crc32 = crc32(0, NULL, 0);
307	if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
308		return (ARCHIVE_FATAL);
309
310	signature = (const char *)h;
311	if (signature[0] == 'M' && signature[1] == 'Z') {
312		/* This is an executable?  Must be self-extracting... */
313		r = skip_sfx(a);
314		if (r < ARCHIVE_WARN)
315			return (r);
316		if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
317			return (ARCHIVE_FATAL);
318		signature = (const char *)h;
319	}
320
321	/* If we don't see a PK signature here, scan forward. */
322	if (signature[0] != 'P' || signature[1] != 'K') {
323		r = search_next_signature(a);
324		if (r != ARCHIVE_OK) {
325			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
326			    "Bad ZIP file");
327			return (ARCHIVE_FATAL);
328		}
329		if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
330			return (ARCHIVE_FATAL);
331		signature = (const char *)h;
332	}
333
334	/*
335	 * "PK00" signature is used for "split" archives that
336	 * only have a single segment.  This means we can just
337	 * skip the PK00; the first real file header should follow.
338	 */
339	if (signature[2] == '0' && signature[3] == '0') {
340		__archive_read_consume(a, 4);
341		if ((h = __archive_read_ahead(a, 4, NULL)) == NULL)
342			return (ARCHIVE_FATAL);
343		signature = (const char *)h;
344		if (signature[0] != 'P' || signature[1] != 'K') {
345			archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
346			    "Bad ZIP file");
347			return (ARCHIVE_FATAL);
348		}
349	}
350
351	if (signature[2] == '\001' && signature[3] == '\002') {
352		/* Beginning of central directory. */
353		return (ARCHIVE_EOF);
354	}
355
356	if (signature[2] == '\003' && signature[3] == '\004') {
357		/* Regular file entry. */
358		r1 = zip_read_file_header(a, entry, zip);
359		if (r1 != ARCHIVE_OK)
360			return (r1);
361		return (r);
362	}
363
364	if (signature[2] == '\005' && signature[3] == '\006') {
365		/* End-of-archive record. */
366		return (ARCHIVE_EOF);
367	}
368
369	if (signature[2] == '\007' && signature[3] == '\010') {
370		/*
371		 * We should never encounter this record here;
372		 * see ZIP_LENGTH_AT_END handling below for details.
373		 */
374		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
375		    "Bad ZIP file: Unexpected end-of-entry record");
376		return (ARCHIVE_FATAL);
377	}
378
379	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
380	    "Damaged ZIP file or unsupported format variant (%d,%d)",
381	    signature[2], signature[3]);
382	return (ARCHIVE_FATAL);
383}
384
385static int
386search_next_signature(struct archive_read *a)
387{
388	const void *h;
389	const char *p, *q;
390	size_t skip;
391	ssize_t bytes;
392	int64_t skipped = 0;
393
394	for (;;) {
395		h = __archive_read_ahead(a, 4, &bytes);
396		if (h == NULL)
397			return (ARCHIVE_FATAL);
398		p = h;
399		q = p + bytes;
400
401		while (p + 4 <= q) {
402			if (p[0] == 'P' && p[1] == 'K') {
403				if ((p[2] == '\001' && p[3] == '\002')
404				    || (p[2] == '\003' && p[3] == '\004')
405				    || (p[2] == '\005' && p[3] == '\006')
406				    || (p[2] == '\007' && p[3] == '\010')
407				    || (p[2] == '0' && p[3] == '0')) {
408					skip = p - (const char *)h;
409					__archive_read_consume(a, skip);
410					return (ARCHIVE_OK);
411				}
412			}
413			++p;
414		}
415		skip = p - (const char *)h;
416		__archive_read_consume(a, skip);
417		skipped += skip;
418	}
419}
420
421static int
422zip_read_file_header(struct archive_read *a, struct archive_entry *entry,
423    struct zip *zip)
424{
425	const struct zip_file_header *p;
426	const void *h;
427
428	if ((p = __archive_read_ahead(a, sizeof *p, NULL)) == NULL) {
429		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
430		    "Truncated ZIP file header");
431		return (ARCHIVE_FATAL);
432	}
433
434	zip->version = p->version[0];
435	zip->system = p->version[1];
436	zip->flags = archive_le16dec(p->flags);
437	zip->compression = archive_le16dec(p->compression);
438	if (zip->compression <
439	    sizeof(compression_names)/sizeof(compression_names[0]))
440		zip->compression_name = compression_names[zip->compression];
441	else
442		zip->compression_name = "??";
443	zip->mtime = zip_time(p->timedate);
444	zip->ctime = 0;
445	zip->atime = 0;
446	zip->mode = 0;
447	zip->uid = 0;
448	zip->gid = 0;
449	zip->crc32 = archive_le32dec(p->crc32);
450	zip->filename_length = archive_le16dec(p->filename_length);
451	zip->extra_length = archive_le16dec(p->extra_length);
452	zip->uncompressed_size = archive_le32dec(p->uncompressed_size);
453	zip->compressed_size = archive_le32dec(p->compressed_size);
454
455	__archive_read_consume(a, sizeof(struct zip_file_header));
456
457
458	/* Read the filename. */
459	if ((h = __archive_read_ahead(a, zip->filename_length, NULL)) == NULL) {
460		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
461		    "Truncated ZIP file header");
462		return (ARCHIVE_FATAL);
463	}
464	if (archive_string_ensure(&zip->pathname, zip->filename_length) == NULL)
465		__archive_errx(1, "Out of memory");
466	archive_strncpy(&zip->pathname, h, zip->filename_length);
467	__archive_read_consume(a, zip->filename_length);
468	archive_entry_set_pathname(entry, zip->pathname.s);
469
470	if (zip->pathname.s[archive_strlen(&zip->pathname) - 1] == '/')
471		zip->mode = AE_IFDIR | 0777;
472	else
473		zip->mode = AE_IFREG | 0777;
474
475	/* Read the extra data. */
476	if ((h = __archive_read_ahead(a, zip->extra_length, NULL)) == NULL) {
477		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
478		    "Truncated ZIP file header");
479		return (ARCHIVE_FATAL);
480	}
481	process_extra(h, zip);
482	__archive_read_consume(a, zip->extra_length);
483
484	/* Populate some additional entry fields: */
485	archive_entry_set_mode(entry, zip->mode);
486	archive_entry_set_uid(entry, zip->uid);
487	archive_entry_set_gid(entry, zip->gid);
488	archive_entry_set_mtime(entry, zip->mtime, 0);
489	archive_entry_set_ctime(entry, zip->ctime, 0);
490	archive_entry_set_atime(entry, zip->atime, 0);
491	/* Set the size only if it's meaningful. */
492	if (0 == (zip->flags & ZIP_LENGTH_AT_END))
493		archive_entry_set_size(entry, zip->uncompressed_size);
494
495	zip->entry_bytes_remaining = zip->compressed_size;
496	zip->entry_offset = 0;
497
498	/* If there's no body, force read_data() to return EOF immediately. */
499	if (0 == (zip->flags & ZIP_LENGTH_AT_END)
500	    && zip->entry_bytes_remaining < 1)
501		zip->end_of_entry = 1;
502
503	/* Set up a more descriptive format name. */
504	sprintf(zip->format_name, "ZIP %d.%d (%s)",
505	    zip->version / 10, zip->version % 10,
506	    zip->compression_name);
507	a->archive.archive_format_name = zip->format_name;
508
509	return (ARCHIVE_OK);
510}
511
512/* Convert an MSDOS-style date/time into Unix-style time. */
513static time_t
514zip_time(const char *p)
515{
516	int msTime, msDate;
517	struct tm ts;
518
519	msTime = (0xff & (unsigned)p[0]) + 256 * (0xff & (unsigned)p[1]);
520	msDate = (0xff & (unsigned)p[2]) + 256 * (0xff & (unsigned)p[3]);
521
522	memset(&ts, 0, sizeof(ts));
523	ts.tm_year = ((msDate >> 9) & 0x7f) + 80; /* Years since 1900. */
524	ts.tm_mon = ((msDate >> 5) & 0x0f) - 1; /* Month number. */
525	ts.tm_mday = msDate & 0x1f; /* Day of month. */
526	ts.tm_hour = (msTime >> 11) & 0x1f;
527	ts.tm_min = (msTime >> 5) & 0x3f;
528	ts.tm_sec = (msTime << 1) & 0x3e;
529	ts.tm_isdst = -1;
530	return mktime(&ts);
531}
532
533static int
534archive_read_format_zip_read_data(struct archive_read *a,
535    const void **buff, size_t *size, off_t *offset)
536{
537	int r;
538	struct zip *zip;
539
540	zip = (struct zip *)(a->format->data);
541
542	/*
543	 * If we hit end-of-entry last time, clean up and return
544	 * ARCHIVE_EOF this time.
545	 */
546	if (zip->end_of_entry) {
547		*offset = zip->entry_uncompressed_bytes_read;
548		*size = 0;
549		*buff = NULL;
550		return (ARCHIVE_EOF);
551	}
552
553	switch(zip->compression) {
554	case 0:  /* No compression. */
555		r =  zip_read_data_none(a, buff, size, offset);
556		break;
557	case 8: /* Deflate compression. */
558		r =  zip_read_data_deflate(a, buff, size, offset);
559		break;
560	default: /* Unsupported compression. */
561		*buff = NULL;
562		*size = 0;
563		*offset = 0;
564		/* Return a warning. */
565		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
566		    "Unsupported ZIP compression method (%s)",
567		    zip->compression_name);
568		if (zip->flags & ZIP_LENGTH_AT_END) {
569			/*
570			 * ZIP_LENGTH_AT_END requires us to
571			 * decompress the entry in order to
572			 * skip it, but we don't know this
573			 * compression method, so we give up.
574			 */
575			r = ARCHIVE_FATAL;
576		} else {
577			/* We can't decompress this entry, but we will
578			 * be able to skip() it and try the next entry. */
579			r = ARCHIVE_WARN;
580		}
581		break;
582	}
583	if (r != ARCHIVE_OK)
584		return (r);
585	/* Update checksum */
586	if (*size)
587		zip->entry_crc32 = crc32(zip->entry_crc32, *buff, *size);
588	/* If we hit the end, swallow any end-of-data marker. */
589	if (zip->end_of_entry) {
590		if (zip->flags & ZIP_LENGTH_AT_END) {
591			const char *p;
592
593			if ((p = __archive_read_ahead(a, 16, NULL)) == NULL) {
594				archive_set_error(&a->archive,
595				    ARCHIVE_ERRNO_FILE_FORMAT,
596				    "Truncated ZIP end-of-file record");
597				return (ARCHIVE_FATAL);
598			}
599			zip->crc32 = archive_le32dec(p + 4);
600			zip->compressed_size = archive_le32dec(p + 8);
601			zip->uncompressed_size = archive_le32dec(p + 12);
602			__archive_read_consume(a, 16);
603		}
604		/* Check file size, CRC against these values. */
605		if (zip->compressed_size != zip->entry_compressed_bytes_read) {
606			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
607			    "ZIP compressed data is wrong size");
608			return (ARCHIVE_WARN);
609		}
610		/* Size field only stores the lower 32 bits of the actual size. */
611		if ((zip->uncompressed_size & UINT32_MAX)
612		    != (zip->entry_uncompressed_bytes_read & UINT32_MAX)) {
613			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
614			    "ZIP uncompressed data is wrong size");
615			return (ARCHIVE_WARN);
616		}
617		/* Check computed CRC against header */
618		if (zip->crc32 != zip->entry_crc32) {
619			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
620			    "ZIP bad CRC: 0x%lx should be 0x%lx",
621			    zip->entry_crc32, zip->crc32);
622			return (ARCHIVE_WARN);
623		}
624	}
625
626	/* Return EOF immediately if this is a non-regular file. */
627	if (AE_IFREG != (zip->mode & AE_IFMT))
628		return (ARCHIVE_EOF);
629	return (ARCHIVE_OK);
630}
631
632/*
633 * Read "uncompressed" data.  According to the current specification,
634 * if ZIP_LENGTH_AT_END is specified, then the size fields in the
635 * initial file header are supposed to be set to zero.  This would, of
636 * course, make it impossible for us to read the archive, since we
637 * couldn't determine the end of the file data.  Info-ZIP seems to
638 * include the real size fields both before and after the data in this
639 * case (the CRC only appears afterwards), so this works as you would
640 * expect.
641 *
642 * Returns ARCHIVE_OK if successful, ARCHIVE_FATAL otherwise, sets
643 * zip->end_of_entry if it consumes all of the data.
644 */
645static int
646zip_read_data_none(struct archive_read *a, const void **buff,
647    size_t *size, off_t *offset)
648{
649	struct zip *zip;
650	ssize_t bytes_avail;
651
652	zip = (struct zip *)(a->format->data);
653
654	if (zip->entry_bytes_remaining == 0) {
655		*buff = NULL;
656		*size = 0;
657		*offset = zip->entry_offset;
658		zip->end_of_entry = 1;
659		return (ARCHIVE_OK);
660	}
661	/*
662	 * Note: '1' here is a performance optimization.
663	 * Recall that the decompression layer returns a count of
664	 * available bytes; asking for more than that forces the
665	 * decompressor to combine reads by copying data.
666	 */
667	*buff = __archive_read_ahead(a, 1, &bytes_avail);
668	if (bytes_avail <= 0) {
669		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
670		    "Truncated ZIP file data");
671		return (ARCHIVE_FATAL);
672	}
673	if (bytes_avail > zip->entry_bytes_remaining)
674		bytes_avail = zip->entry_bytes_remaining;
675	__archive_read_consume(a, bytes_avail);
676	*size = bytes_avail;
677	*offset = zip->entry_offset;
678	zip->entry_offset += *size;
679	zip->entry_bytes_remaining -= *size;
680	zip->entry_uncompressed_bytes_read += *size;
681	zip->entry_compressed_bytes_read += *size;
682	return (ARCHIVE_OK);
683}
684
685#ifdef HAVE_ZLIB_H
686static int
687zip_read_data_deflate(struct archive_read *a, const void **buff,
688    size_t *size, off_t *offset)
689{
690	struct zip *zip;
691	ssize_t bytes_avail;
692	const void *compressed_buff;
693	int r;
694
695	zip = (struct zip *)(a->format->data);
696
697	/* If the buffer hasn't been allocated, allocate it now. */
698	if (zip->uncompressed_buffer == NULL) {
699		zip->uncompressed_buffer_size = 32 * 1024;
700		zip->uncompressed_buffer
701		    = (unsigned char *)malloc(zip->uncompressed_buffer_size);
702		if (zip->uncompressed_buffer == NULL) {
703			archive_set_error(&a->archive, ENOMEM,
704			    "No memory for ZIP decompression");
705			return (ARCHIVE_FATAL);
706		}
707	}
708
709	/* If we haven't yet read any data, initialize the decompressor. */
710	if (!zip->decompress_init) {
711		if (zip->stream_valid)
712			r = inflateReset(&zip->stream);
713		else
714			r = inflateInit2(&zip->stream,
715			    -15 /* Don't check for zlib header */);
716		if (r != Z_OK) {
717			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
718			    "Can't initialize ZIP decompression.");
719			return (ARCHIVE_FATAL);
720		}
721		/* Stream structure has been set up. */
722		zip->stream_valid = 1;
723		/* We've initialized decompression for this stream. */
724		zip->decompress_init = 1;
725	}
726
727	/*
728	 * Note: '1' here is a performance optimization.
729	 * Recall that the decompression layer returns a count of
730	 * available bytes; asking for more than that forces the
731	 * decompressor to combine reads by copying data.
732	 */
733	compressed_buff = __archive_read_ahead(a, 1, &bytes_avail);
734	if (bytes_avail <= 0) {
735		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
736		    "Truncated ZIP file body");
737		return (ARCHIVE_FATAL);
738	}
739
740	/*
741	 * A bug in zlib.h: stream.next_in should be marked 'const'
742	 * but isn't (the library never alters data through the
743	 * next_in pointer, only reads it).  The result: this ugly
744	 * cast to remove 'const'.
745	 */
746	zip->stream.next_in = (Bytef *)(uintptr_t)(const void *)compressed_buff;
747	zip->stream.avail_in = bytes_avail;
748	zip->stream.total_in = 0;
749	zip->stream.next_out = zip->uncompressed_buffer;
750	zip->stream.avail_out = zip->uncompressed_buffer_size;
751	zip->stream.total_out = 0;
752
753	r = inflate(&zip->stream, 0);
754	switch (r) {
755	case Z_OK:
756		break;
757	case Z_STREAM_END:
758		zip->end_of_entry = 1;
759		break;
760	case Z_MEM_ERROR:
761		archive_set_error(&a->archive, ENOMEM,
762		    "Out of memory for ZIP decompression");
763		return (ARCHIVE_FATAL);
764	default:
765		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
766		    "ZIP decompression failed (%d)", r);
767		return (ARCHIVE_FATAL);
768	}
769
770	/* Consume as much as the compressor actually used. */
771	bytes_avail = zip->stream.total_in;
772	__archive_read_consume(a, bytes_avail);
773	zip->entry_bytes_remaining -= bytes_avail;
774	zip->entry_compressed_bytes_read += bytes_avail;
775
776	*offset = zip->entry_offset;
777	*size = zip->stream.total_out;
778	zip->entry_uncompressed_bytes_read += *size;
779	*buff = zip->uncompressed_buffer;
780	zip->entry_offset += *size;
781	return (ARCHIVE_OK);
782}
783#else
784static int
785zip_read_data_deflate(struct archive_read *a, const void **buff,
786    size_t *size, off_t *offset)
787{
788	*buff = NULL;
789	*size = 0;
790	*offset = 0;
791	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
792	    "libarchive compiled without deflate support (no libz)");
793	return (ARCHIVE_FATAL);
794}
795#endif
796
797static int
798archive_read_format_zip_read_data_skip(struct archive_read *a)
799{
800	struct zip *zip;
801	const void *buff = NULL;
802	off_t bytes_skipped;
803
804	zip = (struct zip *)(a->format->data);
805
806	/* If we've already read to end of data, we're done. */
807	if (zip->end_of_entry)
808		return (ARCHIVE_OK);
809
810	/*
811	 * If the length is at the end, we have no choice but
812	 * to decompress all the data to find the end marker.
813	 */
814	if (zip->flags & ZIP_LENGTH_AT_END) {
815		size_t size;
816		off_t offset;
817		int r;
818		do {
819			r = archive_read_format_zip_read_data(a, &buff,
820			    &size, &offset);
821		} while (r == ARCHIVE_OK);
822		return (r);
823	}
824
825	/*
826	 * If the length is at the beginning, we can skip the
827	 * compressed data much more quickly.
828	 */
829	bytes_skipped = __archive_read_skip(a, zip->entry_bytes_remaining);
830	if (bytes_skipped < 0)
831		return (ARCHIVE_FATAL);
832
833	/* This entry is finished and done. */
834	zip->end_of_entry = 1;
835	return (ARCHIVE_OK);
836}
837
838static int
839archive_read_format_zip_cleanup(struct archive_read *a)
840{
841	struct zip *zip;
842
843	zip = (struct zip *)(a->format->data);
844#ifdef HAVE_ZLIB_H
845	if (zip->stream_valid)
846		inflateEnd(&zip->stream);
847#endif
848	free(zip->uncompressed_buffer);
849	archive_string_free(&(zip->pathname));
850	archive_string_free(&(zip->extra));
851	free(zip);
852	(a->format->data) = NULL;
853	return (ARCHIVE_OK);
854}
855
856/*
857 * The extra data is stored as a list of
858 *	id1+size1+data1 + id2+size2+data2 ...
859 *  triplets.  id and size are 2 bytes each.
860 */
861static void
862process_extra(const void* extra, struct zip* zip)
863{
864	int offset = 0;
865	const char *p = (const char *)extra;
866	while (offset < zip->extra_length - 4)
867	{
868		unsigned short headerid = archive_le16dec(p + offset);
869		unsigned short datasize = archive_le16dec(p + offset + 2);
870		offset += 4;
871		if (offset + datasize > zip->extra_length)
872			break;
873#ifdef DEBUG
874		fprintf(stderr, "Header id 0x%04x, length %d\n",
875		    headerid, datasize);
876#endif
877		switch (headerid) {
878		case 0x0001:
879			/* Zip64 extended information extra field. */
880			if (datasize >= 8)
881				zip->uncompressed_size = archive_le64dec(p + offset);
882			if (datasize >= 16)
883				zip->compressed_size = archive_le64dec(p + offset + 8);
884			break;
885		case 0x5455:
886		{
887			/* Extended time field "UT". */
888			int flags = p[offset];
889			offset++;
890			datasize--;
891			/* Flag bits indicate which dates are present. */
892			if (flags & 0x01)
893			{
894#ifdef DEBUG
895				fprintf(stderr, "mtime: %lld -> %d\n",
896				    (long long)zip->mtime,
897				    archive_le32dec(p + offset));
898#endif
899				if (datasize < 4)
900					break;
901				zip->mtime = archive_le32dec(p + offset);
902				offset += 4;
903				datasize -= 4;
904			}
905			if (flags & 0x02)
906			{
907				if (datasize < 4)
908					break;
909				zip->atime = archive_le32dec(p + offset);
910				offset += 4;
911				datasize -= 4;
912			}
913			if (flags & 0x04)
914			{
915				if (datasize < 4)
916					break;
917				zip->ctime = archive_le32dec(p + offset);
918				offset += 4;
919				datasize -= 4;
920			}
921			break;
922		}
923		case 0x7855:
924			/* Info-ZIP Unix Extra Field (type 2) "Ux". */
925#ifdef DEBUG
926			fprintf(stderr, "uid %d gid %d\n",
927			    archive_le16dec(p + offset),
928			    archive_le16dec(p + offset + 2));
929#endif
930			if (datasize >= 2)
931				zip->uid = archive_le16dec(p + offset);
932			if (datasize >= 4)
933				zip->gid = archive_le16dec(p + offset + 2);
934			break;
935		case 0x7875:
936			/* Info-Zip Unix Extra Field (type 3) "ux". */
937			break;
938		default:
939			break;
940		}
941		offset += datasize;
942	}
943#ifdef DEBUG
944	if (offset != zip->extra_length)
945	{
946		fprintf(stderr,
947		    "Extra data field contents do not match reported size!");
948	}
949#endif
950}
951