1// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2/*
3 * Routines for dealing with .zip archives.
4 *
5 * Copyright (c) Meta Platforms, Inc. and affiliates.
6 */
7
8#include <errno.h>
9#include <fcntl.h>
10#include <stdint.h>
11#include <stdlib.h>
12#include <string.h>
13#include <sys/mman.h>
14#include <unistd.h>
15
16#include "libbpf_internal.h"
17#include "zip.h"
18
19#pragma GCC diagnostic push
20#pragma GCC diagnostic ignored "-Wpacked"
21#pragma GCC diagnostic ignored "-Wattributes"
22
23/* Specification of ZIP file format can be found here:
24 * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
25 * For a high level overview of the structure of a ZIP file see
26 * sections 4.3.1 - 4.3.6.
27 *
28 * Data structures appearing in ZIP files do not contain any
29 * padding and they might be misaligned. To allow us to safely
30 * operate on pointers to such structures and their members, we
31 * declare the types as packed.
32 */
33
34#define END_OF_CD_RECORD_MAGIC 0x06054b50
35
36/* See section 4.3.16 of the spec. */
37struct end_of_cd_record {
38	/* Magic value equal to END_OF_CD_RECORD_MAGIC */
39	__u32 magic;
40
41	/* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
42	 * Zip archive might span multiple files (disks).
43	 */
44	__u16 this_disk;
45
46	/* Number of the file containing the beginning of the central directory or
47	 * 0xFFFF if ZIP64 archive.
48	 */
49	__u16 cd_disk;
50
51	/* Number of central directory records on this disk or 0xFFFF if ZIP64
52	 * archive.
53	 */
54	__u16 cd_records;
55
56	/* Number of central directory records on all disks or 0xFFFF if ZIP64
57	 * archive.
58	 */
59	__u16 cd_records_total;
60
61	/* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
62	__u32 cd_size;
63
64	/* Offset of the central directory from the beginning of the archive or
65	 * 0xFFFFFFFF if ZIP64 archive.
66	 */
67	__u32 cd_offset;
68
69	/* Length of comment data following end of central directory record. */
70	__u16 comment_length;
71
72	/* Up to 64k of arbitrary bytes. */
73	/* uint8_t comment[comment_length] */
74} __attribute__((packed));
75
76#define CD_FILE_HEADER_MAGIC 0x02014b50
77#define FLAG_ENCRYPTED (1 << 0)
78#define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
79
80/* See section 4.3.12 of the spec. */
81struct cd_file_header {
82	/* Magic value equal to CD_FILE_HEADER_MAGIC. */
83	__u32 magic;
84	__u16 version;
85	/* Minimum zip version needed to extract the file. */
86	__u16 min_version;
87	__u16 flags;
88	__u16 compression;
89	__u16 last_modified_time;
90	__u16 last_modified_date;
91	__u32 crc;
92	__u32 compressed_size;
93	__u32 uncompressed_size;
94	__u16 file_name_length;
95	__u16 extra_field_length;
96	__u16 file_comment_length;
97	/* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
98	__u16 disk;
99	__u16 internal_attributes;
100	__u32 external_attributes;
101	/* Offset from the start of the disk containing the local file header to the
102	 * start of the local file header.
103	 */
104	__u32 offset;
105} __attribute__((packed));
106
107#define LOCAL_FILE_HEADER_MAGIC 0x04034b50
108
109/* See section 4.3.7 of the spec. */
110struct local_file_header {
111	/* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
112	__u32 magic;
113	/* Minimum zip version needed to extract the file. */
114	__u16 min_version;
115	__u16 flags;
116	__u16 compression;
117	__u16 last_modified_time;
118	__u16 last_modified_date;
119	__u32 crc;
120	__u32 compressed_size;
121	__u32 uncompressed_size;
122	__u16 file_name_length;
123	__u16 extra_field_length;
124} __attribute__((packed));
125
126#pragma GCC diagnostic pop
127
128struct zip_archive {
129	void *data;
130	__u32 size;
131	__u32 cd_offset;
132	__u32 cd_records;
133};
134
135static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
136{
137	if (offset + size > archive->size || offset > offset + size)
138		return NULL;
139
140	return archive->data + offset;
141}
142
143/* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
144 * archive uses features which are not supported.
145 */
146static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
147{
148	__u16 comment_length, cd_records;
149	struct end_of_cd_record *eocd;
150	__u32 cd_offset, cd_size;
151
152	eocd = check_access(archive, offset, sizeof(*eocd));
153	if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
154		return -EINVAL;
155
156	comment_length = eocd->comment_length;
157	if (offset + sizeof(*eocd) + comment_length != archive->size)
158		return -EINVAL;
159
160	cd_records = eocd->cd_records;
161	if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
162		/* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
163		return -ENOTSUP;
164
165	cd_offset = eocd->cd_offset;
166	cd_size = eocd->cd_size;
167	if (!check_access(archive, cd_offset, cd_size))
168		return -EINVAL;
169
170	archive->cd_offset = cd_offset;
171	archive->cd_records = cd_records;
172	return 0;
173}
174
175static int find_cd(struct zip_archive *archive)
176{
177	int64_t limit, offset;
178	int rc = -EINVAL;
179
180	if (archive->size <= sizeof(struct end_of_cd_record))
181		return -EINVAL;
182
183	/* Because the end of central directory ends with a variable length array of
184	 * up to 0xFFFF bytes we can't know exactly where it starts and need to
185	 * search for it at the end of the file, scanning the (limit, offset] range.
186	 */
187	offset = archive->size - sizeof(struct end_of_cd_record);
188	limit = (int64_t)offset - (1 << 16);
189
190	for (; offset >= 0 && offset > limit && rc != 0; offset--) {
191		rc = try_parse_end_of_cd(archive, offset);
192		if (rc == -ENOTSUP)
193			break;
194	}
195	return rc;
196}
197
198struct zip_archive *zip_archive_open(const char *path)
199{
200	struct zip_archive *archive;
201	int err, fd;
202	off_t size;
203	void *data;
204
205	fd = open(path, O_RDONLY | O_CLOEXEC);
206	if (fd < 0)
207		return ERR_PTR(-errno);
208
209	size = lseek(fd, 0, SEEK_END);
210	if (size == (off_t)-1 || size > UINT32_MAX) {
211		close(fd);
212		return ERR_PTR(-EINVAL);
213	}
214
215	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
216	err = -errno;
217	close(fd);
218
219	if (data == MAP_FAILED)
220		return ERR_PTR(err);
221
222	archive = malloc(sizeof(*archive));
223	if (!archive) {
224		munmap(data, size);
225		return ERR_PTR(-ENOMEM);
226	};
227
228	archive->data = data;
229	archive->size = size;
230
231	err = find_cd(archive);
232	if (err) {
233		munmap(data, size);
234		free(archive);
235		return ERR_PTR(err);
236	}
237
238	return archive;
239}
240
241void zip_archive_close(struct zip_archive *archive)
242{
243	munmap(archive->data, archive->size);
244	free(archive);
245}
246
247static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
248							     __u32 offset)
249{
250	struct local_file_header *lfh;
251
252	lfh = check_access(archive, offset, sizeof(*lfh));
253	if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
254		return NULL;
255
256	return lfh;
257}
258
259static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
260{
261	struct local_file_header *lfh;
262	__u32 compressed_size;
263	const char *name;
264	void *data;
265
266	lfh = local_file_header_at_offset(archive, offset);
267	if (!lfh)
268		return -EINVAL;
269
270	offset += sizeof(*lfh);
271	if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
272		return -EINVAL;
273
274	name = check_access(archive, offset, lfh->file_name_length);
275	if (!name)
276		return -EINVAL;
277
278	offset += lfh->file_name_length;
279	if (!check_access(archive, offset, lfh->extra_field_length))
280		return -EINVAL;
281
282	offset += lfh->extra_field_length;
283	compressed_size = lfh->compressed_size;
284	data = check_access(archive, offset, compressed_size);
285	if (!data)
286		return -EINVAL;
287
288	out->compression = lfh->compression;
289	out->name_length = lfh->file_name_length;
290	out->name = name;
291	out->data = data;
292	out->data_length = compressed_size;
293	out->data_offset = offset;
294
295	return 0;
296}
297
298int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
299			   struct zip_entry *out)
300{
301	size_t file_name_length = strlen(file_name);
302	__u32 i, offset = archive->cd_offset;
303
304	for (i = 0; i < archive->cd_records; ++i) {
305		__u16 cdfh_name_length, cdfh_flags;
306		struct cd_file_header *cdfh;
307		const char *cdfh_name;
308
309		cdfh = check_access(archive, offset, sizeof(*cdfh));
310		if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
311			return -EINVAL;
312
313		offset += sizeof(*cdfh);
314		cdfh_name_length = cdfh->file_name_length;
315		cdfh_name = check_access(archive, offset, cdfh_name_length);
316		if (!cdfh_name)
317			return -EINVAL;
318
319		cdfh_flags = cdfh->flags;
320		if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
321		    (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
322		    file_name_length == cdfh_name_length &&
323		    memcmp(file_name, archive->data + offset, file_name_length) == 0) {
324			return get_entry_at_offset(archive, cdfh->offset, out);
325		}
326
327		offset += cdfh_name_length;
328		offset += cdfh->extra_field_length;
329		offset += cdfh->file_comment_length;
330	}
331
332	return -ENOENT;
333}
334