archive_read_support_format_ar.c revision 302408
1/*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/archive_read_support_format_ar.c 299529 2016-05-12 10:16:16Z mm $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_STDLIB_H
38#include <stdlib.h>
39#endif
40#ifdef HAVE_STRING_H
41#include <string.h>
42#endif
43#ifdef HAVE_LIMITS_H
44#include <limits.h>
45#endif
46
47#include "archive.h"
48#include "archive_entry.h"
49#include "archive_private.h"
50#include "archive_read_private.h"
51
52struct ar {
53	int64_t	 entry_bytes_remaining;
54	/* unconsumed is purely to track data we've gotten from readahead,
55	 * but haven't yet marked as consumed.  Must be paired with
56	 * entry_bytes_remaining usage/modification.
57	 */
58	size_t   entry_bytes_unconsumed;
59	int64_t	 entry_offset;
60	int64_t	 entry_padding;
61	char	*strtab;
62	size_t	 strtab_size;
63	char	 read_global_header;
64};
65
66/*
67 * Define structure of the "ar" header.
68 */
69#define AR_name_offset 0
70#define AR_name_size 16
71#define AR_date_offset 16
72#define AR_date_size 12
73#define AR_uid_offset 28
74#define AR_uid_size 6
75#define AR_gid_offset 34
76#define AR_gid_size 6
77#define AR_mode_offset 40
78#define AR_mode_size 8
79#define AR_size_offset 48
80#define AR_size_size 10
81#define AR_fmag_offset 58
82#define AR_fmag_size 2
83
84static int	archive_read_format_ar_bid(struct archive_read *a, int);
85static int	archive_read_format_ar_cleanup(struct archive_read *a);
86static int	archive_read_format_ar_read_data(struct archive_read *a,
87		    const void **buff, size_t *size, int64_t *offset);
88static int	archive_read_format_ar_skip(struct archive_read *a);
89static int	archive_read_format_ar_read_header(struct archive_read *a,
90		    struct archive_entry *e);
91static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
92static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
93static int	ar_parse_gnu_filename_table(struct archive_read *a);
94static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
95		    const char *h);
96
97int
98archive_read_support_format_ar(struct archive *_a)
99{
100	struct archive_read *a = (struct archive_read *)_a;
101	struct ar *ar;
102	int r;
103
104	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
105	    ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
106
107	ar = (struct ar *)malloc(sizeof(*ar));
108	if (ar == NULL) {
109		archive_set_error(&a->archive, ENOMEM,
110		    "Can't allocate ar data");
111		return (ARCHIVE_FATAL);
112	}
113	memset(ar, 0, sizeof(*ar));
114	ar->strtab = NULL;
115
116	r = __archive_read_register_format(a,
117	    ar,
118	    "ar",
119	    archive_read_format_ar_bid,
120	    NULL,
121	    archive_read_format_ar_read_header,
122	    archive_read_format_ar_read_data,
123	    archive_read_format_ar_skip,
124	    NULL,
125	    archive_read_format_ar_cleanup,
126	    NULL,
127	    NULL);
128
129	if (r != ARCHIVE_OK) {
130		free(ar);
131		return (r);
132	}
133	return (ARCHIVE_OK);
134}
135
136static int
137archive_read_format_ar_cleanup(struct archive_read *a)
138{
139	struct ar *ar;
140
141	ar = (struct ar *)(a->format->data);
142	if (ar->strtab)
143		free(ar->strtab);
144	free(ar);
145	(a->format->data) = NULL;
146	return (ARCHIVE_OK);
147}
148
149static int
150archive_read_format_ar_bid(struct archive_read *a, int best_bid)
151{
152	const void *h;
153
154	(void)best_bid; /* UNUSED */
155
156	/*
157	 * Verify the 8-byte file signature.
158	 * TODO: Do we need to check more than this?
159	 */
160	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
161		return (-1);
162	if (memcmp(h, "!<arch>\n", 8) == 0) {
163		return (64);
164	}
165	return (-1);
166}
167
168static int
169_ar_read_header(struct archive_read *a, struct archive_entry *entry,
170	struct ar *ar, const char *h, size_t *unconsumed)
171{
172	char filename[AR_name_size + 1];
173	uint64_t number; /* Used to hold parsed numbers before validation. */
174	size_t bsd_name_length, entry_size;
175	char *p, *st;
176	const void *b;
177	int r;
178
179	/* Verify the magic signature on the file header. */
180	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
181		archive_set_error(&a->archive, EINVAL,
182		    "Incorrect file header signature");
183		return (ARCHIVE_FATAL);
184	}
185
186	/* Copy filename into work buffer. */
187	strncpy(filename, h + AR_name_offset, AR_name_size);
188	filename[AR_name_size] = '\0';
189
190	/*
191	 * Guess the format variant based on the filename.
192	 */
193	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
194		/* We don't already know the variant, so let's guess. */
195		/*
196		 * Biggest clue is presence of '/': GNU starts special
197		 * filenames with '/', appends '/' as terminator to
198		 * non-special names, so anything with '/' should be
199		 * GNU except for BSD long filenames.
200		 */
201		if (strncmp(filename, "#1/", 3) == 0)
202			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
203		else if (strchr(filename, '/') != NULL)
204			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
205		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
206			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
207		/*
208		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
209		 * if name exactly fills 16-byte field?  If so, we
210		 * can't assume entries without '/' are BSD. XXX
211		 */
212	}
213
214	/* Update format name from the code. */
215	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
216		a->archive.archive_format_name = "ar (GNU/SVR4)";
217	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
218		a->archive.archive_format_name = "ar (BSD)";
219	else
220		a->archive.archive_format_name = "ar";
221
222	/*
223	 * Remove trailing spaces from the filename.  GNU and BSD
224	 * variants both pad filename area out with spaces.
225	 * This will only be wrong if GNU/SVR4 'ar' implementations
226	 * omit trailing '/' for 16-char filenames and we have
227	 * a 16-char filename that ends in ' '.
228	 */
229	p = filename + AR_name_size - 1;
230	while (p >= filename && *p == ' ') {
231		*p = '\0';
232		p--;
233	}
234
235	/*
236	 * Remove trailing slash unless first character is '/'.
237	 * (BSD entries never end in '/', so this will only trim
238	 * GNU-format entries.  GNU special entries start with '/'
239	 * and are not terminated in '/', so we don't trim anything
240	 * that starts with '/'.)
241	 */
242	if (filename[0] != '/' && p > filename && *p == '/') {
243		*p = '\0';
244	}
245
246	if (p < filename) {
247		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
248		    "Found entry with empty filename");
249		return (ARCHIVE_FATAL);
250	}
251
252	/*
253	 * '//' is the GNU filename table.
254	 * Later entries can refer to names in this table.
255	 */
256	if (strcmp(filename, "//") == 0) {
257		/* This must come before any call to _read_ahead. */
258		ar_parse_common_header(ar, entry, h);
259		archive_entry_copy_pathname(entry, filename);
260		archive_entry_set_filetype(entry, AE_IFREG);
261		/* Get the size of the filename table. */
262		number = ar_atol10(h + AR_size_offset, AR_size_size);
263		if (number > SIZE_MAX) {
264			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
265			    "Filename table too large");
266			return (ARCHIVE_FATAL);
267		}
268		entry_size = (size_t)number;
269		if (entry_size == 0) {
270			archive_set_error(&a->archive, EINVAL,
271			    "Invalid string table");
272			return (ARCHIVE_FATAL);
273		}
274		if (ar->strtab != NULL) {
275			archive_set_error(&a->archive, EINVAL,
276			    "More than one string tables exist");
277			return (ARCHIVE_FATAL);
278		}
279
280		/* Read the filename table into memory. */
281		st = malloc(entry_size);
282		if (st == NULL) {
283			archive_set_error(&a->archive, ENOMEM,
284			    "Can't allocate filename table buffer");
285			return (ARCHIVE_FATAL);
286		}
287		ar->strtab = st;
288		ar->strtab_size = entry_size;
289
290		if (*unconsumed) {
291			__archive_read_consume(a, *unconsumed);
292			*unconsumed = 0;
293		}
294
295		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
296			return (ARCHIVE_FATAL);
297		memcpy(st, b, entry_size);
298		__archive_read_consume(a, entry_size);
299		/* All contents are consumed. */
300		ar->entry_bytes_remaining = 0;
301		archive_entry_set_size(entry, ar->entry_bytes_remaining);
302
303		/* Parse the filename table. */
304		return (ar_parse_gnu_filename_table(a));
305	}
306
307	/*
308	 * GNU variant handles long filenames by storing /<number>
309	 * to indicate a name stored in the filename table.
310	 * XXX TODO: Verify that it's all digits... Don't be fooled
311	 * by "/9xyz" XXX
312	 */
313	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
314		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
315		/*
316		 * If we can't look up the real name, warn and return
317		 * the entry with the wrong name.
318		 */
319		if (ar->strtab == NULL || number > ar->strtab_size) {
320			archive_set_error(&a->archive, EINVAL,
321			    "Can't find long filename for GNU/SVR4 archive entry");
322			archive_entry_copy_pathname(entry, filename);
323			/* Parse the time, owner, mode, size fields. */
324			ar_parse_common_header(ar, entry, h);
325			return (ARCHIVE_FATAL);
326		}
327
328		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
329		/* Parse the time, owner, mode, size fields. */
330		return (ar_parse_common_header(ar, entry, h));
331	}
332
333	/*
334	 * BSD handles long filenames by storing "#1/" followed by the
335	 * length of filename as a decimal number, then prepends the
336	 * the filename to the file contents.
337	 */
338	if (strncmp(filename, "#1/", 3) == 0) {
339		/* Parse the time, owner, mode, size fields. */
340		/* This must occur before _read_ahead is called again. */
341		ar_parse_common_header(ar, entry, h);
342
343		/* Parse the size of the name, adjust the file size. */
344		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
345		bsd_name_length = (size_t)number;
346		/* Guard against the filename + trailing NUL
347		 * overflowing a size_t and against the filename size
348		 * being larger than the entire entry. */
349		if (number > (uint64_t)(bsd_name_length + 1)
350		    || (int64_t)bsd_name_length > ar->entry_bytes_remaining) {
351			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
352			    "Bad input file size");
353			return (ARCHIVE_FATAL);
354		}
355		ar->entry_bytes_remaining -= bsd_name_length;
356		/* Adjust file size reported to client. */
357		archive_entry_set_size(entry, ar->entry_bytes_remaining);
358
359		if (*unconsumed) {
360			__archive_read_consume(a, *unconsumed);
361			*unconsumed = 0;
362		}
363
364		/* Read the long name into memory. */
365		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
366			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
367			    "Truncated input file");
368			return (ARCHIVE_FATAL);
369		}
370		/* Store it in the entry. */
371		p = (char *)malloc(bsd_name_length + 1);
372		if (p == NULL) {
373			archive_set_error(&a->archive, ENOMEM,
374			    "Can't allocate fname buffer");
375			return (ARCHIVE_FATAL);
376		}
377		strncpy(p, b, bsd_name_length);
378		p[bsd_name_length] = '\0';
379
380		__archive_read_consume(a, bsd_name_length);
381
382		archive_entry_copy_pathname(entry, p);
383		free(p);
384		return (ARCHIVE_OK);
385	}
386
387	/*
388	 * "/" is the SVR4/GNU archive symbol table.
389	 */
390	if (strcmp(filename, "/") == 0) {
391		archive_entry_copy_pathname(entry, "/");
392		/* Parse the time, owner, mode, size fields. */
393		r = ar_parse_common_header(ar, entry, h);
394		/* Force the file type to a regular file. */
395		archive_entry_set_filetype(entry, AE_IFREG);
396		return (r);
397	}
398
399	/*
400	 * "__.SYMDEF" is a BSD archive symbol table.
401	 */
402	if (strcmp(filename, "__.SYMDEF") == 0) {
403		archive_entry_copy_pathname(entry, filename);
404		/* Parse the time, owner, mode, size fields. */
405		return (ar_parse_common_header(ar, entry, h));
406	}
407
408	/*
409	 * Otherwise, this is a standard entry.  The filename
410	 * has already been trimmed as much as possible, based
411	 * on our current knowledge of the format.
412	 */
413	archive_entry_copy_pathname(entry, filename);
414	return (ar_parse_common_header(ar, entry, h));
415}
416
417static int
418archive_read_format_ar_read_header(struct archive_read *a,
419    struct archive_entry *entry)
420{
421	struct ar *ar = (struct ar*)(a->format->data);
422	size_t unconsumed;
423	const void *header_data;
424	int ret;
425
426	if (!ar->read_global_header) {
427		/*
428		 * We are now at the beginning of the archive,
429		 * so we need first consume the ar global header.
430		 */
431		__archive_read_consume(a, 8);
432		ar->read_global_header = 1;
433		/* Set a default format code for now. */
434		a->archive.archive_format = ARCHIVE_FORMAT_AR;
435	}
436
437	/* Read the header for the next file entry. */
438	if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
439		/* Broken header. */
440		return (ARCHIVE_EOF);
441
442	unconsumed = 60;
443
444	ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
445
446	if (unconsumed)
447		__archive_read_consume(a, unconsumed);
448
449	return ret;
450}
451
452
453static int
454ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
455    const char *h)
456{
457	uint64_t n;
458
459	/* Copy remaining header */
460	archive_entry_set_mtime(entry,
461	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
462	archive_entry_set_uid(entry,
463	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
464	archive_entry_set_gid(entry,
465	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
466	archive_entry_set_mode(entry,
467	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
468	n = ar_atol10(h + AR_size_offset, AR_size_size);
469
470	ar->entry_offset = 0;
471	ar->entry_padding = n % 2;
472	archive_entry_set_size(entry, n);
473	ar->entry_bytes_remaining = n;
474	return (ARCHIVE_OK);
475}
476
477static int
478archive_read_format_ar_read_data(struct archive_read *a,
479    const void **buff, size_t *size, int64_t *offset)
480{
481	ssize_t bytes_read;
482	struct ar *ar;
483
484	ar = (struct ar *)(a->format->data);
485
486	if (ar->entry_bytes_unconsumed) {
487		__archive_read_consume(a, ar->entry_bytes_unconsumed);
488		ar->entry_bytes_unconsumed = 0;
489	}
490
491	if (ar->entry_bytes_remaining > 0) {
492		*buff = __archive_read_ahead(a, 1, &bytes_read);
493		if (bytes_read == 0) {
494			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
495			    "Truncated ar archive");
496			return (ARCHIVE_FATAL);
497		}
498		if (bytes_read < 0)
499			return (ARCHIVE_FATAL);
500		if (bytes_read > ar->entry_bytes_remaining)
501			bytes_read = (ssize_t)ar->entry_bytes_remaining;
502		*size = bytes_read;
503		ar->entry_bytes_unconsumed = bytes_read;
504		*offset = ar->entry_offset;
505		ar->entry_offset += bytes_read;
506		ar->entry_bytes_remaining -= bytes_read;
507		return (ARCHIVE_OK);
508	} else {
509		int64_t skipped = __archive_read_consume(a, ar->entry_padding);
510		if (skipped >= 0) {
511			ar->entry_padding -= skipped;
512		}
513		if (ar->entry_padding) {
514			if (skipped >= 0) {
515				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
516					"Truncated ar archive- failed consuming padding");
517			}
518			return (ARCHIVE_FATAL);
519		}
520		*buff = NULL;
521		*size = 0;
522		*offset = ar->entry_offset;
523		return (ARCHIVE_EOF);
524	}
525}
526
527static int
528archive_read_format_ar_skip(struct archive_read *a)
529{
530	int64_t bytes_skipped;
531	struct ar* ar;
532
533	ar = (struct ar *)(a->format->data);
534
535	bytes_skipped = __archive_read_consume(a,
536	    ar->entry_bytes_remaining + ar->entry_padding
537	    + ar->entry_bytes_unconsumed);
538	if (bytes_skipped < 0)
539		return (ARCHIVE_FATAL);
540
541	ar->entry_bytes_remaining = 0;
542	ar->entry_bytes_unconsumed = 0;
543	ar->entry_padding = 0;
544
545	return (ARCHIVE_OK);
546}
547
548static int
549ar_parse_gnu_filename_table(struct archive_read *a)
550{
551	struct ar *ar;
552	char *p;
553	size_t size;
554
555	ar = (struct ar*)(a->format->data);
556	size = ar->strtab_size;
557
558	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
559		if (*p == '/') {
560			*p++ = '\0';
561			if (*p != '\n')
562				goto bad_string_table;
563			*p = '\0';
564		}
565	}
566	/*
567	 * GNU ar always pads the table to an even size.
568	 * The pad character is either '\n' or '`'.
569	 */
570	if (p != ar->strtab + size && *p != '\n' && *p != '`')
571		goto bad_string_table;
572
573	/* Enforce zero termination. */
574	ar->strtab[size - 1] = '\0';
575
576	return (ARCHIVE_OK);
577
578bad_string_table:
579	archive_set_error(&a->archive, EINVAL,
580	    "Invalid string table");
581	free(ar->strtab);
582	ar->strtab = NULL;
583	return (ARCHIVE_FATAL);
584}
585
586static uint64_t
587ar_atol8(const char *p, unsigned char_cnt)
588{
589	uint64_t l, limit, last_digit_limit;
590	unsigned int digit, base;
591
592	base = 8;
593	limit = UINT64_MAX / base;
594	last_digit_limit = UINT64_MAX % base;
595
596	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
597		p++;
598
599	l = 0;
600	digit = *p - '0';
601	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
602		if (l>limit || (l == limit && digit > last_digit_limit)) {
603			l = UINT64_MAX; /* Truncate on overflow. */
604			break;
605		}
606		l = (l * base) + digit;
607		digit = *++p - '0';
608	}
609	return (l);
610}
611
612static uint64_t
613ar_atol10(const char *p, unsigned char_cnt)
614{
615	uint64_t l, limit, last_digit_limit;
616	unsigned int base, digit;
617
618	base = 10;
619	limit = UINT64_MAX / base;
620	last_digit_limit = UINT64_MAX % base;
621
622	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
623		p++;
624	l = 0;
625	digit = *p - '0';
626	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
627		if (l > limit || (l == limit && digit > last_digit_limit)) {
628			l = UINT64_MAX; /* Truncate on overflow. */
629			break;
630		}
631		l = (l * base) + digit;
632		digit = *++p - '0';
633	}
634	return (l);
635}
636