archive_read_support_format_ar.c revision 248616
1/*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: head/contrib/libarchive/libarchive/archive_read_support_format_ar.c 248616 2013-03-22 13:36:03Z mm $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_STDLIB_H
38#include <stdlib.h>
39#endif
40#ifdef HAVE_STRING_H
41#include <string.h>
42#endif
43#ifdef HAVE_LIMITS_H
44#include <limits.h>
45#endif
46
47#include "archive.h"
48#include "archive_entry.h"
49#include "archive_private.h"
50#include "archive_read_private.h"
51
52struct ar {
53	int64_t	 entry_bytes_remaining;
54	/* unconsumed is purely to track data we've gotten from readahead,
55	 * but haven't yet marked as consumed.  Must be paired with
56	 * entry_bytes_remaining usage/modification.
57	 */
58	size_t   entry_bytes_unconsumed;
59	int64_t	 entry_offset;
60	int64_t	 entry_padding;
61	char	*strtab;
62	size_t	 strtab_size;
63	char	 read_global_header;
64};
65
66/*
67 * Define structure of the "ar" header.
68 */
69#define AR_name_offset 0
70#define AR_name_size 16
71#define AR_date_offset 16
72#define AR_date_size 12
73#define AR_uid_offset 28
74#define AR_uid_size 6
75#define AR_gid_offset 34
76#define AR_gid_size 6
77#define AR_mode_offset 40
78#define AR_mode_size 8
79#define AR_size_offset 48
80#define AR_size_size 10
81#define AR_fmag_offset 58
82#define AR_fmag_size 2
83
84static int	archive_read_format_ar_bid(struct archive_read *a, int);
85static int	archive_read_format_ar_cleanup(struct archive_read *a);
86static int	archive_read_format_ar_read_data(struct archive_read *a,
87		    const void **buff, size_t *size, int64_t *offset);
88static int	archive_read_format_ar_skip(struct archive_read *a);
89static int	archive_read_format_ar_read_header(struct archive_read *a,
90		    struct archive_entry *e);
91static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
92static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
93static int	ar_parse_gnu_filename_table(struct archive_read *a);
94static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
95		    const char *h);
96
97int
98archive_read_support_format_ar(struct archive *_a)
99{
100	struct archive_read *a = (struct archive_read *)_a;
101	struct ar *ar;
102	int r;
103
104	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
105	    ARCHIVE_STATE_NEW, "archive_read_support_format_ar");
106
107	ar = (struct ar *)malloc(sizeof(*ar));
108	if (ar == NULL) {
109		archive_set_error(&a->archive, ENOMEM,
110		    "Can't allocate ar data");
111		return (ARCHIVE_FATAL);
112	}
113	memset(ar, 0, sizeof(*ar));
114	ar->strtab = NULL;
115
116	r = __archive_read_register_format(a,
117	    ar,
118	    "ar",
119	    archive_read_format_ar_bid,
120	    NULL,
121	    archive_read_format_ar_read_header,
122	    archive_read_format_ar_read_data,
123	    archive_read_format_ar_skip,
124	    NULL,
125	    archive_read_format_ar_cleanup);
126
127	if (r != ARCHIVE_OK) {
128		free(ar);
129		return (r);
130	}
131	return (ARCHIVE_OK);
132}
133
134static int
135archive_read_format_ar_cleanup(struct archive_read *a)
136{
137	struct ar *ar;
138
139	ar = (struct ar *)(a->format->data);
140	if (ar->strtab)
141		free(ar->strtab);
142	free(ar);
143	(a->format->data) = NULL;
144	return (ARCHIVE_OK);
145}
146
147static int
148archive_read_format_ar_bid(struct archive_read *a, int best_bid)
149{
150	const void *h;
151
152	(void)best_bid; /* UNUSED */
153
154	/*
155	 * Verify the 8-byte file signature.
156	 * TODO: Do we need to check more than this?
157	 */
158	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
159		return (-1);
160	if (memcmp(h, "!<arch>\n", 8) == 0) {
161		return (64);
162	}
163	return (-1);
164}
165
166static int
167_ar_read_header(struct archive_read *a, struct archive_entry *entry,
168	struct ar *ar, const char *h, size_t *unconsumed)
169{
170	char filename[AR_name_size + 1];
171	uint64_t number; /* Used to hold parsed numbers before validation. */
172	size_t bsd_name_length, entry_size;
173	char *p, *st;
174	const void *b;
175	int r;
176
177	/* Verify the magic signature on the file header. */
178	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
179		archive_set_error(&a->archive, EINVAL,
180		    "Incorrect file header signature");
181		return (ARCHIVE_WARN);
182	}
183
184	/* Copy filename into work buffer. */
185	strncpy(filename, h + AR_name_offset, AR_name_size);
186	filename[AR_name_size] = '\0';
187
188	/*
189	 * Guess the format variant based on the filename.
190	 */
191	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
192		/* We don't already know the variant, so let's guess. */
193		/*
194		 * Biggest clue is presence of '/': GNU starts special
195		 * filenames with '/', appends '/' as terminator to
196		 * non-special names, so anything with '/' should be
197		 * GNU except for BSD long filenames.
198		 */
199		if (strncmp(filename, "#1/", 3) == 0)
200			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
201		else if (strchr(filename, '/') != NULL)
202			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
203		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
204			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
205		/*
206		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
207		 * if name exactly fills 16-byte field?  If so, we
208		 * can't assume entries without '/' are BSD. XXX
209		 */
210	}
211
212	/* Update format name from the code. */
213	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
214		a->archive.archive_format_name = "ar (GNU/SVR4)";
215	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
216		a->archive.archive_format_name = "ar (BSD)";
217	else
218		a->archive.archive_format_name = "ar";
219
220	/*
221	 * Remove trailing spaces from the filename.  GNU and BSD
222	 * variants both pad filename area out with spaces.
223	 * This will only be wrong if GNU/SVR4 'ar' implementations
224	 * omit trailing '/' for 16-char filenames and we have
225	 * a 16-char filename that ends in ' '.
226	 */
227	p = filename + AR_name_size - 1;
228	while (p >= filename && *p == ' ') {
229		*p = '\0';
230		p--;
231	}
232
233	/*
234	 * Remove trailing slash unless first character is '/'.
235	 * (BSD entries never end in '/', so this will only trim
236	 * GNU-format entries.  GNU special entries start with '/'
237	 * and are not terminated in '/', so we don't trim anything
238	 * that starts with '/'.)
239	 */
240	if (filename[0] != '/' && *p == '/')
241		*p = '\0';
242
243	/*
244	 * '//' is the GNU filename table.
245	 * Later entries can refer to names in this table.
246	 */
247	if (strcmp(filename, "//") == 0) {
248		/* This must come before any call to _read_ahead. */
249		ar_parse_common_header(ar, entry, h);
250		archive_entry_copy_pathname(entry, filename);
251		archive_entry_set_filetype(entry, AE_IFREG);
252		/* Get the size of the filename table. */
253		number = ar_atol10(h + AR_size_offset, AR_size_size);
254		if (number > SIZE_MAX) {
255			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
256			    "Filename table too large");
257			return (ARCHIVE_FATAL);
258		}
259		entry_size = (size_t)number;
260		if (entry_size == 0) {
261			archive_set_error(&a->archive, EINVAL,
262			    "Invalid string table");
263			return (ARCHIVE_WARN);
264		}
265		if (ar->strtab != NULL) {
266			archive_set_error(&a->archive, EINVAL,
267			    "More than one string tables exist");
268			return (ARCHIVE_WARN);
269		}
270
271		/* Read the filename table into memory. */
272		st = malloc(entry_size);
273		if (st == NULL) {
274			archive_set_error(&a->archive, ENOMEM,
275			    "Can't allocate filename table buffer");
276			return (ARCHIVE_FATAL);
277		}
278		ar->strtab = st;
279		ar->strtab_size = entry_size;
280
281		if (*unconsumed) {
282			__archive_read_consume(a, *unconsumed);
283			*unconsumed = 0;
284		}
285
286		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
287			return (ARCHIVE_FATAL);
288		memcpy(st, b, entry_size);
289		__archive_read_consume(a, entry_size);
290		/* All contents are consumed. */
291		ar->entry_bytes_remaining = 0;
292		archive_entry_set_size(entry, ar->entry_bytes_remaining);
293
294		/* Parse the filename table. */
295		return (ar_parse_gnu_filename_table(a));
296	}
297
298	/*
299	 * GNU variant handles long filenames by storing /<number>
300	 * to indicate a name stored in the filename table.
301	 * XXX TODO: Verify that it's all digits... Don't be fooled
302	 * by "/9xyz" XXX
303	 */
304	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
305		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
306		/*
307		 * If we can't look up the real name, warn and return
308		 * the entry with the wrong name.
309		 */
310		if (ar->strtab == NULL || number > ar->strtab_size) {
311			archive_set_error(&a->archive, EINVAL,
312			    "Can't find long filename for entry");
313			archive_entry_copy_pathname(entry, filename);
314			/* Parse the time, owner, mode, size fields. */
315			ar_parse_common_header(ar, entry, h);
316			return (ARCHIVE_WARN);
317		}
318
319		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
320		/* Parse the time, owner, mode, size fields. */
321		return (ar_parse_common_header(ar, entry, h));
322	}
323
324	/*
325	 * BSD handles long filenames by storing "#1/" followed by the
326	 * length of filename as a decimal number, then prepends the
327	 * the filename to the file contents.
328	 */
329	if (strncmp(filename, "#1/", 3) == 0) {
330		/* Parse the time, owner, mode, size fields. */
331		/* This must occur before _read_ahead is called again. */
332		ar_parse_common_header(ar, entry, h);
333
334		/* Parse the size of the name, adjust the file size. */
335		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
336		bsd_name_length = (size_t)number;
337		/* Guard against the filename + trailing NUL
338		 * overflowing a size_t and against the filename size
339		 * being larger than the entire entry. */
340		if (number > (uint64_t)(bsd_name_length + 1)
341		    || (int64_t)bsd_name_length > ar->entry_bytes_remaining) {
342			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
343			    "Bad input file size");
344			return (ARCHIVE_FATAL);
345		}
346		ar->entry_bytes_remaining -= bsd_name_length;
347		/* Adjust file size reported to client. */
348		archive_entry_set_size(entry, ar->entry_bytes_remaining);
349
350		if (*unconsumed) {
351			__archive_read_consume(a, *unconsumed);
352			*unconsumed = 0;
353		}
354
355		/* Read the long name into memory. */
356		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
357			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
358			    "Truncated input file");
359			return (ARCHIVE_FATAL);
360		}
361		/* Store it in the entry. */
362		p = (char *)malloc(bsd_name_length + 1);
363		if (p == NULL) {
364			archive_set_error(&a->archive, ENOMEM,
365			    "Can't allocate fname buffer");
366			return (ARCHIVE_FATAL);
367		}
368		strncpy(p, b, bsd_name_length);
369		p[bsd_name_length] = '\0';
370
371		__archive_read_consume(a, bsd_name_length);
372
373		archive_entry_copy_pathname(entry, p);
374		free(p);
375		return (ARCHIVE_OK);
376	}
377
378	/*
379	 * "/" is the SVR4/GNU archive symbol table.
380	 */
381	if (strcmp(filename, "/") == 0) {
382		archive_entry_copy_pathname(entry, "/");
383		/* Parse the time, owner, mode, size fields. */
384		r = ar_parse_common_header(ar, entry, h);
385		/* Force the file type to a regular file. */
386		archive_entry_set_filetype(entry, AE_IFREG);
387		return (r);
388	}
389
390	/*
391	 * "__.SYMDEF" is a BSD archive symbol table.
392	 */
393	if (strcmp(filename, "__.SYMDEF") == 0) {
394		archive_entry_copy_pathname(entry, filename);
395		/* Parse the time, owner, mode, size fields. */
396		return (ar_parse_common_header(ar, entry, h));
397	}
398
399	/*
400	 * Otherwise, this is a standard entry.  The filename
401	 * has already been trimmed as much as possible, based
402	 * on our current knowledge of the format.
403	 */
404	archive_entry_copy_pathname(entry, filename);
405	return (ar_parse_common_header(ar, entry, h));
406}
407
408static int
409archive_read_format_ar_read_header(struct archive_read *a,
410    struct archive_entry *entry)
411{
412	struct ar *ar = (struct ar*)(a->format->data);
413	size_t unconsumed;
414	const void *header_data;
415	int ret;
416
417	if (!ar->read_global_header) {
418		/*
419		 * We are now at the beginning of the archive,
420		 * so we need first consume the ar global header.
421		 */
422		__archive_read_consume(a, 8);
423		ar->read_global_header = 1;
424		/* Set a default format code for now. */
425		a->archive.archive_format = ARCHIVE_FORMAT_AR;
426	}
427
428	/* Read the header for the next file entry. */
429	if ((header_data = __archive_read_ahead(a, 60, NULL)) == NULL)
430		/* Broken header. */
431		return (ARCHIVE_EOF);
432
433	unconsumed = 60;
434
435	ret = _ar_read_header(a, entry, ar, (const char *)header_data, &unconsumed);
436
437	if (unconsumed)
438		__archive_read_consume(a, unconsumed);
439
440	return ret;
441}
442
443
444static int
445ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
446    const char *h)
447{
448	uint64_t n;
449
450	/* Copy remaining header */
451	archive_entry_set_mtime(entry,
452	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
453	archive_entry_set_uid(entry,
454	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
455	archive_entry_set_gid(entry,
456	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
457	archive_entry_set_mode(entry,
458	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
459	n = ar_atol10(h + AR_size_offset, AR_size_size);
460
461	ar->entry_offset = 0;
462	ar->entry_padding = n % 2;
463	archive_entry_set_size(entry, n);
464	ar->entry_bytes_remaining = n;
465	return (ARCHIVE_OK);
466}
467
468static int
469archive_read_format_ar_read_data(struct archive_read *a,
470    const void **buff, size_t *size, int64_t *offset)
471{
472	ssize_t bytes_read;
473	struct ar *ar;
474
475	ar = (struct ar *)(a->format->data);
476
477	if (ar->entry_bytes_unconsumed) {
478		__archive_read_consume(a, ar->entry_bytes_unconsumed);
479		ar->entry_bytes_unconsumed = 0;
480	}
481
482	if (ar->entry_bytes_remaining > 0) {
483		*buff = __archive_read_ahead(a, 1, &bytes_read);
484		if (bytes_read == 0) {
485			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
486			    "Truncated ar archive");
487			return (ARCHIVE_FATAL);
488		}
489		if (bytes_read < 0)
490			return (ARCHIVE_FATAL);
491		if (bytes_read > ar->entry_bytes_remaining)
492			bytes_read = (ssize_t)ar->entry_bytes_remaining;
493		*size = bytes_read;
494		ar->entry_bytes_unconsumed = bytes_read;
495		*offset = ar->entry_offset;
496		ar->entry_offset += bytes_read;
497		ar->entry_bytes_remaining -= bytes_read;
498		return (ARCHIVE_OK);
499	} else {
500		int64_t skipped = __archive_read_consume(a, ar->entry_padding);
501		if (skipped >= 0) {
502			ar->entry_padding -= skipped;
503		}
504		if (ar->entry_padding) {
505			if (skipped >= 0) {
506				archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
507					"Truncated ar archive- failed consuming padding");
508			}
509			return (ARCHIVE_FATAL);
510		}
511		*buff = NULL;
512		*size = 0;
513		*offset = ar->entry_offset;
514		return (ARCHIVE_EOF);
515	}
516}
517
518static int
519archive_read_format_ar_skip(struct archive_read *a)
520{
521	int64_t bytes_skipped;
522	struct ar* ar;
523
524	ar = (struct ar *)(a->format->data);
525
526	bytes_skipped = __archive_read_consume(a,
527	    ar->entry_bytes_remaining + ar->entry_padding
528	    + ar->entry_bytes_unconsumed);
529	if (bytes_skipped < 0)
530		return (ARCHIVE_FATAL);
531
532	ar->entry_bytes_remaining = 0;
533	ar->entry_bytes_unconsumed = 0;
534	ar->entry_padding = 0;
535
536	return (ARCHIVE_OK);
537}
538
539static int
540ar_parse_gnu_filename_table(struct archive_read *a)
541{
542	struct ar *ar;
543	char *p;
544	size_t size;
545
546	ar = (struct ar*)(a->format->data);
547	size = ar->strtab_size;
548
549	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
550		if (*p == '/') {
551			*p++ = '\0';
552			if (*p != '\n')
553				goto bad_string_table;
554			*p = '\0';
555		}
556	}
557	/*
558	 * GNU ar always pads the table to an even size.
559	 * The pad character is either '\n' or '`'.
560	 */
561	if (p != ar->strtab + size && *p != '\n' && *p != '`')
562		goto bad_string_table;
563
564	/* Enforce zero termination. */
565	ar->strtab[size - 1] = '\0';
566
567	return (ARCHIVE_OK);
568
569bad_string_table:
570	archive_set_error(&a->archive, EINVAL,
571	    "Invalid string table");
572	free(ar->strtab);
573	ar->strtab = NULL;
574	return (ARCHIVE_WARN);
575}
576
577static uint64_t
578ar_atol8(const char *p, unsigned char_cnt)
579{
580	uint64_t l, limit, last_digit_limit;
581	unsigned int digit, base;
582
583	base = 8;
584	limit = UINT64_MAX / base;
585	last_digit_limit = UINT64_MAX % base;
586
587	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
588		p++;
589
590	l = 0;
591	digit = *p - '0';
592	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
593		if (l>limit || (l == limit && digit > last_digit_limit)) {
594			l = UINT64_MAX; /* Truncate on overflow. */
595			break;
596		}
597		l = (l * base) + digit;
598		digit = *++p - '0';
599	}
600	return (l);
601}
602
603static uint64_t
604ar_atol10(const char *p, unsigned char_cnt)
605{
606	uint64_t l, limit, last_digit_limit;
607	unsigned int base, digit;
608
609	base = 10;
610	limit = UINT64_MAX / base;
611	last_digit_limit = UINT64_MAX % base;
612
613	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
614		p++;
615	l = 0;
616	digit = *p - '0';
617	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
618		if (l > limit || (l == limit && digit > last_digit_limit)) {
619			l = UINT64_MAX; /* Truncate on overflow. */
620			break;
621		}
622		l = (l * base) + digit;
623		digit = *++p - '0';
624	}
625	return (l);
626}
627