archive_read_support_format_ar.c revision 228761
1/*-
2 * Copyright (c) 2007 Kai Wang
3 * Copyright (c) 2007 Tim Kientzle
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer
11 *    in this position and unchanged.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_ar.c 201101 2009-12-28 03:06:27Z kientzle $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_STDLIB_H
38#include <stdlib.h>
39#endif
40#ifdef HAVE_STRING_H
41#include <string.h>
42#endif
43#ifdef HAVE_LIMITS_H
44#include <limits.h>
45#endif
46
47#include "archive.h"
48#include "archive_entry.h"
49#include "archive_private.h"
50#include "archive_read_private.h"
51
52struct ar {
53	off_t	 entry_bytes_remaining;
54	off_t	 entry_offset;
55	off_t	 entry_padding;
56	char	*strtab;
57	size_t	 strtab_size;
58};
59
60/*
61 * Define structure of the "ar" header.
62 */
63#define AR_name_offset 0
64#define AR_name_size 16
65#define AR_date_offset 16
66#define AR_date_size 12
67#define AR_uid_offset 28
68#define AR_uid_size 6
69#define AR_gid_offset 34
70#define AR_gid_size 6
71#define AR_mode_offset 40
72#define AR_mode_size 8
73#define AR_size_offset 48
74#define AR_size_size 10
75#define AR_fmag_offset 58
76#define AR_fmag_size 2
77
78static int	archive_read_format_ar_bid(struct archive_read *a);
79static int	archive_read_format_ar_cleanup(struct archive_read *a);
80static int	archive_read_format_ar_read_data(struct archive_read *a,
81		    const void **buff, size_t *size, off_t *offset);
82static int	archive_read_format_ar_skip(struct archive_read *a);
83static int	archive_read_format_ar_read_header(struct archive_read *a,
84		    struct archive_entry *e);
85static uint64_t	ar_atol8(const char *p, unsigned char_cnt);
86static uint64_t	ar_atol10(const char *p, unsigned char_cnt);
87static int	ar_parse_gnu_filename_table(struct archive_read *a);
88static int	ar_parse_common_header(struct ar *ar, struct archive_entry *,
89		    const char *h);
90
91int
92archive_read_support_format_ar(struct archive *_a)
93{
94	struct archive_read *a = (struct archive_read *)_a;
95	struct ar *ar;
96	int r;
97
98	ar = (struct ar *)malloc(sizeof(*ar));
99	if (ar == NULL) {
100		archive_set_error(&a->archive, ENOMEM,
101		    "Can't allocate ar data");
102		return (ARCHIVE_FATAL);
103	}
104	memset(ar, 0, sizeof(*ar));
105	ar->strtab = NULL;
106
107	r = __archive_read_register_format(a,
108	    ar,
109	    "ar",
110	    archive_read_format_ar_bid,
111	    NULL,
112	    archive_read_format_ar_read_header,
113	    archive_read_format_ar_read_data,
114	    archive_read_format_ar_skip,
115	    archive_read_format_ar_cleanup);
116
117	if (r != ARCHIVE_OK) {
118		free(ar);
119		return (r);
120	}
121	return (ARCHIVE_OK);
122}
123
124static int
125archive_read_format_ar_cleanup(struct archive_read *a)
126{
127	struct ar *ar;
128
129	ar = (struct ar *)(a->format->data);
130	if (ar->strtab)
131		free(ar->strtab);
132	free(ar);
133	(a->format->data) = NULL;
134	return (ARCHIVE_OK);
135}
136
137static int
138archive_read_format_ar_bid(struct archive_read *a)
139{
140	const void *h;
141
142	if (a->archive.archive_format != 0 &&
143	    (a->archive.archive_format & ARCHIVE_FORMAT_BASE_MASK) !=
144	    ARCHIVE_FORMAT_AR)
145		return(0);
146
147	/*
148	 * Verify the 8-byte file signature.
149	 * TODO: Do we need to check more than this?
150	 */
151	if ((h = __archive_read_ahead(a, 8, NULL)) == NULL)
152		return (-1);
153	if (strncmp((const char*)h, "!<arch>\n", 8) == 0) {
154		return (64);
155	}
156	return (-1);
157}
158
159static int
160archive_read_format_ar_read_header(struct archive_read *a,
161    struct archive_entry *entry)
162{
163	char filename[AR_name_size + 1];
164	struct ar *ar;
165	uint64_t number; /* Used to hold parsed numbers before validation. */
166	ssize_t bytes_read;
167	size_t bsd_name_length, entry_size;
168	char *p, *st;
169	const void *b;
170	const char *h;
171	int r;
172
173	ar = (struct ar*)(a->format->data);
174
175	if (a->archive.file_position == 0) {
176		/*
177		 * We are now at the beginning of the archive,
178		 * so we need first consume the ar global header.
179		 */
180		__archive_read_consume(a, 8);
181		/* Set a default format code for now. */
182		a->archive.archive_format = ARCHIVE_FORMAT_AR;
183	}
184
185	/* Read the header for the next file entry. */
186	if ((b = __archive_read_ahead(a, 60, &bytes_read)) == NULL)
187		/* Broken header. */
188		return (ARCHIVE_EOF);
189	__archive_read_consume(a, 60);
190	h = (const char *)b;
191
192	/* Verify the magic signature on the file header. */
193	if (strncmp(h + AR_fmag_offset, "`\n", 2) != 0) {
194		archive_set_error(&a->archive, EINVAL,
195		    "Incorrect file header signature");
196		return (ARCHIVE_WARN);
197	}
198
199	/* Copy filename into work buffer. */
200	strncpy(filename, h + AR_name_offset, AR_name_size);
201	filename[AR_name_size] = '\0';
202
203	/*
204	 * Guess the format variant based on the filename.
205	 */
206	if (a->archive.archive_format == ARCHIVE_FORMAT_AR) {
207		/* We don't already know the variant, so let's guess. */
208		/*
209		 * Biggest clue is presence of '/': GNU starts special
210		 * filenames with '/', appends '/' as terminator to
211		 * non-special names, so anything with '/' should be
212		 * GNU except for BSD long filenames.
213		 */
214		if (strncmp(filename, "#1/", 3) == 0)
215			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
216		else if (strchr(filename, '/') != NULL)
217			a->archive.archive_format = ARCHIVE_FORMAT_AR_GNU;
218		else if (strncmp(filename, "__.SYMDEF", 9) == 0)
219			a->archive.archive_format = ARCHIVE_FORMAT_AR_BSD;
220		/*
221		 * XXX Do GNU/SVR4 'ar' programs ever omit trailing '/'
222		 * if name exactly fills 16-byte field?  If so, we
223		 * can't assume entries without '/' are BSD. XXX
224		 */
225	}
226
227	/* Update format name from the code. */
228	if (a->archive.archive_format == ARCHIVE_FORMAT_AR_GNU)
229		a->archive.archive_format_name = "ar (GNU/SVR4)";
230	else if (a->archive.archive_format == ARCHIVE_FORMAT_AR_BSD)
231		a->archive.archive_format_name = "ar (BSD)";
232	else
233		a->archive.archive_format_name = "ar";
234
235	/*
236	 * Remove trailing spaces from the filename.  GNU and BSD
237	 * variants both pad filename area out with spaces.
238	 * This will only be wrong if GNU/SVR4 'ar' implementations
239	 * omit trailing '/' for 16-char filenames and we have
240	 * a 16-char filename that ends in ' '.
241	 */
242	p = filename + AR_name_size - 1;
243	while (p >= filename && *p == ' ') {
244		*p = '\0';
245		p--;
246	}
247
248	/*
249	 * Remove trailing slash unless first character is '/'.
250	 * (BSD entries never end in '/', so this will only trim
251	 * GNU-format entries.  GNU special entries start with '/'
252	 * and are not terminated in '/', so we don't trim anything
253	 * that starts with '/'.)
254	 */
255	if (filename[0] != '/' && *p == '/')
256		*p = '\0';
257
258	/*
259	 * '//' is the GNU filename table.
260	 * Later entries can refer to names in this table.
261	 */
262	if (strcmp(filename, "//") == 0) {
263		/* This must come before any call to _read_ahead. */
264		ar_parse_common_header(ar, entry, h);
265		archive_entry_copy_pathname(entry, filename);
266		archive_entry_set_filetype(entry, AE_IFREG);
267		/* Get the size of the filename table. */
268		number = ar_atol10(h + AR_size_offset, AR_size_size);
269		if (number > SIZE_MAX) {
270			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
271			    "Filename table too large");
272			return (ARCHIVE_FATAL);
273		}
274		entry_size = (size_t)number;
275		if (entry_size == 0) {
276			archive_set_error(&a->archive, EINVAL,
277			    "Invalid string table");
278			return (ARCHIVE_WARN);
279		}
280		if (ar->strtab != NULL) {
281			archive_set_error(&a->archive, EINVAL,
282			    "More than one string tables exist");
283			return (ARCHIVE_WARN);
284		}
285
286		/* Read the filename table into memory. */
287		st = malloc(entry_size);
288		if (st == NULL) {
289			archive_set_error(&a->archive, ENOMEM,
290			    "Can't allocate filename table buffer");
291			return (ARCHIVE_FATAL);
292		}
293		ar->strtab = st;
294		ar->strtab_size = entry_size;
295		if ((b = __archive_read_ahead(a, entry_size, NULL)) == NULL)
296			return (ARCHIVE_FATAL);
297		memcpy(st, b, entry_size);
298		__archive_read_consume(a, entry_size);
299		/* All contents are consumed. */
300		ar->entry_bytes_remaining = 0;
301		archive_entry_set_size(entry, ar->entry_bytes_remaining);
302
303		/* Parse the filename table. */
304		return (ar_parse_gnu_filename_table(a));
305	}
306
307	/*
308	 * GNU variant handles long filenames by storing /<number>
309	 * to indicate a name stored in the filename table.
310	 * XXX TODO: Verify that it's all digits... Don't be fooled
311	 * by "/9xyz" XXX
312	 */
313	if (filename[0] == '/' && filename[1] >= '0' && filename[1] <= '9') {
314		number = ar_atol10(h + AR_name_offset + 1, AR_name_size - 1);
315		/*
316		 * If we can't look up the real name, warn and return
317		 * the entry with the wrong name.
318		 */
319		if (ar->strtab == NULL || number > ar->strtab_size) {
320			archive_set_error(&a->archive, EINVAL,
321			    "Can't find long filename for entry");
322			archive_entry_copy_pathname(entry, filename);
323			/* Parse the time, owner, mode, size fields. */
324			ar_parse_common_header(ar, entry, h);
325			return (ARCHIVE_WARN);
326		}
327
328		archive_entry_copy_pathname(entry, &ar->strtab[(size_t)number]);
329		/* Parse the time, owner, mode, size fields. */
330		return (ar_parse_common_header(ar, entry, h));
331	}
332
333	/*
334	 * BSD handles long filenames by storing "#1/" followed by the
335	 * length of filename as a decimal number, then prepends the
336	 * the filename to the file contents.
337	 */
338	if (strncmp(filename, "#1/", 3) == 0) {
339		/* Parse the time, owner, mode, size fields. */
340		/* This must occur before _read_ahead is called again. */
341		ar_parse_common_header(ar, entry, h);
342
343		/* Parse the size of the name, adjust the file size. */
344		number = ar_atol10(h + AR_name_offset + 3, AR_name_size - 3);
345		bsd_name_length = (size_t)number;
346		/* Guard against the filename + trailing NUL
347		 * overflowing a size_t and against the filename size
348		 * being larger than the entire entry. */
349		if (number > (uint64_t)(bsd_name_length + 1)
350		    || (off_t)bsd_name_length > ar->entry_bytes_remaining) {
351			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
352			    "Bad input file size");
353			return (ARCHIVE_FATAL);
354		}
355		ar->entry_bytes_remaining -= bsd_name_length;
356		/* Adjust file size reported to client. */
357		archive_entry_set_size(entry, ar->entry_bytes_remaining);
358
359		/* Read the long name into memory. */
360		if ((b = __archive_read_ahead(a, bsd_name_length, NULL)) == NULL) {
361			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
362			    "Truncated input file");
363			return (ARCHIVE_FATAL);
364		}
365		__archive_read_consume(a, bsd_name_length);
366
367		/* Store it in the entry. */
368		p = (char *)malloc(bsd_name_length + 1);
369		if (p == NULL) {
370			archive_set_error(&a->archive, ENOMEM,
371			    "Can't allocate fname buffer");
372			return (ARCHIVE_FATAL);
373		}
374		strncpy(p, b, bsd_name_length);
375		p[bsd_name_length] = '\0';
376		archive_entry_copy_pathname(entry, p);
377		free(p);
378		return (ARCHIVE_OK);
379	}
380
381	/*
382	 * "/" is the SVR4/GNU archive symbol table.
383	 */
384	if (strcmp(filename, "/") == 0) {
385		archive_entry_copy_pathname(entry, "/");
386		/* Parse the time, owner, mode, size fields. */
387		r = ar_parse_common_header(ar, entry, h);
388		/* Force the file type to a regular file. */
389		archive_entry_set_filetype(entry, AE_IFREG);
390		return (r);
391	}
392
393	/*
394	 * "__.SYMDEF" is a BSD archive symbol table.
395	 */
396	if (strcmp(filename, "__.SYMDEF") == 0) {
397		archive_entry_copy_pathname(entry, filename);
398		/* Parse the time, owner, mode, size fields. */
399		return (ar_parse_common_header(ar, entry, h));
400	}
401
402	/*
403	 * Otherwise, this is a standard entry.  The filename
404	 * has already been trimmed as much as possible, based
405	 * on our current knowledge of the format.
406	 */
407	archive_entry_copy_pathname(entry, filename);
408	return (ar_parse_common_header(ar, entry, h));
409}
410
411static int
412ar_parse_common_header(struct ar *ar, struct archive_entry *entry,
413    const char *h)
414{
415	uint64_t n;
416
417	/* Copy remaining header */
418	archive_entry_set_mtime(entry,
419	    (time_t)ar_atol10(h + AR_date_offset, AR_date_size), 0L);
420	archive_entry_set_uid(entry,
421	    (uid_t)ar_atol10(h + AR_uid_offset, AR_uid_size));
422	archive_entry_set_gid(entry,
423	    (gid_t)ar_atol10(h + AR_gid_offset, AR_gid_size));
424	archive_entry_set_mode(entry,
425	    (mode_t)ar_atol8(h + AR_mode_offset, AR_mode_size));
426	n = ar_atol10(h + AR_size_offset, AR_size_size);
427
428	ar->entry_offset = 0;
429	ar->entry_padding = n % 2;
430	archive_entry_set_size(entry, n);
431	ar->entry_bytes_remaining = n;
432	return (ARCHIVE_OK);
433}
434
435static int
436archive_read_format_ar_read_data(struct archive_read *a,
437    const void **buff, size_t *size, off_t *offset)
438{
439	ssize_t bytes_read;
440	struct ar *ar;
441
442	ar = (struct ar *)(a->format->data);
443
444	if (ar->entry_bytes_remaining > 0) {
445		*buff = __archive_read_ahead(a, 1, &bytes_read);
446		if (bytes_read == 0) {
447			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
448			    "Truncated ar archive");
449			return (ARCHIVE_FATAL);
450		}
451		if (bytes_read < 0)
452			return (ARCHIVE_FATAL);
453		if (bytes_read > ar->entry_bytes_remaining)
454			bytes_read = (ssize_t)ar->entry_bytes_remaining;
455		*size = bytes_read;
456		*offset = ar->entry_offset;
457		ar->entry_offset += bytes_read;
458		ar->entry_bytes_remaining -= bytes_read;
459		__archive_read_consume(a, (size_t)bytes_read);
460		return (ARCHIVE_OK);
461	} else {
462		while (ar->entry_padding > 0) {
463			*buff = __archive_read_ahead(a, 1, &bytes_read);
464			if (bytes_read <= 0)
465				return (ARCHIVE_FATAL);
466			if (bytes_read > ar->entry_padding)
467				bytes_read = (ssize_t)ar->entry_padding;
468			__archive_read_consume(a, (size_t)bytes_read);
469			ar->entry_padding -= bytes_read;
470		}
471		*buff = NULL;
472		*size = 0;
473		*offset = ar->entry_offset;
474		return (ARCHIVE_EOF);
475	}
476}
477
478static int
479archive_read_format_ar_skip(struct archive_read *a)
480{
481	off_t bytes_skipped;
482	struct ar* ar;
483
484	ar = (struct ar *)(a->format->data);
485
486	bytes_skipped = __archive_read_skip(a,
487	    ar->entry_bytes_remaining + ar->entry_padding);
488	if (bytes_skipped < 0)
489		return (ARCHIVE_FATAL);
490
491	ar->entry_bytes_remaining = 0;
492	ar->entry_padding = 0;
493
494	return (ARCHIVE_OK);
495}
496
497static int
498ar_parse_gnu_filename_table(struct archive_read *a)
499{
500	struct ar *ar;
501	char *p;
502	size_t size;
503
504	ar = (struct ar*)(a->format->data);
505	size = ar->strtab_size;
506
507	for (p = ar->strtab; p < ar->strtab + size - 1; ++p) {
508		if (*p == '/') {
509			*p++ = '\0';
510			if (*p != '\n')
511				goto bad_string_table;
512			*p = '\0';
513		}
514	}
515	/*
516	 * GNU ar always pads the table to an even size.
517	 * The pad character is either '\n' or '`'.
518	 */
519	if (p != ar->strtab + size && *p != '\n' && *p != '`')
520		goto bad_string_table;
521
522	/* Enforce zero termination. */
523	ar->strtab[size - 1] = '\0';
524
525	return (ARCHIVE_OK);
526
527bad_string_table:
528	archive_set_error(&a->archive, EINVAL,
529	    "Invalid string table");
530	free(ar->strtab);
531	ar->strtab = NULL;
532	return (ARCHIVE_WARN);
533}
534
535static uint64_t
536ar_atol8(const char *p, unsigned char_cnt)
537{
538	uint64_t l, limit, last_digit_limit;
539	unsigned int digit, base;
540
541	base = 8;
542	limit = UINT64_MAX / base;
543	last_digit_limit = UINT64_MAX % base;
544
545	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
546		p++;
547
548	l = 0;
549	digit = *p - '0';
550	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
551		if (l>limit || (l == limit && digit > last_digit_limit)) {
552			l = UINT64_MAX; /* Truncate on overflow. */
553			break;
554		}
555		l = (l * base) + digit;
556		digit = *++p - '0';
557	}
558	return (l);
559}
560
561static uint64_t
562ar_atol10(const char *p, unsigned char_cnt)
563{
564	uint64_t l, limit, last_digit_limit;
565	unsigned int base, digit;
566
567	base = 10;
568	limit = UINT64_MAX / base;
569	last_digit_limit = UINT64_MAX % base;
570
571	while ((*p == ' ' || *p == '\t') && char_cnt-- > 0)
572		p++;
573	l = 0;
574	digit = *p - '0';
575	while (*p >= '0' && digit < base  && char_cnt-- > 0) {
576		if (l > limit || (l == limit && digit > last_digit_limit)) {
577			l = UINT64_MAX; /* Truncate on overflow. */
578			break;
579		}
580		l = (l * base) + digit;
581		digit = *++p - '0';
582	}
583	return (l);
584}
585