archive_read_support_format_mtree.c revision 256281
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2008 Joerg Sonnenberger
4 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: stable/10/contrib/libarchive/libarchive/archive_read_support_format_mtree.c 248616 2013-03-22 13:36:03Z mm $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_FCNTL_H
38#include <fcntl.h>
39#endif
40#include <stddef.h>
41/* #include <stdint.h> */ /* See archive_platform.h */
42#ifdef HAVE_STDLIB_H
43#include <stdlib.h>
44#endif
45#ifdef HAVE_STRING_H
46#include <string.h>
47#endif
48
49#include "archive.h"
50#include "archive_entry.h"
51#include "archive_private.h"
52#include "archive_read_private.h"
53#include "archive_string.h"
54
55#ifndef O_BINARY
56#define	O_BINARY 0
57#endif
58#ifndef O_CLOEXEC
59#define O_CLOEXEC	0
60#endif
61
62#define	MTREE_HAS_DEVICE	0x0001
63#define	MTREE_HAS_FFLAGS	0x0002
64#define	MTREE_HAS_GID		0x0004
65#define	MTREE_HAS_GNAME		0x0008
66#define	MTREE_HAS_MTIME		0x0010
67#define	MTREE_HAS_NLINK		0x0020
68#define	MTREE_HAS_PERM		0x0040
69#define	MTREE_HAS_SIZE		0x0080
70#define	MTREE_HAS_TYPE		0x0100
71#define	MTREE_HAS_UID		0x0200
72#define	MTREE_HAS_UNAME		0x0400
73
74#define	MTREE_HAS_OPTIONAL	0x0800
75#define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
76
77struct mtree_option {
78	struct mtree_option *next;
79	char *value;
80};
81
82struct mtree_entry {
83	struct mtree_entry *next;
84	struct mtree_option *options;
85	char *name;
86	char full;
87	char used;
88};
89
90struct mtree {
91	struct archive_string	 line;
92	size_t			 buffsize;
93	char			*buff;
94	int64_t			 offset;
95	int			 fd;
96	int			 archive_format;
97	const char		*archive_format_name;
98	struct mtree_entry	*entries;
99	struct mtree_entry	*this_entry;
100	struct archive_string	 current_dir;
101	struct archive_string	 contents_name;
102
103	struct archive_entry_linkresolver *resolver;
104
105	int64_t			 cur_size;
106};
107
108static int	bid_keycmp(const char *, const char *, ssize_t);
109static int	cleanup(struct archive_read *);
110static int	detect_form(struct archive_read *, int *);
111static int	mtree_bid(struct archive_read *, int);
112static int	parse_file(struct archive_read *, struct archive_entry *,
113		    struct mtree *, struct mtree_entry *, int *);
114static void	parse_escapes(char *, struct mtree_entry *);
115static int	parse_line(struct archive_read *, struct archive_entry *,
116		    struct mtree *, struct mtree_entry *, int *);
117static int	parse_keyword(struct archive_read *, struct mtree *,
118		    struct archive_entry *, struct mtree_option *, int *);
119static int	read_data(struct archive_read *a,
120		    const void **buff, size_t *size, int64_t *offset);
121static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
122static int	skip(struct archive_read *a);
123static int	read_header(struct archive_read *,
124		    struct archive_entry *);
125static int64_t	mtree_atol10(char **);
126static int64_t	mtree_atol8(char **);
127static int64_t	mtree_atol(char **);
128
129/*
130 * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
131 * here.  TODO: Move this to configure time, but be careful
132 * about cross-compile environments.
133 */
134static int64_t
135get_time_t_max(void)
136{
137#if defined(TIME_T_MAX)
138	return TIME_T_MAX;
139#else
140	static time_t t;
141	time_t a;
142	if (t == 0) {
143		a = 1;
144		while (a > t) {
145			t = a;
146			a = a * 2 + 1;
147		}
148	}
149	return t;
150#endif
151}
152
153static int64_t
154get_time_t_min(void)
155{
156#if defined(TIME_T_MIN)
157	return TIME_T_MIN;
158#else
159	/* 't' will hold the minimum value, which will be zero (if
160	 * time_t is unsigned) or -2^n (if time_t is signed). */
161	static int computed;
162	static time_t t;
163	time_t a;
164	if (computed == 0) {
165		a = (time_t)-1;
166		while (a < t) {
167			t = a;
168			a = a * 2;
169		}
170		computed = 1;
171	}
172	return t;
173#endif
174}
175
176static void
177free_options(struct mtree_option *head)
178{
179	struct mtree_option *next;
180
181	for (; head != NULL; head = next) {
182		next = head->next;
183		free(head->value);
184		free(head);
185	}
186}
187
188int
189archive_read_support_format_mtree(struct archive *_a)
190{
191	struct archive_read *a = (struct archive_read *)_a;
192	struct mtree *mtree;
193	int r;
194
195	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
196	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
197
198	mtree = (struct mtree *)malloc(sizeof(*mtree));
199	if (mtree == NULL) {
200		archive_set_error(&a->archive, ENOMEM,
201		    "Can't allocate mtree data");
202		return (ARCHIVE_FATAL);
203	}
204	memset(mtree, 0, sizeof(*mtree));
205	mtree->fd = -1;
206
207	r = __archive_read_register_format(a, mtree, "mtree",
208	    mtree_bid, NULL, read_header, read_data, skip, NULL, cleanup);
209
210	if (r != ARCHIVE_OK)
211		free(mtree);
212	return (ARCHIVE_OK);
213}
214
215static int
216cleanup(struct archive_read *a)
217{
218	struct mtree *mtree;
219	struct mtree_entry *p, *q;
220
221	mtree = (struct mtree *)(a->format->data);
222
223	p = mtree->entries;
224	while (p != NULL) {
225		q = p->next;
226		free(p->name);
227		free_options(p->options);
228		free(p);
229		p = q;
230	}
231	archive_string_free(&mtree->line);
232	archive_string_free(&mtree->current_dir);
233	archive_string_free(&mtree->contents_name);
234	archive_entry_linkresolver_free(mtree->resolver);
235
236	free(mtree->buff);
237	free(mtree);
238	(a->format->data) = NULL;
239	return (ARCHIVE_OK);
240}
241
242static ssize_t
243get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
244{
245	ssize_t len;
246
247	len = 0;
248	while (len < avail) {
249		switch (*b) {
250		case '\0':/* Non-ascii character or control character. */
251			if (nlsize != NULL)
252				*nlsize = 0;
253			return (-1);
254		case '\r':
255			if (avail-len > 1 && b[1] == '\n') {
256				if (nlsize != NULL)
257					*nlsize = 2;
258				return (len+2);
259			}
260			/* FALL THROUGH */
261		case '\n':
262			if (nlsize != NULL)
263				*nlsize = 1;
264			return (len+1);
265		default:
266			b++;
267			len++;
268			break;
269		}
270	}
271	if (nlsize != NULL)
272		*nlsize = 0;
273	return (avail);
274}
275
276static ssize_t
277next_line(struct archive_read *a,
278    const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
279{
280	ssize_t len;
281	int quit;
282
283	quit = 0;
284	if (*avail == 0) {
285		*nl = 0;
286		len = 0;
287	} else
288		len = get_line_size(*b, *avail, nl);
289	/*
290	 * Read bytes more while it does not reach the end of line.
291	 */
292	while (*nl == 0 && len == *avail && !quit) {
293		ssize_t diff = *ravail - *avail;
294		size_t nbytes_req = (*ravail+1023) & ~1023U;
295		ssize_t tested;
296
297		/* Increase reading bytes if it is not enough to at least
298		 * new two lines. */
299		if (nbytes_req < (size_t)*ravail + 160)
300			nbytes_req <<= 1;
301
302		*b = __archive_read_ahead(a, nbytes_req, avail);
303		if (*b == NULL) {
304			if (*ravail >= *avail)
305				return (0);
306			/* Reading bytes reaches the end of file. */
307			*b = __archive_read_ahead(a, *avail, avail);
308			quit = 1;
309		}
310		*ravail = *avail;
311		*b += diff;
312		*avail -= diff;
313		tested = len;/* Skip some bytes we already determinated. */
314		len = get_line_size(*b, *avail, nl);
315		if (len >= 0)
316			len += tested;
317	}
318	return (len);
319}
320
321/*
322 * Compare characters with a mtree keyword.
323 * Returns the length of a mtree keyword if matched.
324 * Returns 0 if not matched.
325 */
326static int
327bid_keycmp(const char *p, const char *key, ssize_t len)
328{
329	int match_len = 0;
330
331	while (len > 0 && *p && *key) {
332		if (*p == *key) {
333			--len;
334			++p;
335			++key;
336			++match_len;
337			continue;
338		}
339		return (0);/* Not match */
340	}
341	if (*key != '\0')
342		return (0);/* Not match */
343
344	/* A following character should be specified characters */
345	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
346	    p[0] == '\n' || p[0] == '\r' ||
347	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
348		return (match_len);
349	return (0);/* Not match */
350}
351
352/*
353 * Test whether the characters 'p' has is mtree keyword.
354 * Returns the length of a detected keyword.
355 * Returns 0 if any keywords were not found.
356 */
357static int
358bid_keyword(const char *p,  ssize_t len)
359{
360	static const char *keys_c[] = {
361		"content", "contents", "cksum", NULL
362	};
363	static const char *keys_df[] = {
364		"device", "flags", NULL
365	};
366	static const char *keys_g[] = {
367		"gid", "gname", NULL
368	};
369	static const char *keys_il[] = {
370		"ignore", "link", NULL
371	};
372	static const char *keys_m[] = {
373		"md5", "md5digest", "mode", NULL
374	};
375	static const char *keys_no[] = {
376		"nlink", "nochange", "optional", NULL
377	};
378	static const char *keys_r[] = {
379		"rmd160", "rmd160digest", NULL
380	};
381	static const char *keys_s[] = {
382		"sha1", "sha1digest",
383		"sha256", "sha256digest",
384		"sha384", "sha384digest",
385		"sha512", "sha512digest",
386		"size", NULL
387	};
388	static const char *keys_t[] = {
389		"tags", "time", "type", NULL
390	};
391	static const char *keys_u[] = {
392		"uid", "uname",	NULL
393	};
394	const char **keys;
395	int i;
396
397	switch (*p) {
398	case 'c': keys = keys_c; break;
399	case 'd': case 'f': keys = keys_df; break;
400	case 'g': keys = keys_g; break;
401	case 'i': case 'l': keys = keys_il; break;
402	case 'm': keys = keys_m; break;
403	case 'n': case 'o': keys = keys_no; break;
404	case 'r': keys = keys_r; break;
405	case 's': keys = keys_s; break;
406	case 't': keys = keys_t; break;
407	case 'u': keys = keys_u; break;
408	default: return (0);/* Unknown key */
409	}
410
411	for (i = 0; keys[i] != NULL; i++) {
412		int l = bid_keycmp(p, keys[i], len);
413		if (l > 0)
414			return (l);
415	}
416	return (0);/* Unknown key */
417}
418
419/*
420 * Test whether there is a set of mtree keywords.
421 * Returns the number of keyword.
422 * Returns -1 if we got incorrect sequence.
423 * This function expects a set of "<space characters>keyword=value".
424 * When "unset" is specified, expects a set of "<space characters>keyword".
425 */
426static int
427bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
428{
429	int l;
430	int keycnt = 0;
431
432	while (len > 0 && *p) {
433		int blank = 0;
434
435		/* Test whether there are blank characters in the line. */
436		while (len >0 && (*p == ' ' || *p == '\t')) {
437			++p;
438			--len;
439			blank = 1;
440		}
441		if (*p == '\n' || *p == '\r')
442			break;
443		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
444			break;
445		if (!blank && !last_is_path) /* No blank character. */
446			return (-1);
447		if (last_is_path && len == 0)
448				return (keycnt);
449
450		if (unset) {
451			l = bid_keycmp(p, "all", len);
452			if (l > 0)
453				return (1);
454		}
455		/* Test whether there is a correct key in the line. */
456		l = bid_keyword(p, len);
457		if (l == 0)
458			return (-1);/* Unknown keyword was found. */
459		p += l;
460		len -= l;
461		keycnt++;
462
463		/* Skip value */
464		if (*p == '=') {
465			int value = 0;
466			++p;
467			--len;
468			while (len > 0 && *p != ' ' && *p != '\t') {
469				++p;
470				--len;
471				value = 1;
472			}
473			/* A keyword should have a its value unless
474			 * "/unset" operation. */
475			if (!unset && value == 0)
476				return (-1);
477		}
478	}
479	return (keycnt);
480}
481
482static int
483bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
484{
485	int f = 0;
486	static const unsigned char safe_char[256] = {
487		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
488		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
489		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
490		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
491		/* 0123456789:;<>?  EXCLUSION:(=) */
492		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
493		/* @ABCDEFGHIJKLMNO */
494		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
495		/* PQRSTUVWXYZ[\]^_  */
496		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
497		/* `abcdefghijklmno */
498		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
499		/* pqrstuvwxyz{|}~ */
500		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
501		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
502		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
503		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
504		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
505		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
506		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
507		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
508		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
509	};
510	ssize_t ll = len;
511	const char *pp = p;
512
513	*last_is_path = 0;
514	/*
515	 * Skip the path-name which is quoted.
516	 */
517	while (ll > 0 && *pp != ' ' &&*pp != '\t' && *pp != '\r' &&
518	    *pp != '\n') {
519		if (!safe_char[*(const unsigned char *)pp]) {
520			f = 0;
521			break;
522		}
523		++pp;
524		--ll;
525		++f;
526	}
527	/* If a path-name was not found at the first, try to check
528	 * a mtree format ``NetBSD's mtree -D'' creates, which
529	 * places the path-name at the last. */
530	if (f == 0) {
531		const char *pb = p + len - nl;
532		int name_len = 0;
533		int slash;
534
535		/* Do not accept multi lines for form D. */
536		if (pb-2 >= p &&
537		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
538			return (-1);
539		if (pb-1 >= p && pb[-1] == '\\')
540			return (-1);
541
542		slash = 0;
543		while (p <= --pb && *pb != ' ' && *pb != '\t') {
544			if (!safe_char[*(const unsigned char *)pb])
545				return (-1);
546			name_len++;
547			/* The pathname should have a slash in this
548			 * format. */
549			if (*pb == '/')
550				slash = 1;
551		}
552		if (name_len == 0 || slash == 0)
553			return (-1);
554		/* If '/' is placed at the first in this field, this is not
555		 * a valid filename. */
556		if (pb[1] == '/')
557			return (-1);
558		ll = len - nl - name_len;
559		pp = p;
560		*last_is_path = 1;
561	}
562
563	return (bid_keyword_list(pp, ll, 0, *last_is_path));
564}
565
566#define MAX_BID_ENTRY	3
567
568static int
569mtree_bid(struct archive_read *a, int best_bid)
570{
571	const char *signature = "#mtree";
572	const char *p;
573
574	(void)best_bid; /* UNUSED */
575
576	/* Now let's look at the actual header and see if it matches. */
577	p = __archive_read_ahead(a, strlen(signature), NULL);
578	if (p == NULL)
579		return (-1);
580
581	if (memcmp(p, signature, strlen(signature)) == 0)
582		return (8 * (int)strlen(signature));
583
584	/*
585	 * There is not a mtree signature. Let's try to detect mtree format.
586	 */
587	return (detect_form(a, NULL));
588}
589
590static int
591detect_form(struct archive_read *a, int *is_form_d)
592{
593	const char *p;
594	ssize_t avail, ravail;
595	ssize_t detected_bytes = 0, len, nl;
596	int entry_cnt = 0, multiline = 0;
597	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
598			* (In this source we call it `form D') . */
599
600	if (is_form_d != NULL)
601		*is_form_d = 0;
602	p = __archive_read_ahead(a, 1, &avail);
603	if (p == NULL)
604		return (-1);
605	ravail = avail;
606	for (;;) {
607		len = next_line(a, &p, &avail, &ravail, &nl);
608		/* The terminal character of the line should be
609		 * a new line character, '\r\n' or '\n'. */
610		if (len <= 0 || nl == 0)
611			break;
612		if (!multiline) {
613			/* Leading whitespace is never significant,
614			 * ignore it. */
615			while (len > 0 && (*p == ' ' || *p == '\t')) {
616				++p;
617				--avail;
618				--len;
619			}
620			/* Skip comment or empty line. */
621			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
622				p += len;
623				avail -= len;
624				continue;
625			}
626		} else {
627			/* A continuance line; the terminal
628			 * character of previous line was '\' character. */
629			if (bid_keyword_list(p, len, 0, 0) <= 0)
630				break;
631			if (multiline == 1)
632				detected_bytes += len;
633			if (p[len-nl-1] != '\\') {
634				if (multiline == 1 &&
635				    ++entry_cnt >= MAX_BID_ENTRY)
636					break;
637				multiline = 0;
638			}
639			p += len;
640			avail -= len;
641			continue;
642		}
643		if (p[0] != '/') {
644			int last_is_path, keywords;
645
646			keywords = bid_entry(p, len, nl, &last_is_path);
647			if (keywords >= 0) {
648				detected_bytes += len;
649				if (form_D == 0) {
650					if (last_is_path)
651						form_D = 1;
652					else if (keywords > 0)
653						/* This line is not `form D'. */
654						form_D = -1;
655				} else if (form_D == 1) {
656					if (!last_is_path && keywords > 0)
657						/* This this is not `form D'
658						 * and We cannot accept mixed
659						 * format. */
660						break;
661				}
662				if (!last_is_path && p[len-nl-1] == '\\')
663					/* This line continues. */
664					multiline = 1;
665				else {
666					/* We've got plenty of correct lines
667					 * to assume that this file is a mtree
668					 * format. */
669					if (++entry_cnt >= MAX_BID_ENTRY)
670						break;
671				}
672			} else
673				break;
674		} else if (strncmp(p, "/set", 4) == 0) {
675			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
676				break;
677			/* This line continues. */
678			if (p[len-nl-1] == '\\')
679				multiline = 2;
680		} else if (strncmp(p, "/unset", 6) == 0) {
681			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
682				break;
683			/* This line continues. */
684			if (p[len-nl-1] == '\\')
685				multiline = 2;
686		} else
687			break;
688
689		/* Test next line. */
690		p += len;
691		avail -= len;
692	}
693	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
694		if (is_form_d != NULL) {
695			if (form_D == 1)
696				*is_form_d = 1;
697		}
698		return (32);
699	}
700
701	return (0);
702}
703
704/*
705 * The extended mtree format permits multiple lines specifying
706 * attributes for each file.  For those entries, only the last line
707 * is actually used.  Practically speaking, that means we have
708 * to read the entire mtree file into memory up front.
709 *
710 * The parsing is done in two steps.  First, it is decided if a line
711 * changes the global defaults and if it is, processed accordingly.
712 * Otherwise, the options of the line are merged with the current
713 * global options.
714 */
715static int
716add_option(struct archive_read *a, struct mtree_option **global,
717    const char *value, size_t len)
718{
719	struct mtree_option *opt;
720
721	if ((opt = malloc(sizeof(*opt))) == NULL) {
722		archive_set_error(&a->archive, errno, "Can't allocate memory");
723		return (ARCHIVE_FATAL);
724	}
725	if ((opt->value = malloc(len + 1)) == NULL) {
726		free(opt);
727		archive_set_error(&a->archive, errno, "Can't allocate memory");
728		return (ARCHIVE_FATAL);
729	}
730	memcpy(opt->value, value, len);
731	opt->value[len] = '\0';
732	opt->next = *global;
733	*global = opt;
734	return (ARCHIVE_OK);
735}
736
737static void
738remove_option(struct mtree_option **global, const char *value, size_t len)
739{
740	struct mtree_option *iter, *last;
741
742	last = NULL;
743	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
744		if (strncmp(iter->value, value, len) == 0 &&
745		    (iter->value[len] == '\0' ||
746		     iter->value[len] == '='))
747			break;
748	}
749	if (iter == NULL)
750		return;
751	if (last == NULL)
752		*global = iter->next;
753	else
754		last->next = iter->next;
755
756	free(iter->value);
757	free(iter);
758}
759
760static int
761process_global_set(struct archive_read *a,
762    struct mtree_option **global, const char *line)
763{
764	const char *next, *eq;
765	size_t len;
766	int r;
767
768	line += 4;
769	for (;;) {
770		next = line + strspn(line, " \t\r\n");
771		if (*next == '\0')
772			return (ARCHIVE_OK);
773		line = next;
774		next = line + strcspn(line, " \t\r\n");
775		eq = strchr(line, '=');
776		if (eq > next)
777			len = next - line;
778		else
779			len = eq - line;
780
781		remove_option(global, line, len);
782		r = add_option(a, global, line, next - line);
783		if (r != ARCHIVE_OK)
784			return (r);
785		line = next;
786	}
787}
788
789static int
790process_global_unset(struct archive_read *a,
791    struct mtree_option **global, const char *line)
792{
793	const char *next;
794	size_t len;
795
796	line += 6;
797	if (strchr(line, '=') != NULL) {
798		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
799		    "/unset shall not contain `='");
800		return ARCHIVE_FATAL;
801	}
802
803	for (;;) {
804		next = line + strspn(line, " \t\r\n");
805		if (*next == '\0')
806			return (ARCHIVE_OK);
807		line = next;
808		len = strcspn(line, " \t\r\n");
809
810		if (len == 3 && strncmp(line, "all", 3) == 0) {
811			free_options(*global);
812			*global = NULL;
813		} else {
814			remove_option(global, line, len);
815		}
816
817		line += len;
818	}
819}
820
821static int
822process_add_entry(struct archive_read *a, struct mtree *mtree,
823    struct mtree_option **global, const char *line, ssize_t line_len,
824    struct mtree_entry **last_entry, int is_form_d)
825{
826	struct mtree_entry *entry;
827	struct mtree_option *iter;
828	const char *next, *eq, *name, *end;
829	size_t len;
830	int r;
831
832	if ((entry = malloc(sizeof(*entry))) == NULL) {
833		archive_set_error(&a->archive, errno, "Can't allocate memory");
834		return (ARCHIVE_FATAL);
835	}
836	entry->next = NULL;
837	entry->options = NULL;
838	entry->name = NULL;
839	entry->used = 0;
840	entry->full = 0;
841
842	/* Add this entry to list. */
843	if (*last_entry == NULL)
844		mtree->entries = entry;
845	else
846		(*last_entry)->next = entry;
847	*last_entry = entry;
848
849	if (is_form_d) {
850		/*
851		 * This form places the file name as last parameter.
852		 */
853		name = line + line_len -1;
854		while (line_len > 0) {
855			if (*name != '\r' && *name != '\n' &&
856			    *name != '\t' && *name != ' ')
857				break;
858			name--;
859			line_len--;
860		}
861		len = 0;
862		while (line_len > 0) {
863			if (*name == '\r' || *name == '\n' ||
864			    *name == '\t' || *name == ' ') {
865				name++;
866				break;
867			}
868			name--;
869			line_len--;
870			len++;
871		}
872		end = name;
873	} else {
874		len = strcspn(line, " \t\r\n");
875		name = line;
876		line += len;
877		end = line + line_len;
878	}
879
880	if ((entry->name = malloc(len + 1)) == NULL) {
881		archive_set_error(&a->archive, errno, "Can't allocate memory");
882		return (ARCHIVE_FATAL);
883	}
884
885	memcpy(entry->name, name, len);
886	entry->name[len] = '\0';
887	parse_escapes(entry->name, entry);
888
889	for (iter = *global; iter != NULL; iter = iter->next) {
890		r = add_option(a, &entry->options, iter->value,
891		    strlen(iter->value));
892		if (r != ARCHIVE_OK)
893			return (r);
894	}
895
896	for (;;) {
897		next = line + strspn(line, " \t\r\n");
898		if (*next == '\0')
899			return (ARCHIVE_OK);
900		if (next >= end)
901			return (ARCHIVE_OK);
902		line = next;
903		next = line + strcspn(line, " \t\r\n");
904		eq = strchr(line, '=');
905		if (eq == NULL || eq > next)
906			len = next - line;
907		else
908			len = eq - line;
909
910		remove_option(&entry->options, line, len);
911		r = add_option(a, &entry->options, line, next - line);
912		if (r != ARCHIVE_OK)
913			return (r);
914		line = next;
915	}
916}
917
918static int
919read_mtree(struct archive_read *a, struct mtree *mtree)
920{
921	ssize_t len;
922	uintmax_t counter;
923	char *p;
924	struct mtree_option *global;
925	struct mtree_entry *last_entry;
926	int r, is_form_d;
927
928	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
929	mtree->archive_format_name = "mtree";
930
931	global = NULL;
932	last_entry = NULL;
933
934	(void)detect_form(a, &is_form_d);
935
936	for (counter = 1; ; ++counter) {
937		len = readline(a, mtree, &p, 65536);
938		if (len == 0) {
939			mtree->this_entry = mtree->entries;
940			free_options(global);
941			return (ARCHIVE_OK);
942		}
943		if (len < 0) {
944			free_options(global);
945			return ((int)len);
946		}
947		/* Leading whitespace is never significant, ignore it. */
948		while (*p == ' ' || *p == '\t') {
949			++p;
950			--len;
951		}
952		/* Skip content lines and blank lines. */
953		if (*p == '#')
954			continue;
955		if (*p == '\r' || *p == '\n' || *p == '\0')
956			continue;
957		if (*p != '/') {
958			r = process_add_entry(a, mtree, &global, p, len,
959			    &last_entry, is_form_d);
960		} else if (strncmp(p, "/set", 4) == 0) {
961			if (p[4] != ' ' && p[4] != '\t')
962				break;
963			r = process_global_set(a, &global, p);
964		} else if (strncmp(p, "/unset", 6) == 0) {
965			if (p[6] != ' ' && p[6] != '\t')
966				break;
967			r = process_global_unset(a, &global, p);
968		} else
969			break;
970
971		if (r != ARCHIVE_OK) {
972			free_options(global);
973			return r;
974		}
975	}
976
977	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
978	    "Can't parse line %ju", counter);
979	free_options(global);
980	return (ARCHIVE_FATAL);
981}
982
983/*
984 * Read in the entire mtree file into memory on the first request.
985 * Then use the next unused file to satisfy each header request.
986 */
987static int
988read_header(struct archive_read *a, struct archive_entry *entry)
989{
990	struct mtree *mtree;
991	char *p;
992	int r, use_next;
993
994	mtree = (struct mtree *)(a->format->data);
995
996	if (mtree->fd >= 0) {
997		close(mtree->fd);
998		mtree->fd = -1;
999	}
1000
1001	if (mtree->entries == NULL) {
1002		mtree->resolver = archive_entry_linkresolver_new();
1003		if (mtree->resolver == NULL)
1004			return ARCHIVE_FATAL;
1005		archive_entry_linkresolver_set_strategy(mtree->resolver,
1006		    ARCHIVE_FORMAT_MTREE);
1007		r = read_mtree(a, mtree);
1008		if (r != ARCHIVE_OK)
1009			return (r);
1010	}
1011
1012	a->archive.archive_format = mtree->archive_format;
1013	a->archive.archive_format_name = mtree->archive_format_name;
1014
1015	for (;;) {
1016		if (mtree->this_entry == NULL)
1017			return (ARCHIVE_EOF);
1018		if (strcmp(mtree->this_entry->name, "..") == 0) {
1019			mtree->this_entry->used = 1;
1020			if (archive_strlen(&mtree->current_dir) > 0) {
1021				/* Roll back current path. */
1022				p = mtree->current_dir.s
1023				    + mtree->current_dir.length - 1;
1024				while (p >= mtree->current_dir.s && *p != '/')
1025					--p;
1026				if (p >= mtree->current_dir.s)
1027					--p;
1028				mtree->current_dir.length
1029				    = p - mtree->current_dir.s + 1;
1030			}
1031		}
1032		if (!mtree->this_entry->used) {
1033			use_next = 0;
1034			r = parse_file(a, entry, mtree, mtree->this_entry, &use_next);
1035			if (use_next == 0)
1036				return (r);
1037		}
1038		mtree->this_entry = mtree->this_entry->next;
1039	}
1040}
1041
1042/*
1043 * A single file can have multiple lines contribute specifications.
1044 * Parse as many lines as necessary, then pull additional information
1045 * from a backing file on disk as necessary.
1046 */
1047static int
1048parse_file(struct archive_read *a, struct archive_entry *entry,
1049    struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1050{
1051	const char *path;
1052	struct stat st_storage, *st;
1053	struct mtree_entry *mp;
1054	struct archive_entry *sparse_entry;
1055	int r = ARCHIVE_OK, r1, parsed_kws;
1056
1057	mentry->used = 1;
1058
1059	/* Initialize reasonable defaults. */
1060	archive_entry_set_filetype(entry, AE_IFREG);
1061	archive_entry_set_size(entry, 0);
1062	archive_string_empty(&mtree->contents_name);
1063
1064	/* Parse options from this line. */
1065	parsed_kws = 0;
1066	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1067
1068	if (mentry->full) {
1069		archive_entry_copy_pathname(entry, mentry->name);
1070		/*
1071		 * "Full" entries are allowed to have multiple lines
1072		 * and those lines aren't required to be adjacent.  We
1073		 * don't support multiple lines for "relative" entries
1074		 * nor do we make any attempt to merge data from
1075		 * separate "relative" and "full" entries.  (Merging
1076		 * "relative" and "full" entries would require dealing
1077		 * with pathname canonicalization, which is a very
1078		 * tricky subject.)
1079		 */
1080		for (mp = mentry->next; mp != NULL; mp = mp->next) {
1081			if (mp->full && !mp->used
1082			    && strcmp(mentry->name, mp->name) == 0) {
1083				/* Later lines override earlier ones. */
1084				mp->used = 1;
1085				r1 = parse_line(a, entry, mtree, mp,
1086				    &parsed_kws);
1087				if (r1 < r)
1088					r = r1;
1089			}
1090		}
1091	} else {
1092		/*
1093		 * Relative entries require us to construct
1094		 * the full path and possibly update the
1095		 * current directory.
1096		 */
1097		size_t n = archive_strlen(&mtree->current_dir);
1098		if (n > 0)
1099			archive_strcat(&mtree->current_dir, "/");
1100		archive_strcat(&mtree->current_dir, mentry->name);
1101		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1102		if (archive_entry_filetype(entry) != AE_IFDIR)
1103			mtree->current_dir.length = n;
1104	}
1105
1106	/*
1107	 * Try to open and stat the file to get the real size
1108	 * and other file info.  It would be nice to avoid
1109	 * this here so that getting a listing of an mtree
1110	 * wouldn't require opening every referenced contents
1111	 * file.  But then we wouldn't know the actual
1112	 * contents size, so I don't see a really viable way
1113	 * around this.  (Also, we may want to someday pull
1114	 * other unspecified info from the contents file on
1115	 * disk.)
1116	 */
1117	mtree->fd = -1;
1118	if (archive_strlen(&mtree->contents_name) > 0)
1119		path = mtree->contents_name.s;
1120	else
1121		path = archive_entry_pathname(entry);
1122
1123	if (archive_entry_filetype(entry) == AE_IFREG ||
1124	    archive_entry_filetype(entry) == AE_IFDIR) {
1125		mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1126		__archive_ensure_cloexec_flag(mtree->fd);
1127		if (mtree->fd == -1 &&
1128		    (errno != ENOENT ||
1129		     archive_strlen(&mtree->contents_name) > 0)) {
1130			archive_set_error(&a->archive, errno,
1131			    "Can't open %s", path);
1132			r = ARCHIVE_WARN;
1133		}
1134	}
1135
1136	st = &st_storage;
1137	if (mtree->fd >= 0) {
1138		if (fstat(mtree->fd, st) == -1) {
1139			archive_set_error(&a->archive, errno,
1140			    "Could not fstat %s", path);
1141			r = ARCHIVE_WARN;
1142			/* If we can't stat it, don't keep it open. */
1143			close(mtree->fd);
1144			mtree->fd = -1;
1145			st = NULL;
1146		}
1147	} else if (lstat(path, st) == -1) {
1148		st = NULL;
1149	}
1150
1151	/*
1152	 * Check for a mismatch between the type in the specification and
1153	 * the type of the contents object on disk.
1154	 */
1155	if (st != NULL) {
1156		if (
1157		    ((st->st_mode & S_IFMT) == S_IFREG &&
1158		     archive_entry_filetype(entry) == AE_IFREG)
1159#ifdef S_IFLNK
1160		    || ((st->st_mode & S_IFMT) == S_IFLNK &&
1161			archive_entry_filetype(entry) == AE_IFLNK)
1162#endif
1163#ifdef S_IFSOCK
1164		    || ((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1165			archive_entry_filetype(entry) == AE_IFSOCK)
1166#endif
1167#ifdef S_IFCHR
1168		    || ((st->st_mode & S_IFMT) == S_IFCHR &&
1169			archive_entry_filetype(entry) == AE_IFCHR)
1170#endif
1171#ifdef S_IFBLK
1172		    || ((st->st_mode & S_IFMT) == S_IFBLK &&
1173			archive_entry_filetype(entry) == AE_IFBLK)
1174#endif
1175		    || ((st->st_mode & S_IFMT) == S_IFDIR &&
1176			archive_entry_filetype(entry) == AE_IFDIR)
1177#ifdef S_IFIFO
1178		    || ((st->st_mode & S_IFMT) == S_IFIFO &&
1179			archive_entry_filetype(entry) == AE_IFIFO)
1180#endif
1181		    ) {
1182			/* Types match. */
1183		} else {
1184			/* Types don't match; bail out gracefully. */
1185			if (mtree->fd >= 0)
1186				close(mtree->fd);
1187			mtree->fd = -1;
1188			if (parsed_kws & MTREE_HAS_OPTIONAL) {
1189				/* It's not an error for an optional entry
1190				   to not match disk. */
1191				*use_next = 1;
1192			} else if (r == ARCHIVE_OK) {
1193				archive_set_error(&a->archive,
1194				    ARCHIVE_ERRNO_MISC,
1195				    "mtree specification has different type for %s",
1196				    archive_entry_pathname(entry));
1197				r = ARCHIVE_WARN;
1198			}
1199			return r;
1200		}
1201	}
1202
1203	/*
1204	 * If there is a contents file on disk, pick some of the metadata
1205	 * from that file.  For most of these, we only set it from the contents
1206	 * if it wasn't already parsed from the specification.
1207	 */
1208	if (st != NULL) {
1209		if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1210		     (parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1211		    (archive_entry_filetype(entry) == AE_IFCHR ||
1212		     archive_entry_filetype(entry) == AE_IFBLK))
1213			archive_entry_set_rdev(entry, st->st_rdev);
1214		if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME)) == 0 ||
1215		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1216			archive_entry_set_gid(entry, st->st_gid);
1217		if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME)) == 0 ||
1218		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1219			archive_entry_set_uid(entry, st->st_uid);
1220		if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1221		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1222#if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1223			archive_entry_set_mtime(entry, st->st_mtime,
1224			    st->st_mtimespec.tv_nsec);
1225#elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1226			archive_entry_set_mtime(entry, st->st_mtime,
1227			    st->st_mtim.tv_nsec);
1228#elif HAVE_STRUCT_STAT_ST_MTIME_N
1229			archive_entry_set_mtime(entry, st->st_mtime,
1230			    st->st_mtime_n);
1231#elif HAVE_STRUCT_STAT_ST_UMTIME
1232			archive_entry_set_mtime(entry, st->st_mtime,
1233			    st->st_umtime*1000);
1234#elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1235			archive_entry_set_mtime(entry, st->st_mtime,
1236			    st->st_mtime_usec*1000);
1237#else
1238			archive_entry_set_mtime(entry, st->st_mtime, 0);
1239#endif
1240		}
1241		if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1242		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1243			archive_entry_set_nlink(entry, st->st_nlink);
1244		if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1245		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1246			archive_entry_set_perm(entry, st->st_mode);
1247		if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1248		    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1249			archive_entry_set_size(entry, st->st_size);
1250		archive_entry_set_ino(entry, st->st_ino);
1251		archive_entry_set_dev(entry, st->st_dev);
1252
1253		archive_entry_linkify(mtree->resolver, &entry, &sparse_entry);
1254	} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1255		/*
1256		 * Couldn't open the entry, stat it or the on-disk type
1257		 * didn't match.  If this entry is optional, just ignore it
1258		 * and read the next header entry.
1259		 */
1260		*use_next = 1;
1261		return ARCHIVE_OK;
1262	}
1263
1264	mtree->cur_size = archive_entry_size(entry);
1265	mtree->offset = 0;
1266
1267	return r;
1268}
1269
1270/*
1271 * Each line contains a sequence of keywords.
1272 */
1273static int
1274parse_line(struct archive_read *a, struct archive_entry *entry,
1275    struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1276{
1277	struct mtree_option *iter;
1278	int r = ARCHIVE_OK, r1;
1279
1280	for (iter = mp->options; iter != NULL; iter = iter->next) {
1281		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1282		if (r1 < r)
1283			r = r1;
1284	}
1285	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1286		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1287		    "Missing type keyword in mtree specification");
1288		return (ARCHIVE_WARN);
1289	}
1290	return (r);
1291}
1292
1293/*
1294 * Device entries have one of the following forms:
1295 * raw dev_t
1296 * format,major,minor[,subdevice]
1297 *
1298 * Just use major and minor, no translation etc is done
1299 * between formats.
1300 */
1301static int
1302parse_device(struct archive *a, struct archive_entry *entry, char *val)
1303{
1304	char *comma1, *comma2;
1305
1306	comma1 = strchr(val, ',');
1307	if (comma1 == NULL) {
1308		archive_entry_set_dev(entry, (dev_t)mtree_atol10(&val));
1309		return (ARCHIVE_OK);
1310	}
1311	++comma1;
1312	comma2 = strchr(comma1, ',');
1313	if (comma2 == NULL) {
1314		archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1315		    "Malformed device attribute");
1316		return (ARCHIVE_WARN);
1317	}
1318	++comma2;
1319	archive_entry_set_rdevmajor(entry, (dev_t)mtree_atol(&comma1));
1320	archive_entry_set_rdevminor(entry, (dev_t)mtree_atol(&comma2));
1321	return (ARCHIVE_OK);
1322}
1323
1324/*
1325 * Parse a single keyword and its value.
1326 */
1327static int
1328parse_keyword(struct archive_read *a, struct mtree *mtree,
1329    struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1330{
1331	char *val, *key;
1332
1333	key = opt->value;
1334
1335	if (*key == '\0')
1336		return (ARCHIVE_OK);
1337
1338	if (strcmp(key, "nochange") == 0) {
1339		*parsed_kws |= MTREE_HAS_NOCHANGE;
1340		return (ARCHIVE_OK);
1341	}
1342	if (strcmp(key, "optional") == 0) {
1343		*parsed_kws |= MTREE_HAS_OPTIONAL;
1344		return (ARCHIVE_OK);
1345	}
1346	if (strcmp(key, "ignore") == 0) {
1347		/*
1348		 * The mtree processing is not recursive, so
1349		 * recursion will only happen for explicitly listed
1350		 * entries.
1351		 */
1352		return (ARCHIVE_OK);
1353	}
1354
1355	val = strchr(key, '=');
1356	if (val == NULL) {
1357		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1358		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1359		return (ARCHIVE_WARN);
1360	}
1361
1362	*val = '\0';
1363	++val;
1364
1365	switch (key[0]) {
1366	case 'c':
1367		if (strcmp(key, "content") == 0
1368		    || strcmp(key, "contents") == 0) {
1369			parse_escapes(val, NULL);
1370			archive_strcpy(&mtree->contents_name, val);
1371			break;
1372		}
1373		if (strcmp(key, "cksum") == 0)
1374			break;
1375	case 'd':
1376		if (strcmp(key, "device") == 0) {
1377			*parsed_kws |= MTREE_HAS_DEVICE;
1378			return parse_device(&a->archive, entry, val);
1379		}
1380	case 'f':
1381		if (strcmp(key, "flags") == 0) {
1382			*parsed_kws |= MTREE_HAS_FFLAGS;
1383			archive_entry_copy_fflags_text(entry, val);
1384			break;
1385		}
1386	case 'g':
1387		if (strcmp(key, "gid") == 0) {
1388			*parsed_kws |= MTREE_HAS_GID;
1389			archive_entry_set_gid(entry, mtree_atol10(&val));
1390			break;
1391		}
1392		if (strcmp(key, "gname") == 0) {
1393			*parsed_kws |= MTREE_HAS_GNAME;
1394			archive_entry_copy_gname(entry, val);
1395			break;
1396		}
1397	case 'l':
1398		if (strcmp(key, "link") == 0) {
1399			archive_entry_copy_symlink(entry, val);
1400			break;
1401		}
1402	case 'm':
1403		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1404			break;
1405		if (strcmp(key, "mode") == 0) {
1406			if (val[0] >= '0' && val[0] <= '9') {
1407				*parsed_kws |= MTREE_HAS_PERM;
1408				archive_entry_set_perm(entry,
1409				    (mode_t)mtree_atol8(&val));
1410			} else {
1411				archive_set_error(&a->archive,
1412				    ARCHIVE_ERRNO_FILE_FORMAT,
1413				    "Symbolic mode \"%s\" unsupported", val);
1414				return ARCHIVE_WARN;
1415			}
1416			break;
1417		}
1418	case 'n':
1419		if (strcmp(key, "nlink") == 0) {
1420			*parsed_kws |= MTREE_HAS_NLINK;
1421			archive_entry_set_nlink(entry,
1422				(unsigned int)mtree_atol10(&val));
1423			break;
1424		}
1425	case 'r':
1426		if (strcmp(key, "rmd160") == 0 ||
1427		    strcmp(key, "rmd160digest") == 0)
1428			break;
1429	case 's':
1430		if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1431			break;
1432		if (strcmp(key, "sha256") == 0 ||
1433		    strcmp(key, "sha256digest") == 0)
1434			break;
1435		if (strcmp(key, "sha384") == 0 ||
1436		    strcmp(key, "sha384digest") == 0)
1437			break;
1438		if (strcmp(key, "sha512") == 0 ||
1439		    strcmp(key, "sha512digest") == 0)
1440			break;
1441		if (strcmp(key, "size") == 0) {
1442			archive_entry_set_size(entry, mtree_atol10(&val));
1443			break;
1444		}
1445	case 't':
1446		if (strcmp(key, "tags") == 0) {
1447			/*
1448			 * Comma delimited list of tags.
1449			 * Ignore the tags for now, but the interface
1450			 * should be extended to allow inclusion/exclusion.
1451			 */
1452			break;
1453		}
1454		if (strcmp(key, "time") == 0) {
1455			int64_t m;
1456			int64_t my_time_t_max = get_time_t_max();
1457			int64_t my_time_t_min = get_time_t_min();
1458			long ns;
1459
1460			*parsed_kws |= MTREE_HAS_MTIME;
1461			m = mtree_atol10(&val);
1462			/* Replicate an old mtree bug:
1463			 * 123456789.1 represents 123456789
1464			 * seconds and 1 nanosecond. */
1465			if (*val == '.') {
1466				++val;
1467				ns = (long)mtree_atol10(&val);
1468			} else
1469				ns = 0;
1470			if (m > my_time_t_max)
1471				m = my_time_t_max;
1472			else if (m < my_time_t_min)
1473				m = my_time_t_min;
1474			archive_entry_set_mtime(entry, (time_t)m, ns);
1475			break;
1476		}
1477		if (strcmp(key, "type") == 0) {
1478			switch (val[0]) {
1479			case 'b':
1480				if (strcmp(val, "block") == 0) {
1481					archive_entry_set_filetype(entry, AE_IFBLK);
1482					break;
1483				}
1484			case 'c':
1485				if (strcmp(val, "char") == 0) {
1486					archive_entry_set_filetype(entry, AE_IFCHR);
1487					break;
1488				}
1489			case 'd':
1490				if (strcmp(val, "dir") == 0) {
1491					archive_entry_set_filetype(entry, AE_IFDIR);
1492					break;
1493				}
1494			case 'f':
1495				if (strcmp(val, "fifo") == 0) {
1496					archive_entry_set_filetype(entry, AE_IFIFO);
1497					break;
1498				}
1499				if (strcmp(val, "file") == 0) {
1500					archive_entry_set_filetype(entry, AE_IFREG);
1501					break;
1502				}
1503			case 'l':
1504				if (strcmp(val, "link") == 0) {
1505					archive_entry_set_filetype(entry, AE_IFLNK);
1506					break;
1507				}
1508			default:
1509				archive_set_error(&a->archive,
1510				    ARCHIVE_ERRNO_FILE_FORMAT,
1511				    "Unrecognized file type \"%s\"; assuming \"file\"", val);
1512				archive_entry_set_filetype(entry, AE_IFREG);
1513				return (ARCHIVE_WARN);
1514			}
1515			*parsed_kws |= MTREE_HAS_TYPE;
1516			break;
1517		}
1518	case 'u':
1519		if (strcmp(key, "uid") == 0) {
1520			*parsed_kws |= MTREE_HAS_UID;
1521			archive_entry_set_uid(entry, mtree_atol10(&val));
1522			break;
1523		}
1524		if (strcmp(key, "uname") == 0) {
1525			*parsed_kws |= MTREE_HAS_UNAME;
1526			archive_entry_copy_uname(entry, val);
1527			break;
1528		}
1529	default:
1530		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1531		    "Unrecognized key %s=%s", key, val);
1532		return (ARCHIVE_WARN);
1533	}
1534	return (ARCHIVE_OK);
1535}
1536
1537static int
1538read_data(struct archive_read *a, const void **buff, size_t *size, int64_t *offset)
1539{
1540	size_t bytes_to_read;
1541	ssize_t bytes_read;
1542	struct mtree *mtree;
1543
1544	mtree = (struct mtree *)(a->format->data);
1545	if (mtree->fd < 0) {
1546		*buff = NULL;
1547		*offset = 0;
1548		*size = 0;
1549		return (ARCHIVE_EOF);
1550	}
1551	if (mtree->buff == NULL) {
1552		mtree->buffsize = 64 * 1024;
1553		mtree->buff = malloc(mtree->buffsize);
1554		if (mtree->buff == NULL) {
1555			archive_set_error(&a->archive, ENOMEM,
1556			    "Can't allocate memory");
1557			return (ARCHIVE_FATAL);
1558		}
1559	}
1560
1561	*buff = mtree->buff;
1562	*offset = mtree->offset;
1563	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1564		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1565	else
1566		bytes_to_read = mtree->buffsize;
1567	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1568	if (bytes_read < 0) {
1569		archive_set_error(&a->archive, errno, "Can't read");
1570		return (ARCHIVE_WARN);
1571	}
1572	if (bytes_read == 0) {
1573		*size = 0;
1574		return (ARCHIVE_EOF);
1575	}
1576	mtree->offset += bytes_read;
1577	*size = bytes_read;
1578	return (ARCHIVE_OK);
1579}
1580
1581/* Skip does nothing except possibly close the contents file. */
1582static int
1583skip(struct archive_read *a)
1584{
1585	struct mtree *mtree;
1586
1587	mtree = (struct mtree *)(a->format->data);
1588	if (mtree->fd >= 0) {
1589		close(mtree->fd);
1590		mtree->fd = -1;
1591	}
1592	return (ARCHIVE_OK);
1593}
1594
1595/*
1596 * Since parsing backslash sequences always makes strings shorter,
1597 * we can always do this conversion in-place.
1598 */
1599static void
1600parse_escapes(char *src, struct mtree_entry *mentry)
1601{
1602	char *dest = src;
1603	char c;
1604
1605	if (mentry != NULL && strcmp(src, ".") == 0)
1606		mentry->full = 1;
1607
1608	while (*src != '\0') {
1609		c = *src++;
1610		if (c == '/' && mentry != NULL)
1611			mentry->full = 1;
1612		if (c == '\\') {
1613			switch (src[0]) {
1614			case '0':
1615				if (src[1] < '0' || src[1] > '7') {
1616					c = 0;
1617					++src;
1618					break;
1619				}
1620				/* FALLTHROUGH */
1621			case '1':
1622			case '2':
1623			case '3':
1624				if (src[1] >= '0' && src[1] <= '7' &&
1625				    src[2] >= '0' && src[2] <= '7') {
1626					c = (src[0] - '0') << 6;
1627					c |= (src[1] - '0') << 3;
1628					c |= (src[2] - '0');
1629					src += 3;
1630				}
1631				break;
1632			case 'a':
1633				c = '\a';
1634				++src;
1635				break;
1636			case 'b':
1637				c = '\b';
1638				++src;
1639				break;
1640			case 'f':
1641				c = '\f';
1642				++src;
1643				break;
1644			case 'n':
1645				c = '\n';
1646				++src;
1647				break;
1648			case 'r':
1649				c = '\r';
1650				++src;
1651				break;
1652			case 's':
1653				c = ' ';
1654				++src;
1655				break;
1656			case 't':
1657				c = '\t';
1658				++src;
1659				break;
1660			case 'v':
1661				c = '\v';
1662				++src;
1663				break;
1664			}
1665		}
1666		*dest++ = c;
1667	}
1668	*dest = '\0';
1669}
1670
1671/*
1672 * Note that this implementation does not (and should not!) obey
1673 * locale settings; you cannot simply substitute strtol here, since
1674 * it does obey locale.
1675 */
1676static int64_t
1677mtree_atol8(char **p)
1678{
1679	int64_t	l, limit, last_digit_limit;
1680	int digit, base;
1681
1682	base = 8;
1683	limit = INT64_MAX / base;
1684	last_digit_limit = INT64_MAX % base;
1685
1686	l = 0;
1687	digit = **p - '0';
1688	while (digit >= 0 && digit < base) {
1689		if (l>limit || (l == limit && digit > last_digit_limit)) {
1690			l = INT64_MAX; /* Truncate on overflow. */
1691			break;
1692		}
1693		l = (l * base) + digit;
1694		digit = *++(*p) - '0';
1695	}
1696	return (l);
1697}
1698
1699/*
1700 * Note that this implementation does not (and should not!) obey
1701 * locale settings; you cannot simply substitute strtol here, since
1702 * it does obey locale.
1703 */
1704static int64_t
1705mtree_atol10(char **p)
1706{
1707	int64_t l, limit, last_digit_limit;
1708	int base, digit, sign;
1709
1710	base = 10;
1711
1712	if (**p == '-') {
1713		sign = -1;
1714		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1715		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1716		++(*p);
1717	} else {
1718		sign = 1;
1719		limit = INT64_MAX / base;
1720		last_digit_limit = INT64_MAX % base;
1721	}
1722
1723	l = 0;
1724	digit = **p - '0';
1725	while (digit >= 0 && digit < base) {
1726		if (l > limit || (l == limit && digit > last_digit_limit))
1727			return (sign < 0) ? INT64_MIN : INT64_MAX;
1728		l = (l * base) + digit;
1729		digit = *++(*p) - '0';
1730	}
1731	return (sign < 0) ? -l : l;
1732}
1733
1734/* Parse a hex digit. */
1735static int
1736parsehex(char c)
1737{
1738	if (c >= '0' && c <= '9')
1739		return c - '0';
1740	else if (c >= 'a' && c <= 'f')
1741		return c - 'a';
1742	else if (c >= 'A' && c <= 'F')
1743		return c - 'A';
1744	else
1745		return -1;
1746}
1747
1748/*
1749 * Note that this implementation does not (and should not!) obey
1750 * locale settings; you cannot simply substitute strtol here, since
1751 * it does obey locale.
1752 */
1753static int64_t
1754mtree_atol16(char **p)
1755{
1756	int64_t l, limit, last_digit_limit;
1757	int base, digit, sign;
1758
1759	base = 16;
1760
1761	if (**p == '-') {
1762		sign = -1;
1763		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1764		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1765		++(*p);
1766	} else {
1767		sign = 1;
1768		limit = INT64_MAX / base;
1769		last_digit_limit = INT64_MAX % base;
1770	}
1771
1772	l = 0;
1773	digit = parsehex(**p);
1774	while (digit >= 0 && digit < base) {
1775		if (l > limit || (l == limit && digit > last_digit_limit))
1776			return (sign < 0) ? INT64_MIN : INT64_MAX;
1777		l = (l * base) + digit;
1778		digit = parsehex(*++(*p));
1779	}
1780	return (sign < 0) ? -l : l;
1781}
1782
1783static int64_t
1784mtree_atol(char **p)
1785{
1786	if (**p != '0')
1787		return mtree_atol10(p);
1788	if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1789		*p += 2;
1790		return mtree_atol16(p);
1791	}
1792	return mtree_atol8(p);
1793}
1794
1795/*
1796 * Returns length of line (including trailing newline)
1797 * or negative on error.  'start' argument is updated to
1798 * point to first character of line.
1799 */
1800static ssize_t
1801readline(struct archive_read *a, struct mtree *mtree, char **start, ssize_t limit)
1802{
1803	ssize_t bytes_read;
1804	ssize_t total_size = 0;
1805	ssize_t find_off = 0;
1806	const void *t;
1807	const char *s;
1808	void *p;
1809	char *u;
1810
1811	/* Accumulate line in a line buffer. */
1812	for (;;) {
1813		/* Read some more. */
1814		t = __archive_read_ahead(a, 1, &bytes_read);
1815		if (t == NULL)
1816			return (0);
1817		if (bytes_read < 0)
1818			return (ARCHIVE_FATAL);
1819		s = t;  /* Start of line? */
1820		p = memchr(t, '\n', bytes_read);
1821		/* If we found '\n', trim the read. */
1822		if (p != NULL) {
1823			bytes_read = 1 + ((const char *)p) - s;
1824		}
1825		if (total_size + bytes_read + 1 > limit) {
1826			archive_set_error(&a->archive,
1827			    ARCHIVE_ERRNO_FILE_FORMAT,
1828			    "Line too long");
1829			return (ARCHIVE_FATAL);
1830		}
1831		if (archive_string_ensure(&mtree->line,
1832			total_size + bytes_read + 1) == NULL) {
1833			archive_set_error(&a->archive, ENOMEM,
1834			    "Can't allocate working buffer");
1835			return (ARCHIVE_FATAL);
1836		}
1837		memcpy(mtree->line.s + total_size, t, bytes_read);
1838		__archive_read_consume(a, bytes_read);
1839		total_size += bytes_read;
1840		/* Null terminate. */
1841		mtree->line.s[total_size] = '\0';
1842		/* If we found an unescaped '\n', clean up and return. */
1843		for (u = mtree->line.s + find_off; *u; ++u) {
1844			if (u[0] == '\n') {
1845				*start = mtree->line.s;
1846				return total_size;
1847			}
1848			if (u[0] == '#') {
1849				if (p == NULL)
1850					break;
1851				*start = mtree->line.s;
1852				return total_size;
1853			}
1854			if (u[0] != '\\')
1855				continue;
1856			if (u[1] == '\\') {
1857				++u;
1858				continue;
1859			}
1860			if (u[1] == '\n') {
1861				memmove(u, u + 1,
1862				    total_size - (u - mtree->line.s) + 1);
1863				--total_size;
1864				++u;
1865				break;
1866			}
1867			if (u[1] == '\0')
1868				break;
1869		}
1870		find_off = u - mtree->line.s;
1871	}
1872}
1873