archive_read_support_format_mtree.c revision 313926
1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2008 Joerg Sonnenberger
4 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: stable/11/contrib/libarchive/libarchive/archive_read_support_format_mtree.c 313926 2017-02-18 21:58:56Z mm $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_FCNTL_H
38#include <fcntl.h>
39#endif
40#include <stddef.h>
41/* #include <stdint.h> */ /* See archive_platform.h */
42#ifdef HAVE_STDLIB_H
43#include <stdlib.h>
44#endif
45#ifdef HAVE_STRING_H
46#include <string.h>
47#endif
48
49#include "archive.h"
50#include "archive_entry.h"
51#include "archive_private.h"
52#include "archive_read_private.h"
53#include "archive_string.h"
54#include "archive_pack_dev.h"
55
56#ifndef O_BINARY
57#define	O_BINARY 0
58#endif
59#ifndef O_CLOEXEC
60#define O_CLOEXEC	0
61#endif
62
63#define	MTREE_HAS_DEVICE	0x0001
64#define	MTREE_HAS_FFLAGS	0x0002
65#define	MTREE_HAS_GID		0x0004
66#define	MTREE_HAS_GNAME		0x0008
67#define	MTREE_HAS_MTIME		0x0010
68#define	MTREE_HAS_NLINK		0x0020
69#define	MTREE_HAS_PERM		0x0040
70#define	MTREE_HAS_SIZE		0x0080
71#define	MTREE_HAS_TYPE		0x0100
72#define	MTREE_HAS_UID		0x0200
73#define	MTREE_HAS_UNAME		0x0400
74
75#define	MTREE_HAS_OPTIONAL	0x0800
76#define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
77
78#define	MTREE_HASHTABLE_SIZE 1024
79
80struct mtree_option {
81	struct mtree_option *next;
82	char *value;
83};
84
85struct mtree_entry {
86	struct mtree_entry *next;
87	struct mtree_option *options;
88	char *name;
89	char full;
90	char used;
91	unsigned int name_hash;
92	struct mtree_entry *hashtable_next;
93};
94
95struct mtree {
96	struct archive_string	 line;
97	size_t			 buffsize;
98	char			*buff;
99	int64_t			 offset;
100	int			 fd;
101	int			 archive_format;
102	const char		*archive_format_name;
103	struct mtree_entry	*entries;
104	struct mtree_entry	*this_entry;
105	struct mtree_entry	*entry_hashtable[MTREE_HASHTABLE_SIZE];
106	struct archive_string	 current_dir;
107	struct archive_string	 contents_name;
108
109	struct archive_entry_linkresolver *resolver;
110
111	int64_t			 cur_size;
112	char checkfs;
113};
114
115static int	bid_keycmp(const char *, const char *, ssize_t);
116static int	cleanup(struct archive_read *);
117static int	detect_form(struct archive_read *, int *);
118static unsigned int	hash(const char *);
119static int	mtree_bid(struct archive_read *, int);
120static int	parse_file(struct archive_read *, struct archive_entry *,
121		    struct mtree *, struct mtree_entry *, int *);
122static void	parse_escapes(char *, struct mtree_entry *);
123static int	parse_line(struct archive_read *, struct archive_entry *,
124		    struct mtree *, struct mtree_entry *, int *);
125static int	parse_keyword(struct archive_read *, struct mtree *,
126		    struct archive_entry *, struct mtree_option *, int *);
127static int	read_data(struct archive_read *a,
128		    const void **buff, size_t *size, int64_t *offset);
129static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
130static int	skip(struct archive_read *a);
131static int	read_header(struct archive_read *,
132		    struct archive_entry *);
133static int64_t	mtree_atol10(char **);
134static int64_t	mtree_atol8(char **);
135static int64_t	mtree_atol(char **);
136
137/*
138 * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
139 * here.  TODO: Move this to configure time, but be careful
140 * about cross-compile environments.
141 */
142static int64_t
143get_time_t_max(void)
144{
145#if defined(TIME_T_MAX)
146	return TIME_T_MAX;
147#else
148	/* ISO C allows time_t to be a floating-point type,
149	   but POSIX requires an integer type.  The following
150	   should work on any system that follows the POSIX
151	   conventions. */
152	if (((time_t)0) < ((time_t)-1)) {
153		/* Time_t is unsigned */
154		return (~(time_t)0);
155	} else {
156		/* Time_t is signed. */
157		/* Assume it's the same as int64_t or int32_t */
158		if (sizeof(time_t) == sizeof(int64_t)) {
159			return (time_t)INT64_MAX;
160		} else {
161			return (time_t)INT32_MAX;
162		}
163	}
164#endif
165}
166
167static int64_t
168get_time_t_min(void)
169{
170#if defined(TIME_T_MIN)
171	return TIME_T_MIN;
172#else
173	if (((time_t)0) < ((time_t)-1)) {
174		/* Time_t is unsigned */
175		return (time_t)0;
176	} else {
177		/* Time_t is signed. */
178		if (sizeof(time_t) == sizeof(int64_t)) {
179			return (time_t)INT64_MIN;
180		} else {
181			return (time_t)INT32_MIN;
182		}
183	}
184#endif
185}
186
187static int
188archive_read_format_mtree_options(struct archive_read *a,
189    const char *key, const char *val)
190{
191	struct mtree *mtree;
192
193	mtree = (struct mtree *)(a->format->data);
194	if (strcmp(key, "checkfs")  == 0) {
195		/* Allows to read information missing from the mtree from the file system */
196		if (val == NULL || val[0] == 0) {
197			mtree->checkfs = 0;
198		} else {
199			mtree->checkfs = 1;
200		}
201		return (ARCHIVE_OK);
202	}
203
204	/* Note: The "warn" return is just to inform the options
205	 * supervisor that we didn't handle it.  It will generate
206	 * a suitable error if no one used this option. */
207	return (ARCHIVE_WARN);
208}
209
210static void
211free_options(struct mtree_option *head)
212{
213	struct mtree_option *next;
214
215	for (; head != NULL; head = next) {
216		next = head->next;
217		free(head->value);
218		free(head);
219	}
220}
221
222int
223archive_read_support_format_mtree(struct archive *_a)
224{
225	struct archive_read *a = (struct archive_read *)_a;
226	struct mtree *mtree;
227	int r;
228
229	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
230	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
231
232	mtree = (struct mtree *)calloc(1, sizeof(*mtree));
233	if (mtree == NULL) {
234		archive_set_error(&a->archive, ENOMEM,
235		    "Can't allocate mtree data");
236		return (ARCHIVE_FATAL);
237	}
238	mtree->fd = -1;
239
240	r = __archive_read_register_format(a, mtree, "mtree",
241           mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
242
243	if (r != ARCHIVE_OK)
244		free(mtree);
245	return (ARCHIVE_OK);
246}
247
248static int
249cleanup(struct archive_read *a)
250{
251	struct mtree *mtree;
252	struct mtree_entry *p, *q;
253
254	mtree = (struct mtree *)(a->format->data);
255
256	p = mtree->entries;
257	while (p != NULL) {
258		q = p->next;
259		free(p->name);
260		free_options(p->options);
261		free(p);
262		p = q;
263	}
264	archive_string_free(&mtree->line);
265	archive_string_free(&mtree->current_dir);
266	archive_string_free(&mtree->contents_name);
267	archive_entry_linkresolver_free(mtree->resolver);
268
269	free(mtree->buff);
270	free(mtree);
271	(a->format->data) = NULL;
272	return (ARCHIVE_OK);
273}
274
275static ssize_t
276get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
277{
278	ssize_t len;
279
280	len = 0;
281	while (len < avail) {
282		switch (*b) {
283		case '\0':/* Non-ascii character or control character. */
284			if (nlsize != NULL)
285				*nlsize = 0;
286			return (-1);
287		case '\r':
288			if (avail-len > 1 && b[1] == '\n') {
289				if (nlsize != NULL)
290					*nlsize = 2;
291				return (len+2);
292			}
293			/* FALL THROUGH */
294		case '\n':
295			if (nlsize != NULL)
296				*nlsize = 1;
297			return (len+1);
298		default:
299			b++;
300			len++;
301			break;
302		}
303	}
304	if (nlsize != NULL)
305		*nlsize = 0;
306	return (avail);
307}
308
309/*
310 *  <---------------- ravail --------------------->
311 *  <-- diff ------> <---  avail ----------------->
312 *                   <---- len ----------->
313 * | Previous lines | line being parsed  nl extra |
314 *                  ^
315 *                  b
316 *
317 */
318static ssize_t
319next_line(struct archive_read *a,
320    const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
321{
322	ssize_t len;
323	int quit;
324
325	quit = 0;
326	if (*avail == 0) {
327		*nl = 0;
328		len = 0;
329	} else
330		len = get_line_size(*b, *avail, nl);
331	/*
332	 * Read bytes more while it does not reach the end of line.
333	 */
334	while (*nl == 0 && len == *avail && !quit) {
335		ssize_t diff = *ravail - *avail;
336		size_t nbytes_req = (*ravail+1023) & ~1023U;
337		ssize_t tested;
338
339		/* Increase reading bytes if it is not enough to at least
340		 * new two lines. */
341		if (nbytes_req < (size_t)*ravail + 160)
342			nbytes_req <<= 1;
343
344		*b = __archive_read_ahead(a, nbytes_req, avail);
345		if (*b == NULL) {
346			if (*ravail >= *avail)
347				return (0);
348			/* Reading bytes reaches the end of file. */
349			*b = __archive_read_ahead(a, *avail, avail);
350			quit = 1;
351		}
352		*ravail = *avail;
353		*b += diff;
354		*avail -= diff;
355		tested = len;/* Skip some bytes we already determinated. */
356		len = get_line_size(*b + len, *avail - len, nl);
357		if (len >= 0)
358			len += tested;
359	}
360	return (len);
361}
362
363/*
364 * Compare characters with a mtree keyword.
365 * Returns the length of a mtree keyword if matched.
366 * Returns 0 if not matched.
367 */
368static int
369bid_keycmp(const char *p, const char *key, ssize_t len)
370{
371	int match_len = 0;
372
373	while (len > 0 && *p && *key) {
374		if (*p == *key) {
375			--len;
376			++p;
377			++key;
378			++match_len;
379			continue;
380		}
381		return (0);/* Not match */
382	}
383	if (*key != '\0')
384		return (0);/* Not match */
385
386	/* A following character should be specified characters */
387	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
388	    p[0] == '\n' || p[0] == '\r' ||
389	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
390		return (match_len);
391	return (0);/* Not match */
392}
393
394/*
395 * Test whether the characters 'p' has is mtree keyword.
396 * Returns the length of a detected keyword.
397 * Returns 0 if any keywords were not found.
398 */
399static int
400bid_keyword(const char *p,  ssize_t len)
401{
402	static const char *keys_c[] = {
403		"content", "contents", "cksum", NULL
404	};
405	static const char *keys_df[] = {
406		"device", "flags", NULL
407	};
408	static const char *keys_g[] = {
409		"gid", "gname", NULL
410	};
411	static const char *keys_il[] = {
412		"ignore", "inode", "link", NULL
413	};
414	static const char *keys_m[] = {
415		"md5", "md5digest", "mode", NULL
416	};
417	static const char *keys_no[] = {
418		"nlink", "nochange", "optional", NULL
419	};
420	static const char *keys_r[] = {
421		"resdevice", "rmd160", "rmd160digest", NULL
422	};
423	static const char *keys_s[] = {
424		"sha1", "sha1digest",
425		"sha256", "sha256digest",
426		"sha384", "sha384digest",
427		"sha512", "sha512digest",
428		"size", NULL
429	};
430	static const char *keys_t[] = {
431		"tags", "time", "type", NULL
432	};
433	static const char *keys_u[] = {
434		"uid", "uname",	NULL
435	};
436	const char **keys;
437	int i;
438
439	switch (*p) {
440	case 'c': keys = keys_c; break;
441	case 'd': case 'f': keys = keys_df; break;
442	case 'g': keys = keys_g; break;
443	case 'i': case 'l': keys = keys_il; break;
444	case 'm': keys = keys_m; break;
445	case 'n': case 'o': keys = keys_no; break;
446	case 'r': keys = keys_r; break;
447	case 's': keys = keys_s; break;
448	case 't': keys = keys_t; break;
449	case 'u': keys = keys_u; break;
450	default: return (0);/* Unknown key */
451	}
452
453	for (i = 0; keys[i] != NULL; i++) {
454		int l = bid_keycmp(p, keys[i], len);
455		if (l > 0)
456			return (l);
457	}
458	return (0);/* Unknown key */
459}
460
461/*
462 * Test whether there is a set of mtree keywords.
463 * Returns the number of keyword.
464 * Returns -1 if we got incorrect sequence.
465 * This function expects a set of "<space characters>keyword=value".
466 * When "unset" is specified, expects a set of "<space characters>keyword".
467 */
468static int
469bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
470{
471	int l;
472	int keycnt = 0;
473
474	while (len > 0 && *p) {
475		int blank = 0;
476
477		/* Test whether there are blank characters in the line. */
478		while (len >0 && (*p == ' ' || *p == '\t')) {
479			++p;
480			--len;
481			blank = 1;
482		}
483		if (*p == '\n' || *p == '\r')
484			break;
485		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
486			break;
487		if (!blank && !last_is_path) /* No blank character. */
488			return (-1);
489		if (last_is_path && len == 0)
490				return (keycnt);
491
492		if (unset) {
493			l = bid_keycmp(p, "all", len);
494			if (l > 0)
495				return (1);
496		}
497		/* Test whether there is a correct key in the line. */
498		l = bid_keyword(p, len);
499		if (l == 0)
500			return (-1);/* Unknown keyword was found. */
501		p += l;
502		len -= l;
503		keycnt++;
504
505		/* Skip value */
506		if (*p == '=') {
507			int value = 0;
508			++p;
509			--len;
510			while (len > 0 && *p != ' ' && *p != '\t') {
511				++p;
512				--len;
513				value = 1;
514			}
515			/* A keyword should have a its value unless
516			 * "/unset" operation. */
517			if (!unset && value == 0)
518				return (-1);
519		}
520	}
521	return (keycnt);
522}
523
524static int
525bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
526{
527	int f = 0;
528	static const unsigned char safe_char[256] = {
529		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
530		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
531		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
532		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
533		/* 0123456789:;<>?  EXCLUSION:(=) */
534		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
535		/* @ABCDEFGHIJKLMNO */
536		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
537		/* PQRSTUVWXYZ[\]^_  */
538		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
539		/* `abcdefghijklmno */
540		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
541		/* pqrstuvwxyz{|}~ */
542		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
543		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
544		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
545		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
546		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
547		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
548		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
549		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
550		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
551	};
552	ssize_t ll;
553	const char *pp = p;
554	const char * const pp_end = pp + len;
555
556	*last_is_path = 0;
557	/*
558	 * Skip the path-name which is quoted.
559	 */
560	for (;pp < pp_end; ++pp) {
561		if (!safe_char[*(const unsigned char *)pp]) {
562			if (*pp != ' ' && *pp != '\t' && *pp != '\r'
563			    && *pp != '\n')
564				f = 0;
565			break;
566		}
567		f = 1;
568	}
569	ll = pp_end - pp;
570
571	/* If a path-name was not found at the first, try to check
572	 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
573	 * which places the path-name at the last. */
574	if (f == 0) {
575		const char *pb = p + len - nl;
576		int name_len = 0;
577		int slash;
578
579		/* The form D accepts only a single line for an entry. */
580		if (pb-2 >= p &&
581		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
582			return (-1);
583		if (pb-1 >= p && pb[-1] == '\\')
584			return (-1);
585
586		slash = 0;
587		while (p <= --pb && *pb != ' ' && *pb != '\t') {
588			if (!safe_char[*(const unsigned char *)pb])
589				return (-1);
590			name_len++;
591			/* The pathname should have a slash in this
592			 * format. */
593			if (*pb == '/')
594				slash = 1;
595		}
596		if (name_len == 0 || slash == 0)
597			return (-1);
598		/* If '/' is placed at the first in this field, this is not
599		 * a valid filename. */
600		if (pb[1] == '/')
601			return (-1);
602		ll = len - nl - name_len;
603		pp = p;
604		*last_is_path = 1;
605	}
606
607	return (bid_keyword_list(pp, ll, 0, *last_is_path));
608}
609
610#define MAX_BID_ENTRY	3
611
612static int
613mtree_bid(struct archive_read *a, int best_bid)
614{
615	const char *signature = "#mtree";
616	const char *p;
617
618	(void)best_bid; /* UNUSED */
619
620	/* Now let's look at the actual header and see if it matches. */
621	p = __archive_read_ahead(a, strlen(signature), NULL);
622	if (p == NULL)
623		return (-1);
624
625	if (memcmp(p, signature, strlen(signature)) == 0)
626		return (8 * (int)strlen(signature));
627
628	/*
629	 * There is not a mtree signature. Let's try to detect mtree format.
630	 */
631	return (detect_form(a, NULL));
632}
633
634static int
635detect_form(struct archive_read *a, int *is_form_d)
636{
637	const char *p;
638	ssize_t avail, ravail;
639	ssize_t detected_bytes = 0, len, nl;
640	int entry_cnt = 0, multiline = 0;
641	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
642			* (In this source we call it `form D') . */
643
644	if (is_form_d != NULL)
645		*is_form_d = 0;
646	p = __archive_read_ahead(a, 1, &avail);
647	if (p == NULL)
648		return (-1);
649	ravail = avail;
650	for (;;) {
651		len = next_line(a, &p, &avail, &ravail, &nl);
652		/* The terminal character of the line should be
653		 * a new line character, '\r\n' or '\n'. */
654		if (len <= 0 || nl == 0)
655			break;
656		if (!multiline) {
657			/* Leading whitespace is never significant,
658			 * ignore it. */
659			while (len > 0 && (*p == ' ' || *p == '\t')) {
660				++p;
661				--avail;
662				--len;
663			}
664			/* Skip comment or empty line. */
665			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
666				p += len;
667				avail -= len;
668				continue;
669			}
670		} else {
671			/* A continuance line; the terminal
672			 * character of previous line was '\' character. */
673			if (bid_keyword_list(p, len, 0, 0) <= 0)
674				break;
675			if (multiline == 1)
676				detected_bytes += len;
677			if (p[len-nl-1] != '\\') {
678				if (multiline == 1 &&
679				    ++entry_cnt >= MAX_BID_ENTRY)
680					break;
681				multiline = 0;
682			}
683			p += len;
684			avail -= len;
685			continue;
686		}
687		if (p[0] != '/') {
688			int last_is_path, keywords;
689
690			keywords = bid_entry(p, len, nl, &last_is_path);
691			if (keywords >= 0) {
692				detected_bytes += len;
693				if (form_D == 0) {
694					if (last_is_path)
695						form_D = 1;
696					else if (keywords > 0)
697						/* This line is not `form D'. */
698						form_D = -1;
699				} else if (form_D == 1) {
700					if (!last_is_path && keywords > 0)
701						/* This this is not `form D'
702						 * and We cannot accept mixed
703						 * format. */
704						break;
705				}
706				if (!last_is_path && p[len-nl-1] == '\\')
707					/* This line continues. */
708					multiline = 1;
709				else {
710					/* We've got plenty of correct lines
711					 * to assume that this file is a mtree
712					 * format. */
713					if (++entry_cnt >= MAX_BID_ENTRY)
714						break;
715				}
716			} else
717				break;
718		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
719			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
720				break;
721			/* This line continues. */
722			if (p[len-nl-1] == '\\')
723				multiline = 2;
724		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
725			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
726				break;
727			/* This line continues. */
728			if (p[len-nl-1] == '\\')
729				multiline = 2;
730		} else
731			break;
732
733		/* Test next line. */
734		p += len;
735		avail -= len;
736	}
737	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
738		if (is_form_d != NULL) {
739			if (form_D == 1)
740				*is_form_d = 1;
741		}
742		return (32);
743	}
744
745	return (0);
746}
747
748/*
749 * The extended mtree format permits multiple lines specifying
750 * attributes for each file.  For those entries, only the last line
751 * is actually used.  Practically speaking, that means we have
752 * to read the entire mtree file into memory up front.
753 *
754 * The parsing is done in two steps.  First, it is decided if a line
755 * changes the global defaults and if it is, processed accordingly.
756 * Otherwise, the options of the line are merged with the current
757 * global options.
758 */
759static int
760add_option(struct archive_read *a, struct mtree_option **global,
761    const char *value, size_t len)
762{
763	struct mtree_option *opt;
764
765	if ((opt = malloc(sizeof(*opt))) == NULL) {
766		archive_set_error(&a->archive, errno, "Can't allocate memory");
767		return (ARCHIVE_FATAL);
768	}
769	if ((opt->value = malloc(len + 1)) == NULL) {
770		free(opt);
771		archive_set_error(&a->archive, errno, "Can't allocate memory");
772		return (ARCHIVE_FATAL);
773	}
774	memcpy(opt->value, value, len);
775	opt->value[len] = '\0';
776	opt->next = *global;
777	*global = opt;
778	return (ARCHIVE_OK);
779}
780
781static void
782remove_option(struct mtree_option **global, const char *value, size_t len)
783{
784	struct mtree_option *iter, *last;
785
786	last = NULL;
787	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
788		if (strncmp(iter->value, value, len) == 0 &&
789		    (iter->value[len] == '\0' ||
790		     iter->value[len] == '='))
791			break;
792	}
793	if (iter == NULL)
794		return;
795	if (last == NULL)
796		*global = iter->next;
797	else
798		last->next = iter->next;
799
800	free(iter->value);
801	free(iter);
802}
803
804static int
805process_global_set(struct archive_read *a,
806    struct mtree_option **global, const char *line)
807{
808	const char *next, *eq;
809	size_t len;
810	int r;
811
812	line += 4;
813	for (;;) {
814		next = line + strspn(line, " \t\r\n");
815		if (*next == '\0')
816			return (ARCHIVE_OK);
817		line = next;
818		next = line + strcspn(line, " \t\r\n");
819		eq = strchr(line, '=');
820		if (eq > next)
821			len = next - line;
822		else
823			len = eq - line;
824
825		remove_option(global, line, len);
826		r = add_option(a, global, line, next - line);
827		if (r != ARCHIVE_OK)
828			return (r);
829		line = next;
830	}
831}
832
833static int
834process_global_unset(struct archive_read *a,
835    struct mtree_option **global, const char *line)
836{
837	const char *next;
838	size_t len;
839
840	line += 6;
841	if (strchr(line, '=') != NULL) {
842		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
843		    "/unset shall not contain `='");
844		return ARCHIVE_FATAL;
845	}
846
847	for (;;) {
848		next = line + strspn(line, " \t\r\n");
849		if (*next == '\0')
850			return (ARCHIVE_OK);
851		line = next;
852		len = strcspn(line, " \t\r\n");
853
854		if (len == 3 && strncmp(line, "all", 3) == 0) {
855			free_options(*global);
856			*global = NULL;
857		} else {
858			remove_option(global, line, len);
859		}
860
861		line += len;
862	}
863}
864
865static int
866process_add_entry(struct archive_read *a, struct mtree *mtree,
867    struct mtree_option **global, const char *line, ssize_t line_len,
868    struct mtree_entry **last_entry, int is_form_d)
869{
870	struct mtree_entry *entry, *ht_iter;
871	struct mtree_option *iter;
872	const char *next, *eq, *name, *end;
873	size_t name_len, len;
874	int r, i;
875	unsigned int ht_idx;
876
877	if ((entry = malloc(sizeof(*entry))) == NULL) {
878		archive_set_error(&a->archive, errno, "Can't allocate memory");
879		return (ARCHIVE_FATAL);
880	}
881	entry->next = NULL;
882	entry->options = NULL;
883	entry->name = NULL;
884	entry->used = 0;
885	entry->full = 0;
886	entry->name_hash = 0;
887	entry->hashtable_next = NULL;
888
889	/* Add this entry to list. */
890	if (*last_entry == NULL)
891		mtree->entries = entry;
892	else
893		(*last_entry)->next = entry;
894	*last_entry = entry;
895
896	if (is_form_d) {
897		/* Filename is last item on line. */
898		/* Adjust line_len to trim trailing whitespace */
899		while (line_len > 0) {
900			char last_character = line[line_len - 1];
901			if (last_character == '\r'
902			    || last_character == '\n'
903			    || last_character == '\t'
904			    || last_character == ' ') {
905				line_len--;
906			} else {
907				break;
908			}
909		}
910		/* Name starts after the last whitespace separator */
911		name = line;
912		for (i = 0; i < line_len; i++) {
913			if (line[i] == '\r'
914			    || line[i] == '\n'
915			    || line[i] == '\t'
916			    || line[i] == ' ') {
917				name = line + i + 1;
918			}
919		}
920		name_len = line + line_len - name;
921		end = name;
922	} else {
923		/* Filename is first item on line */
924		name_len = strcspn(line, " \t\r\n");
925		name = line;
926		line += name_len;
927		end = line + line_len;
928	}
929	/* name/name_len is the name within the line. */
930	/* line..end brackets the entire line except the name */
931
932	if ((entry->name = malloc(name_len + 1)) == NULL) {
933		archive_set_error(&a->archive, errno, "Can't allocate memory");
934		return (ARCHIVE_FATAL);
935	}
936
937	memcpy(entry->name, name, name_len);
938	entry->name[name_len] = '\0';
939	parse_escapes(entry->name, entry);
940	entry->name_hash = hash(entry->name);
941
942	ht_idx = entry->name_hash % MTREE_HASHTABLE_SIZE;
943	if ((ht_iter = mtree->entry_hashtable[ht_idx]) != NULL) {
944		while (ht_iter->hashtable_next)
945			ht_iter = ht_iter->hashtable_next;
946		ht_iter->hashtable_next = entry;
947	} else {
948		mtree->entry_hashtable[ht_idx] = entry;
949	}
950
951	for (iter = *global; iter != NULL; iter = iter->next) {
952		r = add_option(a, &entry->options, iter->value,
953		    strlen(iter->value));
954		if (r != ARCHIVE_OK)
955			return (r);
956	}
957
958	for (;;) {
959		next = line + strspn(line, " \t\r\n");
960		if (*next == '\0')
961			return (ARCHIVE_OK);
962		if (next >= end)
963			return (ARCHIVE_OK);
964		line = next;
965		next = line + strcspn(line, " \t\r\n");
966		eq = strchr(line, '=');
967		if (eq == NULL || eq > next)
968			len = next - line;
969		else
970			len = eq - line;
971
972		remove_option(&entry->options, line, len);
973		r = add_option(a, &entry->options, line, next - line);
974		if (r != ARCHIVE_OK)
975			return (r);
976		line = next;
977	}
978}
979
980static int
981read_mtree(struct archive_read *a, struct mtree *mtree)
982{
983	ssize_t len;
984	uintmax_t counter;
985	char *p;
986	struct mtree_option *global;
987	struct mtree_entry *last_entry;
988	int r, is_form_d;
989
990	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
991	mtree->archive_format_name = "mtree";
992
993	global = NULL;
994	last_entry = NULL;
995
996	(void)detect_form(a, &is_form_d);
997
998	for (counter = 1; ; ++counter) {
999		len = readline(a, mtree, &p, 65536);
1000		if (len == 0) {
1001			mtree->this_entry = mtree->entries;
1002			free_options(global);
1003			return (ARCHIVE_OK);
1004		}
1005		if (len < 0) {
1006			free_options(global);
1007			return ((int)len);
1008		}
1009		/* Leading whitespace is never significant, ignore it. */
1010		while (*p == ' ' || *p == '\t') {
1011			++p;
1012			--len;
1013		}
1014		/* Skip content lines and blank lines. */
1015		if (*p == '#')
1016			continue;
1017		if (*p == '\r' || *p == '\n' || *p == '\0')
1018			continue;
1019		if (*p != '/') {
1020			r = process_add_entry(a, mtree, &global, p, len,
1021			    &last_entry, is_form_d);
1022		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
1023			if (p[4] != ' ' && p[4] != '\t')
1024				break;
1025			r = process_global_set(a, &global, p);
1026		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
1027			if (p[6] != ' ' && p[6] != '\t')
1028				break;
1029			r = process_global_unset(a, &global, p);
1030		} else
1031			break;
1032
1033		if (r != ARCHIVE_OK) {
1034			free_options(global);
1035			return r;
1036		}
1037	}
1038
1039	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1040	    "Can't parse line %ju", counter);
1041	free_options(global);
1042	return (ARCHIVE_FATAL);
1043}
1044
1045/*
1046 * Read in the entire mtree file into memory on the first request.
1047 * Then use the next unused file to satisfy each header request.
1048 */
1049static int
1050read_header(struct archive_read *a, struct archive_entry *entry)
1051{
1052	struct mtree *mtree;
1053	char *p;
1054	int r, use_next;
1055
1056	mtree = (struct mtree *)(a->format->data);
1057
1058	if (mtree->fd >= 0) {
1059		close(mtree->fd);
1060		mtree->fd = -1;
1061	}
1062
1063	if (mtree->entries == NULL) {
1064		mtree->resolver = archive_entry_linkresolver_new();
1065		if (mtree->resolver == NULL)
1066			return ARCHIVE_FATAL;
1067		archive_entry_linkresolver_set_strategy(mtree->resolver,
1068		    ARCHIVE_FORMAT_MTREE);
1069		r = read_mtree(a, mtree);
1070		if (r != ARCHIVE_OK)
1071			return (r);
1072	}
1073
1074	a->archive.archive_format = mtree->archive_format;
1075	a->archive.archive_format_name = mtree->archive_format_name;
1076
1077	for (;;) {
1078		if (mtree->this_entry == NULL)
1079			return (ARCHIVE_EOF);
1080		if (strcmp(mtree->this_entry->name, "..") == 0) {
1081			mtree->this_entry->used = 1;
1082			if (archive_strlen(&mtree->current_dir) > 0) {
1083				/* Roll back current path. */
1084				p = mtree->current_dir.s
1085				    + mtree->current_dir.length - 1;
1086				while (p >= mtree->current_dir.s && *p != '/')
1087					--p;
1088				if (p >= mtree->current_dir.s)
1089					--p;
1090				mtree->current_dir.length
1091				    = p - mtree->current_dir.s + 1;
1092			}
1093		}
1094		if (!mtree->this_entry->used) {
1095			use_next = 0;
1096			r = parse_file(a, entry, mtree, mtree->this_entry,
1097				&use_next);
1098			if (use_next == 0)
1099				return (r);
1100		}
1101		mtree->this_entry = mtree->this_entry->next;
1102	}
1103}
1104
1105/*
1106 * A single file can have multiple lines contribute specifications.
1107 * Parse as many lines as necessary, then pull additional information
1108 * from a backing file on disk as necessary.
1109 */
1110static int
1111parse_file(struct archive_read *a, struct archive_entry *entry,
1112    struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1113{
1114	const char *path;
1115	struct stat st_storage, *st;
1116	struct mtree_entry *mp;
1117	struct archive_entry *sparse_entry;
1118	int r = ARCHIVE_OK, r1, parsed_kws;
1119
1120	mentry->used = 1;
1121
1122	/* Initialize reasonable defaults. */
1123	archive_entry_set_filetype(entry, AE_IFREG);
1124	archive_entry_set_size(entry, 0);
1125	archive_string_empty(&mtree->contents_name);
1126
1127	/* Parse options from this line. */
1128	parsed_kws = 0;
1129	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1130
1131	if (mentry->full) {
1132		archive_entry_copy_pathname(entry, mentry->name);
1133		/*
1134		 * "Full" entries are allowed to have multiple lines
1135		 * and those lines aren't required to be adjacent.  We
1136		 * don't support multiple lines for "relative" entries
1137		 * nor do we make any attempt to merge data from
1138		 * separate "relative" and "full" entries.  (Merging
1139		 * "relative" and "full" entries would require dealing
1140		 * with pathname canonicalization, which is a very
1141		 * tricky subject.)
1142		 */
1143		for (mp = mentry->hashtable_next; mp != NULL; mp = mp->hashtable_next) {
1144			if (mp->full && !mp->used
1145					&& mentry->name_hash == mp->name_hash
1146					&& strcmp(mentry->name, mp->name) == 0) {
1147				/* Later lines override earlier ones. */
1148				mp->used = 1;
1149				r1 = parse_line(a, entry, mtree, mp,
1150				    &parsed_kws);
1151				if (r1 < r)
1152					r = r1;
1153			}
1154		}
1155	} else {
1156		/*
1157		 * Relative entries require us to construct
1158		 * the full path and possibly update the
1159		 * current directory.
1160		 */
1161		size_t n = archive_strlen(&mtree->current_dir);
1162		if (n > 0)
1163			archive_strcat(&mtree->current_dir, "/");
1164		archive_strcat(&mtree->current_dir, mentry->name);
1165		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1166		if (archive_entry_filetype(entry) != AE_IFDIR)
1167			mtree->current_dir.length = n;
1168	}
1169
1170	if (mtree->checkfs) {
1171		/*
1172		 * Try to open and stat the file to get the real size
1173		 * and other file info.  It would be nice to avoid
1174		 * this here so that getting a listing of an mtree
1175		 * wouldn't require opening every referenced contents
1176		 * file.  But then we wouldn't know the actual
1177		 * contents size, so I don't see a really viable way
1178		 * around this.  (Also, we may want to someday pull
1179		 * other unspecified info from the contents file on
1180		 * disk.)
1181		 */
1182		mtree->fd = -1;
1183		if (archive_strlen(&mtree->contents_name) > 0)
1184			path = mtree->contents_name.s;
1185		else
1186			path = archive_entry_pathname(entry);
1187
1188		if (archive_entry_filetype(entry) == AE_IFREG ||
1189				archive_entry_filetype(entry) == AE_IFDIR) {
1190			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1191			__archive_ensure_cloexec_flag(mtree->fd);
1192			if (mtree->fd == -1 &&
1193				(errno != ENOENT ||
1194				 archive_strlen(&mtree->contents_name) > 0)) {
1195				archive_set_error(&a->archive, errno,
1196						"Can't open %s", path);
1197				r = ARCHIVE_WARN;
1198			}
1199		}
1200
1201		st = &st_storage;
1202		if (mtree->fd >= 0) {
1203			if (fstat(mtree->fd, st) == -1) {
1204				archive_set_error(&a->archive, errno,
1205						"Could not fstat %s", path);
1206				r = ARCHIVE_WARN;
1207				/* If we can't stat it, don't keep it open. */
1208				close(mtree->fd);
1209				mtree->fd = -1;
1210				st = NULL;
1211			}
1212		} else if (lstat(path, st) == -1) {
1213			st = NULL;
1214		}
1215
1216		/*
1217		 * Check for a mismatch between the type in the specification
1218		 * and the type of the contents object on disk.
1219		 */
1220		if (st != NULL) {
1221			if (((st->st_mode & S_IFMT) == S_IFREG &&
1222			      archive_entry_filetype(entry) == AE_IFREG)
1223#ifdef S_IFLNK
1224			  ||((st->st_mode & S_IFMT) == S_IFLNK &&
1225			      archive_entry_filetype(entry) == AE_IFLNK)
1226#endif
1227#ifdef S_IFSOCK
1228			  ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1229			      archive_entry_filetype(entry) == AE_IFSOCK)
1230#endif
1231#ifdef S_IFCHR
1232			  ||((st->st_mode & S_IFMT) == S_IFCHR &&
1233			      archive_entry_filetype(entry) == AE_IFCHR)
1234#endif
1235#ifdef S_IFBLK
1236			  ||((st->st_mode & S_IFMT) == S_IFBLK &&
1237			      archive_entry_filetype(entry) == AE_IFBLK)
1238#endif
1239			  ||((st->st_mode & S_IFMT) == S_IFDIR &&
1240			      archive_entry_filetype(entry) == AE_IFDIR)
1241#ifdef S_IFIFO
1242			  ||((st->st_mode & S_IFMT) == S_IFIFO &&
1243			      archive_entry_filetype(entry) == AE_IFIFO)
1244#endif
1245			) {
1246				/* Types match. */
1247			} else {
1248				/* Types don't match; bail out gracefully. */
1249				if (mtree->fd >= 0)
1250					close(mtree->fd);
1251				mtree->fd = -1;
1252				if (parsed_kws & MTREE_HAS_OPTIONAL) {
1253					/* It's not an error for an optional
1254					 * entry to not match disk. */
1255					*use_next = 1;
1256				} else if (r == ARCHIVE_OK) {
1257					archive_set_error(&a->archive,
1258					    ARCHIVE_ERRNO_MISC,
1259					    "mtree specification has different"
1260					    " type for %s",
1261					    archive_entry_pathname(entry));
1262					r = ARCHIVE_WARN;
1263				}
1264				return (r);
1265			}
1266		}
1267
1268		/*
1269		 * If there is a contents file on disk, pick some of the
1270		 * metadata from that file.  For most of these, we only
1271		 * set it from the contents if it wasn't already parsed
1272		 * from the specification.
1273		 */
1274		if (st != NULL) {
1275			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1276				(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1277				(archive_entry_filetype(entry) == AE_IFCHR ||
1278				 archive_entry_filetype(entry) == AE_IFBLK))
1279				archive_entry_set_rdev(entry, st->st_rdev);
1280			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1281				== 0 ||
1282			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1283				archive_entry_set_gid(entry, st->st_gid);
1284			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1285				== 0 ||
1286			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1287				archive_entry_set_uid(entry, st->st_uid);
1288			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1289			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1290#if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1291				archive_entry_set_mtime(entry, st->st_mtime,
1292						st->st_mtimespec.tv_nsec);
1293#elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1294				archive_entry_set_mtime(entry, st->st_mtime,
1295						st->st_mtim.tv_nsec);
1296#elif HAVE_STRUCT_STAT_ST_MTIME_N
1297				archive_entry_set_mtime(entry, st->st_mtime,
1298						st->st_mtime_n);
1299#elif HAVE_STRUCT_STAT_ST_UMTIME
1300				archive_entry_set_mtime(entry, st->st_mtime,
1301						st->st_umtime*1000);
1302#elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1303				archive_entry_set_mtime(entry, st->st_mtime,
1304						st->st_mtime_usec*1000);
1305#else
1306				archive_entry_set_mtime(entry, st->st_mtime, 0);
1307#endif
1308			}
1309			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1310			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1311				archive_entry_set_nlink(entry, st->st_nlink);
1312			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1313			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1314				archive_entry_set_perm(entry, st->st_mode);
1315			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1316			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1317				archive_entry_set_size(entry, st->st_size);
1318			archive_entry_set_ino(entry, st->st_ino);
1319			archive_entry_set_dev(entry, st->st_dev);
1320
1321			archive_entry_linkify(mtree->resolver, &entry,
1322				&sparse_entry);
1323		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1324			/*
1325			 * Couldn't open the entry, stat it or the on-disk type
1326			 * didn't match.  If this entry is optional, just
1327			 * ignore it and read the next header entry.
1328			 */
1329			*use_next = 1;
1330			return ARCHIVE_OK;
1331		}
1332	}
1333
1334	mtree->cur_size = archive_entry_size(entry);
1335	mtree->offset = 0;
1336
1337	return r;
1338}
1339
1340/*
1341 * Each line contains a sequence of keywords.
1342 */
1343static int
1344parse_line(struct archive_read *a, struct archive_entry *entry,
1345    struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1346{
1347	struct mtree_option *iter;
1348	int r = ARCHIVE_OK, r1;
1349
1350	for (iter = mp->options; iter != NULL; iter = iter->next) {
1351		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1352		if (r1 < r)
1353			r = r1;
1354	}
1355	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1356		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1357		    "Missing type keyword in mtree specification");
1358		return (ARCHIVE_WARN);
1359	}
1360	return (r);
1361}
1362
1363/*
1364 * Device entries have one of the following forms:
1365 *  - raw dev_t
1366 *  - format,major,minor[,subdevice]
1367 * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1368 */
1369
1370/* strsep() is not in C90, but strcspn() is. */
1371/* Taken from http://unixpapa.com/incnote/string.html */
1372static char *
1373la_strsep(char **sp, const char *sep)
1374{
1375	char *p, *s;
1376	if (sp == NULL || *sp == NULL || **sp == '\0')
1377		return(NULL);
1378	s = *sp;
1379	p = s + strcspn(s, sep);
1380	if (*p != '\0')
1381		*p++ = '\0';
1382	*sp = p;
1383	return(s);
1384}
1385
1386static int
1387parse_device(dev_t *pdev, struct archive *a, char *val)
1388{
1389#define MAX_PACK_ARGS 3
1390	unsigned long numbers[MAX_PACK_ARGS];
1391	char *p, *dev;
1392	int argc;
1393	pack_t *pack;
1394	dev_t result;
1395	const char *error = NULL;
1396
1397	memset(pdev, 0, sizeof(*pdev));
1398	if ((dev = strchr(val, ',')) != NULL) {
1399		/*
1400		 * Device's major/minor are given in a specified format.
1401		 * Decode and pack it accordingly.
1402		 */
1403		*dev++ = '\0';
1404		if ((pack = pack_find(val)) == NULL) {
1405			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1406			    "Unknown format `%s'", val);
1407			return ARCHIVE_WARN;
1408		}
1409		argc = 0;
1410		while ((p = la_strsep(&dev, ",")) != NULL) {
1411			if (*p == '\0') {
1412				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1413				    "Missing number");
1414				return ARCHIVE_WARN;
1415			}
1416			if (argc >= MAX_PACK_ARGS) {
1417				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1418				    "Too many arguments");
1419				return ARCHIVE_WARN;
1420			}
1421			numbers[argc++] = (unsigned long)mtree_atol(&p);
1422		}
1423		if (argc < 2) {
1424			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1425			    "Not enough arguments");
1426			return ARCHIVE_WARN;
1427		}
1428		result = (*pack)(argc, numbers, &error);
1429		if (error != NULL) {
1430			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1431			    "%s", error);
1432			return ARCHIVE_WARN;
1433		}
1434	} else {
1435		/* file system raw value. */
1436		result = (dev_t)mtree_atol(&val);
1437	}
1438	*pdev = result;
1439	return ARCHIVE_OK;
1440#undef MAX_PACK_ARGS
1441}
1442
1443/*
1444 * Parse a single keyword and its value.
1445 */
1446static int
1447parse_keyword(struct archive_read *a, struct mtree *mtree,
1448    struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1449{
1450	char *val, *key;
1451
1452	key = opt->value;
1453
1454	if (*key == '\0')
1455		return (ARCHIVE_OK);
1456
1457	if (strcmp(key, "nochange") == 0) {
1458		*parsed_kws |= MTREE_HAS_NOCHANGE;
1459		return (ARCHIVE_OK);
1460	}
1461	if (strcmp(key, "optional") == 0) {
1462		*parsed_kws |= MTREE_HAS_OPTIONAL;
1463		return (ARCHIVE_OK);
1464	}
1465	if (strcmp(key, "ignore") == 0) {
1466		/*
1467		 * The mtree processing is not recursive, so
1468		 * recursion will only happen for explicitly listed
1469		 * entries.
1470		 */
1471		return (ARCHIVE_OK);
1472	}
1473
1474	val = strchr(key, '=');
1475	if (val == NULL) {
1476		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1477		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1478		return (ARCHIVE_WARN);
1479	}
1480
1481	*val = '\0';
1482	++val;
1483
1484	switch (key[0]) {
1485	case 'c':
1486		if (strcmp(key, "content") == 0
1487		    || strcmp(key, "contents") == 0) {
1488			parse_escapes(val, NULL);
1489			archive_strcpy(&mtree->contents_name, val);
1490			break;
1491		}
1492		if (strcmp(key, "cksum") == 0)
1493			break;
1494	case 'd':
1495		if (strcmp(key, "device") == 0) {
1496			/* stat(2) st_rdev field, e.g. the major/minor IDs
1497			 * of a char/block special file */
1498			int r;
1499			dev_t dev;
1500
1501			*parsed_kws |= MTREE_HAS_DEVICE;
1502			r = parse_device(&dev, &a->archive, val);
1503			if (r == ARCHIVE_OK)
1504				archive_entry_set_rdev(entry, dev);
1505			return r;
1506		}
1507	case 'f':
1508		if (strcmp(key, "flags") == 0) {
1509			*parsed_kws |= MTREE_HAS_FFLAGS;
1510			archive_entry_copy_fflags_text(entry, val);
1511			break;
1512		}
1513	case 'g':
1514		if (strcmp(key, "gid") == 0) {
1515			*parsed_kws |= MTREE_HAS_GID;
1516			archive_entry_set_gid(entry, mtree_atol10(&val));
1517			break;
1518		}
1519		if (strcmp(key, "gname") == 0) {
1520			*parsed_kws |= MTREE_HAS_GNAME;
1521			archive_entry_copy_gname(entry, val);
1522			break;
1523		}
1524	case 'i':
1525		if (strcmp(key, "inode") == 0) {
1526			archive_entry_set_ino(entry, mtree_atol10(&val));
1527			break;
1528		}
1529	case 'l':
1530		if (strcmp(key, "link") == 0) {
1531			archive_entry_copy_symlink(entry, val);
1532			break;
1533		}
1534	case 'm':
1535		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1536			break;
1537		if (strcmp(key, "mode") == 0) {
1538			if (val[0] >= '0' && val[0] <= '9') {
1539				*parsed_kws |= MTREE_HAS_PERM;
1540				archive_entry_set_perm(entry,
1541				    (mode_t)mtree_atol8(&val));
1542			} else {
1543				archive_set_error(&a->archive,
1544				    ARCHIVE_ERRNO_FILE_FORMAT,
1545				    "Symbolic mode \"%s\" unsupported", val);
1546				return ARCHIVE_WARN;
1547			}
1548			break;
1549		}
1550	case 'n':
1551		if (strcmp(key, "nlink") == 0) {
1552			*parsed_kws |= MTREE_HAS_NLINK;
1553			archive_entry_set_nlink(entry,
1554				(unsigned int)mtree_atol10(&val));
1555			break;
1556		}
1557	case 'r':
1558		if (strcmp(key, "resdevice") == 0) {
1559			/* stat(2) st_dev field, e.g. the device ID where the
1560			 * inode resides */
1561			int r;
1562			dev_t dev;
1563
1564			r = parse_device(&dev, &a->archive, val);
1565			if (r == ARCHIVE_OK)
1566				archive_entry_set_dev(entry, dev);
1567			return r;
1568		}
1569		if (strcmp(key, "rmd160") == 0 ||
1570		    strcmp(key, "rmd160digest") == 0)
1571			break;
1572	case 's':
1573		if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1574			break;
1575		if (strcmp(key, "sha256") == 0 ||
1576		    strcmp(key, "sha256digest") == 0)
1577			break;
1578		if (strcmp(key, "sha384") == 0 ||
1579		    strcmp(key, "sha384digest") == 0)
1580			break;
1581		if (strcmp(key, "sha512") == 0 ||
1582		    strcmp(key, "sha512digest") == 0)
1583			break;
1584		if (strcmp(key, "size") == 0) {
1585			archive_entry_set_size(entry, mtree_atol10(&val));
1586			break;
1587		}
1588	case 't':
1589		if (strcmp(key, "tags") == 0) {
1590			/*
1591			 * Comma delimited list of tags.
1592			 * Ignore the tags for now, but the interface
1593			 * should be extended to allow inclusion/exclusion.
1594			 */
1595			break;
1596		}
1597		if (strcmp(key, "time") == 0) {
1598			int64_t m;
1599			int64_t my_time_t_max = get_time_t_max();
1600			int64_t my_time_t_min = get_time_t_min();
1601			long ns = 0;
1602
1603			*parsed_kws |= MTREE_HAS_MTIME;
1604			m = mtree_atol10(&val);
1605			/* Replicate an old mtree bug:
1606			 * 123456789.1 represents 123456789
1607			 * seconds and 1 nanosecond. */
1608			if (*val == '.') {
1609				++val;
1610				ns = (long)mtree_atol10(&val);
1611				if (ns < 0)
1612					ns = 0;
1613				else if (ns > 999999999)
1614					ns = 999999999;
1615			}
1616			if (m > my_time_t_max)
1617				m = my_time_t_max;
1618			else if (m < my_time_t_min)
1619				m = my_time_t_min;
1620			archive_entry_set_mtime(entry, (time_t)m, ns);
1621			break;
1622		}
1623		if (strcmp(key, "type") == 0) {
1624			switch (val[0]) {
1625			case 'b':
1626				if (strcmp(val, "block") == 0) {
1627					archive_entry_set_filetype(entry, AE_IFBLK);
1628					break;
1629				}
1630			case 'c':
1631				if (strcmp(val, "char") == 0) {
1632					archive_entry_set_filetype(entry,
1633						AE_IFCHR);
1634					break;
1635				}
1636			case 'd':
1637				if (strcmp(val, "dir") == 0) {
1638					archive_entry_set_filetype(entry,
1639						AE_IFDIR);
1640					break;
1641				}
1642			case 'f':
1643				if (strcmp(val, "fifo") == 0) {
1644					archive_entry_set_filetype(entry,
1645						AE_IFIFO);
1646					break;
1647				}
1648				if (strcmp(val, "file") == 0) {
1649					archive_entry_set_filetype(entry,
1650						AE_IFREG);
1651					break;
1652				}
1653			case 'l':
1654				if (strcmp(val, "link") == 0) {
1655					archive_entry_set_filetype(entry,
1656						AE_IFLNK);
1657					break;
1658				}
1659			default:
1660				archive_set_error(&a->archive,
1661				    ARCHIVE_ERRNO_FILE_FORMAT,
1662				    "Unrecognized file type \"%s\"; "
1663				    "assuming \"file\"", val);
1664				archive_entry_set_filetype(entry, AE_IFREG);
1665				return (ARCHIVE_WARN);
1666			}
1667			*parsed_kws |= MTREE_HAS_TYPE;
1668			break;
1669		}
1670	case 'u':
1671		if (strcmp(key, "uid") == 0) {
1672			*parsed_kws |= MTREE_HAS_UID;
1673			archive_entry_set_uid(entry, mtree_atol10(&val));
1674			break;
1675		}
1676		if (strcmp(key, "uname") == 0) {
1677			*parsed_kws |= MTREE_HAS_UNAME;
1678			archive_entry_copy_uname(entry, val);
1679			break;
1680		}
1681	default:
1682		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1683		    "Unrecognized key %s=%s", key, val);
1684		return (ARCHIVE_WARN);
1685	}
1686	return (ARCHIVE_OK);
1687}
1688
1689static int
1690read_data(struct archive_read *a, const void **buff, size_t *size,
1691    int64_t *offset)
1692{
1693	size_t bytes_to_read;
1694	ssize_t bytes_read;
1695	struct mtree *mtree;
1696
1697	mtree = (struct mtree *)(a->format->data);
1698	if (mtree->fd < 0) {
1699		*buff = NULL;
1700		*offset = 0;
1701		*size = 0;
1702		return (ARCHIVE_EOF);
1703	}
1704	if (mtree->buff == NULL) {
1705		mtree->buffsize = 64 * 1024;
1706		mtree->buff = malloc(mtree->buffsize);
1707		if (mtree->buff == NULL) {
1708			archive_set_error(&a->archive, ENOMEM,
1709			    "Can't allocate memory");
1710			return (ARCHIVE_FATAL);
1711		}
1712	}
1713
1714	*buff = mtree->buff;
1715	*offset = mtree->offset;
1716	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1717		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1718	else
1719		bytes_to_read = mtree->buffsize;
1720	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1721	if (bytes_read < 0) {
1722		archive_set_error(&a->archive, errno, "Can't read");
1723		return (ARCHIVE_WARN);
1724	}
1725	if (bytes_read == 0) {
1726		*size = 0;
1727		return (ARCHIVE_EOF);
1728	}
1729	mtree->offset += bytes_read;
1730	*size = bytes_read;
1731	return (ARCHIVE_OK);
1732}
1733
1734/* Skip does nothing except possibly close the contents file. */
1735static int
1736skip(struct archive_read *a)
1737{
1738	struct mtree *mtree;
1739
1740	mtree = (struct mtree *)(a->format->data);
1741	if (mtree->fd >= 0) {
1742		close(mtree->fd);
1743		mtree->fd = -1;
1744	}
1745	return (ARCHIVE_OK);
1746}
1747
1748/*
1749 * Since parsing backslash sequences always makes strings shorter,
1750 * we can always do this conversion in-place.
1751 */
1752static void
1753parse_escapes(char *src, struct mtree_entry *mentry)
1754{
1755	char *dest = src;
1756	char c;
1757
1758	if (mentry != NULL && strcmp(src, ".") == 0)
1759		mentry->full = 1;
1760
1761	while (*src != '\0') {
1762		c = *src++;
1763		if (c == '/' && mentry != NULL)
1764			mentry->full = 1;
1765		if (c == '\\') {
1766			switch (src[0]) {
1767			case '0':
1768				if (src[1] < '0' || src[1] > '7') {
1769					c = 0;
1770					++src;
1771					break;
1772				}
1773				/* FALLTHROUGH */
1774			case '1':
1775			case '2':
1776			case '3':
1777				if (src[1] >= '0' && src[1] <= '7' &&
1778				    src[2] >= '0' && src[2] <= '7') {
1779					c = (src[0] - '0') << 6;
1780					c |= (src[1] - '0') << 3;
1781					c |= (src[2] - '0');
1782					src += 3;
1783				}
1784				break;
1785			case 'a':
1786				c = '\a';
1787				++src;
1788				break;
1789			case 'b':
1790				c = '\b';
1791				++src;
1792				break;
1793			case 'f':
1794				c = '\f';
1795				++src;
1796				break;
1797			case 'n':
1798				c = '\n';
1799				++src;
1800				break;
1801			case 'r':
1802				c = '\r';
1803				++src;
1804				break;
1805			case 's':
1806				c = ' ';
1807				++src;
1808				break;
1809			case 't':
1810				c = '\t';
1811				++src;
1812				break;
1813			case 'v':
1814				c = '\v';
1815				++src;
1816				break;
1817			case '\\':
1818				c = '\\';
1819				++src;
1820				break;
1821			}
1822		}
1823		*dest++ = c;
1824	}
1825	*dest = '\0';
1826}
1827
1828/*
1829 * Note that this implementation does not (and should not!) obey
1830 * locale settings; you cannot simply substitute strtol here, since
1831 * it does obey locale.
1832 */
1833static int64_t
1834mtree_atol8(char **p)
1835{
1836	int64_t	l, limit, last_digit_limit;
1837	int digit, base;
1838
1839	base = 8;
1840	limit = INT64_MAX / base;
1841	last_digit_limit = INT64_MAX % base;
1842
1843	l = 0;
1844	digit = **p - '0';
1845	while (digit >= 0 && digit < base) {
1846		if (l>limit || (l == limit && digit > last_digit_limit)) {
1847			l = INT64_MAX; /* Truncate on overflow. */
1848			break;
1849		}
1850		l = (l * base) + digit;
1851		digit = *++(*p) - '0';
1852	}
1853	return (l);
1854}
1855
1856/*
1857 * Note that this implementation does not (and should not!) obey
1858 * locale settings; you cannot simply substitute strtol here, since
1859 * it does obey locale.
1860 */
1861static int64_t
1862mtree_atol10(char **p)
1863{
1864	int64_t l, limit, last_digit_limit;
1865	int base, digit, sign;
1866
1867	base = 10;
1868
1869	if (**p == '-') {
1870		sign = -1;
1871		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1872		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1873		++(*p);
1874	} else {
1875		sign = 1;
1876		limit = INT64_MAX / base;
1877		last_digit_limit = INT64_MAX % base;
1878	}
1879
1880	l = 0;
1881	digit = **p - '0';
1882	while (digit >= 0 && digit < base) {
1883		if (l > limit || (l == limit && digit > last_digit_limit))
1884			return (sign < 0) ? INT64_MIN : INT64_MAX;
1885		l = (l * base) + digit;
1886		digit = *++(*p) - '0';
1887	}
1888	return (sign < 0) ? -l : l;
1889}
1890
1891/* Parse a hex digit. */
1892static int
1893parsehex(char c)
1894{
1895	if (c >= '0' && c <= '9')
1896		return c - '0';
1897	else if (c >= 'a' && c <= 'f')
1898		return c - 'a';
1899	else if (c >= 'A' && c <= 'F')
1900		return c - 'A';
1901	else
1902		return -1;
1903}
1904
1905/*
1906 * Note that this implementation does not (and should not!) obey
1907 * locale settings; you cannot simply substitute strtol here, since
1908 * it does obey locale.
1909 */
1910static int64_t
1911mtree_atol16(char **p)
1912{
1913	int64_t l, limit, last_digit_limit;
1914	int base, digit, sign;
1915
1916	base = 16;
1917
1918	if (**p == '-') {
1919		sign = -1;
1920		limit = ((uint64_t)(INT64_MAX) + 1) / base;
1921		last_digit_limit = ((uint64_t)(INT64_MAX) + 1) % base;
1922		++(*p);
1923	} else {
1924		sign = 1;
1925		limit = INT64_MAX / base;
1926		last_digit_limit = INT64_MAX % base;
1927	}
1928
1929	l = 0;
1930	digit = parsehex(**p);
1931	while (digit >= 0 && digit < base) {
1932		if (l > limit || (l == limit && digit > last_digit_limit))
1933			return (sign < 0) ? INT64_MIN : INT64_MAX;
1934		l = (l * base) + digit;
1935		digit = parsehex(*++(*p));
1936	}
1937	return (sign < 0) ? -l : l;
1938}
1939
1940static int64_t
1941mtree_atol(char **p)
1942{
1943	if (**p != '0')
1944		return mtree_atol10(p);
1945	if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1946		*p += 2;
1947		return mtree_atol16(p);
1948	}
1949	return mtree_atol8(p);
1950}
1951
1952/*
1953 * Returns length of line (including trailing newline)
1954 * or negative on error.  'start' argument is updated to
1955 * point to first character of line.
1956 */
1957static ssize_t
1958readline(struct archive_read *a, struct mtree *mtree, char **start,
1959    ssize_t limit)
1960{
1961	ssize_t bytes_read;
1962	ssize_t total_size = 0;
1963	ssize_t find_off = 0;
1964	const void *t;
1965	void *nl;
1966	char *u;
1967
1968	/* Accumulate line in a line buffer. */
1969	for (;;) {
1970		/* Read some more. */
1971		t = __archive_read_ahead(a, 1, &bytes_read);
1972		if (t == NULL)
1973			return (0);
1974		if (bytes_read < 0)
1975			return (ARCHIVE_FATAL);
1976		nl = memchr(t, '\n', bytes_read);
1977		/* If we found '\n', trim the read to end exactly there. */
1978		if (nl != NULL) {
1979			bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1980		}
1981		if (total_size + bytes_read + 1 > limit) {
1982			archive_set_error(&a->archive,
1983			    ARCHIVE_ERRNO_FILE_FORMAT,
1984			    "Line too long");
1985			return (ARCHIVE_FATAL);
1986		}
1987		if (archive_string_ensure(&mtree->line,
1988			total_size + bytes_read + 1) == NULL) {
1989			archive_set_error(&a->archive, ENOMEM,
1990			    "Can't allocate working buffer");
1991			return (ARCHIVE_FATAL);
1992		}
1993		/* Append new bytes to string. */
1994		memcpy(mtree->line.s + total_size, t, bytes_read);
1995		__archive_read_consume(a, bytes_read);
1996		total_size += bytes_read;
1997		mtree->line.s[total_size] = '\0';
1998
1999		for (u = mtree->line.s + find_off; *u; ++u) {
2000			if (u[0] == '\n') {
2001				/* Ends with unescaped newline. */
2002				*start = mtree->line.s;
2003				return total_size;
2004			} else if (u[0] == '#') {
2005				/* Ends with comment sequence #...\n */
2006				if (nl == NULL) {
2007					/* But we've not found the \n yet */
2008					break;
2009				}
2010			} else if (u[0] == '\\') {
2011				if (u[1] == '\n') {
2012					/* Trim escaped newline. */
2013					total_size -= 2;
2014					mtree->line.s[total_size] = '\0';
2015					break;
2016				} else if (u[1] != '\0') {
2017					/* Skip the two-char escape sequence */
2018					++u;
2019				}
2020			}
2021		}
2022		find_off = u - mtree->line.s;
2023	}
2024}
2025
2026static unsigned int
2027hash(const char *p)
2028{
2029	/* A 32-bit version of Peter Weinberger's (PJW) hash algorithm,
2030	   as used by ELF for hashing function names. */
2031	unsigned g, h = 0;
2032	while (*p != '\0') {
2033		h = (h << 4) + *p++;
2034		if ((g = h & 0xF0000000) != 0) {
2035			h ^= g >> 24;
2036			h &= 0x0FFFFFFF;
2037		}
2038	}
2039	return h;
2040}
2041