1/*-
2 * Copyright (c) 2003-2007 Tim Kientzle
3 * Copyright (c) 2008 Joerg Sonnenberger
4 * Copyright (c) 2011-2012 Michihiro NAKAJIMA
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include "archive_platform.h"
29__FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_mtree.c 201165 2009-12-29 05:52:13Z kientzle $");
30
31#ifdef HAVE_SYS_STAT_H
32#include <sys/stat.h>
33#endif
34#ifdef HAVE_ERRNO_H
35#include <errno.h>
36#endif
37#ifdef HAVE_FCNTL_H
38#include <fcntl.h>
39#endif
40#include <stddef.h>
41/* #include <stdint.h> */ /* See archive_platform.h */
42#ifdef HAVE_STDLIB_H
43#include <stdlib.h>
44#endif
45#ifdef HAVE_STRING_H
46#include <string.h>
47#endif
48#ifdef HAVE_CTYPE_H
49#include <ctype.h>
50#endif
51
52#include "archive.h"
53#include "archive_entry.h"
54#include "archive_private.h"
55#include "archive_rb.h"
56#include "archive_read_private.h"
57#include "archive_string.h"
58#include "archive_pack_dev.h"
59
60#ifndef O_BINARY
61#define	O_BINARY 0
62#endif
63#ifndef O_CLOEXEC
64#define O_CLOEXEC	0
65#endif
66
67#define	MTREE_HAS_DEVICE	0x0001
68#define	MTREE_HAS_FFLAGS	0x0002
69#define	MTREE_HAS_GID		0x0004
70#define	MTREE_HAS_GNAME		0x0008
71#define	MTREE_HAS_MTIME		0x0010
72#define	MTREE_HAS_NLINK		0x0020
73#define	MTREE_HAS_PERM		0x0040
74#define	MTREE_HAS_SIZE		0x0080
75#define	MTREE_HAS_TYPE		0x0100
76#define	MTREE_HAS_UID		0x0200
77#define	MTREE_HAS_UNAME		0x0400
78
79#define	MTREE_HAS_OPTIONAL	0x0800
80#define	MTREE_HAS_NOCHANGE	0x1000 /* FreeBSD specific */
81
82#define	MAX_LINE_LEN		(1024 * 1024)
83
84struct mtree_option {
85	struct mtree_option *next;
86	char *value;
87};
88
89struct mtree_entry {
90	struct archive_rb_node rbnode;
91	struct mtree_entry *next_dup;
92	struct mtree_entry *next;
93	struct mtree_option *options;
94	char *name;
95	char full;
96	char used;
97};
98
99struct mtree {
100	struct archive_string	 line;
101	size_t			 buffsize;
102	char			*buff;
103	int64_t			 offset;
104	int			 fd;
105	int			 archive_format;
106	const char		*archive_format_name;
107	struct mtree_entry	*entries;
108	struct mtree_entry	*this_entry;
109	struct archive_rb_tree	 entry_rbtree;
110	struct archive_string	 current_dir;
111	struct archive_string	 contents_name;
112
113	struct archive_entry_linkresolver *resolver;
114	struct archive_rb_tree rbtree;
115
116	int64_t			 cur_size;
117	char checkfs;
118};
119
120static int	bid_keycmp(const char *, const char *, ssize_t);
121static int	cleanup(struct archive_read *);
122static int	detect_form(struct archive_read *, int *);
123static int	mtree_bid(struct archive_read *, int);
124static int	parse_file(struct archive_read *, struct archive_entry *,
125		    struct mtree *, struct mtree_entry *, int *);
126static void	parse_escapes(char *, struct mtree_entry *);
127static int	parse_line(struct archive_read *, struct archive_entry *,
128		    struct mtree *, struct mtree_entry *, int *);
129static int	parse_keyword(struct archive_read *, struct mtree *,
130		    struct archive_entry *, struct mtree_option *, int *);
131static int	read_data(struct archive_read *a,
132		    const void **buff, size_t *size, int64_t *offset);
133static ssize_t	readline(struct archive_read *, struct mtree *, char **, ssize_t);
134static int	skip(struct archive_read *a);
135static int	read_header(struct archive_read *,
136		    struct archive_entry *);
137static int64_t	mtree_atol(char **, int base);
138
139/*
140 * There's no standard for TIME_T_MAX/TIME_T_MIN.  So we compute them
141 * here.  TODO: Move this to configure time, but be careful
142 * about cross-compile environments.
143 */
144static int64_t
145get_time_t_max(void)
146{
147#if defined(TIME_T_MAX)
148	return TIME_T_MAX;
149#else
150	/* ISO C allows time_t to be a floating-point type,
151	   but POSIX requires an integer type.  The following
152	   should work on any system that follows the POSIX
153	   conventions. */
154	if (((time_t)0) < ((time_t)-1)) {
155		/* Time_t is unsigned */
156		return (~(time_t)0);
157	} else {
158		/* Time_t is signed. */
159		/* Assume it's the same as int64_t or int32_t */
160		if (sizeof(time_t) == sizeof(int64_t)) {
161			return (time_t)INT64_MAX;
162		} else {
163			return (time_t)INT32_MAX;
164		}
165	}
166#endif
167}
168
169static int64_t
170get_time_t_min(void)
171{
172#if defined(TIME_T_MIN)
173	return TIME_T_MIN;
174#else
175	if (((time_t)0) < ((time_t)-1)) {
176		/* Time_t is unsigned */
177		return (time_t)0;
178	} else {
179		/* Time_t is signed. */
180		if (sizeof(time_t) == sizeof(int64_t)) {
181			return (time_t)INT64_MIN;
182		} else {
183			return (time_t)INT32_MIN;
184		}
185	}
186#endif
187}
188
189static int
190archive_read_format_mtree_options(struct archive_read *a,
191    const char *key, const char *val)
192{
193	struct mtree *mtree;
194
195	mtree = (struct mtree *)(a->format->data);
196	if (strcmp(key, "checkfs")  == 0) {
197		/* Allows to read information missing from the mtree from the file system */
198		if (val == NULL || val[0] == 0) {
199			mtree->checkfs = 0;
200		} else {
201			mtree->checkfs = 1;
202		}
203		return (ARCHIVE_OK);
204	}
205
206	/* Note: The "warn" return is just to inform the options
207	 * supervisor that we didn't handle it.  It will generate
208	 * a suitable error if no one used this option. */
209	return (ARCHIVE_WARN);
210}
211
212static void
213free_options(struct mtree_option *head)
214{
215	struct mtree_option *next;
216
217	for (; head != NULL; head = next) {
218		next = head->next;
219		free(head->value);
220		free(head);
221	}
222}
223
224static int
225mtree_cmp_node(const struct archive_rb_node *n1,
226    const struct archive_rb_node *n2)
227{
228	const struct mtree_entry *e1 = (const struct mtree_entry *)n1;
229	const struct mtree_entry *e2 = (const struct mtree_entry *)n2;
230
231	return (strcmp(e1->name, e2->name));
232}
233
234static int
235mtree_cmp_key(const struct archive_rb_node *n, const void *key)
236{
237	const struct mtree_entry *e = (const struct mtree_entry *)n;
238
239	return (strcmp(e->name, key));
240}
241
242int
243archive_read_support_format_mtree(struct archive *_a)
244{
245	static const struct archive_rb_tree_ops rb_ops = {
246		mtree_cmp_node, mtree_cmp_key,
247	};
248	struct archive_read *a = (struct archive_read *)_a;
249	struct mtree *mtree;
250	int r;
251
252	archive_check_magic(_a, ARCHIVE_READ_MAGIC,
253	    ARCHIVE_STATE_NEW, "archive_read_support_format_mtree");
254
255	mtree = (struct mtree *)calloc(1, sizeof(*mtree));
256	if (mtree == NULL) {
257		archive_set_error(&a->archive, ENOMEM,
258		    "Can't allocate mtree data");
259		return (ARCHIVE_FATAL);
260	}
261	mtree->fd = -1;
262
263	__archive_rb_tree_init(&mtree->rbtree, &rb_ops);
264
265	r = __archive_read_register_format(a, mtree, "mtree",
266           mtree_bid, archive_read_format_mtree_options, read_header, read_data, skip, NULL, cleanup, NULL, NULL);
267
268	if (r != ARCHIVE_OK)
269		free(mtree);
270	return (ARCHIVE_OK);
271}
272
273static int
274cleanup(struct archive_read *a)
275{
276	struct mtree *mtree;
277	struct mtree_entry *p, *q;
278
279	mtree = (struct mtree *)(a->format->data);
280
281	p = mtree->entries;
282	while (p != NULL) {
283		q = p->next;
284		free(p->name);
285		free_options(p->options);
286		free(p);
287		p = q;
288	}
289	archive_string_free(&mtree->line);
290	archive_string_free(&mtree->current_dir);
291	archive_string_free(&mtree->contents_name);
292	archive_entry_linkresolver_free(mtree->resolver);
293
294	free(mtree->buff);
295	free(mtree);
296	(a->format->data) = NULL;
297	return (ARCHIVE_OK);
298}
299
300static ssize_t
301get_line_size(const char *b, ssize_t avail, ssize_t *nlsize)
302{
303	ssize_t len;
304
305	len = 0;
306	while (len < avail) {
307		switch (*b) {
308		case '\0':/* Non-ascii character or control character. */
309			if (nlsize != NULL)
310				*nlsize = 0;
311			return (-1);
312		case '\r':
313			if (avail-len > 1 && b[1] == '\n') {
314				if (nlsize != NULL)
315					*nlsize = 2;
316				return (len+2);
317			}
318			/* FALL THROUGH */
319		case '\n':
320			if (nlsize != NULL)
321				*nlsize = 1;
322			return (len+1);
323		default:
324			b++;
325			len++;
326			break;
327		}
328	}
329	if (nlsize != NULL)
330		*nlsize = 0;
331	return (avail);
332}
333
334/*
335 *  <---------------- ravail --------------------->
336 *  <-- diff ------> <---  avail ----------------->
337 *                   <---- len ----------->
338 * | Previous lines | line being parsed  nl extra |
339 *                  ^
340 *                  b
341 *
342 */
343static ssize_t
344next_line(struct archive_read *a,
345    const char **b, ssize_t *avail, ssize_t *ravail, ssize_t *nl)
346{
347	ssize_t len;
348	int quit;
349
350	quit = 0;
351	if (*avail == 0) {
352		*nl = 0;
353		len = 0;
354	} else
355		len = get_line_size(*b, *avail, nl);
356	/*
357	 * Read bytes more while it does not reach the end of line.
358	 */
359	while (*nl == 0 && len == *avail && !quit) {
360		ssize_t diff = *ravail - *avail;
361		size_t nbytes_req = (*ravail+1023) & ~1023U;
362		ssize_t tested;
363
364		/*
365		 * Place an arbitrary limit on the line length.
366		 * mtree is almost free-form input and without line length limits,
367		 * it can consume a lot of memory.
368		 */
369		if (len >= MAX_LINE_LEN)
370			return (-1);
371
372		/* Increase reading bytes if it is not enough to at least
373		 * new two lines. */
374		if (nbytes_req < (size_t)*ravail + 160)
375			nbytes_req <<= 1;
376
377		*b = __archive_read_ahead(a, nbytes_req, avail);
378		if (*b == NULL) {
379			if (*ravail >= *avail)
380				return (0);
381			/* Reading bytes reaches the end of file. */
382			*b = __archive_read_ahead(a, *avail, avail);
383			quit = 1;
384		}
385		*ravail = *avail;
386		*b += diff;
387		*avail -= diff;
388		tested = len;/* Skip some bytes we already determinated. */
389		len = get_line_size(*b + len, *avail - len, nl);
390		if (len >= 0)
391			len += tested;
392	}
393	return (len);
394}
395
396/*
397 * Compare characters with a mtree keyword.
398 * Returns the length of a mtree keyword if matched.
399 * Returns 0 if not matched.
400 */
401static int
402bid_keycmp(const char *p, const char *key, ssize_t len)
403{
404	int match_len = 0;
405
406	while (len > 0 && *p && *key) {
407		if (*p == *key) {
408			--len;
409			++p;
410			++key;
411			++match_len;
412			continue;
413		}
414		return (0);/* Not match */
415	}
416	if (*key != '\0')
417		return (0);/* Not match */
418
419	/* A following character should be specified characters */
420	if (p[0] == '=' || p[0] == ' ' || p[0] == '\t' ||
421	    p[0] == '\n' || p[0] == '\r' ||
422	   (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r')))
423		return (match_len);
424	return (0);/* Not match */
425}
426
427/*
428 * Test whether the characters 'p' has is mtree keyword.
429 * Returns the length of a detected keyword.
430 * Returns 0 if any keywords were not found.
431 */
432static int
433bid_keyword(const char *p,  ssize_t len)
434{
435	static const char * const keys_c[] = {
436		"content", "contents", "cksum", NULL
437	};
438	static const char * const keys_df[] = {
439		"device", "flags", NULL
440	};
441	static const char * const keys_g[] = {
442		"gid", "gname", NULL
443	};
444	static const char * const keys_il[] = {
445		"ignore", "inode", "link", NULL
446	};
447	static const char * const keys_m[] = {
448		"md5", "md5digest", "mode", NULL
449	};
450	static const char * const keys_no[] = {
451		"nlink", "nochange", "optional", NULL
452	};
453	static const char * const keys_r[] = {
454		"resdevice", "rmd160", "rmd160digest", NULL
455	};
456	static const char * const keys_s[] = {
457		"sha1", "sha1digest",
458		"sha256", "sha256digest",
459		"sha384", "sha384digest",
460		"sha512", "sha512digest",
461		"size", NULL
462	};
463	static const char * const keys_t[] = {
464		"tags", "time", "type", NULL
465	};
466	static const char * const keys_u[] = {
467		"uid", "uname",	NULL
468	};
469	const char * const *keys;
470	int i;
471
472	switch (*p) {
473	case 'c': keys = keys_c; break;
474	case 'd': case 'f': keys = keys_df; break;
475	case 'g': keys = keys_g; break;
476	case 'i': case 'l': keys = keys_il; break;
477	case 'm': keys = keys_m; break;
478	case 'n': case 'o': keys = keys_no; break;
479	case 'r': keys = keys_r; break;
480	case 's': keys = keys_s; break;
481	case 't': keys = keys_t; break;
482	case 'u': keys = keys_u; break;
483	default: return (0);/* Unknown key */
484	}
485
486	for (i = 0; keys[i] != NULL; i++) {
487		int l = bid_keycmp(p, keys[i], len);
488		if (l > 0)
489			return (l);
490	}
491	return (0);/* Unknown key */
492}
493
494/*
495 * Test whether there is a set of mtree keywords.
496 * Returns the number of keyword.
497 * Returns -1 if we got incorrect sequence.
498 * This function expects a set of "<space characters>keyword=value".
499 * When "unset" is specified, expects a set of "<space characters>keyword".
500 */
501static int
502bid_keyword_list(const char *p,  ssize_t len, int unset, int last_is_path)
503{
504	int l;
505	int keycnt = 0;
506
507	while (len > 0 && *p) {
508		int blank = 0;
509
510		/* Test whether there are blank characters in the line. */
511		while (len >0 && (*p == ' ' || *p == '\t')) {
512			++p;
513			--len;
514			blank = 1;
515		}
516		if (*p == '\n' || *p == '\r')
517			break;
518		if (p[0] == '\\' && (p[1] == '\n' || p[1] == '\r'))
519			break;
520		if (!blank && !last_is_path) /* No blank character. */
521			return (-1);
522		if (last_is_path && len == 0)
523				return (keycnt);
524
525		if (unset) {
526			l = bid_keycmp(p, "all", len);
527			if (l > 0)
528				return (1);
529		}
530		/* Test whether there is a correct key in the line. */
531		l = bid_keyword(p, len);
532		if (l == 0)
533			return (-1);/* Unknown keyword was found. */
534		p += l;
535		len -= l;
536		keycnt++;
537
538		/* Skip value */
539		if (*p == '=') {
540			int value = 0;
541			++p;
542			--len;
543			while (len > 0 && *p != ' ' && *p != '\t') {
544				++p;
545				--len;
546				value = 1;
547			}
548			/* A keyword should have a its value unless
549			 * "/unset" operation. */
550			if (!unset && value == 0)
551				return (-1);
552		}
553	}
554	return (keycnt);
555}
556
557static int
558bid_entry(const char *p, ssize_t len, ssize_t nl, int *last_is_path)
559{
560	int f = 0;
561	static const unsigned char safe_char[256] = {
562		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 00 - 0F */
563		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 10 - 1F */
564		/* !"$%&'()*+,-./  EXCLUSION:( )(#) */
565		0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 20 - 2F */
566		/* 0123456789:;<>?  EXCLUSION:(=) */
567		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, /* 30 - 3F */
568		/* @ABCDEFGHIJKLMNO */
569		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 40 - 4F */
570		/* PQRSTUVWXYZ[\]^_  */
571		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 50 - 5F */
572		/* `abcdefghijklmno */
573		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 60 - 6F */
574		/* pqrstuvwxyz{|}~ */
575		1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* 70 - 7F */
576		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 - 8F */
577		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 90 - 9F */
578		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* A0 - AF */
579		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* B0 - BF */
580		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* C0 - CF */
581		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* D0 - DF */
582		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* E0 - EF */
583		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* F0 - FF */
584	};
585	ssize_t ll;
586	const char *pp = p;
587	const char * const pp_end = pp + len;
588
589	*last_is_path = 0;
590	/*
591	 * Skip the path-name which is quoted.
592	 */
593	for (;pp < pp_end; ++pp) {
594		if (!safe_char[*(const unsigned char *)pp]) {
595			if (*pp != ' ' && *pp != '\t' && *pp != '\r'
596			    && *pp != '\n')
597				f = 0;
598			break;
599		}
600		f = 1;
601	}
602	ll = pp_end - pp;
603
604	/* If a path-name was not found at the first, try to check
605	 * a mtree format(a.k.a form D) ``NetBSD's mtree -D'' creates,
606	 * which places the path-name at the last. */
607	if (f == 0) {
608		const char *pb = p + len - nl;
609		int name_len = 0;
610		int slash;
611
612		/* The form D accepts only a single line for an entry. */
613		if (pb-2 >= p &&
614		    pb[-1] == '\\' && (pb[-2] == ' ' || pb[-2] == '\t'))
615			return (-1);
616		if (pb-1 >= p && pb[-1] == '\\')
617			return (-1);
618
619		slash = 0;
620		while (p <= --pb && *pb != ' ' && *pb != '\t') {
621			if (!safe_char[*(const unsigned char *)pb])
622				return (-1);
623			name_len++;
624			/* The pathname should have a slash in this
625			 * format. */
626			if (*pb == '/')
627				slash = 1;
628		}
629		if (name_len == 0 || slash == 0)
630			return (-1);
631		/* If '/' is placed at the first in this field, this is not
632		 * a valid filename. */
633		if (pb[1] == '/')
634			return (-1);
635		ll = len - nl - name_len;
636		pp = p;
637		*last_is_path = 1;
638	}
639
640	return (bid_keyword_list(pp, ll, 0, *last_is_path));
641}
642
643#define MAX_BID_ENTRY	3
644
645static int
646mtree_bid(struct archive_read *a, int best_bid)
647{
648	const char *signature = "#mtree";
649	const char *p;
650
651	(void)best_bid; /* UNUSED */
652
653	/* Now let's look at the actual header and see if it matches. */
654	p = __archive_read_ahead(a, strlen(signature), NULL);
655	if (p == NULL)
656		return (-1);
657
658	if (memcmp(p, signature, strlen(signature)) == 0)
659		return (8 * (int)strlen(signature));
660
661	/*
662	 * There is not a mtree signature. Let's try to detect mtree format.
663	 */
664	return (detect_form(a, NULL));
665}
666
667static int
668detect_form(struct archive_read *a, int *is_form_d)
669{
670	const char *p;
671	ssize_t avail, ravail;
672	ssize_t detected_bytes = 0, len, nl;
673	int entry_cnt = 0, multiline = 0;
674	int form_D = 0;/* The archive is generated by `NetBSD mtree -D'
675			* (In this source we call it `form D') . */
676
677	if (is_form_d != NULL)
678		*is_form_d = 0;
679	p = __archive_read_ahead(a, 1, &avail);
680	if (p == NULL)
681		return (-1);
682	ravail = avail;
683	for (;;) {
684		len = next_line(a, &p, &avail, &ravail, &nl);
685		/* The terminal character of the line should be
686		 * a new line character, '\r\n' or '\n'. */
687		if (len <= 0 || nl == 0)
688			break;
689		if (!multiline) {
690			/* Leading whitespace is never significant,
691			 * ignore it. */
692			while (len > 0 && (*p == ' ' || *p == '\t')) {
693				++p;
694				--avail;
695				--len;
696			}
697			/* Skip comment or empty line. */
698			if (p[0] == '#' || p[0] == '\n' || p[0] == '\r') {
699				p += len;
700				avail -= len;
701				continue;
702			}
703		} else {
704			/* A continuance line; the terminal
705			 * character of previous line was '\' character. */
706			if (bid_keyword_list(p, len, 0, 0) <= 0)
707				break;
708			if (multiline == 1)
709				detected_bytes += len;
710			if (p[len-nl-1] != '\\') {
711				if (multiline == 1 &&
712				    ++entry_cnt >= MAX_BID_ENTRY)
713					break;
714				multiline = 0;
715			}
716			p += len;
717			avail -= len;
718			continue;
719		}
720		if (p[0] != '/') {
721			int last_is_path, keywords;
722
723			keywords = bid_entry(p, len, nl, &last_is_path);
724			if (keywords >= 0) {
725				detected_bytes += len;
726				if (form_D == 0) {
727					if (last_is_path)
728						form_D = 1;
729					else if (keywords > 0)
730						/* This line is not `form D'. */
731						form_D = -1;
732				} else if (form_D == 1) {
733					if (!last_is_path && keywords > 0)
734						/* This this is not `form D'
735						 * and We cannot accept mixed
736						 * format. */
737						break;
738				}
739				if (!last_is_path && p[len-nl-1] == '\\')
740					/* This line continues. */
741					multiline = 1;
742				else {
743					/* We've got plenty of correct lines
744					 * to assume that this file is a mtree
745					 * format. */
746					if (++entry_cnt >= MAX_BID_ENTRY)
747						break;
748				}
749			} else
750				break;
751		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
752			if (bid_keyword_list(p+4, len-4, 0, 0) <= 0)
753				break;
754			/* This line continues. */
755			if (p[len-nl-1] == '\\')
756				multiline = 2;
757		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
758			if (bid_keyword_list(p+6, len-6, 1, 0) <= 0)
759				break;
760			/* This line continues. */
761			if (p[len-nl-1] == '\\')
762				multiline = 2;
763		} else
764			break;
765
766		/* Test next line. */
767		p += len;
768		avail -= len;
769	}
770	if (entry_cnt >= MAX_BID_ENTRY || (entry_cnt > 0 && len == 0)) {
771		if (is_form_d != NULL) {
772			if (form_D == 1)
773				*is_form_d = 1;
774		}
775		return (32);
776	}
777
778	return (0);
779}
780
781/*
782 * The extended mtree format permits multiple lines specifying
783 * attributes for each file.  For those entries, only the last line
784 * is actually used.  Practically speaking, that means we have
785 * to read the entire mtree file into memory up front.
786 *
787 * The parsing is done in two steps.  First, it is decided if a line
788 * changes the global defaults and if it is, processed accordingly.
789 * Otherwise, the options of the line are merged with the current
790 * global options.
791 */
792static int
793add_option(struct archive_read *a, struct mtree_option **global,
794    const char *value, size_t len)
795{
796	struct mtree_option *opt;
797
798	if ((opt = malloc(sizeof(*opt))) == NULL) {
799		archive_set_error(&a->archive, errno, "Can't allocate memory");
800		return (ARCHIVE_FATAL);
801	}
802	if ((opt->value = malloc(len + 1)) == NULL) {
803		free(opt);
804		archive_set_error(&a->archive, errno, "Can't allocate memory");
805		return (ARCHIVE_FATAL);
806	}
807	memcpy(opt->value, value, len);
808	opt->value[len] = '\0';
809	opt->next = *global;
810	*global = opt;
811	return (ARCHIVE_OK);
812}
813
814static void
815remove_option(struct mtree_option **global, const char *value, size_t len)
816{
817	struct mtree_option *iter, *last;
818
819	last = NULL;
820	for (iter = *global; iter != NULL; last = iter, iter = iter->next) {
821		if (strncmp(iter->value, value, len) == 0 &&
822		    (iter->value[len] == '\0' ||
823		     iter->value[len] == '='))
824			break;
825	}
826	if (iter == NULL)
827		return;
828	if (last == NULL)
829		*global = iter->next;
830	else
831		last->next = iter->next;
832
833	free(iter->value);
834	free(iter);
835}
836
837static int
838process_global_set(struct archive_read *a,
839    struct mtree_option **global, const char *line)
840{
841	const char *next, *eq;
842	size_t len;
843	int r;
844
845	line += 4;
846	for (;;) {
847		next = line + strspn(line, " \t\r\n");
848		if (*next == '\0')
849			return (ARCHIVE_OK);
850		line = next;
851		next = line + strcspn(line, " \t\r\n");
852		eq = strchr(line, '=');
853		if (eq > next)
854			len = next - line;
855		else
856			len = eq - line;
857
858		remove_option(global, line, len);
859		r = add_option(a, global, line, next - line);
860		if (r != ARCHIVE_OK)
861			return (r);
862		line = next;
863	}
864}
865
866static int
867process_global_unset(struct archive_read *a,
868    struct mtree_option **global, const char *line)
869{
870	const char *next;
871	size_t len;
872
873	line += 6;
874	if (strchr(line, '=') != NULL) {
875		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
876		    "/unset shall not contain `='");
877		return ARCHIVE_FATAL;
878	}
879
880	for (;;) {
881		next = line + strspn(line, " \t\r\n");
882		if (*next == '\0')
883			return (ARCHIVE_OK);
884		line = next;
885		len = strcspn(line, " \t\r\n");
886
887		if (len == 3 && strncmp(line, "all", 3) == 0) {
888			free_options(*global);
889			*global = NULL;
890		} else {
891			remove_option(global, line, len);
892		}
893
894		line += len;
895	}
896}
897
898static int
899process_add_entry(struct archive_read *a, struct mtree *mtree,
900    struct mtree_option **global, const char *line, ssize_t line_len,
901    struct mtree_entry **last_entry, int is_form_d)
902{
903	struct mtree_entry *entry;
904	struct mtree_option *iter;
905	const char *next, *eq, *name, *end;
906	size_t name_len, len;
907	int r, i;
908
909	if ((entry = malloc(sizeof(*entry))) == NULL) {
910		archive_set_error(&a->archive, errno, "Can't allocate memory");
911		return (ARCHIVE_FATAL);
912	}
913	entry->next = NULL;
914	entry->options = NULL;
915	entry->name = NULL;
916	entry->used = 0;
917	entry->full = 0;
918
919	/* Add this entry to list. */
920	if (*last_entry == NULL)
921		mtree->entries = entry;
922	else
923		(*last_entry)->next = entry;
924	*last_entry = entry;
925
926	if (is_form_d) {
927		/* Filename is last item on line. */
928		/* Adjust line_len to trim trailing whitespace */
929		while (line_len > 0) {
930			char last_character = line[line_len - 1];
931			if (last_character == '\r'
932			    || last_character == '\n'
933			    || last_character == '\t'
934			    || last_character == ' ') {
935				line_len--;
936			} else {
937				break;
938			}
939		}
940		/* Name starts after the last whitespace separator */
941		name = line;
942		for (i = 0; i < line_len; i++) {
943			if (line[i] == '\r'
944			    || line[i] == '\n'
945			    || line[i] == '\t'
946			    || line[i] == ' ') {
947				name = line + i + 1;
948			}
949		}
950		name_len = line + line_len - name;
951		end = name;
952	} else {
953		/* Filename is first item on line */
954		name_len = strcspn(line, " \t\r\n");
955		name = line;
956		line += name_len;
957		end = line + line_len;
958	}
959	/* name/name_len is the name within the line. */
960	/* line..end brackets the entire line except the name */
961
962	if ((entry->name = malloc(name_len + 1)) == NULL) {
963		archive_set_error(&a->archive, errno, "Can't allocate memory");
964		return (ARCHIVE_FATAL);
965	}
966
967	memcpy(entry->name, name, name_len);
968	entry->name[name_len] = '\0';
969	parse_escapes(entry->name, entry);
970
971	entry->next_dup = NULL;
972	if (entry->full) {
973		if (!__archive_rb_tree_insert_node(&mtree->rbtree, &entry->rbnode)) {
974			struct mtree_entry *alt;
975			alt = (struct mtree_entry *)__archive_rb_tree_find_node(
976			    &mtree->rbtree, entry->name);
977			while (alt->next_dup)
978				alt = alt->next_dup;
979			alt->next_dup = entry;
980		}
981	}
982
983	for (iter = *global; iter != NULL; iter = iter->next) {
984		r = add_option(a, &entry->options, iter->value,
985		    strlen(iter->value));
986		if (r != ARCHIVE_OK)
987			return (r);
988	}
989
990	for (;;) {
991		next = line + strspn(line, " \t\r\n");
992		if (*next == '\0')
993			return (ARCHIVE_OK);
994		if (next >= end)
995			return (ARCHIVE_OK);
996		line = next;
997		next = line + strcspn(line, " \t\r\n");
998		eq = strchr(line, '=');
999		if (eq == NULL || eq > next)
1000			len = next - line;
1001		else
1002			len = eq - line;
1003
1004		remove_option(&entry->options, line, len);
1005		r = add_option(a, &entry->options, line, next - line);
1006		if (r != ARCHIVE_OK)
1007			return (r);
1008		line = next;
1009	}
1010}
1011
1012static int
1013read_mtree(struct archive_read *a, struct mtree *mtree)
1014{
1015	ssize_t len;
1016	uintmax_t counter;
1017	char *p, *s;
1018	struct mtree_option *global;
1019	struct mtree_entry *last_entry;
1020	int r, is_form_d;
1021
1022	mtree->archive_format = ARCHIVE_FORMAT_MTREE;
1023	mtree->archive_format_name = "mtree";
1024
1025	global = NULL;
1026	last_entry = NULL;
1027
1028	(void)detect_form(a, &is_form_d);
1029
1030	for (counter = 1; ; ++counter) {
1031		r = ARCHIVE_OK;
1032		len = readline(a, mtree, &p, 65536);
1033		if (len == 0) {
1034			mtree->this_entry = mtree->entries;
1035			free_options(global);
1036			return (ARCHIVE_OK);
1037		}
1038		if (len < 0) {
1039			free_options(global);
1040			return ((int)len);
1041		}
1042		/* Leading whitespace is never significant, ignore it. */
1043		while (*p == ' ' || *p == '\t') {
1044			++p;
1045			--len;
1046		}
1047		/* Skip content lines and blank lines. */
1048		if (*p == '#')
1049			continue;
1050		if (*p == '\r' || *p == '\n' || *p == '\0')
1051			continue;
1052		/* Non-printable characters are not allowed */
1053		for (s = p;s < p + len - 1; s++) {
1054			if (!isprint((unsigned char)*s)) {
1055				r = ARCHIVE_FATAL;
1056				break;
1057			}
1058		}
1059		if (r != ARCHIVE_OK)
1060			break;
1061		if (*p != '/') {
1062			r = process_add_entry(a, mtree, &global, p, len,
1063			    &last_entry, is_form_d);
1064		} else if (len > 4 && strncmp(p, "/set", 4) == 0) {
1065			if (p[4] != ' ' && p[4] != '\t')
1066				break;
1067			r = process_global_set(a, &global, p);
1068		} else if (len > 6 && strncmp(p, "/unset", 6) == 0) {
1069			if (p[6] != ' ' && p[6] != '\t')
1070				break;
1071			r = process_global_unset(a, &global, p);
1072		} else
1073			break;
1074
1075		if (r != ARCHIVE_OK) {
1076			free_options(global);
1077			return r;
1078		}
1079	}
1080
1081	archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1082	    "Can't parse line %ju", counter);
1083	free_options(global);
1084	return (ARCHIVE_FATAL);
1085}
1086
1087/*
1088 * Read in the entire mtree file into memory on the first request.
1089 * Then use the next unused file to satisfy each header request.
1090 */
1091static int
1092read_header(struct archive_read *a, struct archive_entry *entry)
1093{
1094	struct mtree *mtree;
1095	char *p;
1096	int r, use_next;
1097
1098	mtree = (struct mtree *)(a->format->data);
1099
1100	if (mtree->fd >= 0) {
1101		close(mtree->fd);
1102		mtree->fd = -1;
1103	}
1104
1105	if (mtree->entries == NULL) {
1106		mtree->resolver = archive_entry_linkresolver_new();
1107		if (mtree->resolver == NULL)
1108			return ARCHIVE_FATAL;
1109		archive_entry_linkresolver_set_strategy(mtree->resolver,
1110		    ARCHIVE_FORMAT_MTREE);
1111		r = read_mtree(a, mtree);
1112		if (r != ARCHIVE_OK)
1113			return (r);
1114	}
1115
1116	a->archive.archive_format = mtree->archive_format;
1117	a->archive.archive_format_name = mtree->archive_format_name;
1118
1119	for (;;) {
1120		if (mtree->this_entry == NULL)
1121			return (ARCHIVE_EOF);
1122		if (strcmp(mtree->this_entry->name, "..") == 0) {
1123			mtree->this_entry->used = 1;
1124			if (archive_strlen(&mtree->current_dir) > 0) {
1125				/* Roll back current path. */
1126				p = mtree->current_dir.s
1127				    + mtree->current_dir.length - 1;
1128				while (p >= mtree->current_dir.s && *p != '/')
1129					--p;
1130				if (p >= mtree->current_dir.s)
1131					--p;
1132				mtree->current_dir.length
1133				    = p - mtree->current_dir.s + 1;
1134			}
1135		}
1136		if (!mtree->this_entry->used) {
1137			use_next = 0;
1138			r = parse_file(a, entry, mtree, mtree->this_entry,
1139				&use_next);
1140			if (use_next == 0)
1141				return (r);
1142		}
1143		mtree->this_entry = mtree->this_entry->next;
1144	}
1145}
1146
1147/*
1148 * A single file can have multiple lines contribute specifications.
1149 * Parse as many lines as necessary, then pull additional information
1150 * from a backing file on disk as necessary.
1151 */
1152static int
1153parse_file(struct archive_read *a, struct archive_entry *entry,
1154    struct mtree *mtree, struct mtree_entry *mentry, int *use_next)
1155{
1156	const char *path;
1157	struct stat st_storage, *st;
1158	struct mtree_entry *mp;
1159	struct archive_entry *sparse_entry;
1160	int r = ARCHIVE_OK, r1, parsed_kws;
1161
1162	mentry->used = 1;
1163
1164	/* Initialize reasonable defaults. */
1165	archive_entry_set_filetype(entry, AE_IFREG);
1166	archive_entry_set_size(entry, 0);
1167	archive_string_empty(&mtree->contents_name);
1168
1169	/* Parse options from this line. */
1170	parsed_kws = 0;
1171	r = parse_line(a, entry, mtree, mentry, &parsed_kws);
1172
1173	if (mentry->full) {
1174		archive_entry_copy_pathname(entry, mentry->name);
1175		/*
1176		 * "Full" entries are allowed to have multiple lines
1177		 * and those lines aren't required to be adjacent.  We
1178		 * don't support multiple lines for "relative" entries
1179		 * nor do we make any attempt to merge data from
1180		 * separate "relative" and "full" entries.  (Merging
1181		 * "relative" and "full" entries would require dealing
1182		 * with pathname canonicalization, which is a very
1183		 * tricky subject.)
1184		 */
1185		mp = (struct mtree_entry *)__archive_rb_tree_find_node(
1186		    &mtree->rbtree, mentry->name);
1187		for (; mp; mp = mp->next_dup) {
1188			if (mp->full && !mp->used) {
1189				/* Later lines override earlier ones. */
1190				mp->used = 1;
1191				r1 = parse_line(a, entry, mtree, mp, &parsed_kws);
1192				if (r1 < r)
1193					r = r1;
1194			}
1195		}
1196	} else {
1197		/*
1198		 * Relative entries require us to construct
1199		 * the full path and possibly update the
1200		 * current directory.
1201		 */
1202		size_t n = archive_strlen(&mtree->current_dir);
1203		if (n > 0)
1204			archive_strcat(&mtree->current_dir, "/");
1205		archive_strcat(&mtree->current_dir, mentry->name);
1206		archive_entry_copy_pathname(entry, mtree->current_dir.s);
1207		if (archive_entry_filetype(entry) != AE_IFDIR)
1208			mtree->current_dir.length = n;
1209	}
1210
1211	if (mtree->checkfs) {
1212		/*
1213		 * Try to open and stat the file to get the real size
1214		 * and other file info.  It would be nice to avoid
1215		 * this here so that getting a listing of an mtree
1216		 * wouldn't require opening every referenced contents
1217		 * file.  But then we wouldn't know the actual
1218		 * contents size, so I don't see a really viable way
1219		 * around this.  (Also, we may want to someday pull
1220		 * other unspecified info from the contents file on
1221		 * disk.)
1222		 */
1223		mtree->fd = -1;
1224		if (archive_strlen(&mtree->contents_name) > 0)
1225			path = mtree->contents_name.s;
1226		else
1227			path = archive_entry_pathname(entry);
1228
1229		if (archive_entry_filetype(entry) == AE_IFREG ||
1230				archive_entry_filetype(entry) == AE_IFDIR) {
1231			mtree->fd = open(path, O_RDONLY | O_BINARY | O_CLOEXEC);
1232			__archive_ensure_cloexec_flag(mtree->fd);
1233			if (mtree->fd == -1 &&
1234				(errno != ENOENT ||
1235				 archive_strlen(&mtree->contents_name) > 0)) {
1236				archive_set_error(&a->archive, errno,
1237						"Can't open %s", path);
1238				r = ARCHIVE_WARN;
1239			}
1240		}
1241
1242		st = &st_storage;
1243		if (mtree->fd >= 0) {
1244			if (fstat(mtree->fd, st) == -1) {
1245				archive_set_error(&a->archive, errno,
1246						"Could not fstat %s", path);
1247				r = ARCHIVE_WARN;
1248				/* If we can't stat it, don't keep it open. */
1249				close(mtree->fd);
1250				mtree->fd = -1;
1251				st = NULL;
1252			}
1253		} else if (lstat(path, st) == -1) {
1254			st = NULL;
1255		}
1256
1257		/*
1258		 * Check for a mismatch between the type in the specification
1259		 * and the type of the contents object on disk.
1260		 */
1261		if (st != NULL) {
1262			if (((st->st_mode & S_IFMT) == S_IFREG &&
1263			      archive_entry_filetype(entry) == AE_IFREG)
1264#ifdef S_IFLNK
1265			  ||((st->st_mode & S_IFMT) == S_IFLNK &&
1266			      archive_entry_filetype(entry) == AE_IFLNK)
1267#endif
1268#ifdef S_IFSOCK
1269			  ||((st->st_mode & S_IFSOCK) == S_IFSOCK &&
1270			      archive_entry_filetype(entry) == AE_IFSOCK)
1271#endif
1272#ifdef S_IFCHR
1273			  ||((st->st_mode & S_IFMT) == S_IFCHR &&
1274			      archive_entry_filetype(entry) == AE_IFCHR)
1275#endif
1276#ifdef S_IFBLK
1277			  ||((st->st_mode & S_IFMT) == S_IFBLK &&
1278			      archive_entry_filetype(entry) == AE_IFBLK)
1279#endif
1280			  ||((st->st_mode & S_IFMT) == S_IFDIR &&
1281			      archive_entry_filetype(entry) == AE_IFDIR)
1282#ifdef S_IFIFO
1283			  ||((st->st_mode & S_IFMT) == S_IFIFO &&
1284			      archive_entry_filetype(entry) == AE_IFIFO)
1285#endif
1286			) {
1287				/* Types match. */
1288			} else {
1289				/* Types don't match; bail out gracefully. */
1290				if (mtree->fd >= 0)
1291					close(mtree->fd);
1292				mtree->fd = -1;
1293				if (parsed_kws & MTREE_HAS_OPTIONAL) {
1294					/* It's not an error for an optional
1295					 * entry to not match disk. */
1296					*use_next = 1;
1297				} else if (r == ARCHIVE_OK) {
1298					archive_set_error(&a->archive,
1299					    ARCHIVE_ERRNO_MISC,
1300					    "mtree specification has different"
1301					    " type for %s",
1302					    archive_entry_pathname(entry));
1303					r = ARCHIVE_WARN;
1304				}
1305				return (r);
1306			}
1307		}
1308
1309		/*
1310		 * If there is a contents file on disk, pick some of the
1311		 * metadata from that file.  For most of these, we only
1312		 * set it from the contents if it wasn't already parsed
1313		 * from the specification.
1314		 */
1315		if (st != NULL) {
1316			if (((parsed_kws & MTREE_HAS_DEVICE) == 0 ||
1317				(parsed_kws & MTREE_HAS_NOCHANGE) != 0) &&
1318				(archive_entry_filetype(entry) == AE_IFCHR ||
1319				 archive_entry_filetype(entry) == AE_IFBLK))
1320				archive_entry_set_rdev(entry, st->st_rdev);
1321			if ((parsed_kws & (MTREE_HAS_GID | MTREE_HAS_GNAME))
1322				== 0 ||
1323			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1324				archive_entry_set_gid(entry, st->st_gid);
1325			if ((parsed_kws & (MTREE_HAS_UID | MTREE_HAS_UNAME))
1326				== 0 ||
1327			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1328				archive_entry_set_uid(entry, st->st_uid);
1329			if ((parsed_kws & MTREE_HAS_MTIME) == 0 ||
1330			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0) {
1331#if HAVE_STRUCT_STAT_ST_MTIMESPEC_TV_NSEC
1332				archive_entry_set_mtime(entry, st->st_mtime,
1333						st->st_mtimespec.tv_nsec);
1334#elif HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
1335				archive_entry_set_mtime(entry, st->st_mtime,
1336						st->st_mtim.tv_nsec);
1337#elif HAVE_STRUCT_STAT_ST_MTIME_N
1338				archive_entry_set_mtime(entry, st->st_mtime,
1339						st->st_mtime_n);
1340#elif HAVE_STRUCT_STAT_ST_UMTIME
1341				archive_entry_set_mtime(entry, st->st_mtime,
1342						st->st_umtime*1000);
1343#elif HAVE_STRUCT_STAT_ST_MTIME_USEC
1344				archive_entry_set_mtime(entry, st->st_mtime,
1345						st->st_mtime_usec*1000);
1346#else
1347				archive_entry_set_mtime(entry, st->st_mtime, 0);
1348#endif
1349			}
1350			if ((parsed_kws & MTREE_HAS_NLINK) == 0 ||
1351			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1352				archive_entry_set_nlink(entry, st->st_nlink);
1353			if ((parsed_kws & MTREE_HAS_PERM) == 0 ||
1354			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1355				archive_entry_set_perm(entry, st->st_mode);
1356			if ((parsed_kws & MTREE_HAS_SIZE) == 0 ||
1357			    (parsed_kws & MTREE_HAS_NOCHANGE) != 0)
1358				archive_entry_set_size(entry, st->st_size);
1359			archive_entry_set_ino(entry, st->st_ino);
1360			archive_entry_set_dev(entry, st->st_dev);
1361
1362			archive_entry_linkify(mtree->resolver, &entry,
1363				&sparse_entry);
1364		} else if (parsed_kws & MTREE_HAS_OPTIONAL) {
1365			/*
1366			 * Couldn't open the entry, stat it or the on-disk type
1367			 * didn't match.  If this entry is optional, just
1368			 * ignore it and read the next header entry.
1369			 */
1370			*use_next = 1;
1371			return ARCHIVE_OK;
1372		}
1373	}
1374
1375	mtree->cur_size = archive_entry_size(entry);
1376	mtree->offset = 0;
1377
1378	return r;
1379}
1380
1381/*
1382 * Each line contains a sequence of keywords.
1383 */
1384static int
1385parse_line(struct archive_read *a, struct archive_entry *entry,
1386    struct mtree *mtree, struct mtree_entry *mp, int *parsed_kws)
1387{
1388	struct mtree_option *iter;
1389	int r = ARCHIVE_OK, r1;
1390
1391	for (iter = mp->options; iter != NULL; iter = iter->next) {
1392		r1 = parse_keyword(a, mtree, entry, iter, parsed_kws);
1393		if (r1 < r)
1394			r = r1;
1395	}
1396	if (r == ARCHIVE_OK && (*parsed_kws & MTREE_HAS_TYPE) == 0) {
1397		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1398		    "Missing type keyword in mtree specification");
1399		return (ARCHIVE_WARN);
1400	}
1401	return (r);
1402}
1403
1404/*
1405 * Device entries have one of the following forms:
1406 *  - raw dev_t
1407 *  - format,major,minor[,subdevice]
1408 * When parsing succeeded, `pdev' will contain the appropriate dev_t value.
1409 */
1410
1411/* strsep() is not in C90, but strcspn() is. */
1412/* Taken from http://unixpapa.com/incnote/string.html */
1413static char *
1414la_strsep(char **sp, const char *sep)
1415{
1416	char *p, *s;
1417	if (sp == NULL || *sp == NULL || **sp == '\0')
1418		return(NULL);
1419	s = *sp;
1420	p = s + strcspn(s, sep);
1421	if (*p != '\0')
1422		*p++ = '\0';
1423	*sp = p;
1424	return(s);
1425}
1426
1427static int
1428parse_device(dev_t *pdev, struct archive *a, char *val)
1429{
1430#define MAX_PACK_ARGS 3
1431	unsigned long numbers[MAX_PACK_ARGS];
1432	char *p, *dev;
1433	int argc;
1434	pack_t *pack;
1435	dev_t result;
1436	const char *error = NULL;
1437
1438	memset(pdev, 0, sizeof(*pdev));
1439	if ((dev = strchr(val, ',')) != NULL) {
1440		/*
1441		 * Device's major/minor are given in a specified format.
1442		 * Decode and pack it accordingly.
1443		 */
1444		*dev++ = '\0';
1445		if ((pack = pack_find(val)) == NULL) {
1446			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1447			    "Unknown format `%s'", val);
1448			return ARCHIVE_WARN;
1449		}
1450		argc = 0;
1451		while ((p = la_strsep(&dev, ",")) != NULL) {
1452			if (*p == '\0') {
1453				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1454				    "Missing number");
1455				return ARCHIVE_WARN;
1456			}
1457			if (argc >= MAX_PACK_ARGS) {
1458				archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1459				    "Too many arguments");
1460				return ARCHIVE_WARN;
1461			}
1462			numbers[argc++] = (unsigned long)mtree_atol(&p, 0);
1463		}
1464		if (argc < 2) {
1465			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1466			    "Not enough arguments");
1467			return ARCHIVE_WARN;
1468		}
1469		result = (*pack)(argc, numbers, &error);
1470		if (error != NULL) {
1471			archive_set_error(a, ARCHIVE_ERRNO_FILE_FORMAT,
1472			    "%s", error);
1473			return ARCHIVE_WARN;
1474		}
1475	} else {
1476		/* file system raw value. */
1477		result = (dev_t)mtree_atol(&val, 0);
1478	}
1479	*pdev = result;
1480	return ARCHIVE_OK;
1481#undef MAX_PACK_ARGS
1482}
1483
1484/*
1485 * Parse a single keyword and its value.
1486 */
1487static int
1488parse_keyword(struct archive_read *a, struct mtree *mtree,
1489    struct archive_entry *entry, struct mtree_option *opt, int *parsed_kws)
1490{
1491	char *val, *key;
1492
1493	key = opt->value;
1494
1495	if (*key == '\0')
1496		return (ARCHIVE_OK);
1497
1498	if (strcmp(key, "nochange") == 0) {
1499		*parsed_kws |= MTREE_HAS_NOCHANGE;
1500		return (ARCHIVE_OK);
1501	}
1502	if (strcmp(key, "optional") == 0) {
1503		*parsed_kws |= MTREE_HAS_OPTIONAL;
1504		return (ARCHIVE_OK);
1505	}
1506	if (strcmp(key, "ignore") == 0) {
1507		/*
1508		 * The mtree processing is not recursive, so
1509		 * recursion will only happen for explicitly listed
1510		 * entries.
1511		 */
1512		return (ARCHIVE_OK);
1513	}
1514
1515	val = strchr(key, '=');
1516	if (val == NULL) {
1517		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1518		    "Malformed attribute \"%s\" (%d)", key, key[0]);
1519		return (ARCHIVE_WARN);
1520	}
1521
1522	*val = '\0';
1523	++val;
1524
1525	switch (key[0]) {
1526	case 'c':
1527		if (strcmp(key, "content") == 0
1528		    || strcmp(key, "contents") == 0) {
1529			parse_escapes(val, NULL);
1530			archive_strcpy(&mtree->contents_name, val);
1531			break;
1532		}
1533		if (strcmp(key, "cksum") == 0)
1534			break;
1535		__LA_FALLTHROUGH;
1536	case 'd':
1537		if (strcmp(key, "device") == 0) {
1538			/* stat(2) st_rdev field, e.g. the major/minor IDs
1539			 * of a char/block special file */
1540			int r;
1541			dev_t dev;
1542
1543			*parsed_kws |= MTREE_HAS_DEVICE;
1544			r = parse_device(&dev, &a->archive, val);
1545			if (r == ARCHIVE_OK)
1546				archive_entry_set_rdev(entry, dev);
1547			return r;
1548		}
1549		__LA_FALLTHROUGH;
1550	case 'f':
1551		if (strcmp(key, "flags") == 0) {
1552			*parsed_kws |= MTREE_HAS_FFLAGS;
1553			archive_entry_copy_fflags_text(entry, val);
1554			break;
1555		}
1556		__LA_FALLTHROUGH;
1557	case 'g':
1558		if (strcmp(key, "gid") == 0) {
1559			*parsed_kws |= MTREE_HAS_GID;
1560			archive_entry_set_gid(entry, mtree_atol(&val, 10));
1561			break;
1562		}
1563		if (strcmp(key, "gname") == 0) {
1564			*parsed_kws |= MTREE_HAS_GNAME;
1565			archive_entry_copy_gname(entry, val);
1566			break;
1567		}
1568		__LA_FALLTHROUGH;
1569	case 'i':
1570		if (strcmp(key, "inode") == 0) {
1571			archive_entry_set_ino(entry, mtree_atol(&val, 10));
1572			break;
1573		}
1574		__LA_FALLTHROUGH;
1575	case 'l':
1576		if (strcmp(key, "link") == 0) {
1577			archive_entry_copy_symlink(entry, val);
1578			break;
1579		}
1580		__LA_FALLTHROUGH;
1581	case 'm':
1582		if (strcmp(key, "md5") == 0 || strcmp(key, "md5digest") == 0)
1583			break;
1584		if (strcmp(key, "mode") == 0) {
1585			if (val[0] >= '0' && val[0] <= '7') {
1586				*parsed_kws |= MTREE_HAS_PERM;
1587				archive_entry_set_perm(entry,
1588				    (mode_t)mtree_atol(&val, 8));
1589			} else {
1590				archive_set_error(&a->archive,
1591				    ARCHIVE_ERRNO_FILE_FORMAT,
1592				    "Symbolic or non-octal mode \"%s\" unsupported", val);
1593				return ARCHIVE_WARN;
1594			}
1595			break;
1596		}
1597		__LA_FALLTHROUGH;
1598	case 'n':
1599		if (strcmp(key, "nlink") == 0) {
1600			*parsed_kws |= MTREE_HAS_NLINK;
1601			archive_entry_set_nlink(entry,
1602				(unsigned int)mtree_atol(&val, 10));
1603			break;
1604		}
1605		__LA_FALLTHROUGH;
1606	case 'r':
1607		if (strcmp(key, "resdevice") == 0) {
1608			/* stat(2) st_dev field, e.g. the device ID where the
1609			 * inode resides */
1610			int r;
1611			dev_t dev;
1612
1613			r = parse_device(&dev, &a->archive, val);
1614			if (r == ARCHIVE_OK)
1615				archive_entry_set_dev(entry, dev);
1616			return r;
1617		}
1618		if (strcmp(key, "rmd160") == 0 ||
1619		    strcmp(key, "rmd160digest") == 0)
1620			break;
1621		__LA_FALLTHROUGH;
1622	case 's':
1623		if (strcmp(key, "sha1") == 0 || strcmp(key, "sha1digest") == 0)
1624			break;
1625		if (strcmp(key, "sha256") == 0 ||
1626		    strcmp(key, "sha256digest") == 0)
1627			break;
1628		if (strcmp(key, "sha384") == 0 ||
1629		    strcmp(key, "sha384digest") == 0)
1630			break;
1631		if (strcmp(key, "sha512") == 0 ||
1632		    strcmp(key, "sha512digest") == 0)
1633			break;
1634		if (strcmp(key, "size") == 0) {
1635			archive_entry_set_size(entry, mtree_atol(&val, 10));
1636			break;
1637		}
1638		__LA_FALLTHROUGH;
1639	case 't':
1640		if (strcmp(key, "tags") == 0) {
1641			/*
1642			 * Comma delimited list of tags.
1643			 * Ignore the tags for now, but the interface
1644			 * should be extended to allow inclusion/exclusion.
1645			 */
1646			break;
1647		}
1648		if (strcmp(key, "time") == 0) {
1649			int64_t m;
1650			int64_t my_time_t_max = get_time_t_max();
1651			int64_t my_time_t_min = get_time_t_min();
1652			long ns = 0;
1653
1654			*parsed_kws |= MTREE_HAS_MTIME;
1655			m = mtree_atol(&val, 10);
1656			/* Replicate an old mtree bug:
1657			 * 123456789.1 represents 123456789
1658			 * seconds and 1 nanosecond. */
1659			if (*val == '.') {
1660				++val;
1661				ns = (long)mtree_atol(&val, 10);
1662				if (ns < 0)
1663					ns = 0;
1664				else if (ns > 999999999)
1665					ns = 999999999;
1666			}
1667			if (m > my_time_t_max)
1668				m = my_time_t_max;
1669			else if (m < my_time_t_min)
1670				m = my_time_t_min;
1671			archive_entry_set_mtime(entry, (time_t)m, ns);
1672			break;
1673		}
1674		if (strcmp(key, "type") == 0) {
1675			switch (val[0]) {
1676			case 'b':
1677				if (strcmp(val, "block") == 0) {
1678					archive_entry_set_filetype(entry, AE_IFBLK);
1679					break;
1680				}
1681				__LA_FALLTHROUGH;
1682			case 'c':
1683				if (strcmp(val, "char") == 0) {
1684					archive_entry_set_filetype(entry,
1685						AE_IFCHR);
1686					break;
1687				}
1688				__LA_FALLTHROUGH;
1689			case 'd':
1690				if (strcmp(val, "dir") == 0) {
1691					archive_entry_set_filetype(entry,
1692						AE_IFDIR);
1693					break;
1694				}
1695				__LA_FALLTHROUGH;
1696			case 'f':
1697				if (strcmp(val, "fifo") == 0) {
1698					archive_entry_set_filetype(entry,
1699						AE_IFIFO);
1700					break;
1701				}
1702				if (strcmp(val, "file") == 0) {
1703					archive_entry_set_filetype(entry,
1704						AE_IFREG);
1705					break;
1706				}
1707				__LA_FALLTHROUGH;
1708			case 'l':
1709				if (strcmp(val, "link") == 0) {
1710					archive_entry_set_filetype(entry,
1711						AE_IFLNK);
1712					break;
1713				}
1714				__LA_FALLTHROUGH;
1715			default:
1716				archive_set_error(&a->archive,
1717				    ARCHIVE_ERRNO_FILE_FORMAT,
1718				    "Unrecognized file type \"%s\"; "
1719				    "assuming \"file\"", val);
1720				archive_entry_set_filetype(entry, AE_IFREG);
1721				return (ARCHIVE_WARN);
1722			}
1723			*parsed_kws |= MTREE_HAS_TYPE;
1724			break;
1725		}
1726		__LA_FALLTHROUGH;
1727	case 'u':
1728		if (strcmp(key, "uid") == 0) {
1729			*parsed_kws |= MTREE_HAS_UID;
1730			archive_entry_set_uid(entry, mtree_atol(&val, 10));
1731			break;
1732		}
1733		if (strcmp(key, "uname") == 0) {
1734			*parsed_kws |= MTREE_HAS_UNAME;
1735			archive_entry_copy_uname(entry, val);
1736			break;
1737		}
1738		__LA_FALLTHROUGH;
1739	default:
1740		archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1741		    "Unrecognized key %s=%s", key, val);
1742		return (ARCHIVE_WARN);
1743	}
1744	return (ARCHIVE_OK);
1745}
1746
1747static int
1748read_data(struct archive_read *a, const void **buff, size_t *size,
1749    int64_t *offset)
1750{
1751	size_t bytes_to_read;
1752	ssize_t bytes_read;
1753	struct mtree *mtree;
1754
1755	mtree = (struct mtree *)(a->format->data);
1756	if (mtree->fd < 0) {
1757		*buff = NULL;
1758		*offset = 0;
1759		*size = 0;
1760		return (ARCHIVE_EOF);
1761	}
1762	if (mtree->buff == NULL) {
1763		mtree->buffsize = 64 * 1024;
1764		mtree->buff = malloc(mtree->buffsize);
1765		if (mtree->buff == NULL) {
1766			archive_set_error(&a->archive, ENOMEM,
1767			    "Can't allocate memory");
1768			return (ARCHIVE_FATAL);
1769		}
1770	}
1771
1772	*buff = mtree->buff;
1773	*offset = mtree->offset;
1774	if ((int64_t)mtree->buffsize > mtree->cur_size - mtree->offset)
1775		bytes_to_read = (size_t)(mtree->cur_size - mtree->offset);
1776	else
1777		bytes_to_read = mtree->buffsize;
1778	bytes_read = read(mtree->fd, mtree->buff, bytes_to_read);
1779	if (bytes_read < 0) {
1780		archive_set_error(&a->archive, errno, "Can't read");
1781		return (ARCHIVE_WARN);
1782	}
1783	if (bytes_read == 0) {
1784		*size = 0;
1785		return (ARCHIVE_EOF);
1786	}
1787	mtree->offset += bytes_read;
1788	*size = bytes_read;
1789	return (ARCHIVE_OK);
1790}
1791
1792/* Skip does nothing except possibly close the contents file. */
1793static int
1794skip(struct archive_read *a)
1795{
1796	struct mtree *mtree;
1797
1798	mtree = (struct mtree *)(a->format->data);
1799	if (mtree->fd >= 0) {
1800		close(mtree->fd);
1801		mtree->fd = -1;
1802	}
1803	return (ARCHIVE_OK);
1804}
1805
1806/*
1807 * Since parsing backslash sequences always makes strings shorter,
1808 * we can always do this conversion in-place.
1809 */
1810static void
1811parse_escapes(char *src, struct mtree_entry *mentry)
1812{
1813	char *dest = src;
1814	char c;
1815
1816	if (mentry != NULL && strcmp(src, ".") == 0)
1817		mentry->full = 1;
1818
1819	while (*src != '\0') {
1820		c = *src++;
1821		if (c == '/' && mentry != NULL)
1822			mentry->full = 1;
1823		if (c == '\\') {
1824			switch (src[0]) {
1825			case '0':
1826				if (src[1] < '0' || src[1] > '7') {
1827					c = 0;
1828					++src;
1829					break;
1830				}
1831				/* FALLTHROUGH */
1832			case '1':
1833			case '2':
1834			case '3':
1835				if (src[1] >= '0' && src[1] <= '7' &&
1836				    src[2] >= '0' && src[2] <= '7') {
1837					c = (src[0] - '0') << 6;
1838					c |= (src[1] - '0') << 3;
1839					c |= (src[2] - '0');
1840					src += 3;
1841				}
1842				break;
1843			case 'a':
1844				c = '\a';
1845				++src;
1846				break;
1847			case 'b':
1848				c = '\b';
1849				++src;
1850				break;
1851			case 'f':
1852				c = '\f';
1853				++src;
1854				break;
1855			case 'n':
1856				c = '\n';
1857				++src;
1858				break;
1859			case 'r':
1860				c = '\r';
1861				++src;
1862				break;
1863			case 's':
1864				c = ' ';
1865				++src;
1866				break;
1867			case 't':
1868				c = '\t';
1869				++src;
1870				break;
1871			case 'v':
1872				c = '\v';
1873				++src;
1874				break;
1875			case '\\':
1876				c = '\\';
1877				++src;
1878				break;
1879			}
1880		}
1881		*dest++ = c;
1882	}
1883	*dest = '\0';
1884}
1885
1886/* Parse a hex digit. */
1887static int
1888parsedigit(char c)
1889{
1890	if (c >= '0' && c <= '9')
1891		return c - '0';
1892	else if (c >= 'a' && c <= 'f')
1893		return c - 'a';
1894	else if (c >= 'A' && c <= 'F')
1895		return c - 'A';
1896	else
1897		return -1;
1898}
1899
1900/*
1901 * Note that this implementation does not (and should not!) obey
1902 * locale settings; you cannot simply substitute strtol here, since
1903 * it does obey locale.
1904 */
1905static int64_t
1906mtree_atol(char **p, int base)
1907{
1908	int64_t l, limit;
1909	int digit, last_digit_limit;
1910
1911	if (base == 0) {
1912		if (**p != '0')
1913			base = 10;
1914		else if ((*p)[1] == 'x' || (*p)[1] == 'X') {
1915			*p += 2;
1916			base = 16;
1917		} else {
1918			base = 8;
1919		}
1920	}
1921
1922	if (**p == '-') {
1923		limit = INT64_MIN / base;
1924		last_digit_limit = INT64_MIN % base;
1925		++(*p);
1926
1927		l = 0;
1928		digit = parsedigit(**p);
1929		while (digit >= 0 && digit < base) {
1930			if (l < limit || (l == limit && digit > last_digit_limit))
1931				return INT64_MIN;
1932			l = (l * base) - digit;
1933			digit = parsedigit(*++(*p));
1934		}
1935		return l;
1936	} else {
1937		limit = INT64_MAX / base;
1938		last_digit_limit = INT64_MAX % base;
1939
1940		l = 0;
1941		digit = parsedigit(**p);
1942		while (digit >= 0 && digit < base) {
1943			if (l > limit || (l == limit && digit > last_digit_limit))
1944				return INT64_MAX;
1945			l = (l * base) + digit;
1946			digit = parsedigit(*++(*p));
1947		}
1948		return l;
1949	}
1950}
1951
1952/*
1953 * Returns length of line (including trailing newline)
1954 * or negative on error.  'start' argument is updated to
1955 * point to first character of line.
1956 */
1957static ssize_t
1958readline(struct archive_read *a, struct mtree *mtree, char **start,
1959    ssize_t limit)
1960{
1961	ssize_t bytes_read;
1962	ssize_t total_size = 0;
1963	ssize_t find_off = 0;
1964	const void *t;
1965	void *nl;
1966	char *u;
1967
1968	/* Accumulate line in a line buffer. */
1969	for (;;) {
1970		/* Read some more. */
1971		t = __archive_read_ahead(a, 1, &bytes_read);
1972		if (t == NULL)
1973			return (0);
1974		if (bytes_read < 0)
1975			return (ARCHIVE_FATAL);
1976		nl = memchr(t, '\n', bytes_read);
1977		/* If we found '\n', trim the read to end exactly there. */
1978		if (nl != NULL) {
1979			bytes_read = ((const char *)nl) - ((const char *)t) + 1;
1980		}
1981		if (total_size + bytes_read + 1 > limit) {
1982			archive_set_error(&a->archive,
1983			    ARCHIVE_ERRNO_FILE_FORMAT,
1984			    "Line too long");
1985			return (ARCHIVE_FATAL);
1986		}
1987		if (archive_string_ensure(&mtree->line,
1988			total_size + bytes_read + 1) == NULL) {
1989			archive_set_error(&a->archive, ENOMEM,
1990			    "Can't allocate working buffer");
1991			return (ARCHIVE_FATAL);
1992		}
1993		/* Append new bytes to string. */
1994		memcpy(mtree->line.s + total_size, t, bytes_read);
1995		__archive_read_consume(a, bytes_read);
1996		total_size += bytes_read;
1997		mtree->line.s[total_size] = '\0';
1998
1999		for (u = mtree->line.s + find_off; *u; ++u) {
2000			if (u[0] == '\n') {
2001				/* Ends with unescaped newline. */
2002				*start = mtree->line.s;
2003				return total_size;
2004			} else if (u[0] == '#') {
2005				/* Ends with comment sequence #...\n */
2006				if (nl == NULL) {
2007					/* But we've not found the \n yet */
2008					break;
2009				}
2010			} else if (u[0] == '\\') {
2011				if (u[1] == '\n') {
2012					/* Trim escaped newline. */
2013					total_size -= 2;
2014					mtree->line.s[total_size] = '\0';
2015					break;
2016				} else if (u[1] != '\0') {
2017					/* Skip the two-char escape sequence */
2018					++u;
2019				}
2020			}
2021		}
2022		find_off = u - mtree->line.s;
2023	}
2024}
2025