1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * apprentice - make one pass through /etc/magic, learning its secrets.
30 */
31
32#include "file.h"
33
34#ifndef	lint
35FILE_RCSID("@(#)$File: apprentice.c,v 1.342 2023/07/17 14:38:35 christos Exp $")
36#endif	/* lint */
37
38#include "magic.h"
39#include <stdlib.h>
40#ifdef HAVE_UNISTD_H
41#include <unistd.h>
42#endif
43#include <stddef.h>
44#include <string.h>
45#include <assert.h>
46#include <ctype.h>
47#include <fcntl.h>
48#ifdef QUICK
49#include <sys/mman.h>
50#endif
51#include <dirent.h>
52#include <limits.h>
53#ifdef HAVE_BYTESWAP_H
54#include <byteswap.h>
55#endif
56#ifdef HAVE_SYS_BSWAP_H
57#include <sys/bswap.h>
58#endif
59
60
61#define	EATAB {while (isascii(CAST(unsigned char, *l)) && \
62		      isspace(CAST(unsigned char, *l)))  ++l;}
63#define LOWCASE(l) (isupper(CAST(unsigned char, l)) ? \
64			tolower(CAST(unsigned char, l)) : (l))
65/*
66 * Work around a bug in headers on Digital Unix.
67 * At least confirmed for: OSF1 V4.0 878
68 */
69#if defined(__osf__) && defined(__DECC)
70#ifdef MAP_FAILED
71#undef MAP_FAILED
72#endif
73#endif
74
75#ifndef MAP_FAILED
76#define MAP_FAILED (void *) -1
77#endif
78
79#ifndef MAP_FILE
80#define MAP_FILE 0
81#endif
82
83#define ALLOC_CHUNK	CAST(size_t, 10)
84#define ALLOC_INCR	CAST(size_t, 200)
85
86#define MAP_TYPE_USER	0
87#define MAP_TYPE_MALLOC	1
88#define MAP_TYPE_MMAP	2
89
90struct magic_entry {
91	struct magic *mp;
92	uint32_t cont_count;
93	uint32_t max_count;
94};
95
96struct magic_entry_set {
97	struct magic_entry *me;
98	uint32_t count;
99	uint32_t max;
100};
101
102struct magic_map {
103	void *p;
104	size_t len;
105	int type;
106	struct magic *magic[MAGIC_SETS];
107	uint32_t nmagic[MAGIC_SETS];
108};
109
110int file_formats[FILE_NAMES_SIZE];
111const size_t file_nformats = FILE_NAMES_SIZE;
112const char *file_names[FILE_NAMES_SIZE];
113const size_t file_nnames = FILE_NAMES_SIZE;
114
115file_private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
116file_private int hextoint(int);
117file_private const char *getstr(struct magic_set *, struct magic *, const char *,
118    int);
119file_private int parse(struct magic_set *, struct magic_entry *, const char *,
120    size_t, int);
121file_private void eatsize(const char **);
122file_private int apprentice_1(struct magic_set *, const char *, int);
123file_private ssize_t apprentice_magic_strength_1(const struct magic *);
124file_private int apprentice_sort(const void *, const void *);
125file_private void apprentice_list(struct mlist *, int );
126file_private struct magic_map *apprentice_load(struct magic_set *,
127    const char *, int);
128file_private struct mlist *mlist_alloc(void);
129file_private void mlist_free_all(struct magic_set *);
130file_private void mlist_free(struct mlist *);
131file_private void byteswap(struct magic *, uint32_t);
132file_private void bs1(struct magic *);
133
134#if defined(HAVE_BYTESWAP_H)
135#define swap2(x)	bswap_16(x)
136#define swap4(x)	bswap_32(x)
137#define swap8(x)	bswap_64(x)
138#elif defined(HAVE_SYS_BSWAP_H)
139#define swap2(x)	bswap16(x)
140#define swap4(x)	bswap32(x)
141#define swap8(x)	bswap64(x)
142#else
143file_private uint16_t swap2(uint16_t);
144file_private uint32_t swap4(uint32_t);
145file_private uint64_t swap8(uint64_t);
146#endif
147
148file_private char *mkdbname(struct magic_set *, const char *, int);
149file_private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
150    size_t);
151file_private struct magic_map *apprentice_map(struct magic_set *, const char *);
152file_private int check_buffer(struct magic_set *, struct magic_map *, const char *);
153file_private void apprentice_unmap(struct magic_map *);
154file_private int apprentice_compile(struct magic_set *, struct magic_map *,
155    const char *);
156file_private int check_format_type(const char *, int, const char **);
157file_private int check_format(struct magic_set *, struct magic *);
158file_private int get_op(char);
159file_private int parse_mime(struct magic_set *, struct magic_entry *, const char *,
160    size_t);
161file_private int parse_strength(struct magic_set *, struct magic_entry *,
162    const char *, size_t);
163file_private int parse_apple(struct magic_set *, struct magic_entry *, const char *,
164    size_t);
165file_private int parse_ext(struct magic_set *, struct magic_entry *, const char *,
166    size_t);
167
168
169file_private size_t magicsize = sizeof(struct magic);
170
171file_private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
172
173file_private struct {
174	const char *name;
175	size_t len;
176	int (*fun)(struct magic_set *, struct magic_entry *, const char *,
177	    size_t);
178} bang[] = {
179#define	DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
180	DECLARE_FIELD(mime),
181	DECLARE_FIELD(apple),
182	DECLARE_FIELD(ext),
183	DECLARE_FIELD(strength),
184#undef	DECLARE_FIELD
185	{ NULL, 0, NULL }
186};
187
188#ifdef COMPILE_ONLY
189
190int main(int, char *[]);
191
192int
193main(int argc, char *argv[])
194{
195	int ret;
196	struct magic_set *ms;
197	char *progname;
198
199	if ((progname = strrchr(argv[0], '/')) != NULL)
200		progname++;
201	else
202		progname = argv[0];
203
204	if (argc != 2) {
205		(void)fprintf(stderr, "Usage: %s file\n", progname);
206		return 1;
207	}
208
209	if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
210		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
211		return 1;
212	}
213	ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
214	if (ret == 1)
215		(void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
216	magic_close(ms);
217	return ret;
218}
219#endif /* COMPILE_ONLY */
220
221struct type_tbl_s {
222	const char name[16];
223	const size_t len;
224	const int type;
225	const int format;
226};
227
228/*
229 * XXX - the actual Single UNIX Specification says that "long" means "long",
230 * as in the C data type, but we treat it as meaning "4-byte integer".
231 * Given that the OS X version of file 5.04 did the same, I guess that passes
232 * the actual test; having "long" be dependent on how big a "long" is on
233 * the machine running "file" is silly.
234 */
235static const struct type_tbl_s type_tbl[] = {
236# define XX(s)		s, (sizeof(s) - 1)
237# define XX_NULL	"", 0
238	{ XX("invalid"),	FILE_INVALID,		FILE_FMT_NONE },
239	{ XX("byte"),		FILE_BYTE,		FILE_FMT_NUM },
240	{ XX("short"),		FILE_SHORT,		FILE_FMT_NUM },
241	{ XX("default"),	FILE_DEFAULT,		FILE_FMT_NONE },
242	{ XX("long"),		FILE_LONG,		FILE_FMT_NUM },
243	{ XX("string"),		FILE_STRING,		FILE_FMT_STR },
244	{ XX("date"),		FILE_DATE,		FILE_FMT_STR },
245	{ XX("beshort"),	FILE_BESHORT,		FILE_FMT_NUM },
246	{ XX("belong"),		FILE_BELONG,		FILE_FMT_NUM },
247	{ XX("bedate"),		FILE_BEDATE,		FILE_FMT_STR },
248	{ XX("leshort"),	FILE_LESHORT,		FILE_FMT_NUM },
249	{ XX("lelong"),		FILE_LELONG,		FILE_FMT_NUM },
250	{ XX("ledate"),		FILE_LEDATE,		FILE_FMT_STR },
251	{ XX("pstring"),	FILE_PSTRING,		FILE_FMT_STR },
252	{ XX("ldate"),		FILE_LDATE,		FILE_FMT_STR },
253	{ XX("beldate"),	FILE_BELDATE,		FILE_FMT_STR },
254	{ XX("leldate"),	FILE_LELDATE,		FILE_FMT_STR },
255	{ XX("regex"),		FILE_REGEX,		FILE_FMT_STR },
256	{ XX("bestring16"),	FILE_BESTRING16,	FILE_FMT_STR },
257	{ XX("lestring16"),	FILE_LESTRING16,	FILE_FMT_STR },
258	{ XX("search"),		FILE_SEARCH,		FILE_FMT_STR },
259	{ XX("medate"),		FILE_MEDATE,		FILE_FMT_STR },
260	{ XX("meldate"),	FILE_MELDATE,		FILE_FMT_STR },
261	{ XX("melong"),		FILE_MELONG,		FILE_FMT_NUM },
262	{ XX("quad"),		FILE_QUAD,		FILE_FMT_QUAD },
263	{ XX("lequad"),		FILE_LEQUAD,		FILE_FMT_QUAD },
264	{ XX("bequad"),		FILE_BEQUAD,		FILE_FMT_QUAD },
265	{ XX("qdate"),		FILE_QDATE,		FILE_FMT_STR },
266	{ XX("leqdate"),	FILE_LEQDATE,		FILE_FMT_STR },
267	{ XX("beqdate"),	FILE_BEQDATE,		FILE_FMT_STR },
268	{ XX("qldate"),		FILE_QLDATE,		FILE_FMT_STR },
269	{ XX("leqldate"),	FILE_LEQLDATE,		FILE_FMT_STR },
270	{ XX("beqldate"),	FILE_BEQLDATE,		FILE_FMT_STR },
271	{ XX("float"),		FILE_FLOAT,		FILE_FMT_FLOAT },
272	{ XX("befloat"),	FILE_BEFLOAT,		FILE_FMT_FLOAT },
273	{ XX("lefloat"),	FILE_LEFLOAT,		FILE_FMT_FLOAT },
274	{ XX("double"),		FILE_DOUBLE,		FILE_FMT_DOUBLE },
275	{ XX("bedouble"),	FILE_BEDOUBLE,		FILE_FMT_DOUBLE },
276	{ XX("ledouble"),	FILE_LEDOUBLE,		FILE_FMT_DOUBLE },
277	{ XX("leid3"),		FILE_LEID3,		FILE_FMT_NUM },
278	{ XX("beid3"),		FILE_BEID3,		FILE_FMT_NUM },
279	{ XX("indirect"),	FILE_INDIRECT,		FILE_FMT_NUM },
280	{ XX("qwdate"),		FILE_QWDATE,		FILE_FMT_STR },
281	{ XX("leqwdate"),	FILE_LEQWDATE,		FILE_FMT_STR },
282	{ XX("beqwdate"),	FILE_BEQWDATE,		FILE_FMT_STR },
283	{ XX("name"),		FILE_NAME,		FILE_FMT_NONE },
284	{ XX("use"),		FILE_USE,		FILE_FMT_NONE },
285	{ XX("clear"),		FILE_CLEAR,		FILE_FMT_NONE },
286	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
287	{ XX("guid"),		FILE_GUID,		FILE_FMT_STR },
288	{ XX("offset"),		FILE_OFFSET,		FILE_FMT_QUAD },
289	{ XX("bevarint"),	FILE_BEVARINT,		FILE_FMT_STR },
290	{ XX("levarint"),	FILE_LEVARINT,		FILE_FMT_STR },
291	{ XX("msdosdate"),	FILE_MSDOSDATE,		FILE_FMT_STR },
292	{ XX("lemsdosdate"),	FILE_LEMSDOSDATE,	FILE_FMT_STR },
293	{ XX("bemsdosdate"),	FILE_BEMSDOSDATE,	FILE_FMT_STR },
294	{ XX("msdostime"),	FILE_MSDOSTIME,		FILE_FMT_STR },
295	{ XX("lemsdostime"),	FILE_LEMSDOSTIME,	FILE_FMT_STR },
296	{ XX("bemsdostime"),	FILE_BEMSDOSTIME,	FILE_FMT_STR },
297	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
298	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
299};
300
301/*
302 * These are not types, and cannot be preceded by "u" to make them
303 * unsigned.
304 */
305static const struct type_tbl_s special_tbl[] = {
306	{ XX("der"),		FILE_DER,		FILE_FMT_STR },
307	{ XX("name"),		FILE_NAME,		FILE_FMT_STR },
308	{ XX("use"),		FILE_USE,		FILE_FMT_STR },
309	{ XX("octal"),		FILE_OCTAL,		FILE_FMT_STR },
310	{ XX_NULL,		FILE_INVALID,		FILE_FMT_NONE },
311};
312# undef XX
313# undef XX_NULL
314
315file_private int
316get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
317{
318	const struct type_tbl_s *p;
319
320	for (p = tbl; p->len; p++) {
321		if (strncmp(l, p->name, p->len) == 0) {
322			if (t)
323				*t = l + p->len;
324			break;
325		}
326	}
327	return p->type;
328}
329
330file_private off_t
331maxoff_t(void) {
332	if (/*CONSTCOND*/sizeof(off_t) == sizeof(int))
333		return CAST(off_t, INT_MAX);
334	if (/*CONSTCOND*/sizeof(off_t) == sizeof(long))
335		return CAST(off_t, LONG_MAX);
336	return 0x7fffffff;
337}
338
339file_private int
340get_standard_integer_type(const char *l, const char **t)
341{
342	int type;
343
344	if (isalpha(CAST(unsigned char, l[1]))) {
345		switch (l[1]) {
346		case 'C':
347			/* "dC" and "uC" */
348			type = FILE_BYTE;
349			break;
350		case 'S':
351			/* "dS" and "uS" */
352			type = FILE_SHORT;
353			break;
354		case 'I':
355		case 'L':
356			/*
357			 * "dI", "dL", "uI", and "uL".
358			 *
359			 * XXX - the actual Single UNIX Specification says
360			 * that "L" means "long", as in the C data type,
361			 * but we treat it as meaning "4-byte integer".
362			 * Given that the OS X version of file 5.04 did
363			 * the same, I guess that passes the actual SUS
364			 * validation suite; having "dL" be dependent on
365			 * how big a "long" is on the machine running
366			 * "file" is silly.
367			 */
368			type = FILE_LONG;
369			break;
370		case 'Q':
371			/* "dQ" and "uQ" */
372			type = FILE_QUAD;
373			break;
374		default:
375			/* "d{anything else}", "u{anything else}" */
376			return FILE_INVALID;
377		}
378		l += 2;
379	} else if (isdigit(CAST(unsigned char, l[1]))) {
380		/*
381		 * "d{num}" and "u{num}"; we only support {num} values
382		 * of 1, 2, 4, and 8 - the Single UNIX Specification
383		 * doesn't say anything about whether arbitrary
384		 * values should be supported, but both the Solaris 10
385		 * and OS X Mountain Lion versions of file passed the
386		 * Single UNIX Specification validation suite, and
387		 * neither of them support values bigger than 8 or
388		 * non-power-of-2 values.
389		 */
390		if (isdigit(CAST(unsigned char, l[2]))) {
391			/* Multi-digit, so > 9 */
392			return FILE_INVALID;
393		}
394		switch (l[1]) {
395		case '1':
396			type = FILE_BYTE;
397			break;
398		case '2':
399			type = FILE_SHORT;
400			break;
401		case '4':
402			type = FILE_LONG;
403			break;
404		case '8':
405			type = FILE_QUAD;
406			break;
407		default:
408			/* XXX - what about 3, 5, 6, or 7? */
409			return FILE_INVALID;
410		}
411		l += 2;
412	} else {
413		/*
414		 * "d" or "u" by itself.
415		 */
416		type = FILE_LONG;
417		++l;
418	}
419	if (t)
420		*t = l;
421	return type;
422}
423
424file_private void
425init_file_tables(void)
426{
427	static int done = 0;
428	const struct type_tbl_s *p;
429
430	if (done)
431		return;
432	done++;
433
434	for (p = type_tbl; p->len; p++) {
435		assert(p->type < FILE_NAMES_SIZE);
436		file_names[p->type] = p->name;
437		file_formats[p->type] = p->format;
438	}
439	assert(p - type_tbl == FILE_NAMES_SIZE);
440}
441
442file_private int
443add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
444{
445	struct mlist *ml;
446
447	mlp->map = NULL;
448	if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
449		return -1;
450
451	ml->map = idx == 0 ? map : NULL;
452	ml->magic = map->magic[idx];
453	ml->nmagic = map->nmagic[idx];
454	if (ml->nmagic) {
455		ml->magic_rxcomp = CAST(file_regex_t **,
456		    calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
457		if (ml->magic_rxcomp == NULL) {
458			free(ml);
459			return -1;
460		}
461	} else
462		ml->magic_rxcomp = NULL;
463	mlp->prev->next = ml;
464	ml->prev = mlp->prev;
465	ml->next = mlp;
466	mlp->prev = ml;
467	return 0;
468}
469
470/*
471 * Handle one file or directory.
472 */
473file_private int
474apprentice_1(struct magic_set *ms, const char *fn, int action)
475{
476	struct magic_map *map;
477#ifndef COMPILE_ONLY
478	size_t i;
479#endif
480
481	if (magicsize != FILE_MAGICSIZE) {
482		file_error(ms, 0, "magic element size %lu != %lu",
483		    CAST(unsigned long, sizeof(*map->magic[0])),
484		    CAST(unsigned long, FILE_MAGICSIZE));
485		return -1;
486	}
487
488	if (action == FILE_COMPILE) {
489		map = apprentice_load(ms, fn, action);
490		if (map == NULL)
491			return -1;
492		return apprentice_compile(ms, map, fn);
493	}
494
495#ifndef COMPILE_ONLY
496	map = apprentice_map(ms, fn);
497	if (map == NULL) {
498		if (ms->flags & MAGIC_CHECK)
499			file_magwarn(NULL, "using regular magic file `%s'", fn);
500		map = apprentice_load(ms, fn, action);
501		if (map == NULL)
502			return -1;
503	}
504
505	for (i = 0; i < MAGIC_SETS; i++) {
506		if (add_mlist(ms->mlist[i], map, i) == -1) {
507			/* failed to add to any list, free explicitly */
508			if (i == 0)
509				apprentice_unmap(map);
510			else
511				mlist_free_all(ms);
512			file_oomem(ms, sizeof(*ms->mlist[0]));
513			return -1;
514		}
515	}
516
517	if (action == FILE_LIST) {
518		for (i = 0; i < MAGIC_SETS; i++) {
519			printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
520			    i);
521			apprentice_list(ms->mlist[i], BINTEST);
522			printf("Text patterns:\n");
523			apprentice_list(ms->mlist[i], TEXTTEST);
524		}
525	}
526	return 0;
527#else
528	return 0;
529#endif /* COMPILE_ONLY */
530}
531
532file_protected void
533file_ms_free(struct magic_set *ms)
534{
535	size_t i;
536	if (ms == NULL)
537		return;
538	for (i = 0; i < MAGIC_SETS; i++)
539		mlist_free(ms->mlist[i]);
540	free(ms->o.pbuf);
541	free(ms->o.buf);
542	free(ms->c.li);
543#ifdef USE_C_LOCALE
544	freelocale(ms->c_lc_ctype);
545#endif
546	free(ms);
547}
548
549file_protected struct magic_set *
550file_ms_alloc(int flags)
551{
552	struct magic_set *ms;
553	size_t i, len;
554
555	if ((ms = CAST(struct magic_set *, calloc(CAST(size_t, 1u),
556	    sizeof(*ms)))) == NULL)
557		return NULL;
558
559	if (magic_setflags(ms, flags) == -1) {
560		errno = EINVAL;
561		goto free;
562	}
563
564	ms->o.buf = ms->o.pbuf = NULL;
565	ms->o.blen = 0;
566	len = (ms->c.len = 10) * sizeof(*ms->c.li);
567
568	if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
569		goto free;
570
571	ms->event_flags = 0;
572	ms->error = -1;
573	for (i = 0; i < MAGIC_SETS; i++)
574		ms->mlist[i] = NULL;
575	ms->file = "unknown";
576	ms->line = 0;
577	ms->indir_max = FILE_INDIR_MAX;
578	ms->name_max = FILE_NAME_MAX;
579	ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
580	ms->elf_shsize_max = FILE_ELF_SHSIZE_MAX;
581	ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
582	ms->elf_notes_max = FILE_ELF_NOTES_MAX;
583	ms->regex_max = FILE_REGEX_MAX;
584	ms->bytes_max = FILE_BYTES_MAX;
585	ms->encoding_max = FILE_ENCODING_MAX;
586#ifdef USE_C_LOCALE
587	ms->c_lc_ctype = newlocale(LC_CTYPE_MASK, "C", 0);
588	assert(ms->c_lc_ctype != NULL);
589#endif
590	return ms;
591free:
592	free(ms);
593	return NULL;
594}
595
596file_private void
597apprentice_unmap(struct magic_map *map)
598{
599	size_t i;
600	char *p;
601	if (map == NULL)
602		return;
603
604	switch (map->type) {
605	case MAP_TYPE_USER:
606		break;
607	case MAP_TYPE_MALLOC:
608		p = CAST(char *, map->p);
609		for (i = 0; i < MAGIC_SETS; i++) {
610			char *b = RCAST(char *, map->magic[i]);
611			if (p != NULL && b >= p && b <= p + map->len)
612				continue;
613			free(b);
614		}
615		free(p);
616		break;
617#ifdef QUICK
618	case MAP_TYPE_MMAP:
619		if (map->p && map->p != MAP_FAILED)
620			(void)munmap(map->p, map->len);
621		break;
622#endif
623	default:
624		fprintf(stderr, "Bad map type %d", map->type);
625		abort();
626	}
627	free(map);
628}
629
630file_private struct mlist *
631mlist_alloc(void)
632{
633	struct mlist *mlist;
634	if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
635		return NULL;
636	}
637	mlist->next = mlist->prev = mlist;
638	return mlist;
639}
640
641file_private void
642mlist_free_all(struct magic_set *ms)
643{
644	size_t i;
645
646	for (i = 0; i < MAGIC_SETS; i++) {
647		mlist_free(ms->mlist[i]);
648		ms->mlist[i] = NULL;
649	}
650}
651
652file_private void
653mlist_free_one(struct mlist *ml)
654{
655	size_t i;
656
657	if (ml->map)
658		apprentice_unmap(CAST(struct magic_map *, ml->map));
659
660	for (i = 0; i < ml->nmagic; ++i) {
661		if (ml->magic_rxcomp[i]) {
662			file_regfree(ml->magic_rxcomp[i]);
663			free(ml->magic_rxcomp[i]);
664			ml->magic_rxcomp[i] = NULL;
665		}
666	}
667	free(ml->magic_rxcomp);
668	ml->magic_rxcomp = NULL;
669	free(ml);
670}
671
672file_private void
673mlist_free(struct mlist *mlist)
674{
675	struct mlist *ml, *next;
676
677	if (mlist == NULL)
678		return;
679
680	for (ml = mlist->next; ml != mlist;) {
681		next = ml->next;
682		mlist_free_one(ml);
683		ml = next;
684	}
685	mlist_free_one(mlist);
686}
687
688#ifndef COMPILE_ONLY
689/* void **bufs: an array of compiled magic files */
690file_protected int
691buffer_apprentice(struct magic_set *ms, struct magic **bufs,
692    size_t *sizes, size_t nbufs)
693{
694	size_t i, j;
695	struct magic_map *map;
696
697	if (nbufs == 0)
698		return -1;
699
700	(void)file_reset(ms, 0);
701
702	init_file_tables();
703
704	for (i = 0; i < MAGIC_SETS; i++) {
705		mlist_free(ms->mlist[i]);
706		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
707			file_oomem(ms, sizeof(*ms->mlist[0]));
708			goto fail;
709		}
710	}
711
712	for (i = 0; i < nbufs; i++) {
713		map = apprentice_buf(ms, bufs[i], sizes[i]);
714		if (map == NULL)
715			goto fail;
716
717		for (j = 0; j < MAGIC_SETS; j++) {
718			if (add_mlist(ms->mlist[j], map, j) == -1) {
719				file_oomem(ms, sizeof(*ms->mlist[0]));
720				goto fail;
721			}
722		}
723	}
724
725	return 0;
726fail:
727	mlist_free_all(ms);
728	return -1;
729}
730#endif
731
732/* const char *fn: list of magic files and directories */
733file_protected int
734file_apprentice(struct magic_set *ms, const char *fn, int action)
735{
736	char *p, *mfn;
737	int fileerr, errs = -1;
738	size_t i, j;
739
740	(void)file_reset(ms, 0);
741
742	if ((fn = magic_getpath(fn, action)) == NULL)
743		return -1;
744
745	init_file_tables();
746
747	if ((mfn = strdup(fn)) == NULL) {
748		file_oomem(ms, strlen(fn));
749		return -1;
750	}
751
752	for (i = 0; i < MAGIC_SETS; i++) {
753		mlist_free(ms->mlist[i]);
754		if ((ms->mlist[i] = mlist_alloc()) == NULL) {
755			file_oomem(ms, sizeof(*ms->mlist[0]));
756			for (j = 0; j < i; j++) {
757				mlist_free(ms->mlist[j]);
758				ms->mlist[j] = NULL;
759			}
760			free(mfn);
761			return -1;
762		}
763	}
764	fn = mfn;
765
766	while (fn) {
767		p = CCAST(char *, strchr(fn, PATHSEP));
768		if (p)
769			*p++ = '\0';
770		if (*fn == '\0')
771			break;
772		fileerr = apprentice_1(ms, fn, action);
773		errs = MAX(errs, fileerr);
774		fn = p;
775	}
776
777	free(mfn);
778
779	if (errs == -1) {
780		for (i = 0; i < MAGIC_SETS; i++) {
781			mlist_free(ms->mlist[i]);
782			ms->mlist[i] = NULL;
783		}
784		file_error(ms, 0, "could not find any valid magic files!");
785		return -1;
786	}
787
788#if 0
789	/*
790	 * Always leave the database loaded
791	 */
792	if (action == FILE_LOAD)
793		return 0;
794
795	for (i = 0; i < MAGIC_SETS; i++) {
796		mlist_free(ms->mlist[i]);
797		ms->mlist[i] = NULL;
798	}
799#endif
800
801	switch (action) {
802	case FILE_LOAD:
803	case FILE_COMPILE:
804	case FILE_CHECK:
805	case FILE_LIST:
806		return 0;
807	default:
808		file_error(ms, 0, "Invalid action %d", action);
809		return -1;
810	}
811}
812
813/*
814 * Compute the real length of a magic expression, for the purposes
815 * of determining how "strong" a magic expression is (approximating
816 * how specific its matches are):
817 *	- magic characters count 0 unless escaped.
818 *	- [] expressions count 1
819 *	- {} expressions count 0
820 *	- regular characters or escaped magic characters count 1
821 *	- 0 length expressions count as one
822 */
823file_private size_t
824nonmagic(const char *str)
825{
826	const char *p;
827	size_t rv = 0;
828
829	for (p = str; *p; p++)
830		switch (*p) {
831		case '\\':	/* Escaped anything counts 1 */
832			if (!*++p)
833				p--;
834			rv++;
835			continue;
836		case '?':	/* Magic characters count 0 */
837		case '*':
838		case '.':
839		case '+':
840		case '^':
841		case '$':
842			continue;
843		case '[':	/* Bracketed expressions count 1 the ']' */
844			while (*p && *p != ']')
845				p++;
846			p--;
847			continue;
848		case '{':	/* Braced expressions count 0 */
849			while (*p && *p != '}')
850				p++;
851			if (!*p)
852				p--;
853			continue;
854		default:	/* Anything else counts 1 */
855			rv++;
856			continue;
857		}
858
859	return rv == 0 ? 1 : rv;	/* Return at least 1 */
860}
861
862
863file_private size_t
864typesize(int type)
865{
866	switch (type) {
867	case FILE_BYTE:
868		return 1;
869
870	case FILE_SHORT:
871	case FILE_LESHORT:
872	case FILE_BESHORT:
873	case FILE_MSDOSDATE:
874	case FILE_BEMSDOSDATE:
875	case FILE_LEMSDOSDATE:
876	case FILE_MSDOSTIME:
877	case FILE_BEMSDOSTIME:
878	case FILE_LEMSDOSTIME:
879		return 2;
880
881	case FILE_LONG:
882	case FILE_LELONG:
883	case FILE_BELONG:
884	case FILE_MELONG:
885		return 4;
886
887	case FILE_DATE:
888	case FILE_LEDATE:
889	case FILE_BEDATE:
890	case FILE_MEDATE:
891	case FILE_LDATE:
892	case FILE_LELDATE:
893	case FILE_BELDATE:
894	case FILE_MELDATE:
895	case FILE_FLOAT:
896	case FILE_BEFLOAT:
897	case FILE_LEFLOAT:
898	case FILE_BEID3:
899	case FILE_LEID3:
900		return 4;
901
902	case FILE_QUAD:
903	case FILE_BEQUAD:
904	case FILE_LEQUAD:
905	case FILE_QDATE:
906	case FILE_LEQDATE:
907	case FILE_BEQDATE:
908	case FILE_QLDATE:
909	case FILE_LEQLDATE:
910	case FILE_BEQLDATE:
911	case FILE_QWDATE:
912	case FILE_LEQWDATE:
913	case FILE_BEQWDATE:
914	case FILE_DOUBLE:
915	case FILE_BEDOUBLE:
916	case FILE_LEDOUBLE:
917	case FILE_OFFSET:
918	case FILE_BEVARINT:
919	case FILE_LEVARINT:
920		return 8;
921
922	case FILE_GUID:
923		return 16;
924
925	default:
926		return FILE_BADSIZE;
927	}
928}
929
930/*
931 * Get weight of this magic entry, for sorting purposes.
932 */
933file_private ssize_t
934apprentice_magic_strength_1(const struct magic *m)
935{
936#define MULT 10U
937	size_t ts, v;
938	ssize_t val = 2 * MULT;	/* baseline strength */
939
940	switch (m->type) {
941	case FILE_DEFAULT:	/* make sure this sorts last */
942		if (m->factor_op != FILE_FACTOR_OP_NONE) {
943			file_magwarn(NULL, "Usupported factor_op in default %d",
944			    m->factor_op);
945		}
946		return 0;
947
948	case FILE_BYTE:
949	case FILE_SHORT:
950	case FILE_LESHORT:
951	case FILE_BESHORT:
952	case FILE_LONG:
953	case FILE_LELONG:
954	case FILE_BELONG:
955	case FILE_MELONG:
956	case FILE_DATE:
957	case FILE_LEDATE:
958	case FILE_BEDATE:
959	case FILE_MEDATE:
960	case FILE_LDATE:
961	case FILE_LELDATE:
962	case FILE_BELDATE:
963	case FILE_MELDATE:
964	case FILE_FLOAT:
965	case FILE_BEFLOAT:
966	case FILE_LEFLOAT:
967	case FILE_QUAD:
968	case FILE_BEQUAD:
969	case FILE_LEQUAD:
970	case FILE_QDATE:
971	case FILE_LEQDATE:
972	case FILE_BEQDATE:
973	case FILE_QLDATE:
974	case FILE_LEQLDATE:
975	case FILE_BEQLDATE:
976	case FILE_QWDATE:
977	case FILE_LEQWDATE:
978	case FILE_BEQWDATE:
979	case FILE_DOUBLE:
980	case FILE_BEDOUBLE:
981	case FILE_LEDOUBLE:
982	case FILE_BEVARINT:
983	case FILE_LEVARINT:
984	case FILE_GUID:
985	case FILE_BEID3:
986	case FILE_LEID3:
987	case FILE_OFFSET:
988	case FILE_MSDOSDATE:
989	case FILE_BEMSDOSDATE:
990	case FILE_LEMSDOSDATE:
991	case FILE_MSDOSTIME:
992	case FILE_BEMSDOSTIME:
993	case FILE_LEMSDOSTIME:
994		ts = typesize(m->type);
995		if (ts == FILE_BADSIZE) {
996			(void)fprintf(stderr, "Bad size for type %d\n",
997			    m->type);
998			abort();
999		}
1000		val += ts * MULT;
1001		break;
1002
1003	case FILE_PSTRING:
1004	case FILE_STRING:
1005	case FILE_OCTAL:
1006		val += m->vallen * MULT;
1007		break;
1008
1009	case FILE_BESTRING16:
1010	case FILE_LESTRING16:
1011		val += m->vallen * MULT / 2;
1012		break;
1013
1014	case FILE_SEARCH:
1015		if (m->vallen == 0)
1016			break;
1017		val += m->vallen * MAX(MULT / m->vallen, 1);
1018		break;
1019
1020	case FILE_REGEX:
1021		v = nonmagic(m->value.s);
1022		val += v * MAX(MULT / v, 1);
1023		break;
1024
1025	case FILE_INDIRECT:
1026	case FILE_NAME:
1027	case FILE_USE:
1028	case FILE_CLEAR:
1029		break;
1030
1031	case FILE_DER:
1032		val += MULT;
1033		break;
1034
1035	default:
1036		(void)fprintf(stderr, "Bad type %d\n", m->type);
1037		abort();
1038	}
1039
1040	switch (m->reln) {
1041	case 'x':	/* matches anything penalize */
1042	case '!':       /* matches almost anything penalize */
1043		val = 0;
1044		break;
1045
1046	case '=':	/* Exact match, prefer */
1047		val += MULT;
1048		break;
1049
1050	case '>':
1051	case '<':	/* comparison match reduce strength */
1052		val -= 2 * MULT;
1053		break;
1054
1055	case '^':
1056	case '&':	/* masking bits, we could count them too */
1057		val -= MULT;
1058		break;
1059
1060	default:
1061		(void)fprintf(stderr, "Bad relation %c\n", m->reln);
1062		abort();
1063	}
1064
1065	return val;
1066}
1067
1068
1069/*ARGSUSED*/
1070file_protected size_t
1071file_magic_strength(const struct magic *m,
1072    size_t nmagic __attribute__((__unused__)))
1073{
1074	ssize_t val = apprentice_magic_strength_1(m);
1075
1076#ifdef notyet
1077	if (m->desc[0] == '\0') {
1078		size_t i;
1079		/*
1080		 * Magic entries with no description get their continuations
1081		 * added
1082		 */
1083		for (i = 1; m[i].cont_level != 0 && i < MIN(nmagic, 3); i++) {
1084			ssize_t v = apprentice_magic_strength_1(&m[i]) >>
1085			    (i + 1);
1086			val += v;
1087			if (m[i].desc[0] != '\0')
1088				break;
1089		}
1090	}
1091#endif
1092
1093	switch (m->factor_op) {
1094	case FILE_FACTOR_OP_NONE:
1095		break;
1096	case FILE_FACTOR_OP_PLUS:
1097		val += m->factor;
1098		break;
1099	case FILE_FACTOR_OP_MINUS:
1100		val -= m->factor;
1101		break;
1102	case FILE_FACTOR_OP_TIMES:
1103		val *= m->factor;
1104		break;
1105	case FILE_FACTOR_OP_DIV:
1106		val /= m->factor;
1107		break;
1108	default:
1109		(void)fprintf(stderr, "Bad factor_op %u\n", m->factor_op);
1110		abort();
1111	}
1112
1113	if (val <= 0)	/* ensure we only return 0 for FILE_DEFAULT */
1114		val = 1;
1115
1116#ifndef notyet
1117	/*
1118	 * Magic entries with no description get a bonus because they depend
1119	 * on subsequent magic entries to print something.
1120	 */
1121	if (m->desc[0] == '\0')
1122		val++;
1123#endif
1124
1125	return val;
1126}
1127
1128/*
1129 * Sort callback for sorting entries by "strength" (basically length)
1130 */
1131file_private int
1132apprentice_sort(const void *a, const void *b)
1133{
1134	const struct magic_entry *ma = CAST(const struct magic_entry *, a);
1135	const struct magic_entry *mb = CAST(const struct magic_entry *, b);
1136	size_t sa = file_magic_strength(ma->mp, ma->cont_count);
1137	size_t sb = file_magic_strength(mb->mp, mb->cont_count);
1138	if (sa == sb)
1139		return 0;
1140	else if (sa > sb)
1141		return -1;
1142	else
1143		return 1;
1144}
1145
1146/*
1147 * Shows sorted patterns list in the order which is used for the matching
1148 */
1149file_private void
1150apprentice_list(struct mlist *mlist, int mode)
1151{
1152	uint32_t magindex, descindex, mimeindex, lineindex;
1153	struct mlist *ml;
1154	for (ml = mlist->next; ml != mlist; ml = ml->next) {
1155		for (magindex = 0; magindex < ml->nmagic; magindex++) {
1156			struct magic *m = &ml->magic[magindex];
1157			if ((m->flag & mode) != mode) {
1158				/* Skip sub-tests */
1159				while (magindex + 1 < ml->nmagic &&
1160				       ml->magic[magindex + 1].cont_level != 0)
1161					++magindex;
1162				continue; /* Skip to next top-level test*/
1163			}
1164
1165			/*
1166			 * Try to iterate over the tree until we find item with
1167			 * description/mimetype.
1168			 */
1169			lineindex = descindex = mimeindex = magindex;
1170			for (; magindex + 1 < ml->nmagic &&
1171			   ml->magic[magindex + 1].cont_level != 0;
1172			   magindex++) {
1173				uint32_t mi = magindex + 1;
1174				if (*ml->magic[descindex].desc == '\0'
1175				    && *ml->magic[mi].desc)
1176					descindex = mi;
1177				if (*ml->magic[mimeindex].mimetype == '\0'
1178				    && *ml->magic[mi].mimetype)
1179					mimeindex = mi;
1180			}
1181
1182			printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
1183			    file_magic_strength(m, ml->nmagic - magindex),
1184			    ml->magic[lineindex].lineno,
1185			    ml->magic[descindex].desc,
1186			    ml->magic[mimeindex].mimetype);
1187		}
1188	}
1189}
1190
1191file_private void
1192set_test_type(struct magic *mstart, struct magic *m)
1193{
1194	switch (m->type) {
1195	case FILE_BYTE:
1196	case FILE_SHORT:
1197	case FILE_LONG:
1198	case FILE_DATE:
1199	case FILE_BESHORT:
1200	case FILE_BELONG:
1201	case FILE_BEDATE:
1202	case FILE_LESHORT:
1203	case FILE_LELONG:
1204	case FILE_LEDATE:
1205	case FILE_LDATE:
1206	case FILE_BELDATE:
1207	case FILE_LELDATE:
1208	case FILE_MEDATE:
1209	case FILE_MELDATE:
1210	case FILE_MELONG:
1211	case FILE_QUAD:
1212	case FILE_LEQUAD:
1213	case FILE_BEQUAD:
1214	case FILE_QDATE:
1215	case FILE_LEQDATE:
1216	case FILE_BEQDATE:
1217	case FILE_QLDATE:
1218	case FILE_LEQLDATE:
1219	case FILE_BEQLDATE:
1220	case FILE_QWDATE:
1221	case FILE_LEQWDATE:
1222	case FILE_BEQWDATE:
1223	case FILE_FLOAT:
1224	case FILE_BEFLOAT:
1225	case FILE_LEFLOAT:
1226	case FILE_DOUBLE:
1227	case FILE_BEDOUBLE:
1228	case FILE_LEDOUBLE:
1229	case FILE_BEVARINT:
1230	case FILE_LEVARINT:
1231	case FILE_DER:
1232	case FILE_GUID:
1233	case FILE_OFFSET:
1234	case FILE_MSDOSDATE:
1235	case FILE_BEMSDOSDATE:
1236	case FILE_LEMSDOSDATE:
1237	case FILE_MSDOSTIME:
1238	case FILE_BEMSDOSTIME:
1239	case FILE_LEMSDOSTIME:
1240	case FILE_OCTAL:
1241		mstart->flag |= BINTEST;
1242		break;
1243	case FILE_STRING:
1244	case FILE_PSTRING:
1245	case FILE_BESTRING16:
1246	case FILE_LESTRING16:
1247		/* Allow text overrides */
1248		if (mstart->str_flags & STRING_TEXTTEST)
1249			mstart->flag |= TEXTTEST;
1250		else
1251			mstart->flag |= BINTEST;
1252		break;
1253	case FILE_REGEX:
1254	case FILE_SEARCH:
1255		/* Check for override */
1256		if (mstart->str_flags & STRING_BINTEST)
1257			mstart->flag |= BINTEST;
1258		if (mstart->str_flags & STRING_TEXTTEST)
1259			mstart->flag |= TEXTTEST;
1260
1261		if (mstart->flag & (TEXTTEST|BINTEST))
1262			break;
1263
1264		/* binary test if pattern is not text */
1265		if (file_looks_utf8(m->value.us, CAST(size_t, m->vallen), NULL,
1266		    NULL) <= 0)
1267			mstart->flag |= BINTEST;
1268		else
1269			mstart->flag |= TEXTTEST;
1270		break;
1271	case FILE_DEFAULT:
1272		/* can't deduce anything; we shouldn't see this at the
1273		   top level anyway */
1274		break;
1275	case FILE_INVALID:
1276	default:
1277		/* invalid search type, but no need to complain here */
1278		break;
1279	}
1280}
1281
1282file_private int
1283addentry(struct magic_set *ms, struct magic_entry *me,
1284   struct magic_entry_set *mset)
1285{
1286	size_t i = me->mp->type == FILE_NAME ? 1 : 0;
1287	if (mset[i].me == NULL || mset[i].count == mset[i].max) {
1288		struct magic_entry *mp;
1289
1290		size_t incr = mset[i].max + ALLOC_INCR;
1291		if ((mp = CAST(struct magic_entry *,
1292		    realloc(mset[i].me, sizeof(*mp) * incr))) ==
1293		    NULL) {
1294			file_oomem(ms, sizeof(*mp) * incr);
1295			return -1;
1296		}
1297		(void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
1298		    ALLOC_INCR);
1299		mset[i].me = mp;
1300		mset[i].max = CAST(uint32_t, incr);
1301		assert(mset[i].max == incr);
1302	}
1303	mset[i].me[mset[i].count++] = *me;
1304	memset(me, 0, sizeof(*me));
1305	return 0;
1306}
1307
1308/*
1309 * Load and parse one file.
1310 */
1311file_private void
1312load_1(struct magic_set *ms, int action, const char *fn, int *errs,
1313   struct magic_entry_set *mset)
1314{
1315	size_t lineno = 0, llen = 0;
1316	char *line = NULL;
1317	ssize_t len;
1318	struct magic_entry me;
1319
1320	FILE *f = fopen(ms->file = fn, "r");
1321	if (f == NULL) {
1322		if (errno != ENOENT)
1323			file_error(ms, errno, "cannot read magic file `%s'",
1324				   fn);
1325		(*errs)++;
1326		return;
1327	}
1328
1329	memset(&me, 0, sizeof(me));
1330	/* read and parse this file */
1331	for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
1332	    ms->line++) {
1333		if (len == 0) /* null line, garbage, etc */
1334			continue;
1335		if (line[len - 1] == '\n') {
1336			lineno++;
1337			line[len - 1] = '\0'; /* delete newline */
1338		}
1339		switch (line[0]) {
1340		case '\0':	/* empty, do not parse */
1341		case '#':	/* comment, do not parse */
1342			continue;
1343		case '!':
1344			if (line[1] == ':') {
1345				size_t i;
1346
1347				for (i = 0; bang[i].name != NULL; i++) {
1348					if (CAST(size_t, len - 2) > bang[i].len &&
1349					    memcmp(bang[i].name, line + 2,
1350					    bang[i].len) == 0)
1351						break;
1352				}
1353				if (bang[i].name == NULL) {
1354					file_error(ms, 0,
1355					    "Unknown !: entry `%s'", line);
1356					(*errs)++;
1357					continue;
1358				}
1359				if (me.mp == NULL) {
1360					file_error(ms, 0,
1361					    "No current entry for :!%s type",
1362						bang[i].name);
1363					(*errs)++;
1364					continue;
1365				}
1366				if ((*bang[i].fun)(ms, &me,
1367				    line + bang[i].len + 2,
1368				    len - bang[i].len - 2) != 0) {
1369					(*errs)++;
1370					continue;
1371				}
1372				continue;
1373			}
1374			/*FALLTHROUGH*/
1375		default:
1376		again:
1377			switch (parse(ms, &me, line, lineno, action)) {
1378			case 0:
1379				continue;
1380			case 1:
1381				(void)addentry(ms, &me, mset);
1382				goto again;
1383			default:
1384				(*errs)++;
1385				break;
1386			}
1387		}
1388	}
1389	if (me.mp)
1390		(void)addentry(ms, &me, mset);
1391	free(line);
1392	(void)fclose(f);
1393}
1394
1395/*
1396 * parse a file or directory of files
1397 * const char *fn: name of magic file or directory
1398 */
1399file_private int
1400cmpstrp(const void *p1, const void *p2)
1401{
1402        return strcmp(*RCAST(char *const *, p1), *RCAST(char *const *, p2));
1403}
1404
1405
1406file_private uint32_t
1407set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1408    uint32_t starttest)
1409{
1410	static const char text[] = "text";
1411	static const char binary[] = "binary";
1412	static const size_t len = sizeof(text);
1413
1414	uint32_t i = starttest;
1415
1416	do {
1417		set_test_type(me[starttest].mp, me[i].mp);
1418		if ((ms->flags & MAGIC_DEBUG) == 0)
1419			continue;
1420		(void)fprintf(stderr, "%s%s%s: %s\n",
1421		    me[i].mp->mimetype,
1422		    me[i].mp->mimetype[0] == '\0' ? "" : "; ",
1423		    me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
1424		    me[i].mp->flag & BINTEST ? binary : text);
1425		if (me[i].mp->flag & BINTEST) {
1426			char *p = strstr(me[i].mp->desc, text);
1427			if (p && (p == me[i].mp->desc ||
1428			    isspace(CAST(unsigned char, p[-1]))) &&
1429			    (p + len - me[i].mp->desc == MAXstring
1430			    || (p[len] == '\0' ||
1431			    isspace(CAST(unsigned char, p[len])))))
1432				(void)fprintf(stderr, "*** Possible "
1433				    "binary test for text type\n");
1434		}
1435	} while (++i < nme && me[i].mp->cont_level != 0);
1436	return i;
1437}
1438
1439file_private void
1440set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
1441{
1442	uint32_t i;
1443	for (i = 0; i < nme; i++) {
1444		if (me[i].mp->cont_level == 0 &&
1445		    me[i].mp->type == FILE_DEFAULT) {
1446			while (++i < nme)
1447				if (me[i].mp->cont_level == 0)
1448					break;
1449			if (i != nme) {
1450				/* XXX - Ugh! */
1451				ms->line = me[i].mp->lineno;
1452				file_magwarn(ms,
1453				    "level 0 \"default\" did not sort last");
1454			}
1455			return;
1456		}
1457	}
1458}
1459
1460file_private int
1461coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
1462    struct magic **ma, uint32_t *nma)
1463{
1464	uint32_t i, mentrycount = 0;
1465	size_t slen;
1466
1467	for (i = 0; i < nme; i++)
1468		mentrycount += me[i].cont_count;
1469
1470	if (mentrycount == 0) {
1471		*ma = NULL;
1472		*nma = 0;
1473		return 0;
1474	}
1475
1476	slen = sizeof(**ma) * mentrycount;
1477	if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
1478		file_oomem(ms, slen);
1479		return -1;
1480	}
1481
1482	mentrycount = 0;
1483	for (i = 0; i < nme; i++) {
1484		(void)memcpy(*ma + mentrycount, me[i].mp,
1485		    me[i].cont_count * sizeof(**ma));
1486		mentrycount += me[i].cont_count;
1487	}
1488	*nma = mentrycount;
1489	return 0;
1490}
1491
1492file_private void
1493magic_entry_free(struct magic_entry *me, uint32_t nme)
1494{
1495	uint32_t i;
1496	if (me == NULL)
1497		return;
1498	for (i = 0; i < nme; i++)
1499		free(me[i].mp);
1500	free(me);
1501}
1502
1503file_private struct magic_map *
1504apprentice_load(struct magic_set *ms, const char *fn, int action)
1505{
1506	int errs = 0;
1507	uint32_t i, j;
1508	size_t files = 0, maxfiles = 0;
1509	char **filearr = NULL, *mfn;
1510	struct stat st;
1511	struct magic_map *map;
1512	struct magic_entry_set mset[MAGIC_SETS];
1513	DIR *dir;
1514	struct dirent *d;
1515
1516	memset(mset, 0, sizeof(mset));
1517	ms->flags |= MAGIC_CHECK;	/* Enable checks for parsed files */
1518
1519
1520	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
1521	{
1522		file_oomem(ms, sizeof(*map));
1523		return NULL;
1524	}
1525	map->type = MAP_TYPE_MALLOC;
1526
1527	/* print silly verbose header for USG compat. */
1528	if (action == FILE_CHECK)
1529		(void)fprintf(stderr, "%s\n", usg_hdr);
1530
1531	/* load directory or file */
1532	if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
1533		dir = opendir(fn);
1534		if (!dir) {
1535			errs++;
1536			goto out;
1537		}
1538		while ((d = readdir(dir)) != NULL) {
1539			if (d->d_name[0] == '.')
1540				continue;
1541			if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
1542				file_oomem(ms,
1543				    strlen(fn) + strlen(d->d_name) + 2);
1544				errs++;
1545				closedir(dir);
1546				goto out;
1547			}
1548			if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
1549				free(mfn);
1550				continue;
1551			}
1552			if (files >= maxfiles) {
1553				size_t mlen;
1554				char **nfilearr;
1555				maxfiles = (maxfiles + 1) * 2;
1556				mlen = maxfiles * sizeof(*filearr);
1557				if ((nfilearr = CAST(char **,
1558				    realloc(filearr, mlen))) == NULL) {
1559					file_oomem(ms, mlen);
1560					free(mfn);
1561					closedir(dir);
1562					errs++;
1563					goto out;
1564				}
1565				filearr = nfilearr;
1566			}
1567			filearr[files++] = mfn;
1568		}
1569		closedir(dir);
1570		if (filearr) {
1571			qsort(filearr, files, sizeof(*filearr), cmpstrp);
1572			for (i = 0; i < files; i++) {
1573				load_1(ms, action, filearr[i], &errs, mset);
1574				free(filearr[i]);
1575			}
1576			free(filearr);
1577			filearr = NULL;
1578		}
1579	} else
1580		load_1(ms, action, fn, &errs, mset);
1581	if (errs)
1582		goto out;
1583
1584	for (j = 0; j < MAGIC_SETS; j++) {
1585		/* Set types of tests */
1586		for (i = 0; i < mset[j].count; ) {
1587			if (mset[j].me[i].mp->cont_level != 0) {
1588				i++;
1589				continue;
1590			}
1591			i = set_text_binary(ms, mset[j].me, mset[j].count, i);
1592		}
1593		if (mset[j].me)
1594			qsort(mset[j].me, mset[j].count, sizeof(*mset[0].me),
1595			    apprentice_sort);
1596
1597		/*
1598		 * Make sure that any level 0 "default" line is last
1599		 * (if one exists).
1600		 */
1601		set_last_default(ms, mset[j].me, mset[j].count);
1602
1603		/* coalesce per file arrays into a single one, if needed */
1604		if (mset[j].count == 0)
1605			continue;
1606
1607		if (coalesce_entries(ms, mset[j].me, mset[j].count,
1608		    &map->magic[j], &map->nmagic[j]) == -1) {
1609			errs++;
1610			goto out;
1611		}
1612	}
1613
1614out:
1615	free(filearr);
1616	for (j = 0; j < MAGIC_SETS; j++)
1617		magic_entry_free(mset[j].me, mset[j].count);
1618
1619	if (errs) {
1620		apprentice_unmap(map);
1621		return NULL;
1622	}
1623	return map;
1624}
1625
1626/*
1627 * extend the sign bit if the comparison is to be signed
1628 */
1629file_protected uint64_t
1630file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
1631{
1632	if (!(m->flag & UNSIGNED)) {
1633		switch(m->type) {
1634		/*
1635		 * Do not remove the casts below.  They are
1636		 * vital.  When later compared with the data,
1637		 * the sign extension must have happened.
1638		 */
1639		case FILE_BYTE:
1640			v = CAST(signed char,  v);
1641			break;
1642		case FILE_SHORT:
1643		case FILE_BESHORT:
1644		case FILE_LESHORT:
1645			v = CAST(short, v);
1646			break;
1647		case FILE_DATE:
1648		case FILE_BEDATE:
1649		case FILE_LEDATE:
1650		case FILE_MEDATE:
1651		case FILE_LDATE:
1652		case FILE_BELDATE:
1653		case FILE_LELDATE:
1654		case FILE_MELDATE:
1655		case FILE_LONG:
1656		case FILE_BELONG:
1657		case FILE_LELONG:
1658		case FILE_MELONG:
1659		case FILE_FLOAT:
1660		case FILE_BEFLOAT:
1661		case FILE_LEFLOAT:
1662		case FILE_MSDOSDATE:
1663		case FILE_BEMSDOSDATE:
1664		case FILE_LEMSDOSDATE:
1665		case FILE_MSDOSTIME:
1666		case FILE_BEMSDOSTIME:
1667		case FILE_LEMSDOSTIME:
1668			v = CAST(int32_t, v);
1669			break;
1670		case FILE_QUAD:
1671		case FILE_BEQUAD:
1672		case FILE_LEQUAD:
1673		case FILE_QDATE:
1674		case FILE_QLDATE:
1675		case FILE_QWDATE:
1676		case FILE_BEQDATE:
1677		case FILE_BEQLDATE:
1678		case FILE_BEQWDATE:
1679		case FILE_LEQDATE:
1680		case FILE_LEQLDATE:
1681		case FILE_LEQWDATE:
1682		case FILE_DOUBLE:
1683		case FILE_BEDOUBLE:
1684		case FILE_LEDOUBLE:
1685		case FILE_OFFSET:
1686		case FILE_BEVARINT:
1687		case FILE_LEVARINT:
1688			v = CAST(int64_t, v);
1689			break;
1690		case FILE_STRING:
1691		case FILE_PSTRING:
1692		case FILE_BESTRING16:
1693		case FILE_LESTRING16:
1694		case FILE_REGEX:
1695		case FILE_SEARCH:
1696		case FILE_DEFAULT:
1697		case FILE_INDIRECT:
1698		case FILE_NAME:
1699		case FILE_USE:
1700		case FILE_CLEAR:
1701		case FILE_DER:
1702		case FILE_GUID:
1703		case FILE_OCTAL:
1704			break;
1705		default:
1706			if (ms->flags & MAGIC_CHECK)
1707			    file_magwarn(ms, "cannot happen: m->type=%d\n",
1708				    m->type);
1709			return FILE_BADSIZE;
1710		}
1711	}
1712	return v;
1713}
1714
1715file_private int
1716string_modifier_check(struct magic_set *ms, struct magic *m)
1717{
1718	if ((ms->flags & MAGIC_CHECK) == 0)
1719		return 0;
1720
1721	if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
1722	    (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
1723		file_magwarn(ms,
1724		    "'/BHhLl' modifiers are only allowed for pascal strings\n");
1725		return -1;
1726	}
1727	switch (m->type) {
1728	case FILE_BESTRING16:
1729	case FILE_LESTRING16:
1730		if (m->str_flags != 0) {
1731			file_magwarn(ms,
1732			    "no modifiers allowed for 16-bit strings\n");
1733			return -1;
1734		}
1735		break;
1736	case FILE_STRING:
1737	case FILE_PSTRING:
1738		if ((m->str_flags & REGEX_OFFSET_START) != 0) {
1739			file_magwarn(ms,
1740			    "'/%c' only allowed on regex and search\n",
1741			    CHAR_REGEX_OFFSET_START);
1742			return -1;
1743		}
1744		break;
1745	case FILE_SEARCH:
1746		if (m->str_range == 0) {
1747			file_magwarn(ms,
1748			    "missing range; defaulting to %d\n",
1749                            STRING_DEFAULT_RANGE);
1750			m->str_range = STRING_DEFAULT_RANGE;
1751			return -1;
1752		}
1753		break;
1754	case FILE_REGEX:
1755		if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
1756			file_magwarn(ms, "'/%c' not allowed on regex\n",
1757			    CHAR_COMPACT_WHITESPACE);
1758			return -1;
1759		}
1760		if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
1761			file_magwarn(ms, "'/%c' not allowed on regex\n",
1762			    CHAR_COMPACT_OPTIONAL_WHITESPACE);
1763			return -1;
1764		}
1765		break;
1766	default:
1767		file_magwarn(ms, "coding error: m->type=%d\n",
1768		    m->type);
1769		return -1;
1770	}
1771	return 0;
1772}
1773
1774file_private int
1775get_op(char c)
1776{
1777	switch (c) {
1778	case '&':
1779		return FILE_OPAND;
1780	case '|':
1781		return FILE_OPOR;
1782	case '^':
1783		return FILE_OPXOR;
1784	case '+':
1785		return FILE_OPADD;
1786	case '-':
1787		return FILE_OPMINUS;
1788	case '*':
1789		return FILE_OPMULTIPLY;
1790	case '/':
1791		return FILE_OPDIVIDE;
1792	case '%':
1793		return FILE_OPMODULO;
1794	default:
1795		return -1;
1796	}
1797}
1798
1799#ifdef ENABLE_CONDITIONALS
1800file_private int
1801get_cond(const char *l, const char **t)
1802{
1803	static const struct cond_tbl_s {
1804		char name[8];
1805		size_t len;
1806		int cond;
1807	} cond_tbl[] = {
1808		{ "if",		2,	COND_IF },
1809		{ "elif",	4,	COND_ELIF },
1810		{ "else",	4,	COND_ELSE },
1811		{ "",		0,	COND_NONE },
1812	};
1813	const struct cond_tbl_s *p;
1814
1815	for (p = cond_tbl; p->len; p++) {
1816		if (strncmp(l, p->name, p->len) == 0 &&
1817		    isspace(CAST(unsigned char, l[p->len]))) {
1818			if (t)
1819				*t = l + p->len;
1820			break;
1821		}
1822	}
1823	return p->cond;
1824}
1825
1826file_private int
1827check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
1828{
1829	int last_cond;
1830	last_cond = ms->c.li[cont_level].last_cond;
1831
1832	switch (cond) {
1833	case COND_IF:
1834		if (last_cond != COND_NONE && last_cond != COND_ELIF) {
1835			if (ms->flags & MAGIC_CHECK)
1836				file_magwarn(ms, "syntax error: `if'");
1837			return -1;
1838		}
1839		last_cond = COND_IF;
1840		break;
1841
1842	case COND_ELIF:
1843		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1844			if (ms->flags & MAGIC_CHECK)
1845				file_magwarn(ms, "syntax error: `elif'");
1846			return -1;
1847		}
1848		last_cond = COND_ELIF;
1849		break;
1850
1851	case COND_ELSE:
1852		if (last_cond != COND_IF && last_cond != COND_ELIF) {
1853			if (ms->flags & MAGIC_CHECK)
1854				file_magwarn(ms, "syntax error: `else'");
1855			return -1;
1856		}
1857		last_cond = COND_NONE;
1858		break;
1859
1860	case COND_NONE:
1861		last_cond = COND_NONE;
1862		break;
1863	}
1864
1865	ms->c.li[cont_level].last_cond = last_cond;
1866	return 0;
1867}
1868#endif /* ENABLE_CONDITIONALS */
1869
1870file_private int
1871parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1872{
1873	const char *l = *lp;
1874
1875	while (!isspace(CAST(unsigned char, *++l)))
1876		switch (*l) {
1877		case CHAR_INDIRECT_RELATIVE:
1878			m->str_flags |= INDIRECT_RELATIVE;
1879			break;
1880		default:
1881			if (ms->flags & MAGIC_CHECK)
1882				file_magwarn(ms, "indirect modifier `%c' "
1883					"invalid", *l);
1884			*lp = l;
1885			return -1;
1886		}
1887	*lp = l;
1888	return 0;
1889}
1890
1891file_private void
1892parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
1893    int op)
1894{
1895	const char *l = *lp;
1896	char *t;
1897	uint64_t val;
1898
1899	++l;
1900	m->mask_op |= op;
1901	val = CAST(uint64_t, strtoull(l, &t, 0));
1902	l = t;
1903	m->num_mask = file_signextend(ms, m, val);
1904	eatsize(&l);
1905	*lp = l;
1906}
1907
1908file_private int
1909parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
1910{
1911	const char *l = *lp;
1912	char *t;
1913	int have_range = 0;
1914
1915	while (!isspace(CAST(unsigned char, *++l))) {
1916		switch (*l) {
1917		case '0':  case '1':  case '2':
1918		case '3':  case '4':  case '5':
1919		case '6':  case '7':  case '8':
1920		case '9':
1921			if (have_range && (ms->flags & MAGIC_CHECK))
1922				file_magwarn(ms, "multiple ranges");
1923			have_range = 1;
1924			m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
1925			if (m->str_range == 0)
1926				file_magwarn(ms, "zero range");
1927			l = t - 1;
1928			break;
1929		case CHAR_COMPACT_WHITESPACE:
1930			m->str_flags |= STRING_COMPACT_WHITESPACE;
1931			break;
1932		case CHAR_COMPACT_OPTIONAL_WHITESPACE:
1933			m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
1934			break;
1935		case CHAR_IGNORE_LOWERCASE:
1936			m->str_flags |= STRING_IGNORE_LOWERCASE;
1937			break;
1938		case CHAR_IGNORE_UPPERCASE:
1939			m->str_flags |= STRING_IGNORE_UPPERCASE;
1940			break;
1941		case CHAR_REGEX_OFFSET_START:
1942			m->str_flags |= REGEX_OFFSET_START;
1943			break;
1944		case CHAR_BINTEST:
1945			m->str_flags |= STRING_BINTEST;
1946			break;
1947		case CHAR_TEXTTEST:
1948			m->str_flags |= STRING_TEXTTEST;
1949			break;
1950		case CHAR_TRIM:
1951			m->str_flags |= STRING_TRIM;
1952			break;
1953		case CHAR_FULL_WORD:
1954			m->str_flags |= STRING_FULL_WORD;
1955			break;
1956		case CHAR_PSTRING_1_LE:
1957#define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
1958			if (m->type != FILE_PSTRING)
1959				goto bad;
1960			SET_LENGTH(PSTRING_1_LE);
1961			break;
1962		case CHAR_PSTRING_2_BE:
1963			if (m->type != FILE_PSTRING)
1964				goto bad;
1965			SET_LENGTH(PSTRING_2_BE);
1966			break;
1967		case CHAR_PSTRING_2_LE:
1968			if (m->type != FILE_PSTRING)
1969				goto bad;
1970			SET_LENGTH(PSTRING_2_LE);
1971			break;
1972		case CHAR_PSTRING_4_BE:
1973			if (m->type != FILE_PSTRING)
1974				goto bad;
1975			SET_LENGTH(PSTRING_4_BE);
1976			break;
1977		case CHAR_PSTRING_4_LE:
1978			switch (m->type) {
1979			case FILE_PSTRING:
1980			case FILE_REGEX:
1981				break;
1982			default:
1983				goto bad;
1984			}
1985			SET_LENGTH(PSTRING_4_LE);
1986			break;
1987		case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
1988			if (m->type != FILE_PSTRING)
1989				goto bad;
1990			m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
1991			break;
1992		default:
1993		bad:
1994			if (ms->flags & MAGIC_CHECK)
1995				file_magwarn(ms, "string modifier `%c' "
1996					"invalid", *l);
1997			goto out;
1998		}
1999		/* allow multiple '/' for readability */
2000		if (l[1] == '/' && !isspace(CAST(unsigned char, l[2])))
2001			l++;
2002	}
2003	if (string_modifier_check(ms, m) == -1)
2004		goto out;
2005	*lp = l;
2006	return 0;
2007out:
2008	*lp = l;
2009	return -1;
2010}
2011
2012/*
2013 * parse one line from magic file, put into magic[index++] if valid
2014 */
2015file_private int
2016parse(struct magic_set *ms, struct magic_entry *me, const char *line,
2017    size_t lineno, int action)
2018{
2019#ifdef ENABLE_CONDITIONALS
2020	static uint32_t last_cont_level = 0;
2021#endif
2022	size_t i;
2023	struct magic *m;
2024	const char *l = line;
2025	char *t;
2026	int op;
2027	uint32_t cont_level;
2028	int32_t diff;
2029
2030	cont_level = 0;
2031
2032	/*
2033	 * Parse the offset.
2034	 */
2035	while (*l == '>') {
2036		++l;		/* step over */
2037		cont_level++;
2038	}
2039#ifdef ENABLE_CONDITIONALS
2040	if (cont_level == 0 || cont_level > last_cont_level)
2041		if (file_check_mem(ms, cont_level) == -1)
2042			return -1;
2043	last_cont_level = cont_level;
2044#endif
2045	if (cont_level != 0) {
2046		if (me->mp == NULL) {
2047			file_magerror(ms, "No current entry for continuation");
2048			return -1;
2049		}
2050		if (me->cont_count == 0) {
2051			file_magerror(ms, "Continuations present with 0 count");
2052			return -1;
2053		}
2054		m = &me->mp[me->cont_count - 1];
2055		diff = CAST(int32_t, cont_level) - CAST(int32_t, m->cont_level);
2056		if (diff > 1)
2057			file_magwarn(ms, "New continuation level %u is more "
2058			    "than one larger than current level %u", cont_level,
2059			    m->cont_level);
2060		if (me->cont_count == me->max_count) {
2061			struct magic *nm;
2062			size_t cnt = me->max_count + ALLOC_CHUNK;
2063			if ((nm = CAST(struct magic *, realloc(me->mp,
2064			    sizeof(*nm) * cnt))) == NULL) {
2065				file_oomem(ms, sizeof(*nm) * cnt);
2066				return -1;
2067			}
2068			me->mp = nm;
2069			me->max_count = CAST(uint32_t, cnt);
2070		}
2071		m = &me->mp[me->cont_count++];
2072		(void)memset(m, 0, sizeof(*m));
2073		m->cont_level = cont_level;
2074	} else {
2075		static const size_t len = sizeof(*m) * ALLOC_CHUNK;
2076		if (me->mp != NULL)
2077			return 1;
2078		if ((m = CAST(struct magic *, malloc(len))) == NULL) {
2079			file_oomem(ms, len);
2080			return -1;
2081		}
2082		me->mp = m;
2083		me->max_count = ALLOC_CHUNK;
2084		(void)memset(m, 0, sizeof(*m));
2085		m->factor_op = FILE_FACTOR_OP_NONE;
2086		m->cont_level = 0;
2087		me->cont_count = 1;
2088	}
2089	m->lineno = CAST(uint32_t, lineno);
2090
2091	if (*l == '&') {  /* m->cont_level == 0 checked below. */
2092                ++l;            /* step over */
2093                m->flag |= OFFADD;
2094        }
2095	if (*l == '(') {
2096		++l;		/* step over */
2097		m->flag |= INDIR;
2098		if (m->flag & OFFADD)
2099			m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
2100
2101		if (*l == '&') {  /* m->cont_level == 0 checked below */
2102			++l;            /* step over */
2103			m->flag |= OFFADD;
2104		}
2105	}
2106	/* Indirect offsets are not valid at level 0. */
2107	if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
2108		if (ms->flags & MAGIC_CHECK)
2109			file_magwarn(ms, "relative offset at level 0");
2110		return -1;
2111	}
2112
2113	/* get offset, then skip over it */
2114	if (*l == '-') {
2115		++l;            /* step over */
2116		m->flag |= OFFNEGATIVE;
2117	}
2118	m->offset = CAST(int32_t, strtol(l, &t, 0));
2119        if (l == t) {
2120		if (ms->flags & MAGIC_CHECK)
2121			file_magwarn(ms, "offset `%s' invalid", l);
2122		return -1;
2123	}
2124
2125        l = t;
2126
2127	if (m->flag & INDIR) {
2128		m->in_type = FILE_LONG;
2129		m->in_offset = 0;
2130		m->in_op = 0;
2131		/*
2132		 * read [.,lbs][+-]nnnnn)
2133		 */
2134		if (*l == '.' || *l == ',') {
2135			if (*l == ',')
2136				m->in_op |= FILE_OPSIGNED;
2137			l++;
2138			switch (*l) {
2139			case 'l':
2140				m->in_type = FILE_LELONG;
2141				break;
2142			case 'L':
2143				m->in_type = FILE_BELONG;
2144				break;
2145			case 'm':
2146				m->in_type = FILE_MELONG;
2147				break;
2148			case 'h':
2149			case 's':
2150				m->in_type = FILE_LESHORT;
2151				break;
2152			case 'H':
2153			case 'S':
2154				m->in_type = FILE_BESHORT;
2155				break;
2156			case 'c':
2157			case 'b':
2158			case 'C':
2159			case 'B':
2160				m->in_type = FILE_BYTE;
2161				break;
2162			case 'e':
2163			case 'f':
2164			case 'g':
2165				m->in_type = FILE_LEDOUBLE;
2166				break;
2167			case 'E':
2168			case 'F':
2169			case 'G':
2170				m->in_type = FILE_BEDOUBLE;
2171				break;
2172			case 'i':
2173				m->in_type = FILE_LEID3;
2174				break;
2175			case 'I':
2176				m->in_type = FILE_BEID3;
2177				break;
2178			case 'o':
2179				m->in_type = FILE_OCTAL;
2180				break;
2181			case 'q':
2182				m->in_type = FILE_LEQUAD;
2183				break;
2184			case 'Q':
2185				m->in_type = FILE_BEQUAD;
2186				break;
2187			default:
2188				if (ms->flags & MAGIC_CHECK)
2189					file_magwarn(ms,
2190					    "indirect offset type `%c' invalid",
2191					    *l);
2192				return -1;
2193			}
2194			l++;
2195		}
2196
2197		if (*l == '~') {
2198			m->in_op |= FILE_OPINVERSE;
2199			l++;
2200		}
2201		if ((op = get_op(*l)) != -1) {
2202			m->in_op |= op;
2203			l++;
2204		}
2205		if (*l == '(') {
2206			m->in_op |= FILE_OPINDIRECT;
2207			l++;
2208		}
2209		if (isdigit(CAST(unsigned char, *l)) || *l == '-') {
2210			m->in_offset = CAST(int32_t, strtol(l, &t, 0));
2211			if (l == t) {
2212				if (ms->flags & MAGIC_CHECK)
2213					file_magwarn(ms,
2214					    "in_offset `%s' invalid", l);
2215				return -1;
2216			}
2217			l = t;
2218		}
2219		if (*l++ != ')' ||
2220		    ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
2221			if (ms->flags & MAGIC_CHECK)
2222				file_magwarn(ms,
2223				    "missing ')' in indirect offset");
2224			return -1;
2225		}
2226	}
2227	EATAB;
2228
2229#ifdef ENABLE_CONDITIONALS
2230	m->cond = get_cond(l, &l);
2231	if (check_cond(ms, m->cond, cont_level) == -1)
2232		return -1;
2233
2234	EATAB;
2235#endif
2236
2237	/*
2238	 * Parse the type.
2239	 */
2240	if (*l == 'u') {
2241		/*
2242		 * Try it as a keyword type prefixed by "u"; match what
2243		 * follows the "u".  If that fails, try it as an SUS
2244		 * integer type.
2245		 */
2246		m->type = get_type(type_tbl, l + 1, &l);
2247		if (m->type == FILE_INVALID) {
2248			/*
2249			 * Not a keyword type; parse it as an SUS type,
2250			 * 'u' possibly followed by a number or C/S/L.
2251			 */
2252			m->type = get_standard_integer_type(l, &l);
2253		}
2254		/* It's unsigned. */
2255		if (m->type != FILE_INVALID)
2256			m->flag |= UNSIGNED;
2257	} else {
2258		/*
2259		 * Try it as a keyword type.  If that fails, try it as
2260		 * an SUS integer type if it begins with "d" or as an
2261		 * SUS string type if it begins with "s".  In any case,
2262		 * it's not unsigned.
2263		 */
2264		m->type = get_type(type_tbl, l, &l);
2265		if (m->type == FILE_INVALID) {
2266			/*
2267			 * Not a keyword type; parse it as an SUS type,
2268			 * either 'd' possibly followed by a number or
2269			 * C/S/L, or just 's'.
2270			 */
2271			if (*l == 'd')
2272				m->type = get_standard_integer_type(l, &l);
2273			else if (*l == 's'
2274			    && !isalpha(CAST(unsigned char, l[1]))) {
2275				m->type = FILE_STRING;
2276				++l;
2277			}
2278		}
2279	}
2280
2281	if (m->type == FILE_INVALID) {
2282		/* Not found - try it as a special keyword. */
2283		m->type = get_type(special_tbl, l, &l);
2284	}
2285
2286	if (m->type == FILE_INVALID) {
2287		if (ms->flags & MAGIC_CHECK)
2288			file_magwarn(ms, "type `%s' invalid", l);
2289		return -1;
2290	}
2291
2292	if (m->type == FILE_NAME && cont_level != 0) {
2293		if (ms->flags & MAGIC_CHECK)
2294			file_magwarn(ms, "`name%s' entries can only be "
2295			    "declared at top level", l);
2296		return -1;
2297	}
2298
2299	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
2300	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
2301
2302	m->mask_op = 0;
2303	if (*l == '~') {
2304		if (!IS_STRING(m->type))
2305			m->mask_op |= FILE_OPINVERSE;
2306		else if (ms->flags & MAGIC_CHECK)
2307			file_magwarn(ms, "'~' invalid for string types");
2308		++l;
2309	}
2310	m->str_range = 0;
2311	m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
2312	if ((op = get_op(*l)) != -1) {
2313		if (IS_STRING(m->type)) {
2314			int r;
2315
2316			if (op != FILE_OPDIVIDE) {
2317				if (ms->flags & MAGIC_CHECK)
2318					file_magwarn(ms,
2319					    "invalid string/indirect op: "
2320					    "`%c'", *t);
2321				return -1;
2322			}
2323
2324			if (m->type == FILE_INDIRECT)
2325				r = parse_indirect_modifier(ms, m, &l);
2326			else
2327				r = parse_string_modifier(ms, m, &l);
2328			if (r == -1)
2329				return -1;
2330		} else
2331			parse_op_modifier(ms, m, &l, op);
2332	}
2333
2334	/*
2335	 * We used to set mask to all 1's here, instead let's just not do
2336	 * anything if mask = 0 (unless you have a better idea)
2337	 */
2338	EATAB;
2339
2340	switch (*l) {
2341	case '>':
2342	case '<':
2343  		m->reln = *l;
2344  		++l;
2345		if (*l == '=') {
2346			if (ms->flags & MAGIC_CHECK) {
2347				file_magwarn(ms, "%c= not supported",
2348				    m->reln);
2349				return -1;
2350			}
2351		   ++l;
2352		}
2353		break;
2354	/* Old-style anding: "0 byte &0x80 dynamically linked" */
2355	case '&':
2356	case '^':
2357	case '=':
2358  		m->reln = *l;
2359  		++l;
2360		if (*l == '=') {
2361		   /* HP compat: ignore &= etc. */
2362		   ++l;
2363		}
2364		break;
2365	case '!':
2366		m->reln = *l;
2367		++l;
2368		break;
2369	default:
2370  		m->reln = '=';	/* the default relation */
2371		if (*l == 'x' && ((isascii(CAST(unsigned char, l[1])) &&
2372		    isspace(CAST(unsigned char, l[1]))) || !l[1])) {
2373			m->reln = *l;
2374			++l;
2375		}
2376		break;
2377	}
2378	/*
2379	 * Grab the value part, except for an 'x' reln.
2380	 */
2381	if (m->reln != 'x' && getvalue(ms, m, &l, action))
2382		return -1;
2383
2384	/*
2385	 * TODO finish this macro and start using it!
2386	 * #define offsetcheck {if (offset > ms->bytes_max -1)
2387	 *	magwarn("offset too big"); }
2388	 */
2389
2390	/*
2391	 * Now get last part - the description
2392	 */
2393	EATAB;
2394	if (l[0] == '\b') {
2395		++l;
2396		m->flag |= NOSPACE;
2397	} else if ((l[0] == '\\') && (l[1] == 'b')) {
2398		++l;
2399		++l;
2400		m->flag |= NOSPACE;
2401	}
2402	for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
2403		continue;
2404	if (i == sizeof(m->desc)) {
2405		m->desc[sizeof(m->desc) - 1] = '\0';
2406		if (ms->flags & MAGIC_CHECK)
2407			file_magwarn(ms, "description `%s' truncated", m->desc);
2408	}
2409
2410        /*
2411	 * We only do this check while compiling, or if any of the magic
2412	 * files were not compiled.
2413         */
2414        if (ms->flags & MAGIC_CHECK) {
2415		if (check_format(ms, m) == -1)
2416			return -1;
2417	}
2418#ifndef COMPILE_ONLY
2419	if (action == FILE_CHECK) {
2420		file_mdump(m);
2421	}
2422#endif
2423	m->mimetype[0] = '\0';		/* initialise MIME type to none */
2424	return 0;
2425}
2426
2427/*
2428 * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
2429 * if valid
2430 */
2431/*ARGSUSED*/
2432file_private int
2433parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line,
2434    size_t len __attribute__((__unused__)))
2435{
2436	const char *l = line;
2437	char *el;
2438	unsigned long factor;
2439	char sbuf[512];
2440	struct magic *m = &me->mp[0];
2441
2442	if (m->factor_op != FILE_FACTOR_OP_NONE) {
2443		file_magwarn(ms,
2444		    "Current entry already has a strength type: %c %d",
2445		    m->factor_op, m->factor);
2446		return -1;
2447	}
2448	if (m->type == FILE_NAME) {
2449		file_magwarn(ms, "%s: Strength setting is not supported in "
2450		    "\"name\" magic entries",
2451		    file_printable(ms, sbuf, sizeof(sbuf), m->value.s,
2452		    sizeof(m->value.s)));
2453		return -1;
2454	}
2455	EATAB;
2456	switch (*l) {
2457	case FILE_FACTOR_OP_NONE:
2458		break;
2459	case FILE_FACTOR_OP_PLUS:
2460	case FILE_FACTOR_OP_MINUS:
2461	case FILE_FACTOR_OP_TIMES:
2462	case FILE_FACTOR_OP_DIV:
2463		m->factor_op = *l++;
2464		break;
2465	default:
2466		file_magwarn(ms, "Unknown factor op `%c'", *l);
2467		return -1;
2468	}
2469	EATAB;
2470	factor = strtoul(l, &el, 0);
2471	if (factor > 255) {
2472		file_magwarn(ms, "Too large factor `%lu'", factor);
2473		goto out;
2474	}
2475	if (*el && !isspace(CAST(unsigned char, *el))) {
2476		file_magwarn(ms, "Bad factor `%s'", l);
2477		goto out;
2478	}
2479	m->factor = CAST(uint8_t, factor);
2480	if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
2481		file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
2482		    m->factor_op, m->factor);
2483		goto out;
2484	}
2485	return 0;
2486out:
2487	m->factor_op = FILE_FACTOR_OP_NONE;
2488	m->factor = 0;
2489	return -1;
2490}
2491
2492file_private int
2493goodchar(unsigned char x, const char *extra)
2494{
2495	return (isascii(x) && isalnum(x)) || strchr(extra, x);
2496}
2497
2498file_private int
2499parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
2500    size_t llen, off_t off, size_t len, const char *name, const char *extra,
2501    int nt)
2502{
2503	size_t i;
2504	const char *l = line;
2505	struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
2506	char *buf = CAST(char *, CAST(void *, m)) + off;
2507
2508	if (buf[0] != '\0') {
2509		len = nt ? strlen(buf) : len;
2510		file_magwarn(ms, "Current entry already has a %s type "
2511		    "`%.*s', new type `%s'", name, CAST(int, len), buf, l);
2512		return -1;
2513	}
2514
2515	if (*m->desc == '\0') {
2516		file_magwarn(ms, "Current entry does not yet have a "
2517		    "description for adding a %s type", name);
2518		return -1;
2519	}
2520
2521	EATAB;
2522	for (i = 0; *l && i < llen && i < len && goodchar(*l, extra);
2523	    buf[i++] = *l++)
2524		continue;
2525
2526	if (i == len && *l) {
2527		if (nt)
2528			buf[len - 1] = '\0';
2529		if (ms->flags & MAGIC_CHECK)
2530			file_magwarn(ms, "%s type `%s' truncated %"
2531			    SIZE_T_FORMAT "u", name, line, i);
2532	} else {
2533		if (!isspace(CAST(unsigned char, *l)) && !goodchar(*l, extra))
2534			file_magwarn(ms, "%s type `%s' has bad char '%c'",
2535			    name, line, *l);
2536		if (nt)
2537			buf[i] = '\0';
2538	}
2539
2540	if (i > 0)
2541		return 0;
2542
2543	file_magerror(ms, "Bad magic entry '%s'", line);
2544	return -1;
2545}
2546
2547/*
2548 * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
2549 * magic[index - 1]
2550 */
2551file_private int
2552parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line,
2553    size_t len)
2554{
2555	return parse_extra(ms, me, line, len,
2556	    CAST(off_t, offsetof(struct magic, apple)),
2557	    sizeof(me->mp[0].apple), "APPLE", "!+-./?", 0);
2558}
2559
2560/*
2561 * Parse a comma-separated list of extensions
2562 */
2563file_private int
2564parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line,
2565    size_t len)
2566{
2567	return parse_extra(ms, me, line, len,
2568	    CAST(off_t, offsetof(struct magic, ext)),
2569	    sizeof(me->mp[0].ext), "EXTENSION", ",!+-/@?_$&~", 0);
2570	    /* & for b&w */
2571	    /* ~ for journal~ */
2572}
2573
2574/*
2575 * parse a MIME annotation line from magic file, put into magic[index - 1]
2576 * if valid
2577 */
2578file_private int
2579parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line,
2580    size_t len)
2581{
2582	return parse_extra(ms, me, line, len,
2583	    CAST(off_t, offsetof(struct magic, mimetype)),
2584	    sizeof(me->mp[0].mimetype), "MIME", "+-/.$?:{}", 1);
2585}
2586
2587file_private int
2588check_format_type(const char *ptr, int type, const char **estr)
2589{
2590	int quad = 0, h;
2591	size_t len, cnt;
2592	if (*ptr == '\0') {
2593		/* Missing format string; bad */
2594		*estr = "missing format spec";
2595		return -1;
2596	}
2597
2598	switch (file_formats[type]) {
2599	case FILE_FMT_QUAD:
2600		quad = 1;
2601		/*FALLTHROUGH*/
2602	case FILE_FMT_NUM:
2603		if (quad == 0) {
2604			switch (type) {
2605			case FILE_BYTE:
2606				h = 2;
2607				break;
2608			case FILE_SHORT:
2609			case FILE_BESHORT:
2610			case FILE_LESHORT:
2611				h = 1;
2612				break;
2613			case FILE_LONG:
2614			case FILE_BELONG:
2615			case FILE_LELONG:
2616			case FILE_MELONG:
2617			case FILE_LEID3:
2618			case FILE_BEID3:
2619			case FILE_INDIRECT:
2620				h = 0;
2621				break;
2622			default:
2623				fprintf(stderr, "Bad number format %d", type);
2624				abort();
2625			}
2626		} else
2627			h = 0;
2628		while (*ptr && strchr("-.#", *ptr) != NULL)
2629			ptr++;
2630#define CHECKLEN() do { \
2631	for (len = cnt = 0; isdigit(CAST(unsigned char, *ptr)); ptr++, cnt++) \
2632		len = len * 10 + (*ptr - '0'); \
2633	if (cnt > 5 || len > 1024) \
2634		goto toolong; \
2635} while (/*CONSTCOND*/0)
2636
2637		CHECKLEN();
2638		if (*ptr == '.')
2639			ptr++;
2640		CHECKLEN();
2641		if (quad) {
2642			if (*ptr++ != 'l')
2643				goto invalid;
2644			if (*ptr++ != 'l')
2645				goto invalid;
2646		}
2647
2648		switch (*ptr++) {
2649#ifdef STRICT_FORMAT 	/* "long" formats are int formats for us */
2650		/* so don't accept the 'l' modifier */
2651		case 'l':
2652			switch (*ptr++) {
2653			case 'i':
2654			case 'd':
2655			case 'u':
2656			case 'o':
2657			case 'x':
2658			case 'X':
2659				if (h == 0)
2660					return 0;
2661				/*FALLTHROUGH*/
2662			default:
2663				goto invalid;
2664			}
2665
2666		/*
2667		 * Don't accept h and hh modifiers. They make writing
2668		 * magic entries more complicated, for very little benefit
2669		 */
2670		case 'h':
2671			if (h-- <= 0)
2672				goto invalid;
2673			switch (*ptr++) {
2674			case 'h':
2675				if (h-- <= 0)
2676					goto invalid;
2677				switch (*ptr++) {
2678				case 'i':
2679				case 'd':
2680				case 'u':
2681				case 'o':
2682				case 'x':
2683				case 'X':
2684					return 0;
2685				default:
2686					goto invalid;
2687				}
2688			case 'i':
2689			case 'd':
2690			case 'u':
2691			case 'o':
2692			case 'x':
2693			case 'X':
2694				if (h == 0)
2695					return 0;
2696				/*FALLTHROUGH*/
2697			default:
2698				goto invalid;
2699			}
2700#endif
2701		case 'c':
2702			if (h == 2)
2703				return 0;
2704			goto invalid;
2705		case 'i':
2706		case 'd':
2707		case 'u':
2708		case 'o':
2709		case 'x':
2710		case 'X':
2711#ifdef STRICT_FORMAT
2712			if (h == 0)
2713				return 0;
2714			/*FALLTHROUGH*/
2715#else
2716			return 0;
2717#endif
2718		default:
2719			goto invalid;
2720		}
2721
2722	case FILE_FMT_FLOAT:
2723	case FILE_FMT_DOUBLE:
2724		if (*ptr == '-')
2725			ptr++;
2726		if (*ptr == '.')
2727			ptr++;
2728		CHECKLEN();
2729		if (*ptr == '.')
2730			ptr++;
2731		CHECKLEN();
2732		switch (*ptr++) {
2733		case 'e':
2734		case 'E':
2735		case 'f':
2736		case 'F':
2737		case 'g':
2738		case 'G':
2739			return 0;
2740
2741		default:
2742			goto invalid;
2743		}
2744
2745
2746	case FILE_FMT_STR:
2747		if (*ptr == '-')
2748			ptr++;
2749		while (isdigit(CAST(unsigned char, *ptr)))
2750			ptr++;
2751		if (*ptr == '.') {
2752			ptr++;
2753			while (isdigit(CAST(unsigned char , *ptr)))
2754				ptr++;
2755		}
2756
2757		switch (*ptr++) {
2758		case 's':
2759			return 0;
2760		default:
2761			goto invalid;
2762		}
2763
2764	default:
2765		/* internal error */
2766		fprintf(stderr, "Bad file format %d", type);
2767		abort();
2768	}
2769invalid:
2770	*estr = "not valid";
2771	return -1;
2772toolong:
2773	*estr = "too long";
2774	return -1;
2775}
2776
2777/*
2778 * Check that the optional printf format in description matches
2779 * the type of the magic.
2780 */
2781file_private int
2782check_format(struct magic_set *ms, struct magic *m)
2783{
2784	char *ptr;
2785	const char *estr;
2786
2787	for (ptr = m->desc; *ptr; ptr++)
2788		if (*ptr == '%')
2789			break;
2790	if (*ptr == '\0') {
2791		/* No format string; ok */
2792		return 1;
2793	}
2794
2795	assert(file_nformats == file_nnames);
2796
2797	if (m->type >= file_nformats) {
2798		file_magwarn(ms, "Internal error inconsistency between "
2799		    "m->type and format strings");
2800		return -1;
2801	}
2802	if (file_formats[m->type] == FILE_FMT_NONE) {
2803		file_magwarn(ms, "No format string for `%s' with description "
2804		    "`%s'", m->desc, file_names[m->type]);
2805		return -1;
2806	}
2807
2808	ptr++;
2809	if (check_format_type(ptr, m->type, &estr) == -1) {
2810		/*
2811		 * TODO: this error message is unhelpful if the format
2812		 * string is not one character long
2813		 */
2814		file_magwarn(ms, "Printf format is %s for type "
2815		    "`%s' in description `%s'", estr,
2816		    file_names[m->type], m->desc);
2817		return -1;
2818	}
2819
2820	for (; *ptr; ptr++) {
2821		if (*ptr == '%') {
2822			file_magwarn(ms,
2823			    "Too many format strings (should have at most one) "
2824			    "for `%s' with description `%s'",
2825			    file_names[m->type], m->desc);
2826			return -1;
2827		}
2828	}
2829	return 0;
2830}
2831
2832/*
2833 * Read a numeric value from a pointer, into the value union of a magic
2834 * pointer, according to the magic type.  Update the string pointer to point
2835 * just after the number read.  Return 0 for success, non-zero for failure.
2836 */
2837file_private int
2838getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
2839{
2840	char *ep;
2841	uint64_t ull;
2842	int y;
2843
2844	switch (m->type) {
2845	case FILE_BESTRING16:
2846	case FILE_LESTRING16:
2847	case FILE_STRING:
2848	case FILE_PSTRING:
2849	case FILE_REGEX:
2850	case FILE_SEARCH:
2851	case FILE_NAME:
2852	case FILE_USE:
2853	case FILE_DER:
2854	case FILE_OCTAL:
2855		*p = getstr(ms, m, *p, action == FILE_COMPILE);
2856		if (*p == NULL) {
2857			if (ms->flags & MAGIC_CHECK)
2858				file_magwarn(ms, "cannot get string from `%s'",
2859				    m->value.s);
2860			return -1;
2861		}
2862		if (m->type == FILE_REGEX) {
2863			file_regex_t rx;
2864			int rc =
2865			    file_regcomp(ms, &rx, m->value.s, REG_EXTENDED);
2866			if (rc == 0) {
2867				file_regfree(&rx);
2868			}
2869			return rc ? -1 : 0;
2870		}
2871		return 0;
2872	default:
2873		if (m->reln == 'x')
2874			return 0;
2875		break;
2876	}
2877
2878	switch (m->type) {
2879	case FILE_FLOAT:
2880	case FILE_BEFLOAT:
2881	case FILE_LEFLOAT:
2882		errno = 0;
2883#ifdef HAVE_STRTOF
2884		m->value.f = strtof(*p, &ep);
2885#else
2886		m->value.f = (float)strtod(*p, &ep);
2887#endif
2888		if (errno == 0)
2889			*p = ep;
2890		return 0;
2891	case FILE_DOUBLE:
2892	case FILE_BEDOUBLE:
2893	case FILE_LEDOUBLE:
2894		errno = 0;
2895		m->value.d = strtod(*p, &ep);
2896		if (errno == 0)
2897			*p = ep;
2898		return 0;
2899	case FILE_GUID:
2900		if (file_parse_guid(*p, m->value.guid) == -1)
2901			return -1;
2902		*p += FILE_GUID_SIZE - 1;
2903		return 0;
2904	default:
2905		errno = 0;
2906		ull = CAST(uint64_t, strtoull(*p, &ep, 0));
2907		m->value.q = file_signextend(ms, m, ull);
2908		if (*p == ep) {
2909			file_magwarn(ms, "Unparsable number `%s'", *p);
2910			return -1;
2911		} else {
2912			size_t ts = typesize(m->type);
2913			uint64_t x;
2914			const char *q;
2915
2916			if (ts == FILE_BADSIZE) {
2917				file_magwarn(ms,
2918				    "Expected numeric type got `%s'",
2919				    type_tbl[m->type].name);
2920				return -1;
2921			}
2922			for (q = *p; isspace(CAST(unsigned char, *q)); q++)
2923				continue;
2924			if (*q == '-' && ull != UINT64_MAX)
2925				ull = -CAST(int64_t, ull);
2926			switch (ts) {
2927			case 1:
2928				x = CAST(uint64_t, ull & ~0xffULL);
2929				y = (x & ~0xffULL) != ~0xffULL;
2930				break;
2931			case 2:
2932				x = CAST(uint64_t, ull & ~0xffffULL);
2933				y = (x & ~0xffffULL) != ~0xffffULL;
2934				break;
2935			case 4:
2936				x = CAST(uint64_t, ull & ~0xffffffffULL);
2937				y = (x & ~0xffffffffULL) != ~0xffffffffULL;
2938				break;
2939			case 8:
2940				x = 0;
2941				y = 0;
2942				break;
2943			default:
2944				fprintf(stderr, "Bad width %zu", ts);
2945				abort();
2946			}
2947			if (x && y) {
2948				file_magwarn(ms, "Overflow for numeric"
2949				    " type `%s' value %#" PRIx64,
2950				    type_tbl[m->type].name, ull);
2951				return -1;
2952			}
2953		}
2954		if (errno == 0) {
2955			*p = ep;
2956			eatsize(p);
2957		}
2958		return 0;
2959	}
2960}
2961
2962/*
2963 * Convert a string containing C character escapes.  Stop at an unescaped
2964 * space or tab.
2965 * Copy the converted version to "m->value.s", and the length in m->vallen.
2966 * Return updated scan pointer as function result. Warn if set.
2967 */
2968file_private const char *
2969getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
2970{
2971	const char *origs = s;
2972	char	*p = m->value.s;
2973	size_t  plen = sizeof(m->value.s);
2974	char 	*origp = p;
2975	char	*pmax = p + plen - 1;
2976	int	c;
2977	int	val;
2978	size_t	bracket_nesting = 0;
2979
2980	while ((c = *s++) != '\0') {
2981		if (isspace(CAST(unsigned char, c)))
2982			break;
2983		if (p >= pmax) {
2984			file_error(ms, 0, "string too long: `%s'", origs);
2985			return NULL;
2986		}
2987		if (c != '\\') {
2988		    if (c == '[') {
2989			    bracket_nesting++;
2990		    }
2991		    if (c == ']' && bracket_nesting > 0) {
2992			    bracket_nesting--;
2993		    }
2994		    *p++ = CAST(char, c);
2995		    continue;
2996		}
2997		switch(c = *s++) {
2998
2999		case '\0':
3000			if (warn)
3001				file_magwarn(ms, "incomplete escape");
3002			s--;
3003			goto out;
3004		case '.':
3005			if (m->type == FILE_REGEX &&
3006			    bracket_nesting == 0 && warn) {
3007				file_magwarn(ms, "escaped dot ('.') found, "
3008				    "use \\\\. instead");
3009			}
3010			warn = 0; /* already did */
3011			/*FALLTHROUGH*/
3012		case '\t':
3013			if (warn) {
3014				file_magwarn(ms,
3015				    "escaped tab found, use \\\\t instead");
3016				warn = 0;	/* already did */
3017			}
3018			/*FALLTHROUGH*/
3019		default:
3020			if (warn) {
3021				if (isprint(CAST(unsigned char, c))) {
3022					/* Allow escaping of
3023					 * ``relations'' */
3024					if (strchr("<>&^=!", c) == NULL
3025					    && (m->type != FILE_REGEX ||
3026					    strchr("[]().*?^$|{}", c)
3027					    == NULL)) {
3028						file_magwarn(ms, "no "
3029						    "need to escape "
3030						    "`%c'", c);
3031					}
3032				} else {
3033					file_magwarn(ms,
3034					    "unknown escape sequence: "
3035					    "\\%03o", c);
3036				}
3037			}
3038			/*FALLTHROUGH*/
3039		/* space, perhaps force people to use \040? */
3040		case ' ':
3041#if 0
3042		/*
3043		 * Other things people escape, but shouldn't need to,
3044		 * so we disallow them
3045		 */
3046		case '\'':
3047		case '"':
3048		case '?':
3049#endif
3050		/* Relations */
3051		case '>':
3052		case '<':
3053		case '&':
3054		case '^':
3055		case '=':
3056		case '!':
3057		/* and backslash itself */
3058		case '\\':
3059			*p++ = CAST(char, c);
3060			break;
3061
3062		case 'a':
3063			*p++ = '\a';
3064			break;
3065
3066		case 'b':
3067			*p++ = '\b';
3068			break;
3069
3070		case 'f':
3071			*p++ = '\f';
3072			break;
3073
3074		case 'n':
3075			*p++ = '\n';
3076			break;
3077
3078		case 'r':
3079			*p++ = '\r';
3080			break;
3081
3082		case 't':
3083			*p++ = '\t';
3084			break;
3085
3086		case 'v':
3087			*p++ = '\v';
3088			break;
3089
3090		/* \ and up to 3 octal digits */
3091		case '0':
3092		case '1':
3093		case '2':
3094		case '3':
3095		case '4':
3096		case '5':
3097		case '6':
3098		case '7':
3099			val = c - '0';
3100			c = *s++;  /* try for 2 */
3101			if (c >= '0' && c <= '7') {
3102				val = (val << 3) | (c - '0');
3103				c = *s++;  /* try for 3 */
3104				if (c >= '0' && c <= '7')
3105					val = (val << 3) | (c-'0');
3106				else
3107					--s;
3108			}
3109			else
3110				--s;
3111			*p++ = CAST(char, val);
3112			break;
3113
3114		/* \x and up to 2 hex digits */
3115		case 'x':
3116			val = 'x';	/* Default if no digits */
3117			c = hextoint(*s++);	/* Get next char */
3118			if (c >= 0) {
3119				val = c;
3120				c = hextoint(*s++);
3121				if (c >= 0)
3122					val = (val << 4) + c;
3123				else
3124					--s;
3125			} else
3126				--s;
3127			*p++ = CAST(char, val);
3128			break;
3129		}
3130	}
3131	--s;
3132out:
3133	*p = '\0';
3134	m->vallen = CAST(unsigned char, (p - origp));
3135	if (m->type == FILE_PSTRING) {
3136		size_t l =  file_pstring_length_size(ms, m);
3137		if (l == FILE_BADSIZE)
3138			return NULL;
3139		m->vallen += CAST(unsigned char, l);
3140	}
3141	return s;
3142}
3143
3144
3145/* Single hex char to int; -1 if not a hex char. */
3146file_private int
3147hextoint(int c)
3148{
3149	if (!isascii(CAST(unsigned char, c)))
3150		return -1;
3151	if (isdigit(CAST(unsigned char, c)))
3152		return c - '0';
3153	if ((c >= 'a') && (c <= 'f'))
3154		return c + 10 - 'a';
3155	if (( c>= 'A') && (c <= 'F'))
3156		return c + 10 - 'A';
3157	return -1;
3158}
3159
3160
3161/*
3162 * Print a string containing C character escapes.
3163 */
3164file_protected void
3165file_showstr(FILE *fp, const char *s, size_t len)
3166{
3167	char	c;
3168
3169	for (;;) {
3170		if (len == FILE_BADSIZE) {
3171			c = *s++;
3172			if (c == '\0')
3173				break;
3174		}
3175		else  {
3176			if (len-- == 0)
3177				break;
3178			c = *s++;
3179		}
3180		if (c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
3181			(void) fputc(c, fp);
3182		else {
3183			(void) fputc('\\', fp);
3184			switch (c) {
3185			case '\a':
3186				(void) fputc('a', fp);
3187				break;
3188
3189			case '\b':
3190				(void) fputc('b', fp);
3191				break;
3192
3193			case '\f':
3194				(void) fputc('f', fp);
3195				break;
3196
3197			case '\n':
3198				(void) fputc('n', fp);
3199				break;
3200
3201			case '\r':
3202				(void) fputc('r', fp);
3203				break;
3204
3205			case '\t':
3206				(void) fputc('t', fp);
3207				break;
3208
3209			case '\v':
3210				(void) fputc('v', fp);
3211				break;
3212
3213			default:
3214				(void) fprintf(fp, "%.3o", c & 0377);
3215				break;
3216			}
3217		}
3218	}
3219}
3220
3221/*
3222 * eatsize(): Eat the size spec from a number [eg. 10UL]
3223 */
3224file_private void
3225eatsize(const char **p)
3226{
3227	const char *l = *p;
3228
3229	if (LOWCASE(*l) == 'u')
3230		l++;
3231
3232	switch (LOWCASE(*l)) {
3233	case 'l':    /* long */
3234	case 's':    /* short */
3235	case 'h':    /* short */
3236	case 'b':    /* char/byte */
3237	case 'c':    /* char/byte */
3238		l++;
3239		/*FALLTHROUGH*/
3240	default:
3241		break;
3242	}
3243
3244	*p = l;
3245}
3246
3247/*
3248 * handle a buffer containing a compiled file.
3249 */
3250file_private struct magic_map *
3251apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
3252{
3253	struct magic_map *map;
3254
3255	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3256		file_oomem(ms, sizeof(*map));
3257		return NULL;
3258	}
3259	map->len = len;
3260	map->p = buf;
3261	map->type = MAP_TYPE_USER;
3262	if (check_buffer(ms, map, "buffer") != 0) {
3263		apprentice_unmap(map);
3264		return NULL;
3265	}
3266	return map;
3267}
3268
3269/*
3270 * handle a compiled file.
3271 */
3272
3273file_private struct magic_map *
3274apprentice_map(struct magic_set *ms, const char *fn)
3275{
3276	int fd;
3277	struct stat st;
3278	char *dbname = NULL;
3279	struct magic_map *map;
3280	struct magic_map *rv = NULL;
3281
3282	fd = -1;
3283	if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
3284		file_oomem(ms, sizeof(*map));
3285		goto error;
3286	}
3287	map->type = MAP_TYPE_USER;	/* unspecified */
3288
3289	dbname = mkdbname(ms, fn, 0);
3290	if (dbname == NULL)
3291		goto error;
3292
3293	if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
3294		goto error;
3295
3296	if (fstat(fd, &st) == -1) {
3297		file_error(ms, errno, "cannot stat `%s'", dbname);
3298		goto error;
3299	}
3300	if (st.st_size < 8 || st.st_size > maxoff_t()) {
3301		file_error(ms, 0, "file `%s' is too %s", dbname,
3302		    st.st_size < 8 ? "small" : "large");
3303		goto error;
3304	}
3305
3306	map->len = CAST(size_t, st.st_size);
3307#ifdef QUICK
3308	map->type = MAP_TYPE_MMAP;
3309	if ((map->p = mmap(0, CAST(size_t, st.st_size), PROT_READ|PROT_WRITE,
3310	    MAP_PRIVATE|MAP_FILE, fd, CAST(off_t, 0))) == MAP_FAILED) {
3311		file_error(ms, errno, "cannot map `%s'", dbname);
3312		goto error;
3313	}
3314#else
3315	map->type = MAP_TYPE_MALLOC;
3316	if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
3317		file_oomem(ms, map->len);
3318		goto error;
3319	}
3320	if (read(fd, map->p, map->len) != (ssize_t)map->len) {
3321		file_badread(ms);
3322		goto error;
3323	}
3324#endif
3325	(void)close(fd);
3326	fd = -1;
3327
3328	if (check_buffer(ms, map, dbname) != 0) {
3329		goto error;
3330	}
3331#ifdef QUICK
3332	if (mprotect(map->p, CAST(size_t, st.st_size), PROT_READ) == -1) {
3333		file_error(ms, errno, "cannot mprotect `%s'", dbname);
3334		goto error;
3335	}
3336#endif
3337
3338	free(dbname);
3339	return map;
3340
3341error:
3342	if (fd != -1)
3343		(void)close(fd);
3344	apprentice_unmap(map);
3345	free(dbname);
3346	return rv;
3347}
3348
3349file_private int
3350check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
3351{
3352	uint32_t *ptr;
3353	uint32_t entries, nentries;
3354	uint32_t version;
3355	int i, needsbyteswap;
3356
3357	ptr = CAST(uint32_t *, map->p);
3358	if (*ptr != MAGICNO) {
3359		if (swap4(*ptr) != MAGICNO) {
3360			file_error(ms, 0, "bad magic in `%s'", dbname);
3361			return -1;
3362		}
3363		needsbyteswap = 1;
3364	} else
3365		needsbyteswap = 0;
3366	if (needsbyteswap)
3367		version = swap4(ptr[1]);
3368	else
3369		version = ptr[1];
3370	if (version != VERSIONNO) {
3371		file_error(ms, 0, "File %s supports only version %d magic "
3372		    "files. `%s' is version %d", VERSION,
3373		    VERSIONNO, dbname, version);
3374		return -1;
3375	}
3376	entries = CAST(uint32_t, map->len / sizeof(struct magic));
3377	if ((entries * sizeof(struct magic)) != map->len) {
3378		file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
3379		    "a multiple of %" SIZE_T_FORMAT "u",
3380		    dbname, map->len, sizeof(struct magic));
3381		return -1;
3382	}
3383	map->magic[0] = CAST(struct magic *, map->p) + 1;
3384	nentries = 0;
3385	for (i = 0; i < MAGIC_SETS; i++) {
3386		if (needsbyteswap)
3387			map->nmagic[i] = swap4(ptr[i + 2]);
3388		else
3389			map->nmagic[i] = ptr[i + 2];
3390		if (i != MAGIC_SETS - 1)
3391			map->magic[i + 1] = map->magic[i] + map->nmagic[i];
3392		nentries += map->nmagic[i];
3393	}
3394	if (entries != nentries + 1) {
3395		file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
3396		    dbname, entries, nentries + 1);
3397		return -1;
3398	}
3399	if (needsbyteswap)
3400		for (i = 0; i < MAGIC_SETS; i++)
3401			byteswap(map->magic[i], map->nmagic[i]);
3402	return 0;
3403}
3404
3405/*
3406 * handle an mmaped file.
3407 */
3408file_private int
3409apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
3410{
3411	static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
3412	static const size_t m = sizeof(**map->magic);
3413	int fd = -1;
3414	size_t len;
3415	char *dbname;
3416	int rv = -1;
3417	uint32_t i;
3418	union {
3419		struct magic m;
3420		uint32_t h[2 + MAGIC_SETS];
3421	} hdr;
3422
3423	dbname = mkdbname(ms, fn, 1);
3424
3425	if (dbname == NULL)
3426		goto out;
3427
3428	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
3429	{
3430		file_error(ms, errno, "cannot open `%s'", dbname);
3431		goto out;
3432	}
3433	memset(&hdr, 0, sizeof(hdr));
3434	hdr.h[0] = MAGICNO;
3435	hdr.h[1] = VERSIONNO;
3436	memcpy(hdr.h + 2, map->nmagic, nm);
3437
3438	if (write(fd, &hdr, sizeof(hdr)) != CAST(ssize_t, sizeof(hdr))) {
3439		file_error(ms, errno, "error writing `%s'", dbname);
3440		goto out2;
3441	}
3442
3443	for (i = 0; i < MAGIC_SETS; i++) {
3444		len = m * map->nmagic[i];
3445		if (write(fd, map->magic[i], len) != CAST(ssize_t, len)) {
3446			file_error(ms, errno, "error writing `%s'", dbname);
3447			goto out2;
3448		}
3449	}
3450
3451	rv = 0;
3452out2:
3453	if (fd != -1)
3454		(void)close(fd);
3455out:
3456	apprentice_unmap(map);
3457	free(dbname);
3458	return rv;
3459}
3460
3461file_private const char ext[] = ".mgc";
3462/*
3463 * make a dbname
3464 */
3465file_private char *
3466mkdbname(struct magic_set *ms, const char *fn, int strip)
3467{
3468	const char *p, *q;
3469	char *buf;
3470
3471	if (strip) {
3472		if ((p = strrchr(fn, '/')) != NULL)
3473			fn = ++p;
3474	}
3475
3476	for (q = fn; *q; q++)
3477		continue;
3478	/* Look for .mgc */
3479	for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
3480		if (*p != *q)
3481			break;
3482
3483	/* Did not find .mgc, restore q */
3484	if (p >= ext)
3485		while (*q)
3486			q++;
3487
3488	q++;
3489	/* Compatibility with old code that looked in .mime */
3490	if (ms->flags & MAGIC_MIME) {
3491		if (asprintf(&buf, "%.*s.mime%s", CAST(int, q - fn), fn, ext)
3492		    < 0)
3493			return NULL;
3494		if (access(buf, R_OK) != -1) {
3495			ms->flags &= MAGIC_MIME_TYPE;
3496			return buf;
3497		}
3498		free(buf);
3499	}
3500	if (asprintf(&buf, "%.*s%s", CAST(int, q - fn), fn, ext) < 0)
3501		return NULL;
3502
3503	/* Compatibility with old code that looked in .mime */
3504	if (strstr(fn, ".mime") != NULL)
3505		ms->flags &= MAGIC_MIME_TYPE;
3506	return buf;
3507}
3508
3509/*
3510 * Byteswap an mmap'ed file if needed
3511 */
3512file_private void
3513byteswap(struct magic *magic, uint32_t nmagic)
3514{
3515	uint32_t i;
3516	for (i = 0; i < nmagic; i++)
3517		bs1(&magic[i]);
3518}
3519
3520#if !defined(HAVE_BYTESWAP_H) && !defined(HAVE_SYS_BSWAP_H)
3521/*
3522 * swap a short
3523 */
3524file_private uint16_t
3525swap2(uint16_t sv)
3526{
3527	uint16_t rv;
3528	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3529	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3530	d[0] = s[1];
3531	d[1] = s[0];
3532	return rv;
3533}
3534
3535/*
3536 * swap an int
3537 */
3538file_private uint32_t
3539swap4(uint32_t sv)
3540{
3541	uint32_t rv;
3542	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3543	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3544	d[0] = s[3];
3545	d[1] = s[2];
3546	d[2] = s[1];
3547	d[3] = s[0];
3548	return rv;
3549}
3550
3551/*
3552 * swap a quad
3553 */
3554file_private uint64_t
3555swap8(uint64_t sv)
3556{
3557	uint64_t rv;
3558	uint8_t *s = RCAST(uint8_t *, RCAST(void *, &sv));
3559	uint8_t *d = RCAST(uint8_t *, RCAST(void *, &rv));
3560# if 0
3561	d[0] = s[3];
3562	d[1] = s[2];
3563	d[2] = s[1];
3564	d[3] = s[0];
3565	d[4] = s[7];
3566	d[5] = s[6];
3567	d[6] = s[5];
3568	d[7] = s[4];
3569# else
3570	d[0] = s[7];
3571	d[1] = s[6];
3572	d[2] = s[5];
3573	d[3] = s[4];
3574	d[4] = s[3];
3575	d[5] = s[2];
3576	d[6] = s[1];
3577	d[7] = s[0];
3578# endif
3579	return rv;
3580}
3581#endif
3582
3583file_protected uintmax_t
3584file_varint2uintmax_t(const unsigned char *us, int t, size_t *l)
3585{
3586        uintmax_t x = 0;
3587        const unsigned char *c;
3588        if (t == FILE_LEVARINT) {
3589                for (c = us; *c; c++) {
3590                        if ((*c & 0x80) == 0)
3591                                break;
3592                }
3593		if (l)
3594			*l = c - us + 1;
3595                for (; c >= us; c--) {
3596                        x |= *c & 0x7f;
3597                        x <<= 7;
3598                }
3599        } else {
3600                for (c = us; *c; c++) {
3601			x |= *c & 0x7f;
3602			if ((*c & 0x80) == 0)
3603				break;
3604			x <<= 7;
3605                }
3606		if (l)
3607			*l = c - us + 1;
3608        }
3609	return x;
3610}
3611
3612
3613/*
3614 * byteswap a single magic entry
3615 */
3616file_private void
3617bs1(struct magic *m)
3618{
3619	m->cont_level = swap2(m->cont_level);
3620	m->offset = swap4(CAST(uint32_t, m->offset));
3621	m->in_offset = swap4(CAST(uint32_t, m->in_offset));
3622	m->lineno = swap4(CAST(uint32_t, m->lineno));
3623	if (IS_STRING(m->type)) {
3624		m->str_range = swap4(m->str_range);
3625		m->str_flags = swap4(m->str_flags);
3626	}
3627	else {
3628		m->value.q = swap8(m->value.q);
3629		m->num_mask = swap8(m->num_mask);
3630	}
3631}
3632
3633file_protected size_t
3634file_pstring_length_size(struct magic_set *ms, const struct magic *m)
3635{
3636	switch (m->str_flags & PSTRING_LEN) {
3637	case PSTRING_1_LE:
3638		return 1;
3639	case PSTRING_2_LE:
3640	case PSTRING_2_BE:
3641		return 2;
3642	case PSTRING_4_LE:
3643	case PSTRING_4_BE:
3644		return 4;
3645	default:
3646		file_error(ms, 0, "corrupt magic file "
3647		    "(bad pascal string length %d)",
3648		    m->str_flags & PSTRING_LEN);
3649		return FILE_BADSIZE;
3650	}
3651}
3652file_protected size_t
3653file_pstring_get_length(struct magic_set *ms, const struct magic *m,
3654    const char *ss)
3655{
3656	size_t len = 0;
3657	const unsigned char *s = RCAST(const unsigned char *, ss);
3658	unsigned int s3, s2, s1, s0;
3659
3660	switch (m->str_flags & PSTRING_LEN) {
3661	case PSTRING_1_LE:
3662		len = *s;
3663		break;
3664	case PSTRING_2_LE:
3665		s0 = s[0];
3666		s1 = s[1];
3667		len = (s1 << 8) | s0;
3668		break;
3669	case PSTRING_2_BE:
3670		s0 = s[0];
3671		s1 = s[1];
3672		len = (s0 << 8) | s1;
3673		break;
3674	case PSTRING_4_LE:
3675		s0 = s[0];
3676		s1 = s[1];
3677		s2 = s[2];
3678		s3 = s[3];
3679		len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
3680		break;
3681	case PSTRING_4_BE:
3682		s0 = s[0];
3683		s1 = s[1];
3684		s2 = s[2];
3685		s3 = s[3];
3686		len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
3687		break;
3688	default:
3689		file_error(ms, 0, "corrupt magic file "
3690		    "(bad pascal string length %d)",
3691		    m->str_flags & PSTRING_LEN);
3692		return FILE_BADSIZE;
3693	}
3694
3695	if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF) {
3696		size_t l = file_pstring_length_size(ms, m);
3697		if (l == FILE_BADSIZE)
3698			return l;
3699		len -= l;
3700	}
3701
3702	return len;
3703}
3704
3705file_protected int
3706file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
3707{
3708	uint32_t i, j;
3709	struct mlist *mlist, *ml;
3710
3711	mlist = ms->mlist[1];
3712
3713	for (ml = mlist->next; ml != mlist; ml = ml->next) {
3714		struct magic *ma = ml->magic;
3715		for (i = 0; i < ml->nmagic; i++) {
3716			if (ma[i].type != FILE_NAME)
3717				continue;
3718			if (strcmp(ma[i].value.s, name) == 0) {
3719				v->magic = &ma[i];
3720				v->magic_rxcomp = &(ml->magic_rxcomp[i]);
3721				for (j = i + 1; j < ml->nmagic; j++)
3722				    if (ma[j].cont_level == 0)
3723					    break;
3724				v->nmagic = j - i;
3725				return 0;
3726			}
3727		}
3728	}
3729	return -1;
3730}
3731