apprentice.c revision 111658
1/*
2 * apprentice - make one pass through /etc/magic, learning its secrets.
3 *
4 * Copyright (c) Ian F. Darwin, 1987.
5 * Written by Ian F. Darwin.
6 *
7 * This software is not subject to any license of the American Telephone
8 * and Telegraph Company or of the Regents of the University of California.
9 *
10 * Permission is granted to anyone to use this software for any purpose on
11 * any computer system, and to alter it and redistribute it freely, subject
12 * to the following restrictions:
13 *
14 * 1. The author is not responsible for the consequences of use of this
15 *    software, no matter how awful, even if they arise from flaws in it.
16 *
17 * 2. The origin of this software must not be misrepresented, either by
18 *    explicit claim or by omission.  Since few users ever read sources,
19 *    credits must appear in the documentation.
20 *
21 * 3. Altered versions must be plainly marked as such, and must not be
22 *    misrepresented as being the original software.  Since few users
23 *    ever read sources, credits must appear in the documentation.
24 *
25 * 4. This notice may not be removed or altered.
26 */
27
28#include "file.h"
29#include <stdlib.h>
30#ifdef HAVE_UNISTD_H
31#include <unistd.h>
32#endif
33#include <string.h>
34#include <ctype.h>
35#include <fcntl.h>
36#ifdef QUICK
37#include <sys/mman.h>
38#endif
39
40#ifndef	lint
41FILE_RCSID("@(#)$Id: apprentice.c,v 1.50 2003/02/27 20:47:46 christos Exp $")
42#endif	/* lint */
43
44#define	EATAB {while (isascii((unsigned char) *l) && \
45		      isspace((unsigned char) *l))  ++l;}
46#define LOWCASE(l) (isupper((unsigned char) (l)) ? \
47			tolower((unsigned char) (l)) : (l))
48/*
49 * Work around a bug in headers on Digital Unix.
50 * At least confirmed for: OSF1 V4.0 878
51 */
52#if defined(__osf__) && defined(__DECC)
53#ifdef MAP_FAILED
54#undef MAP_FAILED
55#endif
56#endif
57
58#ifndef MAP_FAILED
59#define MAP_FAILED (void *) -1
60#endif
61
62#ifndef MAP_FILE
63#define MAP_FILE 0
64#endif
65
66#ifdef __EMX__
67  char PATHSEP=';';
68#else
69  char PATHSEP=':';
70#endif
71
72
73static int getvalue(struct magic *, char **);
74static int hextoint(int);
75static char *getstr(char *, char *, int, int *);
76static int parse(struct magic **, uint32_t *, char *, int);
77static void eatsize(char **);
78static int apprentice_1(const char *, int);
79static int apprentice_file(struct magic **, uint32_t *, const char *, int);
80static void byteswap(struct magic *, uint32_t);
81static void bs1(struct magic *);
82static uint16_t swap2(uint16_t);
83static uint32_t swap4(uint32_t);
84static char *mkdbname(const char *);
85static int apprentice_map(struct magic **, uint32_t *, const char *, int);
86static int apprentice_compile(struct magic **, uint32_t *, const char *, int);
87
88static int maxmagic = 0;
89
90struct mlist mlist;
91
92#ifdef COMPILE_ONLY
93const char *magicfile;
94char *progname;
95int lineno;
96
97int main(int, char *[]);
98
99int
100main(int argc, char *argv[])
101{
102	int ret;
103
104	if ((progname = strrchr(argv[0], '/')) != NULL)
105		progname++;
106	else
107		progname = argv[0];
108
109	if (argc != 2) {
110		(void)fprintf(stderr, "usage: %s file\n", progname);
111		exit(1);
112	}
113	magicfile = argv[1];
114
115	exit(apprentice(magicfile, COMPILE));
116}
117#endif /* COMPILE_ONLY */
118
119
120/*
121 * Handle one file.
122 */
123static int
124apprentice_1(const char *fn, int action)
125{
126	struct magic *magic = NULL;
127	uint32_t nmagic = 0;
128	struct mlist *ml;
129	int rv = -1;
130
131	if (action == COMPILE) {
132		rv = apprentice_file(&magic, &nmagic, fn, action);
133		if (rv == 0)
134			return apprentice_compile(&magic, &nmagic, fn, action);
135		else
136			return rv;
137	}
138#ifndef COMPILE_ONLY
139	if ((rv = apprentice_map(&magic, &nmagic, fn, action)) != 0)
140		(void)fprintf(stderr, "%s: Using regular magic file `%s'\n",
141		    progname, fn);
142
143	if (rv != 0)
144		rv = apprentice_file(&magic, &nmagic, fn, action);
145
146	if (rv != 0)
147		return rv;
148
149	if ((ml = malloc(sizeof(*ml))) == NULL) {
150		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
151		    strerror(errno));
152		if (action == CHECK)
153			return -1;
154	}
155
156	if (magic == NULL || nmagic == 0)
157		return rv;
158
159	ml->magic = magic;
160	ml->nmagic = nmagic;
161
162	mlist.prev->next = ml;
163	ml->prev = mlist.prev;
164	ml->next = &mlist;
165	mlist.prev = ml;
166
167	return rv;
168#endif /* COMPILE_ONLY */
169}
170
171
172/* const char *fn: list of magic files */
173int
174apprentice(const char *fn, int action)
175{
176	char *p, *mfn;
177	int file_err, errs = -1;
178
179	mlist.next = mlist.prev = &mlist;
180	mfn = malloc(strlen(fn)+1);
181	if (mfn == NULL) {
182		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
183		    strerror(errno));
184		if (action == CHECK)
185			return -1;
186		else
187			exit(1);
188	}
189	fn = strcpy(mfn, fn);
190
191	while (fn) {
192		p = strchr(fn, PATHSEP);
193		if (p)
194			*p++ = '\0';
195		file_err = apprentice_1(fn, action);
196		if (file_err > errs)
197			errs = file_err;
198		fn = p;
199	}
200	if (errs == -1)
201		(void) fprintf(stderr, "%s: couldn't find any magic files!\n",
202		    progname);
203	if (action == CHECK && errs)
204		exit(1);
205
206	free(mfn);
207	return errs;
208}
209
210/*
211 * parse from a file
212 * const char *fn: name of magic file
213 */
214static int
215apprentice_file(struct magic **magicp, uint32_t *nmagicp, const char *fn,
216		int action)
217{
218	static const char hdr[] =
219		"cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
220	FILE *f;
221	char line[BUFSIZ+1];
222	int errs = 0;
223
224	f = fopen(fn, "r");
225	if (f == NULL) {
226		if (errno != ENOENT)
227			(void) fprintf(stderr,
228			    "%s: can't read magic file %s (%s)\n",
229			    progname, fn, strerror(errno));
230		return -1;
231	}
232
233        maxmagic = MAXMAGIS;
234	*magicp = (struct magic *) calloc(maxmagic, sizeof(struct magic));
235	if (*magicp == NULL) {
236		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
237		    strerror(errno));
238		if (action == CHECK)
239			return -1;
240	}
241
242	/* parse it */
243	if (action == CHECK)	/* print silly verbose header for USG compat. */
244		(void) printf("%s\n", hdr);
245
246	for (lineno = 1;fgets(line, BUFSIZ, f) != NULL; lineno++) {
247		if (line[0]=='#')	/* comment, do not parse */
248			continue;
249		if (strlen(line) <= (unsigned)1) /* null line, garbage, etc */
250			continue;
251		line[strlen(line)-1] = '\0'; /* delete newline */
252		if (parse(magicp, nmagicp, line, action) != 0)
253			errs = 1;
254	}
255
256	(void) fclose(f);
257	if (errs) {
258		free(*magicp);
259		*magicp = NULL;
260		*nmagicp = 0;
261	}
262	return errs;
263}
264
265/*
266 * extend the sign bit if the comparison is to be signed
267 */
268uint32_t
269signextend(struct magic *m, uint32_t v)
270{
271	if (!(m->flag & UNSIGNED))
272		switch(m->type) {
273		/*
274		 * Do not remove the casts below.  They are
275		 * vital.  When later compared with the data,
276		 * the sign extension must have happened.
277		 */
278		case BYTE:
279			v = (char) v;
280			break;
281		case SHORT:
282		case BESHORT:
283		case LESHORT:
284			v = (short) v;
285			break;
286		case DATE:
287		case BEDATE:
288		case LEDATE:
289		case LDATE:
290		case BELDATE:
291		case LELDATE:
292		case LONG:
293		case BELONG:
294		case LELONG:
295			v = (int32_t) v;
296			break;
297		case STRING:
298		case PSTRING:
299			break;
300		case REGEX:
301			break;
302		default:
303			magwarn("can't happen: m->type=%d\n",
304				m->type);
305			return -1;
306		}
307	return v;
308}
309
310/*
311 * parse one line from magic file, put into magic[index++] if valid
312 */
313static int
314parse(struct magic **magicp, uint32_t *nmagicp, char *l, int action)
315{
316	int i = 0;
317	struct magic *m;
318	char *t;
319
320#define ALLOC_INCR	200
321	if (*nmagicp + 1 >= maxmagic){
322		maxmagic += ALLOC_INCR;
323		if ((m = (struct magic *) realloc(*magicp,
324		    sizeof(struct magic) * maxmagic)) == NULL) {
325			(void) fprintf(stderr, "%s: Out of memory (%s).\n",
326			    progname, strerror(errno));
327			if (*magicp)
328				free(*magicp);
329			if (action == CHECK)
330				return -1;
331			else
332				exit(1);
333		}
334		*magicp = m;
335		memset(&(*magicp)[*nmagicp], 0, sizeof(struct magic)
336		    * ALLOC_INCR);
337	}
338	m = &(*magicp)[*nmagicp];
339	m->flag = 0;
340	m->cont_level = 0;
341
342	while (*l == '>') {
343		++l;		/* step over */
344		m->cont_level++;
345	}
346
347	if (m->cont_level != 0 && *l == '(') {
348		++l;		/* step over */
349		m->flag |= INDIR;
350	}
351	if (m->cont_level != 0 && *l == '&') {
352                ++l;            /* step over */
353                m->flag |= OFFADD;
354        }
355
356	/* get offset, then skip over it */
357	m->offset = (int) strtoul(l,&t,0);
358        if (l == t)
359		magwarn("offset %s invalid", l);
360        l = t;
361
362	if (m->flag & INDIR) {
363		m->in_type = LONG;
364		m->in_offset = 0;
365		/*
366		 * read [.lbs][+-]nnnnn)
367		 */
368		if (*l == '.') {
369			l++;
370			switch (*l) {
371			case 'l':
372				m->in_type = LELONG;
373				break;
374			case 'L':
375				m->in_type = BELONG;
376				break;
377			case 'h':
378			case 's':
379				m->in_type = LESHORT;
380				break;
381			case 'H':
382			case 'S':
383				m->in_type = BESHORT;
384				break;
385			case 'c':
386			case 'b':
387			case 'C':
388			case 'B':
389				m->in_type = BYTE;
390				break;
391			default:
392				magwarn("indirect offset type %c invalid", *l);
393				break;
394			}
395			l++;
396		}
397		if (*l == '~') {
398			m->in_op = OPINVERSE;
399			l++;
400		}
401		switch (*l) {
402		case '&':
403			m->in_op |= OPAND;
404			l++;
405			break;
406		case '|':
407			m->in_op |= OPOR;
408			l++;
409			break;
410		case '^':
411			m->in_op |= OPXOR;
412			l++;
413			break;
414		case '+':
415			m->in_op |= OPADD;
416			l++;
417			break;
418		case '-':
419			m->in_op |= OPMINUS;
420			l++;
421			break;
422		case '*':
423			m->in_op |= OPMULTIPLY;
424			l++;
425			break;
426		case '/':
427			m->in_op |= OPDIVIDE;
428			l++;
429			break;
430		case '%':
431			m->in_op |= OPMODULO;
432			l++;
433			break;
434		}
435		if (isdigit((unsigned char)*l))
436			m->in_offset = strtoul(l, &t, 0);
437		else
438			t = l;
439		if (*t++ != ')')
440			magwarn("missing ')' in indirect offset");
441		l = t;
442	}
443
444
445	while (isascii((unsigned char)*l) && isdigit((unsigned char)*l))
446		++l;
447	EATAB;
448
449#define NBYTE		4
450#define NSHORT		5
451#define NLONG		4
452#define NSTRING 	6
453#define NDATE		4
454#define NBESHORT	7
455#define NBELONG		6
456#define NBEDATE		6
457#define NLESHORT	7
458#define NLELONG		6
459#define NLEDATE		6
460#define NPSTRING	7
461#define NLDATE		5
462#define NBELDATE	7
463#define NLELDATE	7
464#define NREGEX		5
465
466	if (*l == 'u') {
467		++l;
468		m->flag |= UNSIGNED;
469	}
470
471	/* get type, skip it */
472	if (strncmp(l, "char", NBYTE)==0) {	/* HP/UX compat */
473		m->type = BYTE;
474		l += NBYTE;
475	} else if (strncmp(l, "byte", NBYTE)==0) {
476		m->type = BYTE;
477		l += NBYTE;
478	} else if (strncmp(l, "short", NSHORT)==0) {
479		m->type = SHORT;
480		l += NSHORT;
481	} else if (strncmp(l, "long", NLONG)==0) {
482		m->type = LONG;
483		l += NLONG;
484	} else if (strncmp(l, "string", NSTRING)==0) {
485		m->type = STRING;
486		l += NSTRING;
487	} else if (strncmp(l, "date", NDATE)==0) {
488		m->type = DATE;
489		l += NDATE;
490	} else if (strncmp(l, "beshort", NBESHORT)==0) {
491		m->type = BESHORT;
492		l += NBESHORT;
493	} else if (strncmp(l, "belong", NBELONG)==0) {
494		m->type = BELONG;
495		l += NBELONG;
496	} else if (strncmp(l, "bedate", NBEDATE)==0) {
497		m->type = BEDATE;
498		l += NBEDATE;
499	} else if (strncmp(l, "leshort", NLESHORT)==0) {
500		m->type = LESHORT;
501		l += NLESHORT;
502	} else if (strncmp(l, "lelong", NLELONG)==0) {
503		m->type = LELONG;
504		l += NLELONG;
505	} else if (strncmp(l, "ledate", NLEDATE)==0) {
506		m->type = LEDATE;
507		l += NLEDATE;
508	} else if (strncmp(l, "pstring", NPSTRING)==0) {
509		m->type = PSTRING;
510		l += NPSTRING;
511	} else if (strncmp(l, "ldate", NLDATE)==0) {
512		m->type = LDATE;
513		l += NLDATE;
514	} else if (strncmp(l, "beldate", NBELDATE)==0) {
515		m->type = BELDATE;
516		l += NBELDATE;
517	} else if (strncmp(l, "leldate", NLELDATE)==0) {
518		m->type = LELDATE;
519		l += NLELDATE;
520	} else if (strncmp(l, "regex", NREGEX)==0) {
521		m->type = REGEX;
522		l += sizeof("regex");
523	} else {
524		magwarn("type %s invalid", l);
525		return -1;
526	}
527	/* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
528	/* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
529	if (*l == '~') {
530		if (STRING != m->type && PSTRING != m->type)
531			m->mask_op = OPINVERSE;
532		++l;
533	}
534	switch (*l) {
535	case '&':
536		m->mask_op |= OPAND;
537		++l;
538		m->mask = signextend(m, strtoul(l, &l, 0));
539		eatsize(&l);
540		break;
541	case '|':
542		m->mask_op |= OPOR;
543		++l;
544		m->mask = signextend(m, strtoul(l, &l, 0));
545		eatsize(&l);
546		break;
547	case '^':
548		m->mask_op |= OPXOR;
549		++l;
550		m->mask = signextend(m, strtoul(l, &l, 0));
551		eatsize(&l);
552		break;
553	case '+':
554		m->mask_op |= OPADD;
555		++l;
556		m->mask = signextend(m, strtoul(l, &l, 0));
557		eatsize(&l);
558		break;
559	case '-':
560		m->mask_op |= OPMINUS;
561		++l;
562		m->mask = signextend(m, strtoul(l, &l, 0));
563		eatsize(&l);
564		break;
565	case '*':
566		m->mask_op |= OPMULTIPLY;
567		++l;
568		m->mask = signextend(m, strtoul(l, &l, 0));
569		eatsize(&l);
570		break;
571	case '%':
572		m->mask_op |= OPMODULO;
573		++l;
574		m->mask = signextend(m, strtoul(l, &l, 0));
575		eatsize(&l);
576		break;
577	case '/':
578		if (STRING != m->type && PSTRING != m->type) {
579			m->mask_op |= OPDIVIDE;
580			++l;
581			m->mask = signextend(m, strtoul(l, &l, 0));
582			eatsize(&l);
583		} else {
584			m->mask = 0L;
585			while (!isspace(*++l)) {
586				switch (*l) {
587				case CHAR_IGNORE_LOWERCASE:
588					m->mask |= STRING_IGNORE_LOWERCASE;
589					break;
590				case CHAR_COMPACT_BLANK:
591					m->mask |= STRING_COMPACT_BLANK;
592					break;
593				case CHAR_COMPACT_OPTIONAL_BLANK:
594					m->mask |=
595					    STRING_COMPACT_OPTIONAL_BLANK;
596					break;
597				default:
598					magwarn("string extension %c invalid",
599					    *l);
600					return -1;
601				}
602			}
603		}
604		break;
605	}
606	/* We used to set mask to all 1's here, instead let's just not do anything
607	   if mask = 0 (unless you have a better idea) */
608	EATAB;
609
610	switch (*l) {
611	case '>':
612	case '<':
613	/* Old-style anding: "0 byte &0x80 dynamically linked" */
614	case '&':
615	case '^':
616	case '=':
617  		m->reln = *l;
618  		++l;
619		if (*l == '=') {
620		   /* HP compat: ignore &= etc. */
621		   ++l;
622		}
623		break;
624	case '!':
625		if (m->type != STRING && m->type != PSTRING) {
626			m->reln = *l;
627			++l;
628			break;
629		}
630		/* FALL THROUGH */
631	default:
632		if (*l == 'x' && isascii((unsigned char)l[1]) &&
633		    isspace((unsigned char)l[1])) {
634			m->reln = *l;
635			++l;
636			goto GetDesc;	/* Bill The Cat */
637		}
638  		m->reln = '=';
639		break;
640	}
641  	EATAB;
642
643	if (getvalue(m, &l))
644		return -1;
645	/*
646	 * TODO finish this macro and start using it!
647	 * #define offsetcheck {if (offset > HOWMANY-1)
648	 *	magwarn("offset too big"); }
649	 */
650
651	/*
652	 * now get last part - the description
653	 */
654GetDesc:
655	EATAB;
656	if (l[0] == '\b') {
657		++l;
658		m->nospflag = 1;
659	} else if ((l[0] == '\\') && (l[1] == 'b')) {
660		++l;
661		++l;
662		m->nospflag = 1;
663	} else
664		m->nospflag = 0;
665	while ((m->desc[i++] = *l++) != '\0' && i<MAXDESC)
666		/* NULLBODY */;
667
668#ifndef COMPILE_ONLY
669	if (action == CHECK) {
670		mdump(m);
671	}
672#endif
673	++(*nmagicp);		/* make room for next */
674	return 0;
675}
676
677/*
678 * Read a numeric value from a pointer, into the value union of a magic
679 * pointer, according to the magic type.  Update the string pointer to point
680 * just after the number read.  Return 0 for success, non-zero for failure.
681 */
682static int
683getvalue(struct magic *m, char **p)
684{
685	int slen;
686
687	if (m->type == STRING || m->type == PSTRING || m->type == REGEX) {
688		*p = getstr(*p, m->value.s, sizeof(m->value.s), &slen);
689		m->vallen = slen;
690	} else
691		if (m->reln != 'x') {
692			m->value.l = signextend(m, strtoul(*p, p, 0));
693			eatsize(p);
694		}
695	return 0;
696}
697
698/*
699 * Convert a string containing C character escapes.  Stop at an unescaped
700 * space or tab.
701 * Copy the converted version to "p", returning its length in *slen.
702 * Return updated scan pointer as function result.
703 */
704static char *
705getstr(char *s, char *p, int plen, int *slen)
706{
707	char	*origs = s, *origp = p;
708	char	*pmax = p + plen - 1;
709	int	c;
710	int	val;
711
712	while ((c = *s++) != '\0') {
713		if (isspace((unsigned char) c))
714			break;
715		if (p >= pmax) {
716			fprintf(stderr, "String too long: %s\n", origs);
717			break;
718		}
719		if(c == '\\') {
720			switch(c = *s++) {
721
722			case '\0':
723				goto out;
724
725			default:
726				*p++ = (char) c;
727				break;
728
729			case 'n':
730				*p++ = '\n';
731				break;
732
733			case 'r':
734				*p++ = '\r';
735				break;
736
737			case 'b':
738				*p++ = '\b';
739				break;
740
741			case 't':
742				*p++ = '\t';
743				break;
744
745			case 'f':
746				*p++ = '\f';
747				break;
748
749			case 'v':
750				*p++ = '\v';
751				break;
752
753			/* \ and up to 3 octal digits */
754			case '0':
755			case '1':
756			case '2':
757			case '3':
758			case '4':
759			case '5':
760			case '6':
761			case '7':
762				val = c - '0';
763				c = *s++;  /* try for 2 */
764				if(c >= '0' && c <= '7') {
765					val = (val<<3) | (c - '0');
766					c = *s++;  /* try for 3 */
767					if(c >= '0' && c <= '7')
768						val = (val<<3) | (c-'0');
769					else
770						--s;
771				}
772				else
773					--s;
774				*p++ = (char)val;
775				break;
776
777			/* \x and up to 2 hex digits */
778			case 'x':
779				val = 'x';	/* Default if no digits */
780				c = hextoint(*s++);	/* Get next char */
781				if (c >= 0) {
782					val = c;
783					c = hextoint(*s++);
784					if (c >= 0)
785						val = (val << 4) + c;
786					else
787						--s;
788				} else
789					--s;
790				*p++ = (char)val;
791				break;
792			}
793		} else
794			*p++ = (char)c;
795	}
796out:
797	*p = '\0';
798	*slen = p - origp;
799	return s;
800}
801
802
803/* Single hex char to int; -1 if not a hex char. */
804static int
805hextoint(int c)
806{
807	if (!isascii((unsigned char) c))
808		return -1;
809	if (isdigit((unsigned char) c))
810		return c - '0';
811	if ((c >= 'a')&&(c <= 'f'))
812		return c + 10 - 'a';
813	if (( c>= 'A')&&(c <= 'F'))
814		return c + 10 - 'A';
815	return -1;
816}
817
818
819/*
820 * Print a string containing C character escapes.
821 */
822void
823showstr(FILE *fp, const char *s, int len)
824{
825	char	c;
826
827	for (;;) {
828		c = *s++;
829		if (len == -1) {
830			if (c == '\0')
831				break;
832		}
833		else  {
834			if (len-- == 0)
835				break;
836		}
837		if(c >= 040 && c <= 0176)	/* TODO isprint && !iscntrl */
838			(void) fputc(c, fp);
839		else {
840			(void) fputc('\\', fp);
841			switch (c) {
842
843			case '\n':
844				(void) fputc('n', fp);
845				break;
846
847			case '\r':
848				(void) fputc('r', fp);
849				break;
850
851			case '\b':
852				(void) fputc('b', fp);
853				break;
854
855			case '\t':
856				(void) fputc('t', fp);
857				break;
858
859			case '\f':
860				(void) fputc('f', fp);
861				break;
862
863			case '\v':
864				(void) fputc('v', fp);
865				break;
866
867			default:
868				(void) fprintf(fp, "%.3o", c & 0377);
869				break;
870			}
871		}
872	}
873}
874
875/*
876 * eatsize(): Eat the size spec from a number [eg. 10UL]
877 */
878static void
879eatsize(char **p)
880{
881	char *l = *p;
882
883	if (LOWCASE(*l) == 'u')
884		l++;
885
886	switch (LOWCASE(*l)) {
887	case 'l':    /* long */
888	case 's':    /* short */
889	case 'h':    /* short */
890	case 'b':    /* char/byte */
891	case 'c':    /* char/byte */
892		l++;
893		/*FALLTHROUGH*/
894	default:
895		break;
896	}
897
898	*p = l;
899}
900
901/*
902 * handle a compiled file.
903 */
904static int
905apprentice_map(struct magic **magicp, uint32_t *nmagicp, const char *fn,
906    int action)
907{
908	int fd;
909	struct stat st;
910	uint32_t *ptr;
911	uint32_t version;
912	int needsbyteswap;
913	char *dbname = mkdbname(fn);
914	void *mm;
915
916	if (dbname == NULL)
917		return -1;
918
919	if ((fd = open(dbname, O_RDONLY)) == -1)
920		return -1;
921
922	if (fstat(fd, &st) == -1) {
923		(void)fprintf(stderr, "%s: Cannot stat `%s' (%s)\n",
924		    progname, dbname, strerror(errno));
925		goto error;
926	}
927
928#ifdef QUICK
929	if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
930	    MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
931		(void)fprintf(stderr, "%s: Cannot map `%s' (%s)\n",
932		    progname, dbname, strerror(errno));
933		goto error;
934	}
935#else
936	if ((mm = malloc((size_t)st.st_size)) == NULL) {
937		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
938		     strerror(errno));
939		goto error;
940	}
941	if (read(fd, mm, (size_t)st.st_size) != (size_t)st.st_size) {
942		(void) fprintf(stderr, "%s: Read failed (%s).\n", progname,
943		    strerror(errno));
944		goto error;
945	}
946#endif
947	*magicp = mm;
948	(void)close(fd);
949	fd = -1;
950	ptr = (uint32_t *) *magicp;
951	if (*ptr != MAGICNO) {
952		if (swap4(*ptr) != MAGICNO) {
953			(void)fprintf(stderr, "%s: Bad magic in `%s'\n",
954			    progname, dbname);
955			goto error;
956		}
957		needsbyteswap = 1;
958	} else
959		needsbyteswap = 0;
960	if (needsbyteswap)
961		version = swap4(ptr[1]);
962	else
963		version = ptr[1];
964	if (version != VERSIONNO) {
965		(void)fprintf(stderr,
966		    "%s: version mismatch (%d != %d) in `%s'\n",
967		    progname, version, VERSIONNO, dbname);
968		goto error;
969	}
970	*nmagicp = (st.st_size / sizeof(struct magic)) - 1;
971	(*magicp)++;
972	if (needsbyteswap)
973		byteswap(*magicp, *nmagicp);
974	return 0;
975
976error:
977	if (fd != -1)
978		(void)close(fd);
979	if (mm) {
980#ifdef QUICK
981		(void)munmap(mm, (size_t)st.st_size);
982#else
983		free(mm);
984#endif
985	} else {
986		*magicp = NULL;
987		*nmagicp = 0;
988	}
989	return -1;
990}
991
992/*
993 * handle an mmaped file.
994 */
995static int
996apprentice_compile(struct magic **magicp, uint32_t *nmagicp, const char *fn,
997    int action)
998{
999	int fd;
1000	char *dbname = mkdbname(fn);
1001	static const uint32_t ar[] = {
1002	    MAGICNO, VERSIONNO
1003	};
1004
1005	if (dbname == NULL)
1006		return -1;
1007
1008	if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC, 0644)) == -1) {
1009		(void)fprintf(stderr, "%s: Cannot open `%s' (%s)\n",
1010		    progname, dbname, strerror(errno));
1011		return -1;
1012	}
1013
1014	if (write(fd, ar, sizeof(ar)) != sizeof(ar)) {
1015		(void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
1016		    progname, dbname, strerror(errno));
1017		return -1;
1018	}
1019
1020	if (lseek(fd, sizeof(struct magic), SEEK_SET) != sizeof(struct magic)) {
1021		(void)fprintf(stderr, "%s: error seeking `%s' (%s)\n",
1022		    progname, dbname, strerror(errno));
1023		return -1;
1024	}
1025
1026	if (write(fd, *magicp,  sizeof(struct magic) * *nmagicp)
1027	    != sizeof(struct magic) * *nmagicp) {
1028		(void)fprintf(stderr, "%s: error writing `%s' (%s)\n",
1029		    progname, dbname, strerror(errno));
1030		return -1;
1031	}
1032
1033	(void)close(fd);
1034	return 0;
1035}
1036
1037/*
1038 * make a dbname
1039 */
1040char *
1041mkdbname(const char *fn)
1042{
1043	static const char ext[] = ".mgc";
1044	static char *buf = NULL;
1045	size_t len = strlen(fn) + sizeof(ext) + 1;
1046	if (buf == NULL)
1047		buf = malloc(len);
1048	else
1049		buf = realloc(buf, len);
1050	if (buf == NULL) {
1051		(void) fprintf(stderr, "%s: Out of memory (%s).\n", progname,
1052		    strerror(errno));
1053		return NULL;
1054	}
1055	(void)strcpy(buf, fn);
1056	(void)strcat(buf, ext);
1057	return buf;
1058}
1059
1060/*
1061 * Byteswap an mmap'ed file if needed
1062 */
1063static void
1064byteswap(struct magic *magic, uint32_t nmagic)
1065{
1066	uint32_t i;
1067	for (i = 0; i < nmagic; i++)
1068		bs1(&magic[i]);
1069}
1070
1071/*
1072 * swap a short
1073 */
1074static uint16_t
1075swap2(uint16_t sv)
1076{
1077	uint16_t rv;
1078	uint8_t *s = (uint8_t *) &sv;
1079	uint8_t *d = (uint8_t *) &rv;
1080	d[0] = s[1];
1081	d[1] = s[0];
1082	return rv;
1083}
1084
1085/*
1086 * swap an int
1087 */
1088static uint32_t
1089swap4(uint32_t sv)
1090{
1091	uint32_t rv;
1092	uint8_t *s = (uint8_t *) &sv;
1093	uint8_t *d = (uint8_t *) &rv;
1094	d[0] = s[3];
1095	d[1] = s[2];
1096	d[2] = s[1];
1097	d[3] = s[0];
1098	return rv;
1099}
1100
1101/*
1102 * byteswap a single magic entry
1103 */
1104static
1105void bs1(struct magic *m)
1106{
1107	m->cont_level = swap2(m->cont_level);
1108	m->offset = swap4(m->offset);
1109	m->in_offset = swap4(m->in_offset);
1110	if (m->type != STRING)
1111		m->value.l = swap4(m->value.l);
1112	m->mask = swap4(m->mask);
1113}
1114