1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * library interface to file
28 *
29 * the sum of the hacks {s5,v10,planix} is _____ than the parts
30 */
31
32static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n";
33
34static const char lib[] = "libast:magic";
35
36#include <ast.h>
37#include <ctype.h>
38#include <ccode.h>
39#include <dt.h>
40#include <modex.h>
41#include <error.h>
42#include <regex.h>
43#include <swap.h>
44
45#define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46
47#define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48
49#define MAXNEST		10		/* { ... } nesting limit	*/
50#define MINITEM		4		/* magic buffer rounding	*/
51
52typedef struct				/* identifier dictionary entry	*/
53{
54	const char	name[16];	/* identifier name		*/
55	int		value;		/* identifier value		*/
56	Dtlink_t	link;		/* dictionary link		*/
57} Info_t;
58
59typedef struct Edit			/* edit substitution		*/
60{
61	struct Edit*	next;		/* next in list			*/
62	regex_t*	from;		/* from pattern			*/
63} Edit_t;
64
65struct Entry;
66
67typedef struct				/* loop info			*/
68{
69	struct Entry*	lab;		/* call this function		*/
70	int		start;		/* start here			*/
71	int		size;		/* increment by this amount	*/
72	int		count;		/* dynamic loop count		*/
73	int		offset;		/* dynamic offset		*/
74} Loop_t;
75
76typedef struct Entry			/* magic file entry		*/
77{
78	struct Entry*	next;		/* next in list			*/
79	char*		expr;		/* offset expression		*/
80	union
81	{
82	unsigned long	num;
83	char*		str;
84	struct Entry*	lab;
85	regex_t*	sub;
86	Loop_t*		loop;
87	}		value;		/* comparison value		*/
88	char*		desc;		/* file description		*/
89	char*		mime;		/* file mime type		*/
90	unsigned long	offset;		/* offset in bytes		*/
91	unsigned long	mask;		/* mask before compare		*/
92	char		cont;		/* continuation operation	*/
93	char		type;		/* datum type			*/
94	char		op;		/* comparison operation		*/
95	char		nest;		/* { or } nesting operation	*/
96	char		swap;		/* forced swap order		*/
97} Entry_t;
98
99#define CC_BIT		5
100
101#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102typedef unsigned short Cctype_t;
103#else
104typedef unsigned long Cctype_t;
105#endif
106
107#define CC_text		0x01
108#define CC_control	0x02
109#define CC_latin	0x04
110#define CC_binary	0x08
111#define CC_utf_8	0x10
112
113#define CC_notext	CC_text		/* CC_text is flipped before checking */
114
115#define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116
117#define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118
119#define ID_NONE		0
120#define ID_ASM		1
121#define ID_C		2
122#define ID_COBOL	3
123#define ID_COPYBOOK	4
124#define ID_CPLUSPLUS	5
125#define ID_FORTRAN	6
126#define ID_HTML		7
127#define ID_INCL1	8
128#define ID_INCL2	9
129#define ID_INCL3	10
130#define ID_MAM1		11
131#define ID_MAM2		12
132#define ID_MAM3		13
133#define ID_NOTEXT	14
134#define ID_PL1		15
135#define ID_YACC		16
136
137#define ID_MAX		ID_YACC
138
139#define INFO_atime	1
140#define INFO_blocks	2
141#define INFO_ctime	3
142#define INFO_fstype	4
143#define INFO_gid	5
144#define INFO_mode	6
145#define INFO_mtime	7
146#define INFO_name	8
147#define INFO_nlink	9
148#define INFO_size	10
149#define INFO_uid	11
150
151#define _MAGIC_PRIVATE_ \
152	Magicdisc_t*	disc;			/* discipline		*/ \
153	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154	Entry_t*	magic;			/* parsed magic table	*/ \
155	Entry_t*	magiclast;		/* last entry in magic	*/ \
156	char*		mime;			/* MIME type		*/ \
157	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161	char		mbuf[64];		/* mime string		*/ \
162	char		sbuf[64];		/* type suffix string	*/ \
163	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171	int		fbsz;			/* fbuf size		*/ \
172	int		fbmx;			/* fbuf max size	*/ \
173	int		xbsz;			/* xbuf size		*/ \
174	int		swap;			/* swap() operation	*/ \
175	unsigned long	flags;			/* disc+open flags	*/ \
176	long		xoff;			/* xbuf offset		*/ \
177	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178	Sfio_t*		fp;			/* fbuf fp		*/ \
179	Sfio_t*		tmp;			/* tmp string		*/ \
180	regdisc_t	redisc;			/* regex discipline	*/ \
181	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182	Dt_t*		idtab;			/* identifier dict	*/ \
183	Dt_t*		infotab;		/* info keyword dict	*/
184
185#include <magic.h>
186
187static Info_t		dict[] =		/* keyword dictionary	*/
188{
189	{ 	"COMMON",	ID_FORTRAN	},
190	{ 	"COMPUTE",	ID_COBOL	},
191	{ 	"COMP",		ID_COPYBOOK	},
192	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193	{ 	"DCL",		ID_PL1		},
194	{ 	"DEFINED",	ID_PL1		},
195	{ 	"DIMENSION",	ID_FORTRAN	},
196	{ 	"DIVISION",	ID_COBOL	},
197	{ 	"FILLER",	ID_COPYBOOK	},
198	{ 	"FIXED",	ID_PL1		},
199	{ 	"FUNCTION",	ID_FORTRAN	},
200	{ 	"HTML",		ID_HTML		},
201	{ 	"INTEGER",	ID_FORTRAN	},
202	{ 	"MAIN",		ID_PL1		},
203	{ 	"OPTIONS",	ID_PL1		},
204	{ 	"PERFORM",	ID_COBOL	},
205	{ 	"PIC",		ID_COPYBOOK	},
206	{ 	"REAL",		ID_FORTRAN	},
207	{ 	"REDEFINES",	ID_COPYBOOK	},
208	{ 	"S9",		ID_COPYBOOK	},
209	{ 	"SECTION",	ID_COBOL	},
210	{ 	"SELECT",	ID_COBOL	},
211	{ 	"SUBROUTINE",	ID_FORTRAN	},
212	{ 	"TEXT",		ID_ASM		},
213	{ 	"VALUE",	ID_COPYBOOK	},
214	{ 	"attr",		ID_MAM3		},
215	{ 	"binary",	ID_YACC		},
216	{ 	"block",	ID_FORTRAN	},
217	{ 	"bss",		ID_ASM		},
218	{ 	"byte",		ID_ASM		},
219	{ 	"char",		ID_C		},
220	{ 	"class",	ID_CPLUSPLUS	},
221	{ 	"clr",		ID_NOTEXT	},
222	{ 	"comm",		ID_ASM		},
223	{ 	"common",	ID_FORTRAN	},
224	{ 	"data",		ID_ASM		},
225	{ 	"dimension",	ID_FORTRAN	},
226	{ 	"done",		ID_MAM2		},
227	{ 	"double",	ID_C		},
228	{ 	"even",		ID_ASM		},
229	{ 	"exec",		ID_MAM3		},
230	{ 	"extern",	ID_C		},
231	{ 	"float",	ID_C		},
232	{ 	"function",	ID_FORTRAN	},
233	{ 	"globl",	ID_ASM		},
234	{ 	"h",		ID_INCL3	},
235	{ 	"html",		ID_HTML		},
236	{ 	"include",	ID_INCL1	},
237	{ 	"int",		ID_C		},
238	{ 	"integer",	ID_FORTRAN	},
239	{ 	"jmp",		ID_NOTEXT	},
240	{ 	"left",		ID_YACC		},
241	{ 	"libc",		ID_INCL2	},
242	{ 	"long",		ID_C		},
243	{ 	"make",		ID_MAM1		},
244	{ 	"mov",		ID_NOTEXT	},
245	{ 	"private",	ID_CPLUSPLUS	},
246	{ 	"public",	ID_CPLUSPLUS	},
247	{ 	"real",		ID_FORTRAN	},
248	{ 	"register",	ID_C		},
249	{ 	"right",	ID_YACC		},
250	{ 	"sfio",		ID_INCL2	},
251	{ 	"static",	ID_C		},
252	{ 	"stdio",	ID_INCL2	},
253	{ 	"struct",	ID_C		},
254	{ 	"subroutine",	ID_FORTRAN	},
255	{ 	"sys",		ID_NOTEXT	},
256	{ 	"term",		ID_YACC		},
257	{ 	"text",		ID_ASM		},
258	{ 	"tst",		ID_NOTEXT	},
259	{ 	"type",		ID_YACC		},
260	{ 	"typedef",	ID_C		},
261	{ 	"u",		ID_INCL2	},
262	{ 	"union",	ID_YACC		},
263	{ 	"void",		ID_C		},
264};
265
266static Info_t		info[] =
267{
268	{	"atime",	INFO_atime		},
269	{	"blocks",	INFO_blocks		},
270	{	"ctime",	INFO_ctime		},
271	{	"fstype",	INFO_fstype		},
272	{	"gid",		INFO_gid		},
273	{	"mode",		INFO_mode		},
274	{	"mtime",	INFO_mtime		},
275	{	"name",		INFO_name		},
276	{	"nlink",	INFO_nlink		},
277	{	"size",		INFO_size		},
278	{	"uid",		INFO_uid		},
279};
280
281/*
282 * return pointer to data at offset off and size siz
283 */
284
285static char*
286getdata(register Magic_t* mp, register long off, register int siz)
287{
288	register long	n;
289
290	if (off < 0)
291		return 0;
292	if (off + siz <= mp->fbsz)
293		return mp->fbuf + off;
294	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295	{
296		if (off + siz > mp->fbmx)
297			return 0;
298		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299		if (sfseek(mp->fp, n, SEEK_SET) != n)
300			return 0;
301		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302		{
303			mp->xoff = 0;
304			mp->xbsz = 0;
305			return 0;
306		}
307		mp->xbuf[mp->xbsz] = 0;
308		mp->xoff = n;
309		if (off + siz > mp->xoff + mp->xbsz)
310			return 0;
311	}
312	return mp->xbuf + off - mp->xoff;
313}
314
315/*
316 * @... evaluator for strexpr()
317 */
318
319static long
320indirect(const char* cs, char** e, void* handle)
321{
322	register char*		s = (char*)cs;
323	register Magic_t*	mp = (Magic_t*)handle;
324	register long		n = 0;
325	register char*		p;
326
327	if (s)
328	{
329		if (*s == '@')
330		{
331			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332			switch (*(s = *e))
333			{
334			case 'b':
335			case 'B':
336				s++;
337				if (p = getdata(mp, n, 1))
338					n = *(unsigned char*)p;
339				else
340					s = (char*)cs;
341				break;
342			case 'h':
343			case 'H':
344				s++;
345				if (p = getdata(mp, n, 2))
346					n = swapget(mp->swap, p, 2);
347				else
348					s = (char*)cs;
349				break;
350			case 'q':
351			case 'Q':
352				s++;
353				if (p = getdata(mp, n, 8))
354					n = swapget(mp->swap, p, 8);
355				else
356					s = (char*)cs;
357				break;
358			default:
359				if (isalnum(*s))
360					s++;
361				if (p = getdata(mp, n, 4))
362					n = swapget(mp->swap, p, 4);
363				else
364					s = (char*)cs;
365				break;
366			}
367		}
368		*e = s;
369	}
370	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372	return n;
373}
374
375/*
376 * emit regex error message
377 */
378
379static void
380regmessage(Magic_t* mp, regex_t* re, int code)
381{
382	char	buf[128];
383
384	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385	{
386		regerror(code, re, buf, sizeof(buf));
387		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388	}
389}
390
391/*
392 * decompose vcodex(3) method composition
393 */
394
395static char*
396vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397{
398	unsigned char*	map;
399	const char*	o;
400	int		c;
401	int		n;
402	int		i;
403	int		a;
404
405	map = CCMAP(CC_ASCII, CC_NATIVE);
406	a = 0;
407	i = 1;
408	for (;;)
409	{
410		if (i)
411			i = 0;
412		else
413			*b++ = '^';
414		if (m < (x - 1) && !*(m + 1))
415		{
416			/*
417			 * obsolete indices
418			 */
419
420			if (!a)
421			{
422				a = 1;
423				o = "old, ";
424				while (b < e && (c = *o++))
425					*b++ = c;
426			}
427			switch (*m)
428			{
429			case 0:		o = "delta"; break;
430			case 1:		o = "huffman"; break;
431			case 2:		o = "huffgroup"; break;
432			case 3:		o = "arith"; break;
433			case 4:		o = "bwt"; break;
434			case 5:		o = "rle"; break;
435			case 6:		o = "mtf"; break;
436			case 7:		o = "transpose"; break;
437			case 8:		o = "table"; break;
438			case 9:		o = "huffpart"; break;
439			case 50:	o = "map"; break;
440			case 100:	o = "recfm"; break;
441			case 101:	o = "ss7"; break;
442			default:	o = "UNKNOWN"; break;
443			}
444			m += 2;
445			while (b < e && (c = *o++))
446				*b++ = c;
447		}
448		else
449			while (b < e && m < x && (c = *m++))
450			{
451				if (map)
452					c = map[c];
453				*b++ = c;
454			}
455		if (b >= e)
456			break;
457		n = 0;
458		while (m < x)
459		{
460			n = (n<<7) | (*m & 0x7f);
461			if (!(*m++ & 0x80))
462				break;
463		}
464		if (n >= (x - m))
465			break;
466		m += n;
467	}
468	return b;
469}
470
471/*
472 * check for magic table match in buf
473 */
474
475static char*
476ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
477{
478	register Entry_t*	ep;
479	register char*		p;
480	register char*		b;
481	register int		level = 0;
482	int			call = -1;
483	int			c;
484	char*			q;
485	char*			t;
486	char*			base = 0;
487	unsigned long		num;
488	unsigned long		mask;
489	regmatch_t		matches[10];
490
491	mp->swap = 0;
492	b = mp->msg[0] = buf;
493	mp->mime = mp->cap[0] = 0;
494	mp->keep[0] = 0;
495	for (ep = mp->magic; ep; ep = ep->next)
496	{
497	fun:
498		if (ep->nest == '{')
499		{
500			if (++level >= MAXNEST)
501			{
502				call = -1;
503				level = 0;
504				mp->keep[0] = 0;
505				b = mp->msg[0];
506				mp->mime = mp->cap[0];
507				continue;
508			}
509			mp->keep[level] = mp->keep[level - 1] != 0;
510			mp->msg[level] = b;
511			mp->cap[level] = mp->mime;
512		}
513		switch (ep->cont)
514		{
515		case '#':
516			if (mp->keep[level] && b > buf)
517			{
518				*b = 0;
519				return buf;
520			}
521			mp->swap = 0;
522			b = mp->msg[0] = buf;
523			mp->mime = mp->cap[0] = 0;
524			if (ep->type == ' ')
525				continue;
526			break;
527		case '$':
528			if (mp->keep[level] && call < (MAXNEST - 1))
529			{
530				mp->ret[++call] = ep;
531				ep = ep->value.lab;
532				goto fun;
533			}
534			continue;
535		case ':':
536			ep = mp->ret[call--];
537			if (ep->op == 'l')
538				goto fun;
539			continue;
540		case '|':
541			if (mp->keep[level] > 1)
542				goto checknest;
543			/*FALLTHROUGH*/
544		default:
545			if (!mp->keep[level])
546			{
547				b = mp->msg[level];
548				mp->mime = mp->cap[level];
549				goto checknest;
550			}
551			break;
552		}
553		p = "";
554		num = 0;
555		if (!ep->expr)
556			num = ep->offset + off;
557		else
558			switch (ep->offset)
559			{
560			case 0:
561				num = strexpr(ep->expr, NiL, indirect, mp) + off;
562				break;
563			case INFO_atime:
564				num = st->st_atime;
565				ep->type = 'D';
566				break;
567			case INFO_blocks:
568				num = iblocks(st);
569				ep->type = 'N';
570				break;
571			case INFO_ctime:
572				num = st->st_ctime;
573				ep->type = 'D';
574				break;
575			case INFO_fstype:
576				p = fmtfs(st);
577				ep->type = toupper(ep->type);
578				break;
579			case INFO_gid:
580				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
581				{
582					p = fmtgid(st->st_gid);
583					ep->type = toupper(ep->type);
584				}
585				else
586				{
587					num = st->st_gid;
588					ep->type = 'N';
589				}
590				break;
591			case INFO_mode:
592				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
593				{
594					p = fmtmode(st->st_mode, 0);
595					ep->type = toupper(ep->type);
596				}
597				else
598				{
599					num = modex(st->st_mode);
600					ep->type = 'N';
601				}
602				break;
603			case INFO_mtime:
604				num = st->st_ctime;
605				ep->type = 'D';
606				break;
607			case INFO_name:
608				if (!base)
609				{
610					if (base = strrchr(file, '/'))
611						base++;
612					else
613						base = (char*)file;
614				}
615				p = base;
616				ep->type = toupper(ep->type);
617				break;
618			case INFO_nlink:
619				num = st->st_nlink;
620				ep->type = 'N';
621				break;
622			case INFO_size:
623				num = st->st_size;
624				ep->type = 'N';
625				break;
626			case INFO_uid:
627				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
628				{
629					p = fmtuid(st->st_uid);
630					ep->type = toupper(ep->type);
631				}
632				else
633				{
634					num = st->st_uid;
635					ep->type = 'N';
636				}
637				break;
638			}
639		switch (ep->type)
640		{
641
642		case 'b':
643			if (!(p = getdata(mp, num, 1)))
644				goto next;
645			num = *(unsigned char*)p;
646			break;
647
648		case 'h':
649			if (!(p = getdata(mp, num, 2)))
650				goto next;
651			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
652			break;
653
654		case 'd':
655		case 'l':
656		case 'v':
657			if (!(p = getdata(mp, num, 4)))
658				goto next;
659			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
660			break;
661
662		case 'q':
663			if (!(p = getdata(mp, num, 8)))
664				goto next;
665			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
666			break;
667
668		case 'e':
669			if (!(p = getdata(mp, num, 0)))
670				goto next;
671			/*FALLTHROUGH*/
672		case 'E':
673			if (!ep->value.sub)
674				goto next;
675			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
676			{
677				c = mp->fbsz;
678				if (c >= sizeof(mp->nbuf))
679					c = sizeof(mp->nbuf) - 1;
680				p = (char*)memcpy(mp->nbuf, p, c);
681				p[c] = 0;
682				ccmapstr(mp->x2n, p, c);
683				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
684				{
685					if (c != REG_NOMATCH)
686						regmessage(mp, ep->value.sub, c);
687					goto next;
688				}
689			}
690			p = ep->value.sub->re_sub->re_buf;
691			q = T(ep->desc);
692			t = *q ? q : p;
693			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
694				*b++ = ' ';
695			b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
696			if (ep->mime)
697				mp->mime = ep->mime;
698			goto checknest;
699
700		case 's':
701			if (!(p = getdata(mp, num, ep->mask)))
702				goto next;
703			goto checkstr;
704		case 'm':
705			if (!(p = getdata(mp, num, 0)))
706				goto next;
707			/*FALLTHROUGH*/
708		case 'M':
709		case 'S':
710		checkstr:
711			for (;;)
712			{
713				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
714					break;
715				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
716					break;
717				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
718					goto next;
719				p = (char*)memcpy(mp->nbuf, p, ep->mask);
720				p[ep->mask] = 0;
721				ccmapstr(mp->x2n, p, ep->mask);
722			}
723			q = T(ep->desc);
724			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
725				*b++ = ' ';
726			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
727			*t = 0;
728			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
729			*t = c;
730			if (ep->mime)
731				mp->mime = ep->mime;
732			goto checknest;
733
734		}
735		if (mask = ep->mask)
736			num &= mask;
737		switch (ep->op)
738		{
739
740		case '=':
741		case '@':
742			if (num == ep->value.num)
743				break;
744			if (ep->cont != '#')
745				goto next;
746			if (!mask)
747				mask = ~mask;
748			if (ep->type == 'h')
749			{
750				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
751				{
752					if (!(mp->swap & (mp->swap + 1)))
753						mp->swap = 7;
754					goto swapped;
755				}
756			}
757			else if (ep->type == 'l')
758			{
759				for (c = 1; c < 4; c++)
760					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
761					{
762						if (!(mp->swap & (mp->swap + 1)))
763							mp->swap = 7;
764						goto swapped;
765					}
766			}
767			else if (ep->type == 'q')
768			{
769				for (c = 1; c < 8; c++)
770					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
771						goto swapped;
772			}
773			goto next;
774
775		case '!':
776			if (num != ep->value.num)
777				break;
778			goto next;
779
780		case '^':
781			if (num ^ ep->value.num)
782				break;
783			goto next;
784
785		case '>':
786			if (num > ep->value.num)
787				break;
788			goto next;
789
790		case '<':
791			if (num < ep->value.num)
792				break;
793			goto next;
794
795		case 'l':
796			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
797			{
798				if (!ep->value.loop->count)
799				{
800					ep->value.loop->count = num;
801					ep->value.loop->offset = off;
802					off = ep->value.loop->start;
803				}
804				else if (!--ep->value.loop->count)
805				{
806					off = ep->value.loop->offset;
807					goto next;
808				}
809				else
810					off += ep->value.loop->size;
811				mp->ret[++call] = ep;
812				ep = ep->value.loop->lab;
813				goto fun;
814			}
815			goto next;
816
817		case 'm':
818			c = mp->swap;
819			t = ckmagic(mp, file, b + (b > buf), st, num);
820			mp->swap = c;
821			if (!t)
822				goto next;
823			if (b > buf)
824				*b = ' ';
825			b += strlen(b);
826			break;
827
828		case 'r':
829#if _UWIN
830		{
831			char*			e;
832			Sfio_t*			rp;
833			Sfio_t*			gp;
834
835			if (!(t = strrchr(file, '.')))
836				goto next;
837			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
838			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
839				goto next;
840			*ep->desc = 0;
841			*ep->mime = 0;
842			gp = 0;
843			while (t = sfgetr(rp, '\n', 1))
844			{
845				if (strneq(t, "Content Type=", 13))
846				{
847					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
848					strcpy(ep->mime, t + 13);
849					if (gp)
850						break;
851				}
852				else
853				{
854					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
855					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
856					{
857						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
858						strcpy(ep->desc, t);
859						if (*ep->mime)
860							break;
861					}
862				}
863			}
864			sfclose(rp);
865			if (!gp)
866				goto next;
867			if (!*ep->mime)
868			{
869				t = T(ep->desc);
870				if (!strncasecmp(t, "microsoft", 9))
871					t += 9;
872				while (isspace(*t))
873					t++;
874				e = "application/x-ms-";
875				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
876				e = strcopy(ep->mime, e);
877				while ((c = *t++) && c != '.' && c != ' ')
878					*e++ = isupper(c) ? tolower(c) : c;
879				*e = 0;
880			}
881			while (t = sfgetr(gp, '\n', 1))
882				if (*t && !streq(t, "\"\""))
883				{
884					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
885					strcpy(ep->desc, t);
886					break;
887				}
888			sfclose(gp);
889			if (!*ep->desc)
890				goto next;
891			if (!t)
892				for (t = T(ep->desc); *t; t++)
893					if (*t == '.')
894						*t = ' ';
895			if (!mp->keep[level])
896				mp->keep[level] = 2;
897			mp->mime = ep->mime;
898			break;
899		}
900#else
901			if (ep->cont == '#' && !mp->keep[level])
902				mp->keep[level] = 1;
903			goto next;
904#endif
905
906		case 'v':
907			if (!(p = getdata(mp, num, 4)))
908				goto next;
909			c = 0;
910			do
911			{
912				num++;
913				c = (c<<7) | (*p & 0x7f);
914			} while (*p++ & 0x80);
915			if (!(p = getdata(mp, num, c)))
916				goto next;
917			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
918			{
919				*b++ = ',';
920				*b++ = ' ';
921			}
922			b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
923			goto checknest;
924
925		}
926	swapped:
927		q = T(ep->desc);
928		if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
929			*b++ = ' ';
930		if (ep->type == 'd' || ep->type == 'D')
931			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
932		else if (ep->type == 'v')
933			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
934		else
935			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
936		if (ep->mime && *ep->mime)
937			mp->mime = ep->mime;
938	checknest:
939		if (ep->nest == '}')
940		{
941			if (!mp->keep[level])
942			{
943				b = mp->msg[level];
944				mp->mime = mp->cap[level];
945			}
946			else if (level > 0)
947				mp->keep[level - 1] = mp->keep[level];
948			if (--level < 0)
949			{
950				level = 0;
951				mp->keep[0] = 0;
952			}
953		}
954		continue;
955	next:
956		if (ep->cont == '&')
957			mp->keep[level] = 0;
958		goto checknest;
959	}
960	if (mp->keep[level] && b > buf)
961	{
962		*b = 0;
963		return buf;
964	}
965	return 0;
966}
967
968/*
969 * check english language stats
970 */
971
972static int
973ckenglish(register Magic_t* mp, int pun, int badpun)
974{
975	register char*	s;
976	register int	vowl = 0;
977	register int	freq = 0;
978	register int	rare = 0;
979
980	if (5 * badpun > pun)
981		return 0;
982	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
983		return 0;
984	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
985		return 0;
986	for (s = "aeiou"; *s; s++)
987		vowl += mp->count[toupper(*s)] + mp->count[*s];
988	for (s = "etaion"; *s; s++)
989		freq += mp->count[toupper(*s)] + mp->count[*s];
990	for (s = "vjkqxz"; *s; s++)
991		rare += mp->count[toupper(*s)] + mp->count[*s];
992	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
993}
994
995/*
996 * check programming language stats
997 */
998
999static char*
1000cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
1001{
1002	register int		c;
1003	register unsigned char*	b;
1004	register unsigned char*	e;
1005	register int		q;
1006	register char*		s;
1007	char*			t;
1008	char*			base;
1009	char*			suff;
1010	char*			t1;
1011	char*			t2;
1012	char*			t3;
1013	int			n;
1014	int			badpun;
1015	int			code;
1016	int			pun;
1017	Cctype_t		flags;
1018	Info_t*			ip;
1019
1020	b = (unsigned char*)mp->fbuf;
1021	e = b + mp->fbsz;
1022	memzero(mp->count, sizeof(mp->count));
1023	memzero(mp->multi, sizeof(mp->multi));
1024	memzero(mp->identifier, sizeof(mp->identifier));
1025
1026	/*
1027	 * check character coding
1028	 */
1029
1030	flags = 0;
1031	while (b < e)
1032		flags |= mp->cctype[*b++];
1033	b = (unsigned char*)mp->fbuf;
1034	code = 0;
1035	q = CC_ASCII;
1036	n = CC_MASK;
1037	for (c = 0; c < CC_MAPS; c++)
1038	{
1039		flags ^= CC_text;
1040		if ((flags & CC_MASK) < n)
1041		{
1042			n = flags & CC_MASK;
1043			q = c;
1044		}
1045		flags >>= CC_BIT;
1046	}
1047	flags = n;
1048	if (!(flags & (CC_binary|CC_notext)))
1049	{
1050		if (q != CC_NATIVE)
1051		{
1052			code = q;
1053			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1054		}
1055		if (b[0] == '#' && b[1] == '!')
1056		{
1057			for (b += 2; b < e && isspace(*b); b++);
1058			for (s = (char*)b; b < e && isprint(*b); b++);
1059			c = *b;
1060			*b = 0;
1061			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1062			{
1063				if (t = strrchr(s, '/'))
1064					s = t + 1;
1065				for (t = s; *t; t++)
1066					if (isspace(*t))
1067					{
1068						*t = 0;
1069						break;
1070					}
1071				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1072				mp->mime = mp->mbuf;
1073				if (match(s, "*sh"))
1074				{
1075					t1 = T("command");
1076					if (streq(s, "sh"))
1077						*s = 0;
1078					else
1079					{
1080						*b++ = ' ';
1081						*b = 0;
1082					}
1083				}
1084				else
1085				{
1086					t1 = T("interpreter");
1087					*b++ = ' ';
1088					*b = 0;
1089				}
1090				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1091				s = mp->sbuf;
1092				goto qualify;
1093			}
1094			*b = c;
1095			b = (unsigned char*)mp->fbuf;
1096		}
1097		badpun = 0;
1098		pun = 0;
1099		q = 0;
1100		s = 0;
1101		t = 0;
1102		while (b < e)
1103		{
1104			c = *b++;
1105			mp->count[c]++;
1106			if (c == q && (q != '*' || *b == '/' && b++))
1107			{
1108				mp->multi[q]++;
1109				q = 0;
1110			}
1111			else if (c == '\\')
1112			{
1113				s = 0;
1114				b++;
1115			}
1116			else if (!q)
1117			{
1118				if (isalpha(c) || c == '_')
1119				{
1120					if (!s)
1121						s = (char*)b - 1;
1122				}
1123				else if (!isdigit(c))
1124				{
1125					if (s)
1126					{
1127						if (s > mp->fbuf)
1128							switch (*(s - 1))
1129							{
1130							case ':':
1131								if (*b == ':')
1132									mp->multi[':']++;
1133								break;
1134							case '.':
1135								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1136									mp->multi['.']++;
1137								break;
1138							case '\n':
1139							case '\\':
1140								if (*b == '{')
1141									t = (char*)b + 1;
1142								break;
1143							case '{':
1144								if (s == t && *b == '}')
1145									mp->multi['X']++;
1146								break;
1147							}
1148							if (!mp->idtab)
1149							{
1150								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
1151									for (q = 0; q < elementsof(dict); q++)
1152										dtinsert(mp->idtab, &dict[q]);
1153								else if (mp->disc->errorf)
1154									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1155								q = 0;
1156							}
1157							if (mp->idtab)
1158							{
1159								*(b - 1) = 0;
1160								if (ip = (Info_t*)dtmatch(mp->idtab, s))
1161									mp->identifier[ip->value]++;
1162								*(b - 1) = c;
1163							}
1164							s = 0;
1165						}
1166					switch (c)
1167					{
1168					case '\t':
1169						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1170							mp->multi['\t']++;
1171						break;
1172					case '"':
1173					case '\'':
1174						q = c;
1175						break;
1176					case '/':
1177						if (*b == '*')
1178							q = *b++;
1179						else if (*b == '/')
1180							q = '\n';
1181						break;
1182					case '$':
1183						if (*b == '(' && *(b + 1) != ' ')
1184							mp->multi['$']++;
1185						break;
1186					case '{':
1187					case '}':
1188					case '[':
1189					case ']':
1190					case '(':
1191						mp->multi[c]++;
1192						break;
1193					case ')':
1194						mp->multi[c]++;
1195						goto punctuation;
1196					case ':':
1197						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1198							mp->multi[':']++;
1199						goto punctuation;
1200					case '.':
1201					case ',':
1202					case '%':
1203					case ';':
1204					case '?':
1205					punctuation:
1206						pun++;
1207						if (*b != ' ' && *b != '\n')
1208							badpun++;
1209						break;
1210					}
1211				}
1212			}
1213		}
1214	}
1215	else
1216		while (b < e)
1217			mp->count[*b++]++;
1218	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1219	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1220	if (!flags)
1221	{
1222		if (match(suff, "*sh|bat|cmd"))
1223			goto id_sh;
1224		if (match(base, "*@(mkfile)"))
1225			goto id_mk;
1226		if (match(base, "*@(makefile|.mk)"))
1227			goto id_make;
1228		if (match(base, "*@(mamfile|.mam)"))
1229			goto id_mam;
1230		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1231			goto id_c;
1232		if (match(suff, "f"))
1233			goto id_fortran;
1234		if (match(suff, "htm+(l)"))
1235			goto id_html;
1236		if (match(suff, "cpy"))
1237			goto id_copybook;
1238		if (match(suff, "cob|cbl|cb2"))
1239			goto id_cobol;
1240		if (match(suff, "pl[1i]"))
1241			goto id_pl1;
1242		if (match(suff, "tex"))
1243			goto id_tex;
1244		if (match(suff, "asm|s"))
1245			goto id_asm;
1246		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1247		{
1248		id_sh:
1249			s = T("command script");
1250			mp->mime = "application/sh";
1251			goto qualify;
1252		}
1253		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1254		{
1255			s = T("mail message");
1256			mp->mime = "message/rfc822";
1257			goto qualify;
1258		}
1259		if (match(base, "*@(mkfile)"))
1260		{
1261		id_mk:
1262			s = "mkfile";
1263			mp->mime = "application/mk";
1264			goto qualify;
1265		}
1266		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1267		{
1268		id_make:
1269			s = "makefile";
1270			mp->mime = "application/make";
1271			goto qualify;
1272		}
1273		if (mp->multi['.'] >= 3)
1274		{
1275			s = T("nroff input");
1276			mp->mime = "application/x-troff";
1277			goto qualify;
1278		}
1279		if (mp->multi['X'] >= 3)
1280		{
1281			s = T("TeX input");
1282			mp->mime = "application/x-tex";
1283			goto qualify;
1284		}
1285		if (mp->fbsz < SF_BUFSIZE &&
1286		    (mp->multi['('] == mp->multi[')'] &&
1287		     mp->multi['{'] == mp->multi['}'] &&
1288		     mp->multi['['] == mp->multi[']']) ||
1289		    mp->fbsz >= SF_BUFSIZE &&
1290		    (mp->multi['('] >= mp->multi[')'] &&
1291		     mp->multi['{'] >= mp->multi['}'] &&
1292		     mp->multi['['] >= mp->multi[']']))
1293		{
1294			c = mp->identifier[ID_INCL1];
1295			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1296			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1297			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1298			{
1299			id_c:
1300				t1 = "";
1301				t2 = "c ";
1302				t3 = T("program");
1303				switch (*suff)
1304				{
1305				case 'c':
1306				case 'C':
1307					mp->mime = "application/x-cc";
1308					break;
1309				case 'l':
1310				case 'L':
1311					t1 = "lex ";
1312					mp->mime = "application/x-lex";
1313					break;
1314				default:
1315					t3 = T("header");
1316					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1317					{
1318						mp->mime = "application/x-cc";
1319						break;
1320					}
1321					/*FALLTHROUGH*/
1322				case 'y':
1323				case 'Y':
1324					t1 = "yacc ";
1325					mp->mime = "application/x-yacc";
1326					break;
1327				}
1328				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1329				{
1330					t2 = "c++ ";
1331					mp->mime = "application/x-c++";
1332				}
1333				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1334				s = mp->sbuf;
1335				goto qualify;
1336			}
1337		}
1338		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1339		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1340		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1341		{
1342		id_mam:
1343			s = T("mam program");
1344			mp->mime = "application/x-mam";
1345			goto qualify;
1346		}
1347		if (mp->identifier[ID_FORTRAN] >= 8)
1348		{
1349		id_fortran:
1350			s = T("fortran program");
1351			mp->mime = "application/x-fortran";
1352			goto qualify;
1353		}
1354		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1355		{
1356		id_html:
1357			s = T("html input");
1358			mp->mime = "text/html";
1359			goto qualify;
1360		}
1361		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1362		{
1363		id_copybook:
1364			s = T("cobol copybook");
1365			mp->mime = "application/x-cobol";
1366			goto qualify;
1367		}
1368		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1369		{
1370		id_cobol:
1371			s = T("cobol program");
1372			mp->mime = "application/x-cobol";
1373			goto qualify;
1374		}
1375		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1376		{
1377		id_pl1:
1378			s = T("pl1 program");
1379			mp->mime = "application/x-pl1";
1380			goto qualify;
1381		}
1382		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1383		{
1384		id_tex:
1385			s = T("TeX input");
1386			mp->mime = "text/tex";
1387			goto qualify;
1388		}
1389		if (mp->identifier[ID_ASM] >= 4)
1390		{
1391		id_asm:
1392			s = T("as program");
1393			mp->mime = "application/x-as";
1394			goto qualify;
1395		}
1396		if (ckenglish(mp, pun, badpun))
1397		{
1398			s = T("english text");
1399			mp->mime = "text/plain";
1400			goto qualify;
1401		}
1402	}
1403	else if (streq(base, "core"))
1404	{
1405		mp->mime = "x-system/core";
1406		return T("core dump");
1407	}
1408	if (flags & (CC_binary|CC_notext))
1409	{
1410		b = (unsigned char*)mp->fbuf;
1411		e = b + mp->fbsz;
1412		n = 0;
1413		for (;;)
1414		{
1415			c = *b++;
1416			q = 0;
1417			while (c & 0x80)
1418			{
1419				c <<= 1;
1420				q++;
1421			}
1422			switch (q)
1423			{
1424			case 4:
1425				if (b < e && (*b++ & 0xc0) != 0x80)
1426					break;
1427			case 3:
1428				if (b < e && (*b++ & 0xc0) != 0x80)
1429					break;
1430			case 2:
1431				if (b < e && (*b++ & 0xc0) != 0x80)
1432					break;
1433				n = 1;
1434			case 0:
1435				if (b >= e)
1436				{
1437					if (n)
1438					{
1439						flags &= ~(CC_binary|CC_notext);
1440						flags |= CC_utf_8;
1441					}
1442					break;
1443				}
1444				continue;
1445			}
1446			break;
1447		}
1448	}
1449	if (flags & (CC_binary|CC_notext))
1450	{
1451		unsigned long	d = 0;
1452
1453		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1454		{
1455			/*
1456			 * compression/encryption via standard deviation
1457			 */
1458
1459
1460			for (c = 0; c < UCHAR_MAX; c++)
1461			{
1462				pun = mp->count[c] - q;
1463				d += pun * pun;
1464			}
1465			d /= mp->fbsz;
1466		}
1467		if (d <= 0)
1468			s = T("binary");
1469		else if (d < 4)
1470			s = T("encrypted");
1471		else if (d < 16)
1472			s = T("packed");
1473		else if (d < 64)
1474			s = T("compressed");
1475		else if (d < 256)
1476			s = T("delta");
1477		else
1478			s = T("data");
1479		mp->mime = "application/octet-stream";
1480		return s;
1481	}
1482	mp->mime = "text/plain";
1483	if (flags & CC_utf_8)
1484		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1485	else if (flags & CC_latin)
1486		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1487	else
1488		s = (flags & CC_control) ? T("text with control characters") : T("text");
1489 qualify:
1490	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1491	{
1492		t = "dos ";
1493		mp->mime = "text/dos";
1494	}
1495	else
1496		t = "";
1497	if (code)
1498	{
1499		if (code == CC_ASCII)
1500			sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
1501		else
1502		{
1503			sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
1504			mp->mime = "text/ebcdic";
1505		}
1506		s = buf;
1507	}
1508	else if (*t)
1509	{
1510		sfsprintf(buf, PATH_MAX, "%s%s", t, s);
1511		s = buf;
1512	}
1513	return s;
1514}
1515
1516/*
1517 * return the basic magic string for file,st in buf,size
1518 */
1519
1520static char*
1521type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
1522{
1523	register char*	s;
1524	register char*	t;
1525
1526	mp->mime = 0;
1527	if (!S_ISREG(st->st_mode))
1528	{
1529		if (S_ISDIR(st->st_mode))
1530		{
1531			mp->mime = "x-system/dir";
1532			return T("directory");
1533		}
1534		if (S_ISLNK(st->st_mode))
1535		{
1536			mp->mime = "x-system/lnk";
1537			s = buf;
1538			s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
1539			if (pathgetlink(file, s, size - (s - buf)) < 0)
1540				return T("cannot read symbolic link text");
1541			return buf;
1542		}
1543		if (S_ISBLK(st->st_mode))
1544		{
1545			mp->mime = "x-system/blk";
1546			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1547			return buf;
1548		}
1549		if (S_ISCHR(st->st_mode))
1550		{
1551			mp->mime = "x-system/chr";
1552			sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
1553			return buf;
1554		}
1555		if (S_ISFIFO(st->st_mode))
1556		{
1557			mp->mime = "x-system/fifo";
1558			return "fifo";
1559		}
1560#ifdef S_ISSOCK
1561		if (S_ISSOCK(st->st_mode))
1562		{
1563			mp->mime = "x-system/sock";
1564			return "socket";
1565		}
1566#endif
1567	}
1568	if (!(mp->fbmx = st->st_size))
1569		s = T("empty");
1570	else if (!mp->fp)
1571		s = T("cannot read");
1572	else
1573	{
1574		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1575		if (mp->fbsz < 0)
1576			s = fmterror(errno);
1577		else if (mp->fbsz == 0)
1578			s = T("empty");
1579		else
1580		{
1581			mp->fbuf[mp->fbsz] = 0;
1582			mp->xoff = 0;
1583			mp->xbsz = 0;
1584			if (!(s = ckmagic(mp, file, buf, st, 0)))
1585				s = cklang(mp, file, buf, st);
1586		}
1587	}
1588	if (!mp->mime)
1589		mp->mime = "application/unknown";
1590	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1591	{
1592		register char*	b;
1593		register char*	be;
1594		register char*	m;
1595		register char*	me;
1596
1597		b = mp->mime;
1598		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1599		while (m < me && b < t)
1600			*m++ = *b++;
1601		b = t = s;
1602		for (;;)
1603		{
1604			if (!(be = strchr(t, ' ')))
1605			{
1606				be = b + strlen(b);
1607				break;
1608			}
1609			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1610				break;
1611			b = t;
1612			t = be + 1;
1613		}
1614		while (m < me && b < be)
1615			if ((*m++ = *b++) == ' ')
1616				*(m - 1) = '-';
1617		*m = 0;
1618	}
1619	return s;
1620}
1621
1622/*
1623 * low level for magicload()
1624 */
1625
1626static int
1627load(register Magic_t* mp, char* file, register Sfio_t* fp)
1628{
1629	register Entry_t*	ep;
1630	register char*		p;
1631	register char*		p2;
1632	char*			p3;
1633	char*			next;
1634	int			n;
1635	int			lge;
1636	int			lev;
1637	int			ent;
1638	int			old;
1639	int			cont;
1640	Info_t*			ip;
1641	Entry_t*		ret;
1642	Entry_t*		first;
1643	Entry_t*		last = 0;
1644	Entry_t*		fun['z' - 'a' + 1];
1645
1646	memzero(fun, sizeof(fun));
1647	cont = '$';
1648	ent = 0;
1649	lev = 0;
1650	old = 0;
1651	ret = 0;
1652	error_info.file = file;
1653	error_info.line = 0;
1654	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1655	while (p = sfgetr(fp, '\n', 1))
1656	{
1657		error_info.line++;
1658		for (; isspace(*p); p++);
1659
1660		/*
1661		 * nesting
1662		 */
1663
1664		switch (*p)
1665		{
1666		case 0:
1667		case '#':
1668			cont = '#';
1669			continue;
1670		case '{':
1671			if (++lev < MAXNEST)
1672				ep->nest = *p;
1673			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1674				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1675			continue;
1676		case '}':
1677			if (!last || lev <= 0)
1678			{
1679				if (mp->disc->errorf)
1680					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1681			}
1682			else if (lev-- == ent)
1683			{
1684				ent = 0;
1685				ep->cont = ':';
1686				ep->offset = ret->offset;
1687				ep->nest = ' ';
1688				ep->type = ' ';
1689				ep->op = ' ';
1690				ep->desc = "[RETURN]";
1691				last = ep;
1692				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1693				ret = 0;
1694			}
1695			else
1696				last->nest = *p;
1697			continue;
1698		default:
1699			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1700			{
1701				n = *p++;
1702				if (n >= 'a' && n <= 'z')
1703					n -= 'a';
1704				else
1705				{
1706					if (mp->disc->errorf)
1707						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1708					n = 0;
1709				}
1710				if (ret && mp->disc->errorf)
1711					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1712				if (*p == '{')
1713				{
1714					ent = ++lev;
1715					ret = ep;
1716					ep->desc = "[FUNCTION]";
1717				}
1718				else
1719				{
1720					if (*(p + 1) != ')' && mp->disc->errorf)
1721						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1722					ep->desc = "[CALL]";
1723				}
1724				ep->cont = cont;
1725				ep->offset = n;
1726				ep->nest = ' ';
1727				ep->type = ' ';
1728				ep->op = ' ';
1729				last = ep;
1730				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1731				if (ret)
1732					fun[n] = last->value.lab = ep;
1733				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1734					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1735				continue;
1736			}
1737			if (!ep->nest)
1738				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1739			break;
1740		}
1741
1742		/*
1743		 * continuation
1744		 */
1745
1746		cont = '$';
1747		switch (*p)
1748		{
1749		case '>':
1750			old = 1;
1751			if (*(p + 1) == *p)
1752			{
1753				/*
1754				 * old style nesting push
1755				 */
1756
1757				p++;
1758				old = 2;
1759				if (!lev && last)
1760				{
1761					lev = 1;
1762					last->nest = '{';
1763					if (last->cont == '>')
1764						last->cont = '&';
1765					ep->nest = '1';
1766				}
1767			}
1768			/*FALLTHROUGH*/
1769		case '+':
1770		case '&':
1771		case '|':
1772			ep->cont = *p++;
1773			break;
1774		default:
1775			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1776				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1777			/*FALLTHROUGH*/
1778		case '*':
1779		case '0': case '1': case '2': case '3': case '4':
1780		case '5': case '6': case '7': case '8': case '9':
1781			ep->cont = (lev > 0) ? '&' : '#';
1782			break;
1783		}
1784		switch (old)
1785		{
1786		case 1:
1787			old = 0;
1788			if (lev)
1789			{
1790				/*
1791				 * old style nesting pop
1792				 */
1793
1794				lev = 0;
1795				if (last)
1796					last->nest = '}';
1797				ep->nest = ' ';
1798				if (ep->cont == '&')
1799					ep->cont = '#';
1800			}
1801			break;
1802		case 2:
1803			old = 1;
1804			break;
1805		}
1806		if (isdigit(*p))
1807		{
1808			/*
1809			 * absolute offset
1810			 */
1811
1812			ep->offset = strton(p, &next, NiL, 0);
1813			p2 = next;
1814		}
1815		else
1816		{
1817			for (p2 = p; *p2 && !isspace(*p2); p2++);
1818			if (!*p2)
1819			{
1820				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1821					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1822				continue;
1823			}
1824
1825			/*
1826			 * offset expression
1827			 */
1828
1829			*p2++ = 0;
1830			ep->expr = vmstrdup(mp->vm, p);
1831			if (isalpha(*p))
1832				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1833			else if (*p == '(' && ep->cont == '>')
1834			{
1835				/*
1836				 * convert old style indirection to @
1837				 */
1838
1839				p = ep->expr + 1;
1840				for (;;)
1841				{
1842					switch (*p++)
1843					{
1844					case 0:
1845					case '@':
1846					case '(':
1847						break;
1848					case ')':
1849						break;
1850					default:
1851						continue;
1852					}
1853					break;
1854				}
1855				if (*--p == ')')
1856				{
1857					*p = 0;
1858					*ep->expr = '@';
1859				}
1860			}
1861		}
1862		for (; isspace(*p2); p2++);
1863		for (p = p2; *p2 && !isspace(*p2); p2++);
1864		if (!*p2)
1865		{
1866			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1867				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1868			continue;
1869		}
1870		*p2++ = 0;
1871
1872		/*
1873		 * type
1874		 */
1875
1876		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1877		{
1878			ep->swap = ~(*p == 'l' ? 7 : 0);
1879			p += 2;
1880		}
1881		if (*p == 's')
1882		{
1883			if (*(p + 1) == 'h')
1884				ep->type = 'h';
1885			else
1886				ep->type = 's';
1887		}
1888		else if (*p == 'a')
1889			ep->type = 's';
1890		else
1891			ep->type = *p;
1892		if (p = strchr(p, '&'))
1893		{
1894			/*
1895			 * old style mask
1896			 */
1897
1898			ep->mask = strton(++p, NiL, NiL, 0);
1899		}
1900		for (; isspace(*p2); p2++);
1901		if (ep->mask)
1902			*--p2 = '=';
1903
1904		/*
1905		 * comparison operation
1906		 */
1907
1908		p = p2;
1909		if (p2 = strchr(p, '\t'))
1910			*p2++ = 0;
1911		else
1912		{
1913			int	qe = 0;
1914			int	qn = 0;
1915
1916			/*
1917			 * assume balanced {}[]()\\""'' field
1918			 */
1919
1920			for (p2 = p;;)
1921			{
1922				switch (n = *p2++)
1923				{
1924				case 0:
1925					break;
1926				case '{':
1927					if (!qe)
1928						qe = '}';
1929					if (qe == '}')
1930						qn++;
1931					continue;
1932				case '(':
1933					if (!qe)
1934						qe = ')';
1935					if (qe == ')')
1936						qn++;
1937					continue;
1938				case '[':
1939					if (!qe)
1940						qe = ']';
1941					if (qe == ']')
1942						qn++;
1943					continue;
1944				case '}':
1945				case ')':
1946				case ']':
1947					if (qe == n && qn > 0)
1948						qn--;
1949					continue;
1950				case '"':
1951				case '\'':
1952					if (!qe)
1953						qe = n;
1954					else if (qe == n)
1955						qe = 0;
1956					continue;
1957				case '\\':
1958					if (*p2)
1959						p2++;
1960					continue;
1961				default:
1962					if (!qe && isspace(n))
1963						break;
1964					continue;
1965				}
1966				if (n)
1967					*(p2 - 1) = 0;
1968				else
1969					p2--;
1970				break;
1971			}
1972		}
1973		lge = 0;
1974		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
1975			ep->op = '=';
1976		else
1977		{
1978			if (*p == '&')
1979			{
1980				ep->mask = strton(++p, &next, NiL, 0);
1981				p = next;
1982			}
1983			switch (*p)
1984			{
1985			case '=':
1986			case '>':
1987			case '<':
1988			case '*':
1989				ep->op = *p++;
1990				if (*p == '=')
1991				{
1992					p++;
1993					switch (ep->op)
1994					{
1995					case '>':
1996						lge = -1;
1997						break;
1998					case '<':
1999						lge = 1;
2000						break;
2001					}
2002				}
2003				break;
2004			case '!':
2005			case '@':
2006				ep->op = *p++;
2007				if (*p == '=')
2008					p++;
2009				break;
2010			case 'x':
2011				p++;
2012				ep->op = '*';
2013				break;
2014			default:
2015				ep->op = '=';
2016				if (ep->mask)
2017					ep->value.num = ep->mask;
2018				break;
2019			}
2020		}
2021		if (ep->op != '*' && !ep->value.num)
2022		{
2023			if (ep->type == 'e')
2024			{
2025				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2026				{
2027					ep->value.sub->re_disc = &mp->redisc;
2028					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2029					{
2030						p += ep->value.sub->re_npat;
2031						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2032							p += ep->value.sub->re_npat;
2033					}
2034					if (n)
2035					{
2036						regmessage(mp, ep->value.sub, n);
2037						ep->value.sub = 0;
2038					}
2039					else if (*p && mp->disc->errorf)
2040						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2041				}
2042			}
2043			else if (ep->type == 'm')
2044			{
2045				ep->mask = stresc(p) + 1;
2046				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2047				memcpy(ep->value.str, p, ep->mask);
2048				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2049					ep->value.str[ep->mask - 1] = '*';
2050			}
2051			else if (ep->type == 's')
2052			{
2053				ep->mask = stresc(p);
2054				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2055				memcpy(ep->value.str, p, ep->mask);
2056			}
2057			else if (*p == '\'')
2058			{
2059				stresc(p);
2060				ep->value.num = *(unsigned char*)(p + 1) + lge;
2061			}
2062			else if (strmatch(p, "+([a-z])\\(*\\)"))
2063			{
2064				char*	t;
2065
2066				t = p;
2067				ep->type = 'V';
2068				ep->op = *p;
2069				while (*p && *p++ != '(');
2070				switch (ep->op)
2071				{
2072				case 'l':
2073					n = *p++;
2074					if (n < 'a' || n > 'z')
2075					{
2076						if (mp->disc->errorf)
2077							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2078					}
2079					else if (!fun[n -= 'a'])
2080					{
2081						if (mp->disc->errorf)
2082							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2083					}
2084					else
2085					{
2086						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2087						ep->value.loop->lab = fun[n];
2088						while (*p && *p++ != ',');
2089						ep->value.loop->start = strton(p, &t, NiL, 0);
2090						while (*t && *t++ != ',');
2091						ep->value.loop->size = strton(t, &t, NiL, 0);
2092					}
2093					break;
2094				case 'm':
2095				case 'r':
2096					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2097					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2098					break;
2099				case 'v':
2100					break;
2101				default:
2102					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2103						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2104					break;
2105				}
2106			}
2107			else
2108			{
2109				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2110				if (ep->op == '@')
2111					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2112			}
2113		}
2114
2115		/*
2116		 * file description
2117		 */
2118
2119		if (p2)
2120		{
2121			for (; isspace(*p2); p2++);
2122			if (p = strchr(p2, '\t'))
2123			{
2124				/*
2125				 * check for message catalog index
2126				 */
2127
2128				*p++ = 0;
2129				if (isalpha(*p2))
2130				{
2131					for (p3 = p2; isalnum(*p3); p3++);
2132					if (*p3++ == ':')
2133					{
2134						for (; isdigit(*p3); p3++);
2135						if (!*p3)
2136						{
2137							for (p2 = p; isspace(*p2); p2++);
2138							if (p = strchr(p2, '\t'))
2139								*p++ = 0;
2140						}
2141					}
2142				}
2143			}
2144			stresc(p2);
2145			ep->desc = vmstrdup(mp->vm, p2);
2146			if (p)
2147			{
2148				for (; isspace(*p); p++);
2149				if (*p)
2150					ep->mime = vmstrdup(mp->vm, p);
2151			}
2152		}
2153		else
2154			ep->desc = "";
2155
2156		/*
2157		 * get next entry
2158		 */
2159
2160		last = ep;
2161		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2162	}
2163	if (last)
2164	{
2165		last->next = 0;
2166		if (mp->magiclast)
2167			mp->magiclast->next = first;
2168		else
2169			mp->magic = first;
2170		mp->magiclast = last;
2171	}
2172	vmfree(mp->vm, ep);
2173	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2174	{
2175		if (lev < 0)
2176			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2177		else if (lev > 0)
2178			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2179		if (ret)
2180			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2181	}
2182	error_info.file = 0;
2183	error_info.line = 0;
2184	return 0;
2185}
2186
2187/*
2188 * load a magic file into mp
2189 */
2190
2191int
2192magicload(register Magic_t* mp, const char* file, unsigned long flags)
2193{
2194	register char*		s;
2195	register char*		e;
2196	register char*		t;
2197	int			n;
2198	int			found;
2199	int			list;
2200	Sfio_t*			fp;
2201
2202	mp->flags = mp->disc->flags | flags;
2203	found = 0;
2204	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2205	{
2206		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2207			s = MAGIC_FILE;
2208	}
2209	for (;;)
2210	{
2211		if (!list)
2212			e = 0;
2213		else if (e = strchr(s, ':'))
2214		{
2215			/*
2216			 * ok, so ~ won't work for the last list element
2217			 * we do it for MAGIC_FILES_ENV anyway
2218			 */
2219
2220			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2221			{
2222				sfputr(mp->tmp, t, -1);
2223				s += n - 1;
2224			}
2225			sfwrite(mp->tmp, s, e - s);
2226			if (!(s = sfstruse(mp->tmp)))
2227				goto nospace;
2228		}
2229		if (!*s || streq(s, "-"))
2230			s = MAGIC_FILE;
2231		if (!(fp = sfopen(NiL, s, "r")))
2232		{
2233			if (list)
2234			{
2235				if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
2236				{
2237					strcpy(mp->fbuf, s);
2238					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2239					if (!(s = sfstruse(mp->tmp)))
2240						goto nospace;
2241					if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
2242						goto next;
2243				}
2244				if (!(fp = sfopen(NiL, t, "r")))
2245					goto next;
2246			}
2247			else
2248			{
2249				if (mp->disc->errorf)
2250					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2251				return -1;
2252			}
2253		}
2254		found = 1;
2255		n = load(mp, s, fp);
2256		sfclose(fp);
2257		if (n && !list)
2258			return -1;
2259	next:
2260		if (!e)
2261			break;
2262		s = e + 1;
2263	}
2264	if (!found)
2265	{
2266		if (mp->flags & MAGIC_VERBOSE)
2267		{
2268			if (mp->disc->errorf)
2269				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2270		}
2271		return -1;
2272	}
2273	return 0;
2274 nospace:
2275	if (mp->disc->errorf)
2276		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2277	return -1;
2278}
2279
2280/*
2281 * open a magic session
2282 */
2283
2284Magic_t*
2285magicopen(Magicdisc_t* disc)
2286{
2287	register Magic_t*	mp;
2288	register int		i;
2289	register int		n;
2290	register int		f;
2291	register int		c;
2292	register Vmalloc_t*	vm;
2293	unsigned char*		map[CC_MAPS + 1];
2294
2295	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2296		return 0;
2297	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2298	{
2299		vmclose(vm);
2300		return 0;
2301	}
2302	mp->id = lib;
2303	mp->disc = disc;
2304	mp->vm = vm;
2305	mp->flags = disc->flags;
2306	mp->redisc.re_version = REG_VERSION;
2307	mp->redisc.re_flags = REG_NOFREE;
2308	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2309	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2310	mp->redisc.re_resizehandle = (void*)mp->vm;
2311	mp->dtdisc.key = offsetof(Info_t, name);
2312	mp->dtdisc.link = offsetof(Info_t, link);
2313	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
2314		goto bad;
2315	for (n = 0; n < elementsof(info); n++)
2316		dtinsert(mp->infotab, &info[n]);
2317	for (i = 0; i < CC_MAPS; i++)
2318		map[i] = ccmap(i, CC_ASCII);
2319	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2320	for (n = 0; n <= UCHAR_MAX; n++)
2321	{
2322		f = 0;
2323		i = CC_MAPS;
2324		while (--i >= 0)
2325		{
2326			c = ccmapchr(map[i], n);
2327			f = (f << CC_BIT) | CCTYPE(c);
2328		}
2329		mp->cctype[n] = f;
2330	}
2331	return mp;
2332 bad:
2333	magicclose(mp);
2334	return 0;
2335}
2336
2337/*
2338 * close a magicopen() session
2339 */
2340
2341int
2342magicclose(register Magic_t* mp)
2343{
2344	if (!mp)
2345		return -1;
2346	if (mp->tmp)
2347		sfstrclose(mp->tmp);
2348	if (mp->vm)
2349		vmclose(mp->vm);
2350	return 0;
2351}
2352
2353/*
2354 * return the magic string for file with optional stat info st
2355 */
2356
2357char*
2358magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2359{
2360	off_t	off;
2361	char*	s;
2362
2363	mp->flags = mp->disc->flags;
2364	mp->mime = 0;
2365	if (!st)
2366		s = T("cannot stat");
2367	else
2368	{
2369		if (mp->fp = fp)
2370			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2371		s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
2372		if (mp->fp)
2373			sfseek(mp->fp, off, SEEK_SET);
2374		if (!(mp->flags & MAGIC_MIME))
2375		{
2376			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2377				sfprintf(mp->tmp, "%s ", T("short"));
2378			sfprintf(mp->tmp, "%s", s);
2379			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2380				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2381			if (st->st_mode & S_ISUID)
2382				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2383			if (st->st_mode & S_ISGID)
2384				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2385			if (st->st_mode & S_ISVTX)
2386				sfprintf(mp->tmp, ", sticky");
2387			if (!(s = sfstruse(mp->tmp)))
2388				s = T("out of space");
2389		}
2390	}
2391	if (mp->flags & MAGIC_MIME)
2392		s = mp->mime;
2393	if (!s)
2394		s = T("error");
2395	return s;
2396}
2397
2398/*
2399 * list the magic table in mp on sp
2400 */
2401
2402int
2403magiclist(register Magic_t* mp, register Sfio_t* sp)
2404{
2405	register Entry_t*	ep = mp->magic;
2406	register Entry_t*	rp = 0;
2407
2408	mp->flags = mp->disc->flags;
2409	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2410	while (ep)
2411	{
2412		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2413		if (ep->expr)
2414			sfprintf(sp, "%s", ep->expr);
2415		else
2416			sfprintf(sp, "%ld", ep->offset);
2417		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2418		switch (ep->type)
2419		{
2420		case 'm':
2421		case 's':
2422			sfputr(sp, fmtesc(ep->value.str), -1);
2423			break;
2424		case 'V':
2425			switch (ep->op)
2426			{
2427			case 'l':
2428				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2429				break;
2430			case 'v':
2431				sfprintf(sp, "vcodex()");
2432				break;
2433			default:
2434				sfprintf(sp, "%p", ep->value.str);
2435				break;
2436			}
2437			break;
2438		default:
2439			sfprintf(sp, "%lo", ep->value.num);
2440			break;
2441		}
2442		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2443		if (ep->cont == '$' && !ep->value.lab->mask)
2444		{
2445			rp = ep;
2446			ep = ep->value.lab;
2447		}
2448		else
2449		{
2450			if (ep->cont == ':')
2451			{
2452				ep = rp;
2453				ep->value.lab->mask = 1;
2454			}
2455			ep = ep->next;
2456		}
2457	}
2458	return 0;
2459}
2460