1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                  Common Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*            http://www.opensource.org/licenses/cpl1.0.txt             *
11*         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * library interface to file
28 *
29 * the sum of the hacks {s5,v10,planix} is _____ than the parts
30 */
31
32static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-01-28 $\0\n";
33
34static const char lib[] = "libast:magic";
35
36#include <ast.h>
37#include <ctype.h>
38#include <ccode.h>
39#include <dt.h>
40#include <modex.h>
41#include <error.h>
42#include <regex.h>
43#include <swap.h>
44
45#define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46
47#define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48
49#define MAXNEST		10		/* { ... } nesting limit	*/
50#define MINITEM		4		/* magic buffer rounding	*/
51
52typedef struct				/* identifier dictionary entry	*/
53{
54	const char	name[16];	/* identifier name		*/
55	int		value;		/* identifier value		*/
56	Dtlink_t	link;		/* dictionary link		*/
57} Info_t;
58
59typedef struct Edit			/* edit substitution		*/
60{
61	struct Edit*	next;		/* next in list			*/
62	regex_t*	from;		/* from pattern			*/
63} Edit_t;
64
65struct Entry;
66
67typedef struct				/* loop info			*/
68{
69	struct Entry*	lab;		/* call this function		*/
70	int		start;		/* start here			*/
71	int		size;		/* increment by this amount	*/
72	int		count;		/* dynamic loop count		*/
73	int		offset;		/* dynamic offset		*/
74} Loop_t;
75
76typedef struct Entry			/* magic file entry		*/
77{
78	struct Entry*	next;		/* next in list			*/
79	char*		expr;		/* offset expression		*/
80	union
81	{
82	unsigned long	num;
83	char*		str;
84	struct Entry*	lab;
85	regex_t*	sub;
86	Loop_t*		loop;
87	}		value;		/* comparison value		*/
88	char*		desc;		/* file description		*/
89	char*		mime;		/* file mime type		*/
90	unsigned long	offset;		/* offset in bytes		*/
91	unsigned long	mask;		/* mask before compare		*/
92	char		cont;		/* continuation operation	*/
93	char		type;		/* datum type			*/
94	char		op;		/* comparison operation		*/
95	char		nest;		/* { or } nesting operation	*/
96	char		swap;		/* forced swap order		*/
97} Entry_t;
98
99#define CC_BIT		5
100
101#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102typedef unsigned short Cctype_t;
103#else
104typedef unsigned long Cctype_t;
105#endif
106
107#define CC_text		0x01
108#define CC_control	0x02
109#define CC_latin	0x04
110#define CC_binary	0x08
111#define CC_utf_8	0x10
112
113#define CC_notext	CC_text		/* CC_text is flipped before checking */
114
115#define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116
117#define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118
119#define ID_NONE		0
120#define ID_ASM		1
121#define ID_C		2
122#define ID_COBOL	3
123#define ID_COPYBOOK	4
124#define ID_CPLUSPLUS	5
125#define ID_FORTRAN	6
126#define ID_HTML		7
127#define ID_INCL1	8
128#define ID_INCL2	9
129#define ID_INCL3	10
130#define ID_MAM1		11
131#define ID_MAM2		12
132#define ID_MAM3		13
133#define ID_NOTEXT	14
134#define ID_PL1		15
135#define ID_YACC		16
136
137#define ID_MAX		ID_YACC
138
139#define INFO_atime	1
140#define INFO_blocks	2
141#define INFO_ctime	3
142#define INFO_fstype	4
143#define INFO_gid	5
144#define INFO_mode	6
145#define INFO_mtime	7
146#define INFO_name	8
147#define INFO_nlink	9
148#define INFO_size	10
149#define INFO_uid	11
150
151#define _MAGIC_PRIVATE_ \
152	Magicdisc_t*	disc;			/* discipline		*/ \
153	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154	Entry_t*	magic;			/* parsed magic table	*/ \
155	Entry_t*	magiclast;		/* last entry in magic	*/ \
156	char*		mime;			/* MIME type		*/ \
157	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161	char		mbuf[64];		/* mime string		*/ \
162	char		sbuf[64];		/* type suffix string	*/ \
163	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171	int		fbsz;			/* fbuf size		*/ \
172	int		fbmx;			/* fbuf max size	*/ \
173	int		xbsz;			/* xbuf size		*/ \
174	int		swap;			/* swap() operation	*/ \
175	unsigned long	flags;			/* disc+open flags	*/ \
176	long		xoff;			/* xbuf offset		*/ \
177	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178	Sfio_t*		fp;			/* fbuf fp		*/ \
179	Sfio_t*		tmp;			/* tmp string		*/ \
180	regdisc_t	redisc;			/* regex discipline	*/ \
181	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182	Dt_t*		idtab;			/* identifier dict	*/ \
183	Dt_t*		infotab;		/* info keyword dict	*/
184
185#include <magic.h>
186
187static Info_t		dict[] =		/* keyword dictionary	*/
188{
189	{ 	"COMMON",	ID_FORTRAN	},
190	{ 	"COMPUTE",	ID_COBOL	},
191	{ 	"COMP",		ID_COPYBOOK	},
192	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193	{ 	"DCL",		ID_PL1		},
194	{ 	"DEFINED",	ID_PL1		},
195	{ 	"DIMENSION",	ID_FORTRAN	},
196	{ 	"DIVISION",	ID_COBOL	},
197	{ 	"FILLER",	ID_COPYBOOK	},
198	{ 	"FIXED",	ID_PL1		},
199	{ 	"FUNCTION",	ID_FORTRAN	},
200	{ 	"HTML",		ID_HTML		},
201	{ 	"INTEGER",	ID_FORTRAN	},
202	{ 	"MAIN",		ID_PL1		},
203	{ 	"OPTIONS",	ID_PL1		},
204	{ 	"PERFORM",	ID_COBOL	},
205	{ 	"PIC",		ID_COPYBOOK	},
206	{ 	"REAL",		ID_FORTRAN	},
207	{ 	"REDEFINES",	ID_COPYBOOK	},
208	{ 	"S9",		ID_COPYBOOK	},
209	{ 	"SECTION",	ID_COBOL	},
210	{ 	"SELECT",	ID_COBOL	},
211	{ 	"SUBROUTINE",	ID_FORTRAN	},
212	{ 	"TEXT",		ID_ASM		},
213	{ 	"VALUE",	ID_COPYBOOK	},
214	{ 	"attr",		ID_MAM3		},
215	{ 	"binary",	ID_YACC		},
216	{ 	"block",	ID_FORTRAN	},
217	{ 	"bss",		ID_ASM		},
218	{ 	"byte",		ID_ASM		},
219	{ 	"char",		ID_C		},
220	{ 	"class",	ID_CPLUSPLUS	},
221	{ 	"clr",		ID_NOTEXT	},
222	{ 	"comm",		ID_ASM		},
223	{ 	"common",	ID_FORTRAN	},
224	{ 	"data",		ID_ASM		},
225	{ 	"dimension",	ID_FORTRAN	},
226	{ 	"done",		ID_MAM2		},
227	{ 	"double",	ID_C		},
228	{ 	"even",		ID_ASM		},
229	{ 	"exec",		ID_MAM3		},
230	{ 	"extern",	ID_C		},
231	{ 	"float",	ID_C		},
232	{ 	"function",	ID_FORTRAN	},
233	{ 	"globl",	ID_ASM		},
234	{ 	"h",		ID_INCL3	},
235	{ 	"html",		ID_HTML		},
236	{ 	"include",	ID_INCL1	},
237	{ 	"int",		ID_C		},
238	{ 	"integer",	ID_FORTRAN	},
239	{ 	"jmp",		ID_NOTEXT	},
240	{ 	"left",		ID_YACC		},
241	{ 	"libc",		ID_INCL2	},
242	{ 	"long",		ID_C		},
243	{ 	"make",		ID_MAM1		},
244	{ 	"mov",		ID_NOTEXT	},
245	{ 	"private",	ID_CPLUSPLUS	},
246	{ 	"public",	ID_CPLUSPLUS	},
247	{ 	"real",		ID_FORTRAN	},
248	{ 	"register",	ID_C		},
249	{ 	"right",	ID_YACC		},
250	{ 	"sfio",		ID_INCL2	},
251	{ 	"static",	ID_C		},
252	{ 	"stdio",	ID_INCL2	},
253	{ 	"struct",	ID_C		},
254	{ 	"subroutine",	ID_FORTRAN	},
255	{ 	"sys",		ID_NOTEXT	},
256	{ 	"term",		ID_YACC		},
257	{ 	"text",		ID_ASM		},
258	{ 	"tst",		ID_NOTEXT	},
259	{ 	"type",		ID_YACC		},
260	{ 	"typedef",	ID_C		},
261	{ 	"u",		ID_INCL2	},
262	{ 	"union",	ID_YACC		},
263	{ 	"void",		ID_C		},
264};
265
266static Info_t		info[] =
267{
268	{	"atime",	INFO_atime		},
269	{	"blocks",	INFO_blocks		},
270	{	"ctime",	INFO_ctime		},
271	{	"fstype",	INFO_fstype		},
272	{	"gid",		INFO_gid		},
273	{	"mode",		INFO_mode		},
274	{	"mtime",	INFO_mtime		},
275	{	"name",		INFO_name		},
276	{	"nlink",	INFO_nlink		},
277	{	"size",		INFO_size		},
278	{	"uid",		INFO_uid		},
279};
280
281/*
282 * return pointer to data at offset off and size siz
283 */
284
285static char*
286getdata(register Magic_t* mp, register long off, register int siz)
287{
288	register long	n;
289
290	if (off < 0)
291		return 0;
292	if (off + siz <= mp->fbsz)
293		return mp->fbuf + off;
294	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295	{
296		if (off + siz > mp->fbmx)
297			return 0;
298		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299		if (sfseek(mp->fp, n, SEEK_SET) != n)
300			return 0;
301		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302		{
303			mp->xoff = 0;
304			mp->xbsz = 0;
305			return 0;
306		}
307		mp->xbuf[mp->xbsz] = 0;
308		mp->xoff = n;
309		if (off + siz > mp->xoff + mp->xbsz)
310			return 0;
311	}
312	return mp->xbuf + off - mp->xoff;
313}
314
315/*
316 * @... evaluator for strexpr()
317 */
318
319static long
320indirect(const char* cs, char** e, void* handle)
321{
322	register char*		s = (char*)cs;
323	register Magic_t*	mp = (Magic_t*)handle;
324	register long		n = 0;
325	register char*		p;
326
327	if (s)
328	{
329		if (*s == '@')
330		{
331			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332			switch (*(s = *e))
333			{
334			case 'b':
335			case 'B':
336				s++;
337				if (p = getdata(mp, n, 1))
338					n = *(unsigned char*)p;
339				else
340					s = (char*)cs;
341				break;
342			case 'h':
343			case 'H':
344				s++;
345				if (p = getdata(mp, n, 2))
346					n = swapget(mp->swap, p, 2);
347				else
348					s = (char*)cs;
349				break;
350			case 'q':
351			case 'Q':
352				s++;
353				if (p = getdata(mp, n, 8))
354					n = swapget(mp->swap, p, 8);
355				else
356					s = (char*)cs;
357				break;
358			default:
359				if (isalnum(*s))
360					s++;
361				if (p = getdata(mp, n, 4))
362					n = swapget(mp->swap, p, 4);
363				else
364					s = (char*)cs;
365				break;
366			}
367		}
368		*e = s;
369	}
370	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372	return n;
373}
374
375/*
376 * emit regex error message
377 */
378
379static void
380regmessage(Magic_t* mp, regex_t* re, int code)
381{
382	char	buf[128];
383
384	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385	{
386		regerror(code, re, buf, sizeof(buf));
387		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388	}
389}
390
391/*
392 * decompose vcodex(3) method composition
393 */
394
395static char*
396vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397{
398	unsigned char*	map;
399	const char*	o;
400	int		c;
401	int		n;
402	int		i;
403	int		a;
404
405	map = CCMAP(CC_ASCII, CC_NATIVE);
406	a = 0;
407	i = 1;
408	for (;;)
409	{
410		if (i)
411			i = 0;
412		else
413			*b++ = '^';
414		if (m < (x - 1) && !*(m + 1))
415		{
416			/*
417			 * obsolete indices
418			 */
419
420			if (!a)
421			{
422				a = 1;
423				o = "old, ";
424				while (b < e && (c = *o++))
425					*b++ = c;
426			}
427			switch (*m)
428			{
429			case 0:		o = "delta"; break;
430			case 1:		o = "huffman"; break;
431			case 2:		o = "huffgroup"; break;
432			case 3:		o = "arith"; break;
433			case 4:		o = "bwt"; break;
434			case 5:		o = "rle"; break;
435			case 6:		o = "mtf"; break;
436			case 7:		o = "transpose"; break;
437			case 8:		o = "table"; break;
438			case 9:		o = "huffpart"; break;
439			case 50:	o = "map"; break;
440			case 100:	o = "recfm"; break;
441			case 101:	o = "ss7"; break;
442			default:	o = "UNKNOWN"; break;
443			}
444			m += 2;
445			while (b < e && (c = *o++))
446				*b++ = c;
447		}
448		else
449			while (b < e && m < x && (c = *m++))
450			{
451				if (map)
452					c = map[c];
453				*b++ = c;
454			}
455		if (b >= e)
456			break;
457		n = 0;
458		while (m < x)
459		{
460			n = (n<<7) | (*m & 0x7f);
461			if (!(*m++ & 0x80))
462				break;
463		}
464		if (n >= (x - m))
465			break;
466		m += n;
467	}
468	return b;
469}
470
471/*
472 * check for magic table match in buf
473 */
474
475static char*
476ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
477{
478	register Entry_t*	ep;
479	register char*		p;
480	register char*		b;
481	register int		level = 0;
482	int			call = -1;
483	int			all = 0;
484	int			c;
485	int			str;
486	char*			q;
487	char*			t;
488	char*			cur;
489	char*			base = 0;
490	unsigned long		num;
491	unsigned long		mask;
492	regmatch_t		matches[10];
493
494	mp->swap = 0;
495	b = mp->msg[0] = cur = buf;
496	mp->mime = mp->cap[0] = 0;
497	mp->keep[0] = 0;
498	for (ep = mp->magic; ep; ep = ep->next)
499	{
500	fun:
501		if (ep->nest == '{')
502		{
503			if (++level >= MAXNEST)
504			{
505				call = -1;
506				level = 0;
507				mp->keep[0] = 0;
508				b = mp->msg[0];
509				mp->mime = mp->cap[0];
510				continue;
511			}
512			mp->keep[level] = mp->keep[level - 1] != 0;
513			mp->msg[level] = b;
514			mp->cap[level] = mp->mime;
515		}
516		switch (ep->cont)
517		{
518		case '#':
519			if (mp->keep[level] && b > cur)
520			{
521				if ((mp->flags & MAGIC_ALL) && b < (end - 3))
522				{
523					all = 1;
524					*b++ = '\n';
525					cur = b;
526					continue;
527				}
528				*b = 0;
529				return buf;
530			}
531			mp->swap = 0;
532			b = mp->msg[0] = cur;
533			mp->mime = mp->cap[0] = 0;
534			if (ep->type == ' ')
535				continue;
536			break;
537		case '$':
538			if (mp->keep[level] && call < (MAXNEST - 1))
539			{
540				mp->ret[++call] = ep;
541				ep = ep->value.lab;
542				goto fun;
543			}
544			continue;
545		case ':':
546			ep = mp->ret[call--];
547			if (ep->op == 'l')
548				goto fun;
549			continue;
550		case '|':
551			if (mp->keep[level] > 1)
552				goto checknest;
553			/*FALLTHROUGH*/
554		default:
555			if (!mp->keep[level])
556			{
557				b = mp->msg[level];
558				mp->mime = mp->cap[level];
559				goto checknest;
560			}
561			break;
562		}
563		p = "";
564		num = 0;
565		if (!ep->expr)
566			num = ep->offset + off;
567		else
568			switch (ep->offset)
569			{
570			case 0:
571				num = strexpr(ep->expr, NiL, indirect, mp) + off;
572				break;
573			case INFO_atime:
574				num = st->st_atime;
575				ep->type = 'D';
576				break;
577			case INFO_blocks:
578				num = iblocks(st);
579				ep->type = 'N';
580				break;
581			case INFO_ctime:
582				num = st->st_ctime;
583				ep->type = 'D';
584				break;
585			case INFO_fstype:
586				p = fmtfs(st);
587				ep->type = toupper(ep->type);
588				break;
589			case INFO_gid:
590				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
591				{
592					p = fmtgid(st->st_gid);
593					ep->type = toupper(ep->type);
594				}
595				else
596				{
597					num = st->st_gid;
598					ep->type = 'N';
599				}
600				break;
601			case INFO_mode:
602				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
603				{
604					p = fmtmode(st->st_mode, 0);
605					ep->type = toupper(ep->type);
606				}
607				else
608				{
609					num = modex(st->st_mode);
610					ep->type = 'N';
611				}
612				break;
613			case INFO_mtime:
614				num = st->st_ctime;
615				ep->type = 'D';
616				break;
617			case INFO_name:
618				if (!base)
619				{
620					if (base = strrchr(file, '/'))
621						base++;
622					else
623						base = (char*)file;
624				}
625				p = base;
626				ep->type = toupper(ep->type);
627				break;
628			case INFO_nlink:
629				num = st->st_nlink;
630				ep->type = 'N';
631				break;
632			case INFO_size:
633				num = st->st_size;
634				ep->type = 'N';
635				break;
636			case INFO_uid:
637				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
638				{
639					p = fmtuid(st->st_uid);
640					ep->type = toupper(ep->type);
641				}
642				else
643				{
644					num = st->st_uid;
645					ep->type = 'N';
646				}
647				break;
648			}
649		switch (ep->type)
650		{
651
652		case 'b':
653			if (!(p = getdata(mp, num, 1)))
654				goto next;
655			num = *(unsigned char*)p;
656			break;
657
658		case 'h':
659			if (!(p = getdata(mp, num, 2)))
660				goto next;
661			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
662			break;
663
664		case 'd':
665		case 'l':
666		case 'v':
667			if (!(p = getdata(mp, num, 4)))
668				goto next;
669			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
670			break;
671
672		case 'q':
673			if (!(p = getdata(mp, num, 8)))
674				goto next;
675			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
676			break;
677
678		case 'e':
679			if (!(p = getdata(mp, num, 0)))
680				goto next;
681			/*FALLTHROUGH*/
682		case 'E':
683			if (!ep->value.sub)
684				goto next;
685			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
686			{
687				c = mp->fbsz;
688				if (c >= sizeof(mp->nbuf))
689					c = sizeof(mp->nbuf) - 1;
690				p = (char*)memcpy(mp->nbuf, p, c);
691				p[c] = 0;
692				ccmapstr(mp->x2n, p, c);
693				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
694				{
695					if (c != REG_NOMATCH)
696						regmessage(mp, ep->value.sub, c);
697					goto next;
698				}
699			}
700			p = ep->value.sub->re_sub->re_buf;
701			q = T(ep->desc);
702			t = *q ? q : p;
703			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
704				*b++ = ' ';
705			b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
706			if (ep->mime)
707				mp->mime = ep->mime;
708			goto checknest;
709
710		case 's':
711			if (!(p = getdata(mp, num, ep->mask)))
712				goto next;
713			goto checkstr;
714		case 'm':
715			if (!(p = getdata(mp, num, 0)))
716				goto next;
717			/*FALLTHROUGH*/
718		case 'M':
719		case 'S':
720		checkstr:
721			for (;;)
722			{
723				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
724					break;
725				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
726					break;
727				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
728					goto next;
729				p = (char*)memcpy(mp->nbuf, p, ep->mask);
730				p[ep->mask] = 0;
731				ccmapstr(mp->x2n, p, ep->mask);
732			}
733			q = T(ep->desc);
734			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
735				*b++ = ' ';
736			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
737			*t = 0;
738			b += sfsprintf(b, end - b, q + (*q == '\b'), p);
739			*t = c;
740			if (ep->mime)
741				mp->mime = ep->mime;
742			goto checknest;
743
744		}
745		if (mask = ep->mask)
746			num &= mask;
747		switch (ep->op)
748		{
749
750		case '=':
751		case '@':
752			if (num == ep->value.num)
753				break;
754			if (ep->cont != '#')
755				goto next;
756			if (!mask)
757				mask = ~mask;
758			if (ep->type == 'h')
759			{
760				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
761				{
762					if (!(mp->swap & (mp->swap + 1)))
763						mp->swap = 7;
764					goto swapped;
765				}
766			}
767			else if (ep->type == 'l')
768			{
769				for (c = 1; c < 4; c++)
770					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
771					{
772						if (!(mp->swap & (mp->swap + 1)))
773							mp->swap = 7;
774						goto swapped;
775					}
776			}
777			else if (ep->type == 'q')
778			{
779				for (c = 1; c < 8; c++)
780					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
781						goto swapped;
782			}
783			goto next;
784
785		case '!':
786			if (num != ep->value.num)
787				break;
788			goto next;
789
790		case '^':
791			if (num ^ ep->value.num)
792				break;
793			goto next;
794
795		case '>':
796			if (num > ep->value.num)
797				break;
798			goto next;
799
800		case '<':
801			if (num < ep->value.num)
802				break;
803			goto next;
804
805		case 'l':
806			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
807			{
808				if (!ep->value.loop->count)
809				{
810					ep->value.loop->count = num;
811					ep->value.loop->offset = off;
812					off = ep->value.loop->start;
813				}
814				else if (!--ep->value.loop->count)
815				{
816					off = ep->value.loop->offset;
817					goto next;
818				}
819				else
820					off += ep->value.loop->size;
821				mp->ret[++call] = ep;
822				ep = ep->value.loop->lab;
823				goto fun;
824			}
825			goto next;
826
827		case 'm':
828			c = mp->swap;
829			t = ckmagic(mp, file, b + (b > cur), end, st, num);
830			mp->swap = c;
831			if (t)
832			{
833				if (b > cur && b < end)
834					*b = ' ';
835				b += strlen(b);
836			}
837			else if (ep->cont == '&')
838				goto next;
839			break;
840
841		case 'r':
842#if _UWIN
843		{
844			char*			e;
845			Sfio_t*			rp;
846			Sfio_t*			gp;
847
848			if (!(t = strrchr(file, '.')))
849				goto next;
850			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
851			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
852				goto next;
853			*ep->desc = 0;
854			*ep->mime = 0;
855			gp = 0;
856			while (t = sfgetr(rp, '\n', 1))
857			{
858				if (strneq(t, "Content Type=", 13))
859				{
860					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
861					strcpy(ep->mime, t + 13);
862					if (gp)
863						break;
864				}
865				else
866				{
867					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
868					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
869					{
870						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
871						strcpy(ep->desc, t);
872						if (*ep->mime)
873							break;
874					}
875				}
876			}
877			sfclose(rp);
878			if (!gp)
879				goto next;
880			if (!*ep->mime)
881			{
882				t = T(ep->desc);
883				if (!strncasecmp(t, "microsoft", 9))
884					t += 9;
885				while (isspace(*t))
886					t++;
887				e = "application/x-ms-";
888				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
889				e = strcopy(ep->mime, e);
890				while ((c = *t++) && c != '.' && c != ' ')
891					*e++ = isupper(c) ? tolower(c) : c;
892				*e = 0;
893			}
894			while (t = sfgetr(gp, '\n', 1))
895				if (*t && !streq(t, "\"\""))
896				{
897					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
898					strcpy(ep->desc, t);
899					break;
900				}
901			sfclose(gp);
902			if (!*ep->desc)
903				goto next;
904			if (!t)
905				for (t = T(ep->desc); *t; t++)
906					if (*t == '.')
907						*t = ' ';
908			if (!mp->keep[level])
909				mp->keep[level] = 2;
910			mp->mime = ep->mime;
911			break;
912		}
913#else
914			if (ep->cont == '#' && !mp->keep[level])
915				mp->keep[level] = 1;
916			goto next;
917#endif
918
919		case 'v':
920			if (!(p = getdata(mp, num, 4)))
921				goto next;
922			c = 0;
923			do
924			{
925				num++;
926				c = (c<<7) | (*p & 0x7f);
927			} while (*p++ & 0x80);
928			if (!(p = getdata(mp, num, c)))
929				goto next;
930			if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
931			{
932				*b++ = ',';
933				*b++ = ' ';
934			}
935			b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
936			goto checknest;
937
938		}
939	swapped:
940		q = T(ep->desc);
941		if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
942			*b++ = ' ';
943		if (*q == '\b')
944			q++;
945		str = 0;
946		for (t = q; *t; t++)
947			if (*t == '%' && (c = *(t + 1)))
948			{
949				if (c == '%')
950					t++;
951				else
952					while (c && c != '%')
953					{
954						if (c == 's')
955						{
956							str = 1;
957							break;
958						}
959						t++;
960						c = *(t + 1);
961					}
962			}
963		if (!str)
964			b += sfsprintf(b, end - b, q, num, 0, 0, 0, 0, 0, 0, 0);
965		else if (ep->type == 'd' || ep->type == 'D')
966			b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
967		else if (ep->type == 'v')
968			b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
969		else
970			b += sfsprintf(b, end - b, q, fmtnum(num, 0), 0, 0, 0, 0, 0, 0, 0);
971		if (ep->mime && *ep->mime)
972			mp->mime = ep->mime;
973	checknest:
974		if (ep->nest == '}')
975		{
976			if (!mp->keep[level])
977			{
978				b = mp->msg[level];
979				mp->mime = mp->cap[level];
980			}
981			else if (level > 0)
982				mp->keep[level - 1] = mp->keep[level];
983			if (--level < 0)
984			{
985				level = 0;
986				mp->keep[0] = 0;
987			}
988		}
989		continue;
990	next:
991		if (ep->cont == '&')
992			mp->keep[level] = 0;
993		goto checknest;
994	}
995	if (all && b-- || mp->keep[level] && b > cur)
996	{
997		*b = 0;
998		return buf;
999	}
1000	return 0;
1001}
1002
1003/*
1004 * check english language stats
1005 */
1006
1007static int
1008ckenglish(register Magic_t* mp, int pun, int badpun)
1009{
1010	register char*	s;
1011	register int	vowl = 0;
1012	register int	freq = 0;
1013	register int	rare = 0;
1014
1015	if (5 * badpun > pun)
1016		return 0;
1017	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
1018		return 0;
1019	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
1020		return 0;
1021	for (s = "aeiou"; *s; s++)
1022		vowl += mp->count[toupper(*s)] + mp->count[*s];
1023	for (s = "etaion"; *s; s++)
1024		freq += mp->count[toupper(*s)] + mp->count[*s];
1025	for (s = "vjkqxz"; *s; s++)
1026		rare += mp->count[toupper(*s)] + mp->count[*s];
1027	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
1028}
1029
1030/*
1031 * check programming language stats
1032 */
1033
1034static char*
1035cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
1036{
1037	register int		c;
1038	register unsigned char*	b;
1039	register unsigned char*	e;
1040	register int		q;
1041	register char*		s;
1042	char*			t;
1043	char*			base;
1044	char*			suff;
1045	char*			t1;
1046	char*			t2;
1047	char*			t3;
1048	int			n;
1049	int			badpun;
1050	int			code;
1051	int			pun;
1052	Cctype_t		flags;
1053	Info_t*			ip;
1054
1055	b = (unsigned char*)mp->fbuf;
1056	e = b + mp->fbsz;
1057	memzero(mp->count, sizeof(mp->count));
1058	memzero(mp->multi, sizeof(mp->multi));
1059	memzero(mp->identifier, sizeof(mp->identifier));
1060
1061	/*
1062	 * check character coding
1063	 */
1064
1065	flags = 0;
1066	while (b < e)
1067		flags |= mp->cctype[*b++];
1068	b = (unsigned char*)mp->fbuf;
1069	code = 0;
1070	q = CC_ASCII;
1071	n = CC_MASK;
1072	for (c = 0; c < CC_MAPS; c++)
1073	{
1074		flags ^= CC_text;
1075		if ((flags & CC_MASK) < n)
1076		{
1077			n = flags & CC_MASK;
1078			q = c;
1079		}
1080		flags >>= CC_BIT;
1081	}
1082	flags = n;
1083	if (!(flags & (CC_binary|CC_notext)))
1084	{
1085		if (q != CC_NATIVE)
1086		{
1087			code = q;
1088			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1089		}
1090		if (b[0] == '#' && b[1] == '!')
1091		{
1092			for (b += 2; b < e && isspace(*b); b++);
1093			for (s = (char*)b; b < e && isprint(*b); b++);
1094			c = *b;
1095			*b = 0;
1096			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1097			{
1098				if (t = strrchr(s, '/'))
1099					s = t + 1;
1100				for (t = s; *t; t++)
1101					if (isspace(*t))
1102					{
1103						*t = 0;
1104						break;
1105					}
1106				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1107				mp->mime = mp->mbuf;
1108				if (match(s, "*sh"))
1109				{
1110					t1 = T("command");
1111					if (streq(s, "sh"))
1112						*s = 0;
1113					else
1114					{
1115						*b++ = ' ';
1116						*b = 0;
1117					}
1118				}
1119				else
1120				{
1121					t1 = T("interpreter");
1122					*b++ = ' ';
1123					*b = 0;
1124				}
1125				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1126				s = mp->sbuf;
1127				goto qualify;
1128			}
1129			*b = c;
1130			b = (unsigned char*)mp->fbuf;
1131		}
1132		badpun = 0;
1133		pun = 0;
1134		q = 0;
1135		s = 0;
1136		t = 0;
1137		while (b < e)
1138		{
1139			c = *b++;
1140			mp->count[c]++;
1141			if (c == q && (q != '*' || *b == '/' && b++))
1142			{
1143				mp->multi[q]++;
1144				q = 0;
1145			}
1146			else if (c == '\\')
1147			{
1148				s = 0;
1149				b++;
1150			}
1151			else if (!q)
1152			{
1153				if (isalpha(c) || c == '_')
1154				{
1155					if (!s)
1156						s = (char*)b - 1;
1157				}
1158				else if (!isdigit(c))
1159				{
1160					if (s)
1161					{
1162						if (s > mp->fbuf)
1163							switch (*(s - 1))
1164							{
1165							case ':':
1166								if (*b == ':')
1167									mp->multi[':']++;
1168								break;
1169							case '.':
1170								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1171									mp->multi['.']++;
1172								break;
1173							case '\n':
1174							case '\\':
1175								if (*b == '{')
1176									t = (char*)b + 1;
1177								break;
1178							case '{':
1179								if (s == t && *b == '}')
1180									mp->multi['X']++;
1181								break;
1182							}
1183							if (!mp->idtab)
1184							{
1185								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
1186									for (q = 0; q < elementsof(dict); q++)
1187										dtinsert(mp->idtab, &dict[q]);
1188								else if (mp->disc->errorf)
1189									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1190								q = 0;
1191							}
1192							if (mp->idtab)
1193							{
1194								*(b - 1) = 0;
1195								if (ip = (Info_t*)dtmatch(mp->idtab, s))
1196									mp->identifier[ip->value]++;
1197								*(b - 1) = c;
1198							}
1199							s = 0;
1200						}
1201					switch (c)
1202					{
1203					case '\t':
1204						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1205							mp->multi['\t']++;
1206						break;
1207					case '"':
1208					case '\'':
1209						q = c;
1210						break;
1211					case '/':
1212						if (*b == '*')
1213							q = *b++;
1214						else if (*b == '/')
1215							q = '\n';
1216						break;
1217					case '$':
1218						if (*b == '(' && *(b + 1) != ' ')
1219							mp->multi['$']++;
1220						break;
1221					case '{':
1222					case '}':
1223					case '[':
1224					case ']':
1225					case '(':
1226						mp->multi[c]++;
1227						break;
1228					case ')':
1229						mp->multi[c]++;
1230						goto punctuation;
1231					case ':':
1232						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1233							mp->multi[':']++;
1234						goto punctuation;
1235					case '.':
1236					case ',':
1237					case '%':
1238					case ';':
1239					case '?':
1240					punctuation:
1241						pun++;
1242						if (*b != ' ' && *b != '\n')
1243							badpun++;
1244						break;
1245					}
1246				}
1247			}
1248		}
1249	}
1250	else
1251		while (b < e)
1252			mp->count[*b++]++;
1253	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1254	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1255	if (!flags)
1256	{
1257		if (match(suff, "*sh|bat|cmd"))
1258			goto id_sh;
1259		if (match(base, "*@(mkfile)"))
1260			goto id_mk;
1261		if (match(base, "*@(makefile|.mk)"))
1262			goto id_make;
1263		if (match(base, "*@(mamfile|.mam)"))
1264			goto id_mam;
1265		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1266			goto id_c;
1267		if (match(suff, "f"))
1268			goto id_fortran;
1269		if (match(suff, "htm+(l)"))
1270			goto id_html;
1271		if (match(suff, "cpy"))
1272			goto id_copybook;
1273		if (match(suff, "cob|cbl|cb2"))
1274			goto id_cobol;
1275		if (match(suff, "pl[1i]"))
1276			goto id_pl1;
1277		if (match(suff, "tex"))
1278			goto id_tex;
1279		if (match(suff, "asm|s"))
1280			goto id_asm;
1281		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1282		{
1283		id_sh:
1284			s = T("command script");
1285			mp->mime = "application/sh";
1286			goto qualify;
1287		}
1288		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1289		{
1290			s = T("mail message");
1291			mp->mime = "message/rfc822";
1292			goto qualify;
1293		}
1294		if (match(base, "*@(mkfile)"))
1295		{
1296		id_mk:
1297			s = "mkfile";
1298			mp->mime = "application/mk";
1299			goto qualify;
1300		}
1301		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1302		{
1303		id_make:
1304			s = "makefile";
1305			mp->mime = "application/make";
1306			goto qualify;
1307		}
1308		if (mp->multi['.'] >= 3)
1309		{
1310			s = T("nroff input");
1311			mp->mime = "application/x-troff";
1312			goto qualify;
1313		}
1314		if (mp->multi['X'] >= 3)
1315		{
1316			s = T("TeX input");
1317			mp->mime = "application/x-tex";
1318			goto qualify;
1319		}
1320		if (mp->fbsz < SF_BUFSIZE &&
1321		    (mp->multi['('] == mp->multi[')'] &&
1322		     mp->multi['{'] == mp->multi['}'] &&
1323		     mp->multi['['] == mp->multi[']']) ||
1324		    mp->fbsz >= SF_BUFSIZE &&
1325		    (mp->multi['('] >= mp->multi[')'] &&
1326		     mp->multi['{'] >= mp->multi['}'] &&
1327		     mp->multi['['] >= mp->multi[']']))
1328		{
1329			c = mp->identifier[ID_INCL1];
1330			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1331			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1332			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1333			{
1334			id_c:
1335				t1 = "";
1336				t2 = "c ";
1337				t3 = T("program");
1338				switch (*suff)
1339				{
1340				case 'c':
1341				case 'C':
1342					mp->mime = "application/x-cc";
1343					break;
1344				case 'l':
1345				case 'L':
1346					t1 = "lex ";
1347					mp->mime = "application/x-lex";
1348					break;
1349				default:
1350					t3 = T("header");
1351					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1352					{
1353						mp->mime = "application/x-cc";
1354						break;
1355					}
1356					/*FALLTHROUGH*/
1357				case 'y':
1358				case 'Y':
1359					t1 = "yacc ";
1360					mp->mime = "application/x-yacc";
1361					break;
1362				}
1363				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1364				{
1365					t2 = "c++ ";
1366					mp->mime = "application/x-c++";
1367				}
1368				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1369				s = mp->sbuf;
1370				goto qualify;
1371			}
1372		}
1373		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1374		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1375		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1376		{
1377		id_mam:
1378			s = T("mam program");
1379			mp->mime = "application/x-mam";
1380			goto qualify;
1381		}
1382		if (mp->identifier[ID_FORTRAN] >= 8)
1383		{
1384		id_fortran:
1385			s = T("fortran program");
1386			mp->mime = "application/x-fortran";
1387			goto qualify;
1388		}
1389		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1390		{
1391		id_html:
1392			s = T("html input");
1393			mp->mime = "text/html";
1394			goto qualify;
1395		}
1396		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1397		{
1398		id_copybook:
1399			s = T("cobol copybook");
1400			mp->mime = "application/x-cobol";
1401			goto qualify;
1402		}
1403		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1404		{
1405		id_cobol:
1406			s = T("cobol program");
1407			mp->mime = "application/x-cobol";
1408			goto qualify;
1409		}
1410		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1411		{
1412		id_pl1:
1413			s = T("pl1 program");
1414			mp->mime = "application/x-pl1";
1415			goto qualify;
1416		}
1417		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1418		{
1419		id_tex:
1420			s = T("TeX input");
1421			mp->mime = "text/tex";
1422			goto qualify;
1423		}
1424		if (mp->identifier[ID_ASM] >= 4)
1425		{
1426		id_asm:
1427			s = T("as program");
1428			mp->mime = "application/x-as";
1429			goto qualify;
1430		}
1431		if (ckenglish(mp, pun, badpun))
1432		{
1433			s = T("english text");
1434			mp->mime = "text/plain";
1435			goto qualify;
1436		}
1437	}
1438	else if (streq(base, "core"))
1439	{
1440		mp->mime = "x-system/core";
1441		return T("core dump");
1442	}
1443	if (flags & (CC_binary|CC_notext))
1444	{
1445		b = (unsigned char*)mp->fbuf;
1446		e = b + mp->fbsz;
1447		n = 0;
1448		for (;;)
1449		{
1450			c = *b++;
1451			q = 0;
1452			while (c & 0x80)
1453			{
1454				c <<= 1;
1455				q++;
1456			}
1457			switch (q)
1458			{
1459			case 4:
1460				if (b < e && (*b++ & 0xc0) != 0x80)
1461					break;
1462			case 3:
1463				if (b < e && (*b++ & 0xc0) != 0x80)
1464					break;
1465			case 2:
1466				if (b < e && (*b++ & 0xc0) != 0x80)
1467					break;
1468				n = 1;
1469			case 0:
1470				if (b >= e)
1471				{
1472					if (n)
1473					{
1474						flags &= ~(CC_binary|CC_notext);
1475						flags |= CC_utf_8;
1476					}
1477					break;
1478				}
1479				continue;
1480			}
1481			break;
1482		}
1483	}
1484	if (flags & (CC_binary|CC_notext))
1485	{
1486		unsigned long	d = 0;
1487
1488		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1489		{
1490			/*
1491			 * compression/encryption via standard deviation
1492			 */
1493
1494
1495			for (c = 0; c < UCHAR_MAX; c++)
1496			{
1497				pun = mp->count[c] - q;
1498				d += pun * pun;
1499			}
1500			d /= mp->fbsz;
1501		}
1502		if (d <= 0)
1503			s = T("binary");
1504		else if (d < 4)
1505			s = T("encrypted");
1506		else if (d < 16)
1507			s = T("packed");
1508		else if (d < 64)
1509			s = T("compressed");
1510		else if (d < 256)
1511			s = T("delta");
1512		else
1513			s = T("data");
1514		mp->mime = "application/octet-stream";
1515		return s;
1516	}
1517	mp->mime = "text/plain";
1518	if (flags & CC_utf_8)
1519		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1520	else if (flags & CC_latin)
1521		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1522	else
1523		s = (flags & CC_control) ? T("text with control characters") : T("text");
1524 qualify:
1525	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1526	{
1527		t = "dos ";
1528		mp->mime = "text/dos";
1529	}
1530	else
1531		t = "";
1532	if (code)
1533	{
1534		if (code == CC_ASCII)
1535			sfsprintf(buf, end - buf, "ascii %s%s", t, s);
1536		else
1537		{
1538			sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
1539			mp->mime = "text/ebcdic";
1540		}
1541		s = buf;
1542	}
1543	else if (*t)
1544	{
1545		sfsprintf(buf, end - buf, "%s%s", t, s);
1546		s = buf;
1547	}
1548	return s;
1549}
1550
1551/*
1552 * return the basic magic string for file,st in buf,size
1553 */
1554
1555static char*
1556type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
1557{
1558	register char*	s;
1559	register char*	t;
1560
1561	mp->mime = 0;
1562	if (!S_ISREG(st->st_mode))
1563	{
1564		if (S_ISDIR(st->st_mode))
1565		{
1566			mp->mime = "x-system/dir";
1567			return T("directory");
1568		}
1569		if (S_ISLNK(st->st_mode))
1570		{
1571			mp->mime = "x-system/lnk";
1572			s = buf;
1573			s += sfsprintf(s, end - s, T("symbolic link to "));
1574			if (pathgetlink(file, s, end - s) < 0)
1575				return T("cannot read symbolic link text");
1576			return buf;
1577		}
1578		if (S_ISBLK(st->st_mode))
1579		{
1580			mp->mime = "x-system/blk";
1581			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1582			return buf;
1583		}
1584		if (S_ISCHR(st->st_mode))
1585		{
1586			mp->mime = "x-system/chr";
1587			sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
1588			return buf;
1589		}
1590		if (S_ISFIFO(st->st_mode))
1591		{
1592			mp->mime = "x-system/fifo";
1593			return "fifo";
1594		}
1595#ifdef S_ISSOCK
1596		if (S_ISSOCK(st->st_mode))
1597		{
1598			mp->mime = "x-system/sock";
1599			return "socket";
1600		}
1601#endif
1602	}
1603	if (!(mp->fbmx = st->st_size))
1604		s = T("empty");
1605	else if (!mp->fp)
1606		s = T("cannot read");
1607	else
1608	{
1609		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1610		if (mp->fbsz < 0)
1611			s = fmterror(errno);
1612		else if (mp->fbsz == 0)
1613			s = T("empty");
1614		else
1615		{
1616			mp->fbuf[mp->fbsz] = 0;
1617			mp->xoff = 0;
1618			mp->xbsz = 0;
1619			if (!(s = ckmagic(mp, file, buf, end, st, 0)))
1620				s = cklang(mp, file, buf, end, st);
1621		}
1622	}
1623	if (!mp->mime)
1624		mp->mime = "application/unknown";
1625	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1626	{
1627		register char*	b;
1628		register char*	be;
1629		register char*	m;
1630		register char*	me;
1631
1632		b = mp->mime;
1633		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1634		while (m < me && b < t)
1635			*m++ = *b++;
1636		b = t = s;
1637		for (;;)
1638		{
1639			if (!(be = strchr(t, ' ')))
1640			{
1641				be = b + strlen(b);
1642				break;
1643			}
1644			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1645				break;
1646			b = t;
1647			t = be + 1;
1648		}
1649		while (m < me && b < be)
1650			if ((*m++ = *b++) == ' ')
1651				*(m - 1) = '-';
1652		*m = 0;
1653	}
1654	return s;
1655}
1656
1657/*
1658 * low level for magicload()
1659 */
1660
1661static int
1662load(register Magic_t* mp, char* file, register Sfio_t* fp)
1663{
1664	register Entry_t*	ep;
1665	register char*		p;
1666	register char*		p2;
1667	char*			p3;
1668	char*			next;
1669	int			n;
1670	int			lge;
1671	int			lev;
1672	int			ent;
1673	int			old;
1674	int			cont;
1675	Info_t*			ip;
1676	Entry_t*		ret;
1677	Entry_t*		first;
1678	Entry_t*		last = 0;
1679	Entry_t*		fun['z' - 'a' + 1];
1680
1681	memzero(fun, sizeof(fun));
1682	cont = '$';
1683	ent = 0;
1684	lev = 0;
1685	old = 0;
1686	ret = 0;
1687	error_info.file = file;
1688	error_info.line = 0;
1689	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1690	while (p = sfgetr(fp, '\n', 1))
1691	{
1692		error_info.line++;
1693		for (; isspace(*p); p++);
1694
1695		/*
1696		 * nesting
1697		 */
1698
1699		switch (*p)
1700		{
1701		case 0:
1702		case '#':
1703			cont = '#';
1704			continue;
1705		case '{':
1706			if (++lev < MAXNEST)
1707				ep->nest = *p;
1708			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1709				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1710			continue;
1711		case '}':
1712			if (!last || lev <= 0)
1713			{
1714				if (mp->disc->errorf)
1715					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1716			}
1717			else if (lev-- == ent)
1718			{
1719				ent = 0;
1720				ep->cont = ':';
1721				ep->offset = ret->offset;
1722				ep->nest = ' ';
1723				ep->type = ' ';
1724				ep->op = ' ';
1725				ep->desc = "[RETURN]";
1726				last = ep;
1727				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1728				ret = 0;
1729			}
1730			else
1731				last->nest = *p;
1732			continue;
1733		default:
1734			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1735			{
1736				n = *p++;
1737				if (n >= 'a' && n <= 'z')
1738					n -= 'a';
1739				else
1740				{
1741					if (mp->disc->errorf)
1742						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1743					n = 0;
1744				}
1745				if (ret && mp->disc->errorf)
1746					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1747				if (*p == '{')
1748				{
1749					ent = ++lev;
1750					ret = ep;
1751					ep->desc = "[FUNCTION]";
1752				}
1753				else
1754				{
1755					if (*(p + 1) != ')' && mp->disc->errorf)
1756						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1757					ep->desc = "[CALL]";
1758				}
1759				ep->cont = cont;
1760				ep->offset = n;
1761				ep->nest = ' ';
1762				ep->type = ' ';
1763				ep->op = ' ';
1764				last = ep;
1765				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1766				if (ret)
1767					fun[n] = last->value.lab = ep;
1768				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1769					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1770				continue;
1771			}
1772			if (!ep->nest)
1773				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1774			break;
1775		}
1776
1777		/*
1778		 * continuation
1779		 */
1780
1781		cont = '$';
1782		switch (*p)
1783		{
1784		case '>':
1785			old = 1;
1786			if (*(p + 1) == *p)
1787			{
1788				/*
1789				 * old style nesting push
1790				 */
1791
1792				p++;
1793				old = 2;
1794				if (!lev && last)
1795				{
1796					lev = 1;
1797					last->nest = '{';
1798					if (last->cont == '>')
1799						last->cont = '&';
1800					ep->nest = '1';
1801				}
1802			}
1803			/*FALLTHROUGH*/
1804		case '+':
1805		case '&':
1806		case '|':
1807			ep->cont = *p++;
1808			break;
1809		default:
1810			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1811				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1812			/*FALLTHROUGH*/
1813		case '*':
1814		case '0': case '1': case '2': case '3': case '4':
1815		case '5': case '6': case '7': case '8': case '9':
1816			ep->cont = (lev > 0) ? '&' : '#';
1817			break;
1818		}
1819		switch (old)
1820		{
1821		case 1:
1822			old = 0;
1823			if (lev)
1824			{
1825				/*
1826				 * old style nesting pop
1827				 */
1828
1829				lev = 0;
1830				if (last)
1831					last->nest = '}';
1832				ep->nest = ' ';
1833				if (ep->cont == '&')
1834					ep->cont = '#';
1835			}
1836			break;
1837		case 2:
1838			old = 1;
1839			break;
1840		}
1841		if (isdigit(*p))
1842		{
1843			/*
1844			 * absolute offset
1845			 */
1846
1847			ep->offset = strton(p, &next, NiL, 0);
1848			p2 = next;
1849		}
1850		else
1851		{
1852			for (p2 = p; *p2 && !isspace(*p2); p2++);
1853			if (!*p2)
1854			{
1855				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1856					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1857				continue;
1858			}
1859
1860			/*
1861			 * offset expression
1862			 */
1863
1864			*p2++ = 0;
1865			ep->expr = vmstrdup(mp->vm, p);
1866			if (isalpha(*p))
1867				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1868			else if (*p == '(' && ep->cont == '>')
1869			{
1870				/*
1871				 * convert old style indirection to @
1872				 */
1873
1874				p = ep->expr + 1;
1875				for (;;)
1876				{
1877					switch (*p++)
1878					{
1879					case 0:
1880					case '@':
1881					case '(':
1882						break;
1883					case ')':
1884						break;
1885					default:
1886						continue;
1887					}
1888					break;
1889				}
1890				if (*--p == ')')
1891				{
1892					*p = 0;
1893					*ep->expr = '@';
1894				}
1895			}
1896		}
1897		for (; isspace(*p2); p2++);
1898		for (p = p2; *p2 && !isspace(*p2); p2++);
1899		if (!*p2)
1900		{
1901			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1902				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1903			continue;
1904		}
1905		*p2++ = 0;
1906
1907		/*
1908		 * type
1909		 */
1910
1911		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1912		{
1913			ep->swap = ~(*p == 'l' ? 7 : 0);
1914			p += 2;
1915		}
1916		if (*p == 's')
1917		{
1918			if (*(p + 1) == 'h')
1919				ep->type = 'h';
1920			else
1921				ep->type = 's';
1922		}
1923		else if (*p == 'a')
1924			ep->type = 's';
1925		else
1926			ep->type = *p;
1927		if (p = strchr(p, '&'))
1928		{
1929			/*
1930			 * old style mask
1931			 */
1932
1933			ep->mask = strton(++p, NiL, NiL, 0);
1934		}
1935		for (; isspace(*p2); p2++);
1936		if (ep->mask)
1937			*--p2 = '=';
1938
1939		/*
1940		 * comparison operation
1941		 */
1942
1943		p = p2;
1944		if (p2 = strchr(p, '\t'))
1945			*p2++ = 0;
1946		else
1947		{
1948			int	qe = 0;
1949			int	qn = 0;
1950
1951			/*
1952			 * assume balanced {}[]()\\""'' field
1953			 */
1954
1955			for (p2 = p;;)
1956			{
1957				switch (n = *p2++)
1958				{
1959				case 0:
1960					break;
1961				case '{':
1962					if (!qe)
1963						qe = '}';
1964					if (qe == '}')
1965						qn++;
1966					continue;
1967				case '(':
1968					if (!qe)
1969						qe = ')';
1970					if (qe == ')')
1971						qn++;
1972					continue;
1973				case '[':
1974					if (!qe)
1975						qe = ']';
1976					if (qe == ']')
1977						qn++;
1978					continue;
1979				case '}':
1980				case ')':
1981				case ']':
1982					if (qe == n && qn > 0)
1983						qn--;
1984					continue;
1985				case '"':
1986				case '\'':
1987					if (!qe)
1988						qe = n;
1989					else if (qe == n)
1990						qe = 0;
1991					continue;
1992				case '\\':
1993					if (*p2)
1994						p2++;
1995					continue;
1996				default:
1997					if (!qe && isspace(n))
1998						break;
1999					continue;
2000				}
2001				if (n)
2002					*(p2 - 1) = 0;
2003				else
2004					p2--;
2005				break;
2006			}
2007		}
2008		lge = 0;
2009		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
2010			ep->op = '=';
2011		else
2012		{
2013			if (*p == '&')
2014			{
2015				ep->mask = strton(++p, &next, NiL, 0);
2016				p = next;
2017			}
2018			switch (*p)
2019			{
2020			case '=':
2021			case '>':
2022			case '<':
2023			case '*':
2024				ep->op = *p++;
2025				if (*p == '=')
2026				{
2027					p++;
2028					switch (ep->op)
2029					{
2030					case '>':
2031						lge = -1;
2032						break;
2033					case '<':
2034						lge = 1;
2035						break;
2036					}
2037				}
2038				break;
2039			case '!':
2040			case '@':
2041				ep->op = *p++;
2042				if (*p == '=')
2043					p++;
2044				break;
2045			case 'x':
2046				p++;
2047				ep->op = '*';
2048				break;
2049			default:
2050				ep->op = '=';
2051				if (ep->mask)
2052					ep->value.num = ep->mask;
2053				break;
2054			}
2055		}
2056		if (ep->op != '*' && !ep->value.num)
2057		{
2058			if (ep->type == 'e')
2059			{
2060				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2061				{
2062					ep->value.sub->re_disc = &mp->redisc;
2063					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2064					{
2065						p += ep->value.sub->re_npat;
2066						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2067							p += ep->value.sub->re_npat;
2068					}
2069					if (n)
2070					{
2071						regmessage(mp, ep->value.sub, n);
2072						ep->value.sub = 0;
2073					}
2074					else if (*p && mp->disc->errorf)
2075						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2076				}
2077			}
2078			else if (ep->type == 'm')
2079			{
2080				ep->mask = stresc(p) + 1;
2081				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2082				memcpy(ep->value.str, p, ep->mask);
2083				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2084					ep->value.str[ep->mask - 1] = '*';
2085			}
2086			else if (ep->type == 's')
2087			{
2088				ep->mask = stresc(p);
2089				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2090				memcpy(ep->value.str, p, ep->mask);
2091			}
2092			else if (*p == '\'')
2093			{
2094				stresc(p);
2095				ep->value.num = *(unsigned char*)(p + 1) + lge;
2096			}
2097			else if (strmatch(p, "+([a-z])\\(*\\)"))
2098			{
2099				char*	t;
2100
2101				t = p;
2102				ep->type = 'V';
2103				ep->op = *p;
2104				while (*p && *p++ != '(');
2105				switch (ep->op)
2106				{
2107				case 'l':
2108					n = *p++;
2109					if (n < 'a' || n > 'z')
2110					{
2111						if (mp->disc->errorf)
2112							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2113					}
2114					else if (!fun[n -= 'a'])
2115					{
2116						if (mp->disc->errorf)
2117							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2118					}
2119					else
2120					{
2121						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2122						ep->value.loop->lab = fun[n];
2123						while (*p && *p++ != ',');
2124						ep->value.loop->start = strton(p, &t, NiL, 0);
2125						while (*t && *t++ != ',');
2126						ep->value.loop->size = strton(t, &t, NiL, 0);
2127					}
2128					break;
2129				case 'm':
2130				case 'r':
2131					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2132					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2133					break;
2134				case 'v':
2135					break;
2136				default:
2137					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2138						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2139					break;
2140				}
2141			}
2142			else
2143			{
2144				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2145				if (ep->op == '@')
2146					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2147			}
2148		}
2149
2150		/*
2151		 * file description
2152		 */
2153
2154		if (p2)
2155		{
2156			for (; isspace(*p2); p2++);
2157			if (p = strchr(p2, '\t'))
2158			{
2159				/*
2160				 * check for message catalog index
2161				 */
2162
2163				*p++ = 0;
2164				if (isalpha(*p2))
2165				{
2166					for (p3 = p2; isalnum(*p3); p3++);
2167					if (*p3++ == ':')
2168					{
2169						for (; isdigit(*p3); p3++);
2170						if (!*p3)
2171						{
2172							for (p2 = p; isspace(*p2); p2++);
2173							if (p = strchr(p2, '\t'))
2174								*p++ = 0;
2175						}
2176					}
2177				}
2178			}
2179			stresc(p2);
2180			ep->desc = vmstrdup(mp->vm, p2);
2181			if (p)
2182			{
2183				for (; isspace(*p); p++);
2184				if (*p)
2185					ep->mime = vmstrdup(mp->vm, p);
2186			}
2187		}
2188		else
2189			ep->desc = "";
2190
2191		/*
2192		 * get next entry
2193		 */
2194
2195		last = ep;
2196		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2197	}
2198	if (last)
2199	{
2200		last->next = 0;
2201		if (mp->magiclast)
2202			mp->magiclast->next = first;
2203		else
2204			mp->magic = first;
2205		mp->magiclast = last;
2206	}
2207	vmfree(mp->vm, ep);
2208	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2209	{
2210		if (lev < 0)
2211			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2212		else if (lev > 0)
2213			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2214		if (ret)
2215			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2216	}
2217	error_info.file = 0;
2218	error_info.line = 0;
2219	return 0;
2220}
2221
2222/*
2223 * load a magic file into mp
2224 */
2225
2226int
2227magicload(register Magic_t* mp, const char* file, unsigned long flags)
2228{
2229	register char*		s;
2230	register char*		e;
2231	register char*		t;
2232	int			n;
2233	int			found;
2234	int			list;
2235	Sfio_t*			fp;
2236
2237	mp->flags = mp->disc->flags | flags;
2238	found = 0;
2239	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2240	{
2241		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2242			s = MAGIC_FILE;
2243	}
2244	for (;;)
2245	{
2246		if (!list)
2247			e = 0;
2248		else if (e = strchr(s, ':'))
2249		{
2250			/*
2251			 * ok, so ~ won't work for the last list element
2252			 * we do it for MAGIC_FILES_ENV anyway
2253			 */
2254
2255			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2256			{
2257				sfputr(mp->tmp, t, -1);
2258				s += n - 1;
2259			}
2260			sfwrite(mp->tmp, s, e - s);
2261			if (!(s = sfstruse(mp->tmp)))
2262				goto nospace;
2263		}
2264		if (!*s || streq(s, "-"))
2265			s = MAGIC_FILE;
2266		if (!(fp = sfopen(NiL, s, "r")))
2267		{
2268			if (list)
2269			{
2270				if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
2271				{
2272					strcpy(mp->fbuf, s);
2273					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2274					if (!(s = sfstruse(mp->tmp)))
2275						goto nospace;
2276					if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
2277						goto next;
2278				}
2279				if (!(fp = sfopen(NiL, t, "r")))
2280					goto next;
2281			}
2282			else
2283			{
2284				if (mp->disc->errorf)
2285					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2286				return -1;
2287			}
2288		}
2289		found = 1;
2290		n = load(mp, s, fp);
2291		sfclose(fp);
2292		if (n && !list)
2293			return -1;
2294	next:
2295		if (!e)
2296			break;
2297		s = e + 1;
2298	}
2299	if (!found)
2300	{
2301		if (mp->flags & MAGIC_VERBOSE)
2302		{
2303			if (mp->disc->errorf)
2304				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2305		}
2306		return -1;
2307	}
2308	return 0;
2309 nospace:
2310	if (mp->disc->errorf)
2311		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2312	return -1;
2313}
2314
2315/*
2316 * open a magic session
2317 */
2318
2319Magic_t*
2320magicopen(Magicdisc_t* disc)
2321{
2322	register Magic_t*	mp;
2323	register int		i;
2324	register int		n;
2325	register int		f;
2326	register int		c;
2327	register Vmalloc_t*	vm;
2328	unsigned char*		map[CC_MAPS + 1];
2329
2330	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2331		return 0;
2332	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2333	{
2334		vmclose(vm);
2335		return 0;
2336	}
2337	mp->id = lib;
2338	mp->disc = disc;
2339	mp->vm = vm;
2340	mp->flags = disc->flags;
2341	mp->redisc.re_version = REG_VERSION;
2342	mp->redisc.re_flags = REG_NOFREE;
2343	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2344	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2345	mp->redisc.re_resizehandle = (void*)mp->vm;
2346	mp->dtdisc.key = offsetof(Info_t, name);
2347	mp->dtdisc.link = offsetof(Info_t, link);
2348	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
2349		goto bad;
2350	for (n = 0; n < elementsof(info); n++)
2351		dtinsert(mp->infotab, &info[n]);
2352	for (i = 0; i < CC_MAPS; i++)
2353		map[i] = ccmap(i, CC_ASCII);
2354	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2355	for (n = 0; n <= UCHAR_MAX; n++)
2356	{
2357		f = 0;
2358		i = CC_MAPS;
2359		while (--i >= 0)
2360		{
2361			c = ccmapchr(map[i], n);
2362			f = (f << CC_BIT) | CCTYPE(c);
2363		}
2364		mp->cctype[n] = f;
2365	}
2366	return mp;
2367 bad:
2368	magicclose(mp);
2369	return 0;
2370}
2371
2372/*
2373 * close a magicopen() session
2374 */
2375
2376int
2377magicclose(register Magic_t* mp)
2378{
2379	if (!mp)
2380		return -1;
2381	if (mp->tmp)
2382		sfstrclose(mp->tmp);
2383	if (mp->vm)
2384		vmclose(mp->vm);
2385	return 0;
2386}
2387
2388/*
2389 * return the magic string for file with optional stat info st
2390 */
2391
2392char*
2393magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2394{
2395	off_t	off;
2396	char*	s;
2397
2398	mp->flags = mp->disc->flags;
2399	mp->mime = 0;
2400	if (!st)
2401		s = T("cannot stat");
2402	else
2403	{
2404		if (mp->fp = fp)
2405			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2406		s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
2407		if (mp->fp)
2408			sfseek(mp->fp, off, SEEK_SET);
2409		if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
2410		{
2411			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2412				sfprintf(mp->tmp, "%s ", T("short"));
2413			sfprintf(mp->tmp, "%s", s);
2414			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2415				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2416			if (st->st_mode & S_ISUID)
2417				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2418			if (st->st_mode & S_ISGID)
2419				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2420			if (st->st_mode & S_ISVTX)
2421				sfprintf(mp->tmp, ", sticky");
2422			if (!(s = sfstruse(mp->tmp)))
2423				s = T("out of space");
2424		}
2425	}
2426	if (mp->flags & MAGIC_MIME)
2427		s = mp->mime;
2428	if (!s)
2429		s = T("error");
2430	return s;
2431}
2432
2433/*
2434 * list the magic table in mp on sp
2435 */
2436
2437int
2438magiclist(register Magic_t* mp, register Sfio_t* sp)
2439{
2440	register Entry_t*	ep = mp->magic;
2441	register Entry_t*	rp = 0;
2442
2443	mp->flags = mp->disc->flags;
2444	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2445	while (ep)
2446	{
2447		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2448		if (ep->expr)
2449			sfprintf(sp, "%s", ep->expr);
2450		else
2451			sfprintf(sp, "%ld", ep->offset);
2452		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2453		switch (ep->type)
2454		{
2455		case 'm':
2456		case 's':
2457			sfputr(sp, fmtesc(ep->value.str), -1);
2458			break;
2459		case 'V':
2460			switch (ep->op)
2461			{
2462			case 'l':
2463				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2464				break;
2465			case 'v':
2466				sfprintf(sp, "vcodex()");
2467				break;
2468			default:
2469				sfprintf(sp, "%p", ep->value.str);
2470				break;
2471			}
2472			break;
2473		default:
2474			sfprintf(sp, "%lo", ep->value.num);
2475			break;
2476		}
2477		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2478		if (ep->cont == '$' && !ep->value.lab->mask)
2479		{
2480			rp = ep;
2481			ep = ep->value.lab;
2482		}
2483		else
2484		{
2485			if (ep->cont == ':')
2486			{
2487				ep = rp;
2488				ep->value.lab->mask = 1;
2489			}
2490			ep = ep->next;
2491		}
2492	}
2493	return 0;
2494}
2495