1/***********************************************************************
2*                                                                      *
3*               This software is part of the ast package               *
4*          Copyright (c) 1985-2011 AT&T Intellectual Property          *
5*                      and is licensed under the                       *
6*                 Eclipse Public License, Version 1.0                  *
7*                    by AT&T Intellectual Property                     *
8*                                                                      *
9*                A copy of the License is available at                 *
10*          http://www.eclipse.org/org/documents/epl-v10.html           *
11*         (with md5 checksum b35adb5213ca9657e911e9befb180842)         *
12*                                                                      *
13*              Information and Software Systems Research               *
14*                            AT&T Research                             *
15*                           Florham Park NJ                            *
16*                                                                      *
17*                 Glenn Fowler <gsf@research.att.com>                  *
18*                  David Korn <dgk@research.att.com>                   *
19*                   Phong Vo <kpv@research.att.com>                    *
20*                                                                      *
21***********************************************************************/
22#pragma prototyped
23/*
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * library interface to file
28 *
29 * the sum of the hacks {s5,v10,planix} is _____ than the parts
30 */
31
32static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n";
33
34static const char lib[] = "libast:magic";
35
36#include <ast.h>
37#include <ctype.h>
38#include <ccode.h>
39#include <dt.h>
40#include <modex.h>
41#include <error.h>
42#include <regex.h>
43#include <swap.h>
44
45#define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46
47#define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48
49#define MAXNEST		10		/* { ... } nesting limit	*/
50#define MINITEM		4		/* magic buffer rounding	*/
51
52typedef struct				/* identifier dictionary entry	*/
53{
54	const char	name[16];	/* identifier name		*/
55	int		value;		/* identifier value		*/
56	Dtlink_t	link;		/* dictionary link		*/
57} Info_t;
58
59typedef struct Edit			/* edit substitution		*/
60{
61	struct Edit*	next;		/* next in list			*/
62	regex_t*	from;		/* from pattern			*/
63} Edit_t;
64
65struct Entry;
66
67typedef struct				/* loop info			*/
68{
69	struct Entry*	lab;		/* call this function		*/
70	int		start;		/* start here			*/
71	int		size;		/* increment by this amount	*/
72	int		count;		/* dynamic loop count		*/
73	int		offset;		/* dynamic offset		*/
74} Loop_t;
75
76typedef struct Entry			/* magic file entry		*/
77{
78	struct Entry*	next;		/* next in list			*/
79	char*		expr;		/* offset expression		*/
80	union
81	{
82	unsigned long	num;
83	char*		str;
84	struct Entry*	lab;
85	regex_t*	sub;
86	Loop_t*		loop;
87	}		value;		/* comparison value		*/
88	char*		desc;		/* file description		*/
89	char*		mime;		/* file mime type		*/
90	unsigned long	offset;		/* offset in bytes		*/
91	unsigned long	mask;		/* mask before compare		*/
92	char		cont;		/* continuation operation	*/
93	char		type;		/* datum type			*/
94	char		op;		/* comparison operation		*/
95	char		nest;		/* { or } nesting operation	*/
96	char		swap;		/* forced swap order		*/
97} Entry_t;
98
99#define CC_BIT		5
100
101#if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102typedef unsigned short Cctype_t;
103#else
104typedef unsigned long Cctype_t;
105#endif
106
107#define CC_text		0x01
108#define CC_control	0x02
109#define CC_latin	0x04
110#define CC_binary	0x08
111#define CC_utf_8	0x10
112
113#define CC_notext	CC_text		/* CC_text is flipped before checking */
114
115#define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116
117#define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118
119#define ID_NONE		0
120#define ID_ASM		1
121#define ID_C		2
122#define ID_COBOL	3
123#define ID_COPYBOOK	4
124#define ID_CPLUSPLUS	5
125#define ID_FORTRAN	6
126#define ID_HTML		7
127#define ID_INCL1	8
128#define ID_INCL2	9
129#define ID_INCL3	10
130#define ID_MAM1		11
131#define ID_MAM2		12
132#define ID_MAM3		13
133#define ID_NOTEXT	14
134#define ID_PL1		15
135#define ID_YACC		16
136
137#define ID_MAX		ID_YACC
138
139#define INFO_atime	1
140#define INFO_blocks	2
141#define INFO_ctime	3
142#define INFO_fstype	4
143#define INFO_gid	5
144#define INFO_mode	6
145#define INFO_mtime	7
146#define INFO_name	8
147#define INFO_nlink	9
148#define INFO_size	10
149#define INFO_uid	11
150
151#define _MAGIC_PRIVATE_ \
152	Magicdisc_t*	disc;			/* discipline		*/ \
153	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154	Entry_t*	magic;			/* parsed magic table	*/ \
155	Entry_t*	magiclast;		/* last entry in magic	*/ \
156	char*		mime;			/* MIME type		*/ \
157	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161	char		mbuf[64];		/* mime string		*/ \
162	char		sbuf[64];		/* type suffix string	*/ \
163	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171	int		fbsz;			/* fbuf size		*/ \
172	int		fbmx;			/* fbuf max size	*/ \
173	int		xbsz;			/* xbuf size		*/ \
174	int		swap;			/* swap() operation	*/ \
175	unsigned long	flags;			/* disc+open flags	*/ \
176	long		xoff;			/* xbuf offset		*/ \
177	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178	Sfio_t*		fp;			/* fbuf fp		*/ \
179	Sfio_t*		tmp;			/* tmp string		*/ \
180	regdisc_t	redisc;			/* regex discipline	*/ \
181	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182	Dt_t*		idtab;			/* identifier dict	*/ \
183	Dt_t*		infotab;		/* info keyword dict	*/
184
185#include <magic.h>
186
187static Info_t		dict[] =		/* keyword dictionary	*/
188{
189	{ 	"COMMON",	ID_FORTRAN	},
190	{ 	"COMPUTE",	ID_COBOL	},
191	{ 	"COMP",		ID_COPYBOOK	},
192	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193	{ 	"DCL",		ID_PL1		},
194	{ 	"DEFINED",	ID_PL1		},
195	{ 	"DIMENSION",	ID_FORTRAN	},
196	{ 	"DIVISION",	ID_COBOL	},
197	{ 	"FILLER",	ID_COPYBOOK	},
198	{ 	"FIXED",	ID_PL1		},
199	{ 	"FUNCTION",	ID_FORTRAN	},
200	{ 	"HTML",		ID_HTML		},
201	{ 	"INTEGER",	ID_FORTRAN	},
202	{ 	"MAIN",		ID_PL1		},
203	{ 	"OPTIONS",	ID_PL1		},
204	{ 	"PERFORM",	ID_COBOL	},
205	{ 	"PIC",		ID_COPYBOOK	},
206	{ 	"REAL",		ID_FORTRAN	},
207	{ 	"REDEFINES",	ID_COPYBOOK	},
208	{ 	"S9",		ID_COPYBOOK	},
209	{ 	"SECTION",	ID_COBOL	},
210	{ 	"SELECT",	ID_COBOL	},
211	{ 	"SUBROUTINE",	ID_FORTRAN	},
212	{ 	"TEXT",		ID_ASM		},
213	{ 	"VALUE",	ID_COPYBOOK	},
214	{ 	"attr",		ID_MAM3		},
215	{ 	"binary",	ID_YACC		},
216	{ 	"block",	ID_FORTRAN	},
217	{ 	"bss",		ID_ASM		},
218	{ 	"byte",		ID_ASM		},
219	{ 	"char",		ID_C		},
220	{ 	"class",	ID_CPLUSPLUS	},
221	{ 	"clr",		ID_NOTEXT	},
222	{ 	"comm",		ID_ASM		},
223	{ 	"common",	ID_FORTRAN	},
224	{ 	"data",		ID_ASM		},
225	{ 	"dimension",	ID_FORTRAN	},
226	{ 	"done",		ID_MAM2		},
227	{ 	"double",	ID_C		},
228	{ 	"even",		ID_ASM		},
229	{ 	"exec",		ID_MAM3		},
230	{ 	"extern",	ID_C		},
231	{ 	"float",	ID_C		},
232	{ 	"function",	ID_FORTRAN	},
233	{ 	"globl",	ID_ASM		},
234	{ 	"h",		ID_INCL3	},
235	{ 	"html",		ID_HTML		},
236	{ 	"include",	ID_INCL1	},
237	{ 	"int",		ID_C		},
238	{ 	"integer",	ID_FORTRAN	},
239	{ 	"jmp",		ID_NOTEXT	},
240	{ 	"left",		ID_YACC		},
241	{ 	"libc",		ID_INCL2	},
242	{ 	"long",		ID_C		},
243	{ 	"make",		ID_MAM1		},
244	{ 	"mov",		ID_NOTEXT	},
245	{ 	"private",	ID_CPLUSPLUS	},
246	{ 	"public",	ID_CPLUSPLUS	},
247	{ 	"real",		ID_FORTRAN	},
248	{ 	"register",	ID_C		},
249	{ 	"right",	ID_YACC		},
250	{ 	"sfio",		ID_INCL2	},
251	{ 	"static",	ID_C		},
252	{ 	"stdio",	ID_INCL2	},
253	{ 	"struct",	ID_C		},
254	{ 	"subroutine",	ID_FORTRAN	},
255	{ 	"sys",		ID_NOTEXT	},
256	{ 	"term",		ID_YACC		},
257	{ 	"text",		ID_ASM		},
258	{ 	"tst",		ID_NOTEXT	},
259	{ 	"type",		ID_YACC		},
260	{ 	"typedef",	ID_C		},
261	{ 	"u",		ID_INCL2	},
262	{ 	"union",	ID_YACC		},
263	{ 	"void",		ID_C		},
264};
265
266static Info_t		info[] =
267{
268	{	"atime",	INFO_atime		},
269	{	"blocks",	INFO_blocks		},
270	{	"ctime",	INFO_ctime		},
271	{	"fstype",	INFO_fstype		},
272	{	"gid",		INFO_gid		},
273	{	"mode",		INFO_mode		},
274	{	"mtime",	INFO_mtime		},
275	{	"name",		INFO_name		},
276	{	"nlink",	INFO_nlink		},
277	{	"size",		INFO_size		},
278	{	"uid",		INFO_uid		},
279};
280
281/*
282 * return pointer to data at offset off and size siz
283 */
284
285static char*
286getdata(register Magic_t* mp, register long off, register int siz)
287{
288	register long	n;
289
290	if (off < 0)
291		return 0;
292	if (off + siz <= mp->fbsz)
293		return mp->fbuf + off;
294	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295	{
296		if (off + siz > mp->fbmx)
297			return 0;
298		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299		if (sfseek(mp->fp, n, SEEK_SET) != n)
300			return 0;
301		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302		{
303			mp->xoff = 0;
304			mp->xbsz = 0;
305			return 0;
306		}
307		mp->xbuf[mp->xbsz] = 0;
308		mp->xoff = n;
309		if (off + siz > mp->xoff + mp->xbsz)
310			return 0;
311	}
312	return mp->xbuf + off - mp->xoff;
313}
314
315/*
316 * @... evaluator for strexpr()
317 */
318
319static long
320indirect(const char* cs, char** e, void* handle)
321{
322	register char*		s = (char*)cs;
323	register Magic_t*	mp = (Magic_t*)handle;
324	register long		n = 0;
325	register char*		p;
326
327	if (s)
328	{
329		if (*s == '@')
330		{
331			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332			switch (*(s = *e))
333			{
334			case 'b':
335			case 'B':
336				s++;
337				if (p = getdata(mp, n, 1))
338					n = *(unsigned char*)p;
339				else
340					s = (char*)cs;
341				break;
342			case 'h':
343			case 'H':
344				s++;
345				if (p = getdata(mp, n, 2))
346					n = swapget(mp->swap, p, 2);
347				else
348					s = (char*)cs;
349				break;
350			case 'q':
351			case 'Q':
352				s++;
353				if (p = getdata(mp, n, 8))
354					n = swapget(mp->swap, p, 8);
355				else
356					s = (char*)cs;
357				break;
358			default:
359				if (isalnum(*s))
360					s++;
361				if (p = getdata(mp, n, 4))
362					n = swapget(mp->swap, p, 4);
363				else
364					s = (char*)cs;
365				break;
366			}
367		}
368		*e = s;
369	}
370	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372	return n;
373}
374
375/*
376 * emit regex error message
377 */
378
379static void
380regmessage(Magic_t* mp, regex_t* re, int code)
381{
382	char	buf[128];
383
384	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385	{
386		regerror(code, re, buf, sizeof(buf));
387		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388	}
389}
390
391/*
392 * decompose vcodex(3) method composition
393 */
394
395static char*
396vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397{
398	unsigned char*	map;
399	const char*	o;
400	int		c;
401	int		n;
402	int		i;
403	int		a;
404
405	map = CCMAP(CC_ASCII, CC_NATIVE);
406	a = 0;
407	i = 1;
408	for (;;)
409	{
410		if (i)
411			i = 0;
412		else
413			*b++ = '^';
414		if (m < (x - 1) && !*(m + 1))
415		{
416			/*
417			 * obsolete indices
418			 */
419
420			if (!a)
421			{
422				a = 1;
423				o = "old, ";
424				while (b < e && (c = *o++))
425					*b++ = c;
426			}
427			switch (*m)
428			{
429			case 0:		o = "delta"; break;
430			case 1:		o = "huffman"; break;
431			case 2:		o = "huffgroup"; break;
432			case 3:		o = "arith"; break;
433			case 4:		o = "bwt"; break;
434			case 5:		o = "rle"; break;
435			case 6:		o = "mtf"; break;
436			case 7:		o = "transpose"; break;
437			case 8:		o = "table"; break;
438			case 9:		o = "huffpart"; break;
439			case 50:	o = "map"; break;
440			case 100:	o = "recfm"; break;
441			case 101:	o = "ss7"; break;
442			default:	o = "UNKNOWN"; break;
443			}
444			m += 2;
445			while (b < e && (c = *o++))
446				*b++ = c;
447		}
448		else
449			while (b < e && m < x && (c = *m++))
450			{
451				if (map)
452					c = map[c];
453				*b++ = c;
454			}
455		if (b >= e)
456			break;
457		n = 0;
458		while (m < x)
459		{
460			n = (n<<7) | (*m & 0x7f);
461			if (!(*m++ & 0x80))
462				break;
463		}
464		if (n >= (x - m))
465			break;
466		m += n;
467	}
468	return b;
469}
470
471/*
472 * check for magic table match in buf
473 */
474
475static char*
476ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off)
477{
478	register Entry_t*	ep;
479	register char*		p;
480	register char*		b;
481	register int		level = 0;
482	int			call = -1;
483	int			all = 0;
484	int			c;
485	int			str;
486	char*			q;
487	char*			t;
488	char*			cur;
489	char*			base = 0;
490	unsigned long		num;
491	unsigned long		mask;
492	regmatch_t		matches[10];
493
494	mp->swap = 0;
495	b = mp->msg[0] = cur = buf;
496	mp->mime = mp->cap[0] = 0;
497	mp->keep[0] = 0;
498	for (ep = mp->magic; ep; ep = ep->next)
499	{
500	fun:
501		if (ep->nest == '{')
502		{
503			if (++level >= MAXNEST)
504			{
505				call = -1;
506				level = 0;
507				mp->keep[0] = 0;
508				b = mp->msg[0];
509				mp->mime = mp->cap[0];
510				continue;
511			}
512			mp->keep[level] = mp->keep[level - 1] != 0;
513			mp->msg[level] = b;
514			mp->cap[level] = mp->mime;
515		}
516		switch (ep->cont)
517		{
518		case '#':
519			if (mp->keep[level] && b > cur)
520			{
521				if ((mp->flags & MAGIC_ALL) && b < (end - 3))
522				{
523					all = 1;
524					*b++ = '\n';
525					cur = b;
526					continue;
527				}
528				*b = 0;
529				return buf;
530			}
531			mp->swap = 0;
532			b = mp->msg[0] = cur;
533			mp->mime = mp->cap[0] = 0;
534			if (ep->type == ' ')
535				continue;
536			break;
537		case '$':
538			if (mp->keep[level] && call < (MAXNEST - 1))
539			{
540				mp->ret[++call] = ep;
541				ep = ep->value.lab;
542				goto fun;
543			}
544			continue;
545		case ':':
546			ep = mp->ret[call--];
547			if (ep->op == 'l')
548				goto fun;
549			continue;
550		case '|':
551			if (mp->keep[level] > 1)
552				goto checknest;
553			/*FALLTHROUGH*/
554		default:
555			if (!mp->keep[level])
556			{
557				b = mp->msg[level];
558				mp->mime = mp->cap[level];
559				goto checknest;
560			}
561			break;
562		}
563		p = "";
564		num = 0;
565		if (!ep->expr)
566			num = ep->offset + off;
567		else
568			switch (ep->offset)
569			{
570			case 0:
571				num = strexpr(ep->expr, NiL, indirect, mp) + off;
572				break;
573			case INFO_atime:
574				num = st->st_atime;
575				ep->type = 'D';
576				break;
577			case INFO_blocks:
578				num = iblocks(st);
579				ep->type = 'N';
580				break;
581			case INFO_ctime:
582				num = st->st_ctime;
583				ep->type = 'D';
584				break;
585			case INFO_fstype:
586				p = fmtfs(st);
587				ep->type = toupper(ep->type);
588				break;
589			case INFO_gid:
590				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
591				{
592					p = fmtgid(st->st_gid);
593					ep->type = toupper(ep->type);
594				}
595				else
596				{
597					num = st->st_gid;
598					ep->type = 'N';
599				}
600				break;
601			case INFO_mode:
602				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
603				{
604					p = fmtmode(st->st_mode, 0);
605					ep->type = toupper(ep->type);
606				}
607				else
608				{
609					num = modex(st->st_mode);
610					ep->type = 'N';
611				}
612				break;
613			case INFO_mtime:
614				num = st->st_ctime;
615				ep->type = 'D';
616				break;
617			case INFO_name:
618				if (!base)
619				{
620					if (base = strrchr(file, '/'))
621						base++;
622					else
623						base = (char*)file;
624				}
625				p = base;
626				ep->type = toupper(ep->type);
627				break;
628			case INFO_nlink:
629				num = st->st_nlink;
630				ep->type = 'N';
631				break;
632			case INFO_size:
633				num = st->st_size;
634				ep->type = 'N';
635				break;
636			case INFO_uid:
637				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
638				{
639					p = fmtuid(st->st_uid);
640					ep->type = toupper(ep->type);
641				}
642				else
643				{
644					num = st->st_uid;
645					ep->type = 'N';
646				}
647				break;
648			}
649		switch (ep->type)
650		{
651
652		case 'b':
653			if (!(p = getdata(mp, num, 1)))
654				goto next;
655			num = *(unsigned char*)p;
656			break;
657
658		case 'h':
659			if (!(p = getdata(mp, num, 2)))
660				goto next;
661			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
662			break;
663
664		case 'd':
665		case 'l':
666		case 'v':
667			if (!(p = getdata(mp, num, 4)))
668				goto next;
669			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
670			break;
671
672		case 'q':
673			if (!(p = getdata(mp, num, 8)))
674				goto next;
675			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
676			break;
677
678		case 'e':
679			if (!(p = getdata(mp, num, 0)))
680				goto next;
681			/*FALLTHROUGH*/
682		case 'E':
683			if (!ep->value.sub)
684				goto next;
685			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
686			{
687				c = mp->fbsz;
688				if (c >= sizeof(mp->nbuf))
689					c = sizeof(mp->nbuf) - 1;
690				p = (char*)memcpy(mp->nbuf, p, c);
691				p[c] = 0;
692				ccmapstr(mp->x2n, p, c);
693				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
694				{
695					if (c != REG_NOMATCH)
696						regmessage(mp, ep->value.sub, c);
697					goto next;
698				}
699			}
700			p = ep->value.sub->re_sub->re_buf;
701			q = T(ep->desc);
702			t = *q ? q : p;
703			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
704				*b++ = ' ';
705			b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b'));
706			if (ep->mime)
707				mp->mime = ep->mime;
708			goto checknest;
709
710		case 's':
711			if (!(p = getdata(mp, num, ep->mask)))
712				goto next;
713			goto checkstr;
714		case 'm':
715			if (!(p = getdata(mp, num, 0)))
716				goto next;
717			/*FALLTHROUGH*/
718		case 'M':
719		case 'S':
720		checkstr:
721			for (;;)
722			{
723				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
724					break;
725				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
726					break;
727				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
728					goto next;
729				p = (char*)memcpy(mp->nbuf, p, ep->mask);
730				p[ep->mask] = 0;
731				ccmapstr(mp->x2n, p, ep->mask);
732			}
733			q = T(ep->desc);
734			if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
735				*b++ = ' ';
736			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
737			*t = 0;
738			b += sfsprintf(b, end - b, q + (*q == '\b'), p);
739			*t = c;
740			if (ep->mime)
741				mp->mime = ep->mime;
742			goto checknest;
743
744		}
745		if (mask = ep->mask)
746			num &= mask;
747		switch (ep->op)
748		{
749
750		case '=':
751		case '@':
752			if (num == ep->value.num)
753				break;
754			if (ep->cont != '#')
755				goto next;
756			if (!mask)
757				mask = ~mask;
758			if (ep->type == 'h')
759			{
760				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
761				{
762					if (!(mp->swap & (mp->swap + 1)))
763						mp->swap = 7;
764					goto swapped;
765				}
766			}
767			else if (ep->type == 'l')
768			{
769				for (c = 1; c < 4; c++)
770					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
771					{
772						if (!(mp->swap & (mp->swap + 1)))
773							mp->swap = 7;
774						goto swapped;
775					}
776			}
777			else if (ep->type == 'q')
778			{
779				for (c = 1; c < 8; c++)
780					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
781						goto swapped;
782			}
783			goto next;
784
785		case '!':
786			if (num != ep->value.num)
787				break;
788			goto next;
789
790		case '^':
791			if (num ^ ep->value.num)
792				break;
793			goto next;
794
795		case '>':
796			if (num > ep->value.num)
797				break;
798			goto next;
799
800		case '<':
801			if (num < ep->value.num)
802				break;
803			goto next;
804
805		case 'l':
806			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
807			{
808				if (!ep->value.loop->count)
809				{
810					ep->value.loop->count = num;
811					ep->value.loop->offset = off;
812					off = ep->value.loop->start;
813				}
814				else if (!--ep->value.loop->count)
815				{
816					off = ep->value.loop->offset;
817					goto next;
818				}
819				else
820					off += ep->value.loop->size;
821				mp->ret[++call] = ep;
822				ep = ep->value.loop->lab;
823				goto fun;
824			}
825			goto next;
826
827		case 'm':
828			c = mp->swap;
829			t = ckmagic(mp, file, b + (b > cur), end, st, num);
830			mp->swap = c;
831			if (t)
832			{
833				if (b > cur && b < end)
834					*b = ' ';
835				b += strlen(b);
836			}
837			else if (ep->cont == '&')
838				goto next;
839			break;
840
841		case 'r':
842#if _UWIN
843		{
844			char*			e;
845			Sfio_t*			rp;
846			Sfio_t*			gp;
847
848			if (!(t = strrchr(file, '.')))
849				goto next;
850			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
851			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
852				goto next;
853			*ep->desc = 0;
854			*ep->mime = 0;
855			gp = 0;
856			while (t = sfgetr(rp, '\n', 1))
857			{
858				if (strneq(t, "Content Type=", 13))
859				{
860					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
861					strcpy(ep->mime, t + 13);
862					if (gp)
863						break;
864				}
865				else
866				{
867					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
868					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
869					{
870						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
871						strcpy(ep->desc, t);
872						if (*ep->mime)
873							break;
874					}
875				}
876			}
877			sfclose(rp);
878			if (!gp)
879				goto next;
880			if (!*ep->mime)
881			{
882				t = T(ep->desc);
883				if (!strncasecmp(t, "microsoft", 9))
884					t += 9;
885				while (isspace(*t))
886					t++;
887				e = "application/x-ms-";
888				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
889				e = strcopy(ep->mime, e);
890				while ((c = *t++) && c != '.' && c != ' ')
891					*e++ = isupper(c) ? tolower(c) : c;
892				*e = 0;
893			}
894			while (t = sfgetr(gp, '\n', 1))
895				if (*t && !streq(t, "\"\""))
896				{
897					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
898					strcpy(ep->desc, t);
899					break;
900				}
901			sfclose(gp);
902			if (!*ep->desc)
903				goto next;
904			if (!t)
905				for (t = T(ep->desc); *t; t++)
906					if (*t == '.')
907						*t = ' ';
908			if (!mp->keep[level])
909				mp->keep[level] = 2;
910			mp->mime = ep->mime;
911			break;
912		}
913#else
914			if (ep->cont == '#' && !mp->keep[level])
915				mp->keep[level] = 1;
916			goto next;
917#endif
918
919		case 'v':
920			if (!(p = getdata(mp, num, 4)))
921				goto next;
922			c = 0;
923			do
924			{
925				num++;
926				c = (c<<7) | (*p & 0x7f);
927			} while (*p++ & 0x80);
928			if (!(p = getdata(mp, num, c)))
929				goto next;
930			if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ')
931			{
932				*b++ = ',';
933				*b++ = ' ';
934			}
935			b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
936			goto checknest;
937
938		}
939	swapped:
940		q = T(ep->desc);
941		if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
942			*b++ = ' ';
943		if (*q == '\b')
944			q++;
945		str = 0;
946		for (t = q; *t; t++)
947			if (*t == '%' && (c = *(t + 1)))
948			{
949				if (c == '%')
950					t++;
951				else
952					while (c && c != '%')
953					{
954						if (c == 's')
955						{
956							str = 1;
957							break;
958						}
959						else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X')
960							goto format;
961						t++;
962						c = *(t + 1);
963					}
964			}
965	format:
966		if (!str)
967			b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
968		else if (ep->type == 'd' || ep->type == 'D')
969			b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0);
970		else if (ep->type == 'v')
971			b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0);
972		else
973			b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0);
974		if (ep->mime && *ep->mime)
975			mp->mime = ep->mime;
976	checknest:
977		if (ep->nest == '}')
978		{
979			if (!mp->keep[level])
980			{
981				b = mp->msg[level];
982				mp->mime = mp->cap[level];
983			}
984			else if (level > 0)
985				mp->keep[level - 1] = mp->keep[level];
986			if (--level < 0)
987			{
988				level = 0;
989				mp->keep[0] = 0;
990			}
991		}
992		continue;
993	next:
994		if (ep->cont == '&')
995			mp->keep[level] = 0;
996		goto checknest;
997	}
998	if (all && b-- || mp->keep[level] && b > cur)
999	{
1000		*b = 0;
1001		return buf;
1002	}
1003	return 0;
1004}
1005
1006/*
1007 * check english language stats
1008 */
1009
1010static int
1011ckenglish(register Magic_t* mp, int pun, int badpun)
1012{
1013	register char*	s;
1014	register int	vowl = 0;
1015	register int	freq = 0;
1016	register int	rare = 0;
1017
1018	if (5 * badpun > pun)
1019		return 0;
1020	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
1021		return 0;
1022	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
1023		return 0;
1024	for (s = "aeiou"; *s; s++)
1025		vowl += mp->count[toupper(*s)] + mp->count[*s];
1026	for (s = "etaion"; *s; s++)
1027		freq += mp->count[toupper(*s)] + mp->count[*s];
1028	for (s = "vjkqxz"; *s; s++)
1029		rare += mp->count[toupper(*s)] + mp->count[*s];
1030	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
1031}
1032
1033/*
1034 * check programming language stats
1035 */
1036
1037static char*
1038cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st)
1039{
1040	register int		c;
1041	register unsigned char*	b;
1042	register unsigned char*	e;
1043	register int		q;
1044	register char*		s;
1045	char*			t;
1046	char*			base;
1047	char*			suff;
1048	char*			t1;
1049	char*			t2;
1050	char*			t3;
1051	int			n;
1052	int			badpun;
1053	int			code;
1054	int			pun;
1055	Cctype_t		flags;
1056	Info_t*			ip;
1057
1058	b = (unsigned char*)mp->fbuf;
1059	e = b + mp->fbsz;
1060	memzero(mp->count, sizeof(mp->count));
1061	memzero(mp->multi, sizeof(mp->multi));
1062	memzero(mp->identifier, sizeof(mp->identifier));
1063
1064	/*
1065	 * check character coding
1066	 */
1067
1068	flags = 0;
1069	while (b < e)
1070		flags |= mp->cctype[*b++];
1071	b = (unsigned char*)mp->fbuf;
1072	code = 0;
1073	q = CC_ASCII;
1074	n = CC_MASK;
1075	for (c = 0; c < CC_MAPS; c++)
1076	{
1077		flags ^= CC_text;
1078		if ((flags & CC_MASK) < n)
1079		{
1080			n = flags & CC_MASK;
1081			q = c;
1082		}
1083		flags >>= CC_BIT;
1084	}
1085	flags = n;
1086	if (!(flags & (CC_binary|CC_notext)))
1087	{
1088		if (q != CC_NATIVE)
1089		{
1090			code = q;
1091			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1092		}
1093		if (b[0] == '#' && b[1] == '!')
1094		{
1095			for (b += 2; b < e && isspace(*b); b++);
1096			for (s = (char*)b; b < e && isprint(*b); b++);
1097			c = *b;
1098			*b = 0;
1099			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1100			{
1101				if (t = strrchr(s, '/'))
1102					s = t + 1;
1103				for (t = s; *t; t++)
1104					if (isspace(*t))
1105					{
1106						*t = 0;
1107						break;
1108					}
1109				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1110				mp->mime = mp->mbuf;
1111				if (match(s, "*sh"))
1112				{
1113					t1 = T("command");
1114					if (streq(s, "sh"))
1115						*s = 0;
1116					else
1117					{
1118						*b++ = ' ';
1119						*b = 0;
1120					}
1121				}
1122				else
1123				{
1124					t1 = T("interpreter");
1125					*b++ = ' ';
1126					*b = 0;
1127				}
1128				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1129				s = mp->sbuf;
1130				goto qualify;
1131			}
1132			*b = c;
1133			b = (unsigned char*)mp->fbuf;
1134		}
1135		badpun = 0;
1136		pun = 0;
1137		q = 0;
1138		s = 0;
1139		t = 0;
1140		while (b < e)
1141		{
1142			c = *b++;
1143			mp->count[c]++;
1144			if (c == q && (q != '*' || *b == '/' && b++))
1145			{
1146				mp->multi[q]++;
1147				q = 0;
1148			}
1149			else if (c == '\\')
1150			{
1151				s = 0;
1152				b++;
1153			}
1154			else if (!q)
1155			{
1156				if (isalpha(c) || c == '_')
1157				{
1158					if (!s)
1159						s = (char*)b - 1;
1160				}
1161				else if (!isdigit(c))
1162				{
1163					if (s)
1164					{
1165						if (s > mp->fbuf)
1166							switch (*(s - 1))
1167							{
1168							case ':':
1169								if (*b == ':')
1170									mp->multi[':']++;
1171								break;
1172							case '.':
1173								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1174									mp->multi['.']++;
1175								break;
1176							case '\n':
1177							case '\\':
1178								if (*b == '{')
1179									t = (char*)b + 1;
1180								break;
1181							case '{':
1182								if (s == t && *b == '}')
1183									mp->multi['X']++;
1184								break;
1185							}
1186							if (!mp->idtab)
1187							{
1188								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset))
1189									for (q = 0; q < elementsof(dict); q++)
1190										dtinsert(mp->idtab, &dict[q]);
1191								else if (mp->disc->errorf)
1192									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1193								q = 0;
1194							}
1195							if (mp->idtab)
1196							{
1197								*(b - 1) = 0;
1198								if (ip = (Info_t*)dtmatch(mp->idtab, s))
1199									mp->identifier[ip->value]++;
1200								*(b - 1) = c;
1201							}
1202							s = 0;
1203						}
1204					switch (c)
1205					{
1206					case '\t':
1207						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1208							mp->multi['\t']++;
1209						break;
1210					case '"':
1211					case '\'':
1212						q = c;
1213						break;
1214					case '/':
1215						if (*b == '*')
1216							q = *b++;
1217						else if (*b == '/')
1218							q = '\n';
1219						break;
1220					case '$':
1221						if (*b == '(' && *(b + 1) != ' ')
1222							mp->multi['$']++;
1223						break;
1224					case '{':
1225					case '}':
1226					case '[':
1227					case ']':
1228					case '(':
1229						mp->multi[c]++;
1230						break;
1231					case ')':
1232						mp->multi[c]++;
1233						goto punctuation;
1234					case ':':
1235						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1236							mp->multi[':']++;
1237						goto punctuation;
1238					case '.':
1239					case ',':
1240					case '%':
1241					case ';':
1242					case '?':
1243					punctuation:
1244						pun++;
1245						if (*b != ' ' && *b != '\n')
1246							badpun++;
1247						break;
1248					}
1249				}
1250			}
1251		}
1252	}
1253	else
1254		while (b < e)
1255			mp->count[*b++]++;
1256	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1257	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1258	if (!flags)
1259	{
1260		if (match(suff, "*sh|bat|cmd"))
1261			goto id_sh;
1262		if (match(base, "*@(mkfile)"))
1263			goto id_mk;
1264		if (match(base, "*@(makefile|.mk)"))
1265			goto id_make;
1266		if (match(base, "*@(mamfile|.mam)"))
1267			goto id_mam;
1268		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1269			goto id_c;
1270		if (match(suff, "f"))
1271			goto id_fortran;
1272		if (match(suff, "htm+(l)"))
1273			goto id_html;
1274		if (match(suff, "cpy"))
1275			goto id_copybook;
1276		if (match(suff, "cob|cbl|cb2"))
1277			goto id_cobol;
1278		if (match(suff, "pl[1i]"))
1279			goto id_pl1;
1280		if (match(suff, "tex"))
1281			goto id_tex;
1282		if (match(suff, "asm|s"))
1283			goto id_asm;
1284		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1285		{
1286		id_sh:
1287			s = T("command script");
1288			mp->mime = "application/sh";
1289			goto qualify;
1290		}
1291		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1292		{
1293			s = T("mail message");
1294			mp->mime = "message/rfc822";
1295			goto qualify;
1296		}
1297		if (match(base, "*@(mkfile)"))
1298		{
1299		id_mk:
1300			s = "mkfile";
1301			mp->mime = "application/mk";
1302			goto qualify;
1303		}
1304		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1305		{
1306		id_make:
1307			s = "makefile";
1308			mp->mime = "application/make";
1309			goto qualify;
1310		}
1311		if (mp->multi['.'] >= 3)
1312		{
1313			s = T("nroff input");
1314			mp->mime = "application/x-troff";
1315			goto qualify;
1316		}
1317		if (mp->multi['X'] >= 3)
1318		{
1319			s = T("TeX input");
1320			mp->mime = "application/x-tex";
1321			goto qualify;
1322		}
1323		if (mp->fbsz < SF_BUFSIZE &&
1324		    (mp->multi['('] == mp->multi[')'] &&
1325		     mp->multi['{'] == mp->multi['}'] &&
1326		     mp->multi['['] == mp->multi[']']) ||
1327		    mp->fbsz >= SF_BUFSIZE &&
1328		    (mp->multi['('] >= mp->multi[')'] &&
1329		     mp->multi['{'] >= mp->multi['}'] &&
1330		     mp->multi['['] >= mp->multi[']']))
1331		{
1332			c = mp->identifier[ID_INCL1];
1333			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1334			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1335			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1336			{
1337			id_c:
1338				t1 = "";
1339				t2 = "c ";
1340				t3 = T("program");
1341				switch (*suff)
1342				{
1343				case 'c':
1344				case 'C':
1345					mp->mime = "application/x-cc";
1346					break;
1347				case 'l':
1348				case 'L':
1349					t1 = "lex ";
1350					mp->mime = "application/x-lex";
1351					break;
1352				default:
1353					t3 = T("header");
1354					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1355					{
1356						mp->mime = "application/x-cc";
1357						break;
1358					}
1359					/*FALLTHROUGH*/
1360				case 'y':
1361				case 'Y':
1362					t1 = "yacc ";
1363					mp->mime = "application/x-yacc";
1364					break;
1365				}
1366				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1367				{
1368					t2 = "c++ ";
1369					mp->mime = "application/x-c++";
1370				}
1371				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1372				s = mp->sbuf;
1373				goto qualify;
1374			}
1375		}
1376		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1377		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1378		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1379		{
1380		id_mam:
1381			s = T("mam program");
1382			mp->mime = "application/x-mam";
1383			goto qualify;
1384		}
1385		if (mp->identifier[ID_FORTRAN] >= 8)
1386		{
1387		id_fortran:
1388			s = T("fortran program");
1389			mp->mime = "application/x-fortran";
1390			goto qualify;
1391		}
1392		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1393		{
1394		id_html:
1395			s = T("html input");
1396			mp->mime = "text/html";
1397			goto qualify;
1398		}
1399		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1400		{
1401		id_copybook:
1402			s = T("cobol copybook");
1403			mp->mime = "application/x-cobol";
1404			goto qualify;
1405		}
1406		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1407		{
1408		id_cobol:
1409			s = T("cobol program");
1410			mp->mime = "application/x-cobol";
1411			goto qualify;
1412		}
1413		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1414		{
1415		id_pl1:
1416			s = T("pl1 program");
1417			mp->mime = "application/x-pl1";
1418			goto qualify;
1419		}
1420		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1421		{
1422		id_tex:
1423			s = T("TeX input");
1424			mp->mime = "text/tex";
1425			goto qualify;
1426		}
1427		if (mp->identifier[ID_ASM] >= 4)
1428		{
1429		id_asm:
1430			s = T("as program");
1431			mp->mime = "application/x-as";
1432			goto qualify;
1433		}
1434		if (ckenglish(mp, pun, badpun))
1435		{
1436			s = T("english text");
1437			mp->mime = "text/plain";
1438			goto qualify;
1439		}
1440	}
1441	else if (streq(base, "core"))
1442	{
1443		mp->mime = "x-system/core";
1444		return T("core dump");
1445	}
1446	if (flags & (CC_binary|CC_notext))
1447	{
1448		b = (unsigned char*)mp->fbuf;
1449		e = b + mp->fbsz;
1450		n = 0;
1451		for (;;)
1452		{
1453			c = *b++;
1454			q = 0;
1455			while (c & 0x80)
1456			{
1457				c <<= 1;
1458				q++;
1459			}
1460			switch (q)
1461			{
1462			case 4:
1463				if (b < e && (*b++ & 0xc0) != 0x80)
1464					break;
1465			case 3:
1466				if (b < e && (*b++ & 0xc0) != 0x80)
1467					break;
1468			case 2:
1469				if (b < e && (*b++ & 0xc0) != 0x80)
1470					break;
1471				n = 1;
1472			case 0:
1473				if (b >= e)
1474				{
1475					if (n)
1476					{
1477						flags &= ~(CC_binary|CC_notext);
1478						flags |= CC_utf_8;
1479					}
1480					break;
1481				}
1482				continue;
1483			}
1484			break;
1485		}
1486	}
1487	if (flags & (CC_binary|CC_notext))
1488	{
1489		unsigned long	d = 0;
1490
1491		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1492		{
1493			/*
1494			 * compression/encryption via standard deviation
1495			 */
1496
1497
1498			for (c = 0; c < UCHAR_MAX; c++)
1499			{
1500				pun = mp->count[c] - q;
1501				d += pun * pun;
1502			}
1503			d /= mp->fbsz;
1504		}
1505		if (d <= 0)
1506			s = T("binary");
1507		else if (d < 4)
1508			s = T("encrypted");
1509		else if (d < 16)
1510			s = T("packed");
1511		else if (d < 64)
1512			s = T("compressed");
1513		else if (d < 256)
1514			s = T("delta");
1515		else
1516			s = T("data");
1517		mp->mime = "application/octet-stream";
1518		return s;
1519	}
1520	mp->mime = "text/plain";
1521	if (flags & CC_utf_8)
1522		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1523	else if (flags & CC_latin)
1524		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1525	else
1526		s = (flags & CC_control) ? T("text with control characters") : T("text");
1527 qualify:
1528	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1529	{
1530		t = "dos ";
1531		mp->mime = "text/dos";
1532	}
1533	else
1534		t = "";
1535	if (code)
1536	{
1537		if (code == CC_ASCII)
1538			sfsprintf(buf, end - buf, "ascii %s%s", t, s);
1539		else
1540		{
1541			sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s);
1542			mp->mime = "text/ebcdic";
1543		}
1544		s = buf;
1545	}
1546	else if (*t)
1547	{
1548		sfsprintf(buf, end - buf, "%s%s", t, s);
1549		s = buf;
1550	}
1551	return s;
1552}
1553
1554/*
1555 * return the basic magic string for file,st in buf,size
1556 */
1557
1558static char*
1559type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end)
1560{
1561	register char*	s;
1562	register char*	t;
1563
1564	mp->mime = 0;
1565	if (!S_ISREG(st->st_mode))
1566	{
1567		if (S_ISDIR(st->st_mode))
1568		{
1569			mp->mime = "x-system/dir";
1570			return T("directory");
1571		}
1572		if (S_ISLNK(st->st_mode))
1573		{
1574			mp->mime = "x-system/lnk";
1575			s = buf;
1576			s += sfsprintf(s, end - s, T("symbolic link to "));
1577			if (pathgetlink(file, s, end - s) < 0)
1578				return T("cannot read symbolic link text");
1579			return buf;
1580		}
1581		if (S_ISBLK(st->st_mode))
1582		{
1583			mp->mime = "x-system/blk";
1584			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1585			return buf;
1586		}
1587		if (S_ISCHR(st->st_mode))
1588		{
1589			mp->mime = "x-system/chr";
1590			sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st));
1591			return buf;
1592		}
1593		if (S_ISFIFO(st->st_mode))
1594		{
1595			mp->mime = "x-system/fifo";
1596			return "fifo";
1597		}
1598#ifdef S_ISSOCK
1599		if (S_ISSOCK(st->st_mode))
1600		{
1601			mp->mime = "x-system/sock";
1602			return "socket";
1603		}
1604#endif
1605	}
1606	if (!(mp->fbmx = st->st_size))
1607		s = T("empty");
1608	else if (!mp->fp)
1609		s = T("cannot read");
1610	else
1611	{
1612		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1613		if (mp->fbsz < 0)
1614			s = fmterror(errno);
1615		else if (mp->fbsz == 0)
1616			s = T("empty");
1617		else
1618		{
1619			mp->fbuf[mp->fbsz] = 0;
1620			mp->xoff = 0;
1621			mp->xbsz = 0;
1622			if (!(s = ckmagic(mp, file, buf, end, st, 0)))
1623				s = cklang(mp, file, buf, end, st);
1624		}
1625	}
1626	if (!mp->mime)
1627		mp->mime = "application/unknown";
1628	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1629	{
1630		register char*	b;
1631		register char*	be;
1632		register char*	m;
1633		register char*	me;
1634
1635		b = mp->mime;
1636		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1637		while (m < me && b < t)
1638			*m++ = *b++;
1639		b = t = s;
1640		for (;;)
1641		{
1642			if (!(be = strchr(t, ' ')))
1643			{
1644				be = b + strlen(b);
1645				break;
1646			}
1647			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1648				break;
1649			b = t;
1650			t = be + 1;
1651		}
1652		while (m < me && b < be)
1653			if ((*m++ = *b++) == ' ')
1654				*(m - 1) = '-';
1655		*m = 0;
1656	}
1657	return s;
1658}
1659
1660/*
1661 * low level for magicload()
1662 */
1663
1664static int
1665load(register Magic_t* mp, char* file, register Sfio_t* fp)
1666{
1667	register Entry_t*	ep;
1668	register char*		p;
1669	register char*		p2;
1670	char*			p3;
1671	char*			next;
1672	int			n;
1673	int			lge;
1674	int			lev;
1675	int			ent;
1676	int			old;
1677	int			cont;
1678	Info_t*			ip;
1679	Entry_t*		ret;
1680	Entry_t*		first;
1681	Entry_t*		last = 0;
1682	Entry_t*		fun['z' - 'a' + 1];
1683
1684	memzero(fun, sizeof(fun));
1685	cont = '$';
1686	ent = 0;
1687	lev = 0;
1688	old = 0;
1689	ret = 0;
1690	error_info.file = file;
1691	error_info.line = 0;
1692	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1693	while (p = sfgetr(fp, '\n', 1))
1694	{
1695		error_info.line++;
1696		for (; isspace(*p); p++);
1697
1698		/*
1699		 * nesting
1700		 */
1701
1702		switch (*p)
1703		{
1704		case 0:
1705		case '#':
1706			cont = '#';
1707			continue;
1708		case '{':
1709			if (++lev < MAXNEST)
1710				ep->nest = *p;
1711			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1712				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1713			continue;
1714		case '}':
1715			if (!last || lev <= 0)
1716			{
1717				if (mp->disc->errorf)
1718					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1719			}
1720			else if (lev-- == ent)
1721			{
1722				ent = 0;
1723				ep->cont = ':';
1724				ep->offset = ret->offset;
1725				ep->nest = ' ';
1726				ep->type = ' ';
1727				ep->op = ' ';
1728				ep->desc = "[RETURN]";
1729				last = ep;
1730				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1731				ret = 0;
1732			}
1733			else
1734				last->nest = *p;
1735			continue;
1736		default:
1737			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1738			{
1739				n = *p++;
1740				if (n >= 'a' && n <= 'z')
1741					n -= 'a';
1742				else
1743				{
1744					if (mp->disc->errorf)
1745						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1746					n = 0;
1747				}
1748				if (ret && mp->disc->errorf)
1749					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1750				if (*p == '{')
1751				{
1752					ent = ++lev;
1753					ret = ep;
1754					ep->desc = "[FUNCTION]";
1755				}
1756				else
1757				{
1758					if (*(p + 1) != ')' && mp->disc->errorf)
1759						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1760					ep->desc = "[CALL]";
1761				}
1762				ep->cont = cont;
1763				ep->offset = n;
1764				ep->nest = ' ';
1765				ep->type = ' ';
1766				ep->op = ' ';
1767				last = ep;
1768				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1769				if (ret)
1770					fun[n] = last->value.lab = ep;
1771				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1772					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1773				continue;
1774			}
1775			if (!ep->nest)
1776				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1777			break;
1778		}
1779
1780		/*
1781		 * continuation
1782		 */
1783
1784		cont = '$';
1785		switch (*p)
1786		{
1787		case '>':
1788			old = 1;
1789			if (*(p + 1) == *p)
1790			{
1791				/*
1792				 * old style nesting push
1793				 */
1794
1795				p++;
1796				old = 2;
1797				if (!lev && last)
1798				{
1799					lev = 1;
1800					last->nest = '{';
1801					if (last->cont == '>')
1802						last->cont = '&';
1803					ep->nest = '1';
1804				}
1805			}
1806			/*FALLTHROUGH*/
1807		case '+':
1808		case '&':
1809		case '|':
1810			ep->cont = *p++;
1811			break;
1812		default:
1813			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1814				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1815			/*FALLTHROUGH*/
1816		case '*':
1817		case '0': case '1': case '2': case '3': case '4':
1818		case '5': case '6': case '7': case '8': case '9':
1819			ep->cont = (lev > 0) ? '&' : '#';
1820			break;
1821		}
1822		switch (old)
1823		{
1824		case 1:
1825			old = 0;
1826			if (lev)
1827			{
1828				/*
1829				 * old style nesting pop
1830				 */
1831
1832				lev = 0;
1833				if (last)
1834					last->nest = '}';
1835				ep->nest = ' ';
1836				if (ep->cont == '&')
1837					ep->cont = '#';
1838			}
1839			break;
1840		case 2:
1841			old = 1;
1842			break;
1843		}
1844		if (isdigit(*p))
1845		{
1846			/*
1847			 * absolute offset
1848			 */
1849
1850			ep->offset = strton(p, &next, NiL, 0);
1851			p2 = next;
1852		}
1853		else
1854		{
1855			for (p2 = p; *p2 && !isspace(*p2); p2++);
1856			if (!*p2)
1857			{
1858				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1859					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1860				continue;
1861			}
1862
1863			/*
1864			 * offset expression
1865			 */
1866
1867			*p2++ = 0;
1868			ep->expr = vmstrdup(mp->vm, p);
1869			if (isalpha(*p))
1870				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1871			else if (*p == '(' && ep->cont == '>')
1872			{
1873				/*
1874				 * convert old style indirection to @
1875				 */
1876
1877				p = ep->expr + 1;
1878				for (;;)
1879				{
1880					switch (*p++)
1881					{
1882					case 0:
1883					case '@':
1884					case '(':
1885						break;
1886					case ')':
1887						break;
1888					default:
1889						continue;
1890					}
1891					break;
1892				}
1893				if (*--p == ')')
1894				{
1895					*p = 0;
1896					*ep->expr = '@';
1897				}
1898			}
1899		}
1900		for (; isspace(*p2); p2++);
1901		for (p = p2; *p2 && !isspace(*p2); p2++);
1902		if (!*p2)
1903		{
1904			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1905				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1906			continue;
1907		}
1908		*p2++ = 0;
1909
1910		/*
1911		 * type
1912		 */
1913
1914		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1915		{
1916			ep->swap = ~(*p == 'l' ? 7 : 0);
1917			p += 2;
1918		}
1919		if (*p == 's')
1920		{
1921			if (*(p + 1) == 'h')
1922				ep->type = 'h';
1923			else
1924				ep->type = 's';
1925		}
1926		else if (*p == 'a')
1927			ep->type = 's';
1928		else
1929			ep->type = *p;
1930		if (p = strchr(p, '&'))
1931		{
1932			/*
1933			 * old style mask
1934			 */
1935
1936			ep->mask = strton(++p, NiL, NiL, 0);
1937		}
1938		for (; isspace(*p2); p2++);
1939		if (ep->mask)
1940			*--p2 = '=';
1941
1942		/*
1943		 * comparison operation
1944		 */
1945
1946		p = p2;
1947		if (p2 = strchr(p, '\t'))
1948			*p2++ = 0;
1949		else
1950		{
1951			int	qe = 0;
1952			int	qn = 0;
1953
1954			/*
1955			 * assume balanced {}[]()\\""'' field
1956			 */
1957
1958			for (p2 = p;;)
1959			{
1960				switch (n = *p2++)
1961				{
1962				case 0:
1963					break;
1964				case '{':
1965					if (!qe)
1966						qe = '}';
1967					if (qe == '}')
1968						qn++;
1969					continue;
1970				case '(':
1971					if (!qe)
1972						qe = ')';
1973					if (qe == ')')
1974						qn++;
1975					continue;
1976				case '[':
1977					if (!qe)
1978						qe = ']';
1979					if (qe == ']')
1980						qn++;
1981					continue;
1982				case '}':
1983				case ')':
1984				case ']':
1985					if (qe == n && qn > 0)
1986						qn--;
1987					continue;
1988				case '"':
1989				case '\'':
1990					if (!qe)
1991						qe = n;
1992					else if (qe == n)
1993						qe = 0;
1994					continue;
1995				case '\\':
1996					if (*p2)
1997						p2++;
1998					continue;
1999				default:
2000					if (!qe && isspace(n))
2001						break;
2002					continue;
2003				}
2004				if (n)
2005					*(p2 - 1) = 0;
2006				else
2007					p2--;
2008				break;
2009			}
2010		}
2011		lge = 0;
2012		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
2013			ep->op = '=';
2014		else
2015		{
2016			if (*p == '&')
2017			{
2018				ep->mask = strton(++p, &next, NiL, 0);
2019				p = next;
2020			}
2021			switch (*p)
2022			{
2023			case '=':
2024			case '>':
2025			case '<':
2026			case '*':
2027				ep->op = *p++;
2028				if (*p == '=')
2029				{
2030					p++;
2031					switch (ep->op)
2032					{
2033					case '>':
2034						lge = -1;
2035						break;
2036					case '<':
2037						lge = 1;
2038						break;
2039					}
2040				}
2041				break;
2042			case '!':
2043			case '@':
2044				ep->op = *p++;
2045				if (*p == '=')
2046					p++;
2047				break;
2048			case 'x':
2049				p++;
2050				ep->op = '*';
2051				break;
2052			default:
2053				ep->op = '=';
2054				if (ep->mask)
2055					ep->value.num = ep->mask;
2056				break;
2057			}
2058		}
2059		if (ep->op != '*' && !ep->value.num)
2060		{
2061			if (ep->type == 'e')
2062			{
2063				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2064				{
2065					ep->value.sub->re_disc = &mp->redisc;
2066					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2067					{
2068						p += ep->value.sub->re_npat;
2069						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2070							p += ep->value.sub->re_npat;
2071					}
2072					if (n)
2073					{
2074						regmessage(mp, ep->value.sub, n);
2075						ep->value.sub = 0;
2076					}
2077					else if (*p && mp->disc->errorf)
2078						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2079				}
2080			}
2081			else if (ep->type == 'm')
2082			{
2083				ep->mask = stresc(p) + 1;
2084				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2085				memcpy(ep->value.str, p, ep->mask);
2086				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2087					ep->value.str[ep->mask - 1] = '*';
2088			}
2089			else if (ep->type == 's')
2090			{
2091				ep->mask = stresc(p);
2092				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2093				memcpy(ep->value.str, p, ep->mask);
2094			}
2095			else if (*p == '\'')
2096			{
2097				stresc(p);
2098				ep->value.num = *(unsigned char*)(p + 1) + lge;
2099			}
2100			else if (strmatch(p, "+([a-z])\\(*\\)"))
2101			{
2102				char*	t;
2103
2104				t = p;
2105				ep->type = 'V';
2106				ep->op = *p;
2107				while (*p && *p++ != '(');
2108				switch (ep->op)
2109				{
2110				case 'l':
2111					n = *p++;
2112					if (n < 'a' || n > 'z')
2113					{
2114						if (mp->disc->errorf)
2115							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2116					}
2117					else if (!fun[n -= 'a'])
2118					{
2119						if (mp->disc->errorf)
2120							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2121					}
2122					else
2123					{
2124						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2125						ep->value.loop->lab = fun[n];
2126						while (*p && *p++ != ',');
2127						ep->value.loop->start = strton(p, &t, NiL, 0);
2128						while (*t && *t++ != ',');
2129						ep->value.loop->size = strton(t, &t, NiL, 0);
2130					}
2131					break;
2132				case 'm':
2133				case 'r':
2134					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2135					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2136					break;
2137				case 'v':
2138					break;
2139				default:
2140					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2141						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2142					break;
2143				}
2144			}
2145			else
2146			{
2147				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2148				if (ep->op == '@')
2149					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2150			}
2151		}
2152
2153		/*
2154		 * file description
2155		 */
2156
2157		if (p2)
2158		{
2159			for (; isspace(*p2); p2++);
2160			if (p = strchr(p2, '\t'))
2161			{
2162				/*
2163				 * check for message catalog index
2164				 */
2165
2166				*p++ = 0;
2167				if (isalpha(*p2))
2168				{
2169					for (p3 = p2; isalnum(*p3); p3++);
2170					if (*p3++ == ':')
2171					{
2172						for (; isdigit(*p3); p3++);
2173						if (!*p3)
2174						{
2175							for (p2 = p; isspace(*p2); p2++);
2176							if (p = strchr(p2, '\t'))
2177								*p++ = 0;
2178						}
2179					}
2180				}
2181			}
2182			stresc(p2);
2183			ep->desc = vmstrdup(mp->vm, p2);
2184			if (p)
2185			{
2186				for (; isspace(*p); p++);
2187				if (*p)
2188					ep->mime = vmstrdup(mp->vm, p);
2189			}
2190		}
2191		else
2192			ep->desc = "";
2193
2194		/*
2195		 * get next entry
2196		 */
2197
2198		last = ep;
2199		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2200	}
2201	if (last)
2202	{
2203		last->next = 0;
2204		if (mp->magiclast)
2205			mp->magiclast->next = first;
2206		else
2207			mp->magic = first;
2208		mp->magiclast = last;
2209	}
2210	vmfree(mp->vm, ep);
2211	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2212	{
2213		if (lev < 0)
2214			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2215		else if (lev > 0)
2216			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2217		if (ret)
2218			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2219	}
2220	error_info.file = 0;
2221	error_info.line = 0;
2222	return 0;
2223}
2224
2225/*
2226 * load a magic file into mp
2227 */
2228
2229int
2230magicload(register Magic_t* mp, const char* file, unsigned long flags)
2231{
2232	register char*		s;
2233	register char*		e;
2234	register char*		t;
2235	int			n;
2236	int			found;
2237	int			list;
2238	Sfio_t*			fp;
2239
2240	mp->flags = mp->disc->flags | flags;
2241	found = 0;
2242	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2243	{
2244		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2245			s = MAGIC_FILE;
2246	}
2247	for (;;)
2248	{
2249		if (!list)
2250			e = 0;
2251		else if (e = strchr(s, ':'))
2252		{
2253			/*
2254			 * ok, so ~ won't work for the last list element
2255			 * we do it for MAGIC_FILES_ENV anyway
2256			 */
2257
2258			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2259			{
2260				sfputr(mp->tmp, t, -1);
2261				s += n - 1;
2262			}
2263			sfwrite(mp->tmp, s, e - s);
2264			if (!(s = sfstruse(mp->tmp)))
2265				goto nospace;
2266		}
2267		if (!*s || streq(s, "-"))
2268			s = MAGIC_FILE;
2269		if (!(fp = sfopen(NiL, s, "r")))
2270		{
2271			if (list)
2272			{
2273				if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/'))
2274				{
2275					strcpy(mp->fbuf, s);
2276					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2277					if (!(s = sfstruse(mp->tmp)))
2278						goto nospace;
2279					if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))))
2280						goto next;
2281				}
2282				if (!(fp = sfopen(NiL, t, "r")))
2283					goto next;
2284			}
2285			else
2286			{
2287				if (mp->disc->errorf)
2288					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2289				return -1;
2290			}
2291		}
2292		found = 1;
2293		n = load(mp, s, fp);
2294		sfclose(fp);
2295		if (n && !list)
2296			return -1;
2297	next:
2298		if (!e)
2299			break;
2300		s = e + 1;
2301	}
2302	if (!found)
2303	{
2304		if (mp->flags & MAGIC_VERBOSE)
2305		{
2306			if (mp->disc->errorf)
2307				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2308		}
2309		return -1;
2310	}
2311	return 0;
2312 nospace:
2313	if (mp->disc->errorf)
2314		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2315	return -1;
2316}
2317
2318/*
2319 * open a magic session
2320 */
2321
2322Magic_t*
2323magicopen(Magicdisc_t* disc)
2324{
2325	register Magic_t*	mp;
2326	register int		i;
2327	register int		n;
2328	register int		f;
2329	register int		c;
2330	register Vmalloc_t*	vm;
2331	unsigned char*		map[CC_MAPS + 1];
2332
2333	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2334		return 0;
2335	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2336	{
2337		vmclose(vm);
2338		return 0;
2339	}
2340	mp->id = lib;
2341	mp->disc = disc;
2342	mp->vm = vm;
2343	mp->flags = disc->flags;
2344	mp->redisc.re_version = REG_VERSION;
2345	mp->redisc.re_flags = REG_NOFREE;
2346	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2347	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2348	mp->redisc.re_resizehandle = (void*)mp->vm;
2349	mp->dtdisc.key = offsetof(Info_t, name);
2350	mp->dtdisc.link = offsetof(Info_t, link);
2351	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset)))
2352		goto bad;
2353	for (n = 0; n < elementsof(info); n++)
2354		dtinsert(mp->infotab, &info[n]);
2355	for (i = 0; i < CC_MAPS; i++)
2356		map[i] = ccmap(i, CC_ASCII);
2357	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2358	for (n = 0; n <= UCHAR_MAX; n++)
2359	{
2360		f = 0;
2361		i = CC_MAPS;
2362		while (--i >= 0)
2363		{
2364			c = ccmapchr(map[i], n);
2365			f = (f << CC_BIT) | CCTYPE(c);
2366		}
2367		mp->cctype[n] = f;
2368	}
2369	return mp;
2370 bad:
2371	magicclose(mp);
2372	return 0;
2373}
2374
2375/*
2376 * close a magicopen() session
2377 */
2378
2379int
2380magicclose(register Magic_t* mp)
2381{
2382	if (!mp)
2383		return -1;
2384	if (mp->tmp)
2385		sfstrclose(mp->tmp);
2386	if (mp->vm)
2387		vmclose(mp->vm);
2388	return 0;
2389}
2390
2391/*
2392 * return the magic string for file with optional stat info st
2393 */
2394
2395char*
2396magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2397{
2398	off_t	off;
2399	char*	s;
2400
2401	mp->flags = mp->disc->flags;
2402	mp->mime = 0;
2403	if (!st)
2404		s = T("cannot stat");
2405	else
2406	{
2407		if (mp->fp = fp)
2408			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2409		s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]);
2410		if (mp->fp)
2411			sfseek(mp->fp, off, SEEK_SET);
2412		if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL)))
2413		{
2414			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2415				sfprintf(mp->tmp, "%s ", T("short"));
2416			sfprintf(mp->tmp, "%s", s);
2417			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2418				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2419			if (st->st_mode & S_ISUID)
2420				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2421			if (st->st_mode & S_ISGID)
2422				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2423			if (st->st_mode & S_ISVTX)
2424				sfprintf(mp->tmp, ", sticky");
2425			if (!(s = sfstruse(mp->tmp)))
2426				s = T("out of space");
2427		}
2428	}
2429	if (mp->flags & MAGIC_MIME)
2430		s = mp->mime;
2431	if (!s)
2432		s = T("error");
2433	return s;
2434}
2435
2436/*
2437 * list the magic table in mp on sp
2438 */
2439
2440int
2441magiclist(register Magic_t* mp, register Sfio_t* sp)
2442{
2443	register Entry_t*	ep = mp->magic;
2444	register Entry_t*	rp = 0;
2445
2446	mp->flags = mp->disc->flags;
2447	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2448	while (ep)
2449	{
2450		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2451		if (ep->expr)
2452			sfprintf(sp, "%s", ep->expr);
2453		else
2454			sfprintf(sp, "%ld", ep->offset);
2455		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2456		switch (ep->type)
2457		{
2458		case 'm':
2459		case 's':
2460			sfputr(sp, fmtesc(ep->value.str), -1);
2461			break;
2462		case 'V':
2463			switch (ep->op)
2464			{
2465			case 'l':
2466				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2467				break;
2468			case 'v':
2469				sfprintf(sp, "vcodex()");
2470				break;
2471			default:
2472				sfprintf(sp, "%p", ep->value.str);
2473				break;
2474			}
2475			break;
2476		default:
2477			sfprintf(sp, "%lo", ep->value.num);
2478			break;
2479		}
2480		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2481		if (ep->cont == '$' && !ep->value.lab->mask)
2482		{
2483			rp = ep;
2484			ep = ep->value.lab;
2485		}
2486		else
2487		{
2488			if (ep->cont == ':')
2489			{
2490				ep = rp;
2491				ep->value.lab->mask = 1;
2492			}
2493			ep = ep->next;
2494		}
2495	}
2496	return 0;
2497}
2498