1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file.h - definitions for file(1) program
30 * @(#)$File: file.h,v 1.124 2010/01/16 17:45:12 chl Exp $
31 */
32
33#ifndef __file_h__
34#define __file_h__
35
36#ifdef HAVE_CONFIG_H
37#include <config.h>
38#endif
39
40#include <stdio.h>	/* Include that here, to make sure __P gets defined */
41#include <errno.h>
42#include <fcntl.h>	/* For open and flags */
43#ifdef HAVE_STDINT_H
44#ifndef __STDC_LIMIT_MACROS
45#define __STDC_LIMIT_MACROS
46#endif
47#include <stdint.h>
48#endif
49#ifdef HAVE_INTTYPES_H
50#include <inttypes.h>
51#endif
52#include <regex.h>
53#include <sys/types.h>
54#include <sys/param.h>
55/* Do this here and now, because struct stat gets re-defined on solaris */
56#include <sys/stat.h>
57#include <stdarg.h>
58
59#define ENABLE_CONDITIONALS
60
61#ifndef MAGIC
62#define MAGIC "/etc/magic"
63#endif
64
65#ifdef __EMX__
66#define PATHSEP	';'
67#else
68#define PATHSEP	':'
69#endif
70
71#define private static
72#ifndef protected
73#define protected
74#endif
75#define public
76
77#ifndef __GNUC_PREREQ__
78#ifdef __GNUC__
79#define	__GNUC_PREREQ__(x, y)						\
80	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
81	 (__GNUC__ > (x)))
82#else
83#define	__GNUC_PREREQ__(x, y)	0
84#endif
85#endif
86
87#ifndef __GNUC__
88#ifndef __attribute__
89#define __attribute__(a)
90#endif
91#endif
92
93#ifndef MIN
94#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
95#endif
96
97#ifndef MAX
98#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
99#endif
100
101#ifndef HOWMANY
102# define HOWMANY (256 * 1024)	/* how much of the file to look at */
103#endif
104#define MAXMAGIS 8192		/* max entries in any one magic file
105				   or directory */
106#define MAXDESC	64		/* max leng of text description/MIME type */
107#define MAXstring 32		/* max leng of "string" types */
108
109#define MAGICNO		0xF11E041C
110#define VERSIONNO	7
111#define FILE_MAGICSIZE	200
112
113#define	FILE_LOAD	0
114#define FILE_CHECK	1
115#define FILE_COMPILE	2
116
117union VALUETYPE {
118	uint8_t b;
119	uint16_t h;
120	uint32_t l;
121	uint64_t q;
122	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
123	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
124	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
125	char s[MAXstring];	/* the search string or regex pattern */
126	unsigned char us[MAXstring];
127	float f;
128	double d;
129};
130
131struct magic {
132	/* Word 1 */
133	uint16_t cont_level;	/* level of ">" */
134	uint8_t flag;
135#define INDIR		0x01	/* if '(...)' appears */
136#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
137#define INDIROFFADD	0x04	/* if '>&(' appears */
138#define UNSIGNED	0x08	/* comparison is unsigned */
139#define NOSPACE		0x10	/* suppress space character before output */
140#define BINTEST		0x20	/* test is for a binary type (set only
141				   for top-level tests) */
142#define TEXTTEST	0x40	/* for passing to file_softmagic */
143
144	uint8_t factor;
145
146	/* Word 2 */
147	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
148	uint8_t vallen;		/* length of string value, if any */
149	uint8_t type;		/* comparison type (FILE_*) */
150	uint8_t in_type;	/* type of indirection */
151#define 			FILE_INVALID	0
152#define 			FILE_BYTE	1
153#define				FILE_SHORT	2
154#define				FILE_DEFAULT	3
155#define				FILE_LONG	4
156#define				FILE_STRING	5
157#define				FILE_DATE	6
158#define				FILE_BESHORT	7
159#define				FILE_BELONG	8
160#define				FILE_BEDATE	9
161#define				FILE_LESHORT	10
162#define				FILE_LELONG	11
163#define				FILE_LEDATE	12
164#define				FILE_PSTRING	13
165#define				FILE_LDATE	14
166#define				FILE_BELDATE	15
167#define				FILE_LELDATE	16
168#define				FILE_REGEX	17
169#define				FILE_BESTRING16	18
170#define				FILE_LESTRING16	19
171#define				FILE_SEARCH	20
172#define				FILE_MEDATE	21
173#define				FILE_MELDATE	22
174#define				FILE_MELONG	23
175#define				FILE_QUAD	24
176#define				FILE_LEQUAD	25
177#define				FILE_BEQUAD	26
178#define				FILE_QDATE	27
179#define				FILE_LEQDATE	28
180#define				FILE_BEQDATE	29
181#define				FILE_QLDATE	30
182#define				FILE_LEQLDATE	31
183#define				FILE_BEQLDATE	32
184#define				FILE_FLOAT	33
185#define				FILE_BEFLOAT	34
186#define				FILE_LEFLOAT	35
187#define				FILE_DOUBLE	36
188#define				FILE_BEDOUBLE	37
189#define				FILE_LEDOUBLE	38
190#define				FILE_BEID3	39
191#define				FILE_LEID3	40
192#define				FILE_INDIRECT	41
193#define				FILE_NAMES_SIZE	42/* size of array to contain all names */
194
195#define IS_STRING(t) \
196	((t) == FILE_STRING || \
197	 (t) == FILE_PSTRING || \
198	 (t) == FILE_BESTRING16 || \
199	 (t) == FILE_LESTRING16 || \
200	 (t) == FILE_REGEX || \
201	 (t) == FILE_SEARCH || \
202	 (t) == FILE_DEFAULT)
203
204#define FILE_FMT_NONE 0
205#define FILE_FMT_NUM  1 /* "cduxXi" */
206#define FILE_FMT_STR  2 /* "s" */
207#define FILE_FMT_QUAD 3 /* "ll" */
208#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
209#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
210
211	/* Word 3 */
212	uint8_t in_op;		/* operator for indirection */
213	uint8_t mask_op;	/* operator for mask */
214#ifdef ENABLE_CONDITIONALS
215	uint8_t cond;		/* conditional type */
216#else
217	uint8_t dummy;
218#endif
219	uint8_t factor_op;
220#define		FILE_FACTOR_OP_PLUS	'+'
221#define		FILE_FACTOR_OP_MINUS	'-'
222#define		FILE_FACTOR_OP_TIMES	'*'
223#define		FILE_FACTOR_OP_DIV	'/'
224#define		FILE_FACTOR_OP_NONE	'\0'
225
226#define				FILE_OPS	"&|^+-*/%"
227#define				FILE_OPAND	0
228#define				FILE_OPOR	1
229#define				FILE_OPXOR	2
230#define				FILE_OPADD	3
231#define				FILE_OPMINUS	4
232#define				FILE_OPMULTIPLY	5
233#define				FILE_OPDIVIDE	6
234#define				FILE_OPMODULO	7
235#define				FILE_OPS_MASK	0x07 /* mask for above ops */
236#define				FILE_UNUSED_1	0x08
237#define				FILE_UNUSED_2	0x10
238#define				FILE_UNUSED_3	0x20
239#define				FILE_OPINVERSE	0x40
240#define				FILE_OPINDIRECT	0x80
241
242#ifdef ENABLE_CONDITIONALS
243#define				COND_NONE	0
244#define				COND_IF		1
245#define				COND_ELIF	2
246#define				COND_ELSE	3
247#endif /* ENABLE_CONDITIONALS */
248
249	/* Word 4 */
250	uint32_t offset;	/* offset to magic number */
251	/* Word 5 */
252	int32_t in_offset;	/* offset from indirection */
253	/* Word 6 */
254	uint32_t lineno;	/* line number in magic file */
255	/* Word 7,8 */
256	union {
257		uint64_t _mask;	/* for use with numeric and date types */
258		struct {
259			uint32_t _count;	/* repeat/line count */
260			uint32_t _flags;	/* modifier flags */
261		} _s;		/* for use with string types */
262	} _u;
263#define num_mask _u._mask
264#define str_range _u._s._count
265#define str_flags _u._s._flags
266	/* Words 9-16 */
267	union VALUETYPE value;	/* either number or string */
268	/* Words 17-24 */
269	char desc[MAXDESC];	/* description */
270	/* Words 25-32 */
271	char mimetype[MAXDESC]; /* MIME type */
272	/* Words 33-34 */
273	char apple[8];
274};
275
276#define BIT(A)   (1 << (A))
277#define STRING_COMPACT_WHITESPACE		BIT(0)
278#define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
279#define STRING_IGNORE_LOWERCASE			BIT(2)
280#define STRING_IGNORE_UPPERCASE			BIT(3)
281#define REGEX_OFFSET_START			BIT(4)
282#define STRING_TEXTTEST				BIT(5)
283#define STRING_BINTEST				BIT(6)
284#define CHAR_COMPACT_WHITESPACE			'W'
285#define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
286#define CHAR_IGNORE_LOWERCASE			'c'
287#define CHAR_IGNORE_UPPERCASE			'C'
288#define CHAR_REGEX_OFFSET_START			's'
289#define CHAR_TEXTTEST				't'
290#define CHAR_BINTEST				'b'
291#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
292#define STRING_DEFAULT_RANGE		100
293
294
295/* list of magic entries */
296struct mlist {
297	struct magic *magic;		/* array of magic entries */
298	uint32_t nmagic;			/* number of entries in array */
299	int mapped;  /* allocation type: 0 => apprentice_file
300		      *                  1 => apprentice_map + malloc
301		      *                  2 => apprentice_map + mmap */
302	struct mlist *next, *prev;
303};
304
305#ifdef __cplusplus
306#define CAST(T, b)	static_cast<T>(b)
307#else
308#define CAST(T, b)	(T)(b)
309#endif
310
311struct level_info {
312	int32_t off;
313	int got_match;
314#ifdef ENABLE_CONDITIONALS
315	int last_match;
316	int last_cond;	/* used for error checking by parse() */
317#endif
318};
319struct magic_set {
320	struct mlist *mlist;
321	struct cont {
322		size_t len;
323		struct level_info *li;
324	} c;
325	struct out {
326		char *buf;		/* Accumulation buffer */
327		char *pbuf;		/* Printable buffer */
328	} o;
329	uint32_t offset;
330	int error;
331	int flags;			/* Control magic tests. */
332	int event_flags;		/* Note things that happened. */
333#define 		EVENT_HAD_ERR		0x01
334	const char *file;
335	size_t line;			/* current magic line number */
336
337	/* data for searches */
338	struct {
339		const char *s;		/* start of search in original source */
340		size_t s_len;		/* length of search region */
341		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
342		size_t rm_len;		/* match length */
343	} search;
344
345	/* FIXME: Make the string dynamically allocated so that e.g.
346	   strings matched in files can be longer than MAXstring */
347	union VALUETYPE ms_value;	/* either number or string */
348};
349
350/* Type for Unicode characters */
351typedef unsigned long unichar;
352
353struct stat;
354protected const char *file_fmttime(uint32_t, int);
355protected int file_buffer(struct magic_set *, int, const char *, const void *,
356    size_t);
357protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
358protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
359protected int file_vprintf(struct magic_set *, const char *, va_list);
360protected int file_printf(struct magic_set *, const char *, ...)
361    __attribute__((__format__(__printf__, 2, 3)));
362protected int file_reset(struct magic_set *);
363protected int file_tryelf(struct magic_set *, int, const unsigned char *,
364    size_t);
365protected int file_trymacho(struct magic_set *, int, const unsigned char *, size_t, const char *);
366protected int file_trycdf(struct magic_set *, int, const unsigned char *,
367    size_t);
368protected int file_zmagic(struct magic_set *, int, const char *,
369    const unsigned char *, size_t);
370protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
371protected int file_ascmagic_with_encoding(struct magic_set *,
372    const unsigned char *, size_t, unichar *, size_t, const char *,
373    const char *);
374protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
375    unichar **, size_t *, const char **, const char **, const char **);
376protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
377protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
378    int);
379protected struct mlist *file_apprentice(struct magic_set *, const char *, int);
380protected uint64_t file_signextend(struct magic_set *, struct magic *,
381    uint64_t);
382protected void file_delmagic(struct magic *, int type, size_t entries);
383protected void file_badread(struct magic_set *);
384protected void file_badseek(struct magic_set *);
385protected void file_oomem(struct magic_set *, size_t);
386protected void file_error(struct magic_set *, int, const char *, ...)
387    __attribute__((__format__(__printf__, 3, 4)));
388protected void file_magerror(struct magic_set *, const char *, ...)
389    __attribute__((__format__(__printf__, 2, 3)));
390protected void file_magwarn(struct magic_set *, const char *, ...)
391    __attribute__((__format__(__printf__, 2, 3)));
392protected void file_mdump(struct magic *);
393protected void file_showstr(FILE *, const char *, size_t);
394protected size_t file_mbswidth(const char *);
395protected const char *file_getbuffer(struct magic_set *);
396protected ssize_t sread(int, void *, size_t, int);
397protected int file_check_mem(struct magic_set *, unsigned int);
398protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
399    size_t *);
400#ifdef __EMX__
401protected int file_os2_apptype(struct magic_set *, const char *, const void *,
402    size_t);
403#endif /* __EMX__ */
404
405
406#ifndef COMPILE_ONLY
407extern const char *file_names[];
408extern const size_t file_nnames;
409#endif
410
411#ifndef HAVE_STRERROR
412extern int sys_nerr;
413extern char *sys_errlist[];
414#define strerror(e) \
415	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
416#endif
417
418#ifndef HAVE_STRTOUL
419#define strtoul(a, b, c)	strtol(a, b, c)
420#endif
421
422#ifndef HAVE_VASPRINTF
423int vasprintf(char **, const char *, va_list);
424#endif
425#ifndef HAVE_ASPRINTF
426int asprintf(char **ptr, const char *format_string, ...);
427#endif
428
429#ifndef HAVE_STRLCPY
430size_t strlcpy(char *dst, const char *src, size_t siz);
431#endif
432#ifndef HAVE_STRLCAT
433size_t strlcat(char *dst, const char *src, size_t siz);
434#endif
435
436#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
437#define QUICK
438#endif
439
440#ifndef O_BINARY
441#define O_BINARY	0
442#endif
443
444#ifndef __cplusplus
445#if defined(__GNUC__) && (__GNUC__ >= 3)
446#define FILE_RCSID(id) \
447static const char rcsid[] __attribute__((__used__)) = id;
448#else
449#define FILE_RCSID(id) \
450static const char *rcsid(const char *p) { \
451	return rcsid(p = id); \
452}
453#endif
454#else
455#define FILE_RCSID(id)
456#endif
457
458#endif /* __file_h__ */
459