file.h revision 186690
168349Sobrien/*
2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995.
3133359Sobrien * Software written by Ian F. Darwin and others;
4133359Sobrien * maintained 1995-present by Christos Zoulas and others.
5133359Sobrien *
6133359Sobrien * Redistribution and use in source and binary forms, with or without
7133359Sobrien * modification, are permitted provided that the following conditions
8133359Sobrien * are met:
9133359Sobrien * 1. Redistributions of source code must retain the above copyright
10133359Sobrien *    notice immediately at the beginning of the file, without modification,
11133359Sobrien *    this list of conditions, and the following disclaimer.
12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
13133359Sobrien *    notice, this list of conditions and the following disclaimer in the
14133359Sobrien *    documentation and/or other materials provided with the distribution.
15133359Sobrien *
16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26133359Sobrien * SUCH DAMAGE.
27133359Sobrien */
28133359Sobrien/*
2968349Sobrien * file.h - definitions for file(1) program
30186690Sobrien * @(#)$File: file.h,v 1.108 2008/07/16 18:00:57 christos Exp $
3168349Sobrien */
3268349Sobrien
3368349Sobrien#ifndef __file_h__
3468349Sobrien#define __file_h__
3568349Sobrien
3668349Sobrien#ifdef HAVE_CONFIG_H
3768349Sobrien#include <config.h>
3868349Sobrien#endif
3968349Sobrien
40133359Sobrien#include <stdio.h>	/* Include that here, to make sure __P gets defined */
41103373Sobrien#include <errno.h>
42159764Sobrien#include <fcntl.h>	/* For open and flags */
43103373Sobrien#ifdef HAVE_STDINT_H
44103373Sobrien#include <stdint.h>
45133359Sobrien#endif
46133359Sobrien#ifdef HAVE_INTTYPES_H
47110949Sobrien#include <inttypes.h>
48103373Sobrien#endif
49169962Sobrien#include <regex.h>
50169942Sobrien#include <sys/types.h>
51103373Sobrien/* Do this here and now, because struct stat gets re-defined on solaris */
52103373Sobrien#include <sys/stat.h>
53186690Sobrien#include <stdarg.h>
5468349Sobrien
55169962Sobrien#define ENABLE_CONDITIONALS
56169962Sobrien
57133359Sobrien#ifndef MAGIC
58133359Sobrien#define MAGIC "/etc/magic"
59133359Sobrien#endif
60133359Sobrien
61133359Sobrien#ifdef __EMX__
62133359Sobrien#define PATHSEP	';'
63133359Sobrien#else
64133359Sobrien#define PATHSEP	':'
65133359Sobrien#endif
66133359Sobrien
67133359Sobrien#define private static
68133359Sobrien#ifndef protected
69133359Sobrien#define protected
70133359Sobrien#endif
71133359Sobrien#define public
72133359Sobrien
73169962Sobrien#ifndef __GNUC_PREREQ__
74169962Sobrien#ifdef __GNUC__
75169962Sobrien#define	__GNUC_PREREQ__(x, y)						\
76169962Sobrien	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
77169962Sobrien	 (__GNUC__ > (x)))
78169962Sobrien#else
79169962Sobrien#define	__GNUC_PREREQ__(x, y)	0
80169962Sobrien#endif
81169962Sobrien#endif
82169962Sobrien
83186690Sobrien#ifndef __GNUC__
84186690Sobrien#ifndef __attribute__
85186690Sobrien#define __attribute__(a)
86186690Sobrien#endif
87186690Sobrien#endif
88186690Sobrien
89169962Sobrien#ifndef MIN
90169962Sobrien#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
91169962Sobrien#endif
92169962Sobrien
93186690Sobrien#ifndef MAX
94186690Sobrien#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
95186690Sobrien#endif
96186690Sobrien
9768349Sobrien#ifndef HOWMANY
98159764Sobrien# define HOWMANY (256 * 1024)	/* how much of the file to look at */
9968349Sobrien#endif
100186690Sobrien#define MAXMAGIS 8192		/* max entries in any one magic file
101186690Sobrien				   or directory */
102186690Sobrien#define MAXDESC	64		/* max leng of text description/MIME type */
10368349Sobrien#define MAXstring 32		/* max leng of "string" types */
10468349Sobrien
10574784Sobrien#define MAGICNO		0xF11E041C
106186690Sobrien#define VERSIONNO	6
107186690Sobrien#define FILE_MAGICSIZE	(32 * 6)
10874784Sobrien
109133359Sobrien#define	FILE_LOAD	0
110133359Sobrien#define FILE_CHECK	1
111133359Sobrien#define FILE_COMPILE	2
11274784Sobrien
113186690Sobrienunion VALUETYPE {
114186690Sobrien	uint8_t b;
115186690Sobrien	uint16_t h;
116186690Sobrien	uint32_t l;
117186690Sobrien	uint64_t q;
118186690Sobrien	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
119186690Sobrien	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
120186690Sobrien	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
121186690Sobrien	char s[MAXstring];	/* the search string or regex pattern */
122186690Sobrien	unsigned char us[MAXstring];
123186690Sobrien	float f;
124186690Sobrien	double d;
125186690Sobrien};
126186690Sobrien
12768349Sobrienstruct magic {
128133359Sobrien	/* Word 1 */
129103373Sobrien	uint16_t cont_level;	/* level of ">" */
130103373Sobrien	uint8_t flag;
131186690Sobrien#define INDIR		0x01	/* if '(...)' appears */
132186690Sobrien#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
133186690Sobrien#define INDIROFFADD	0x04	/* if '>&(' appears */
134186690Sobrien#define UNSIGNED	0x08	/* comparison is unsigned */
135186690Sobrien#define NOSPACE		0x10	/* suppress space character before output */
136186690Sobrien#define BINTEST		0x20	/* test is for a binary type (set only
137186690Sobrien                                   for top-level tests) */
138186690Sobrien#define TEXTTEST	0	/* for passing to file_softmagic */
139169962Sobrien
140186690Sobrien	uint8_t factor;
141186690Sobrien
142133359Sobrien	/* Word 2 */
143103373Sobrien	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
144103373Sobrien	uint8_t vallen;		/* length of string value, if any */
145186690Sobrien	uint8_t type;		/* comparison type (FILE_*) */
146186690Sobrien	uint8_t in_type;	/* type of indirection */
147169962Sobrien#define 			FILE_INVALID	0
148133359Sobrien#define 			FILE_BYTE	1
149133359Sobrien#define				FILE_SHORT	2
150169962Sobrien#define				FILE_DEFAULT	3
151133359Sobrien#define				FILE_LONG	4
152133359Sobrien#define				FILE_STRING	5
153133359Sobrien#define				FILE_DATE	6
154133359Sobrien#define				FILE_BESHORT	7
155133359Sobrien#define				FILE_BELONG	8
156133359Sobrien#define				FILE_BEDATE	9
157133359Sobrien#define				FILE_LESHORT	10
158133359Sobrien#define				FILE_LELONG	11
159133359Sobrien#define				FILE_LEDATE	12
160133359Sobrien#define				FILE_PSTRING	13
161133359Sobrien#define				FILE_LDATE	14
162133359Sobrien#define				FILE_BELDATE	15
163133359Sobrien#define				FILE_LELDATE	16
164133359Sobrien#define				FILE_REGEX	17
165139368Sobrien#define				FILE_BESTRING16	18
166139368Sobrien#define				FILE_LESTRING16	19
167159764Sobrien#define				FILE_SEARCH	20
168159764Sobrien#define				FILE_MEDATE	21
169159764Sobrien#define				FILE_MELDATE	22
170159764Sobrien#define				FILE_MELONG	23
171169942Sobrien#define				FILE_QUAD	24
172169942Sobrien#define				FILE_LEQUAD	25
173169942Sobrien#define				FILE_BEQUAD	26
174169942Sobrien#define				FILE_QDATE	27
175169942Sobrien#define				FILE_LEQDATE	28
176169942Sobrien#define				FILE_BEQDATE	29
177169942Sobrien#define				FILE_QLDATE	30
178169942Sobrien#define				FILE_LEQLDATE	31
179169942Sobrien#define				FILE_BEQLDATE	32
180175296Sobrien#define				FILE_FLOAT	33
181175296Sobrien#define				FILE_BEFLOAT	34
182175296Sobrien#define				FILE_LEFLOAT	35
183175296Sobrien#define				FILE_DOUBLE	36
184175296Sobrien#define				FILE_BEDOUBLE	37
185175296Sobrien#define				FILE_LEDOUBLE	38
186175296Sobrien#define				FILE_NAMES_SIZE	39/* size of array to contain all names */
187133359Sobrien
188169962Sobrien#define IS_STRING(t) \
189169962Sobrien	((t) == FILE_STRING || \
190169962Sobrien	 (t) == FILE_PSTRING || \
191169962Sobrien	 (t) == FILE_BESTRING16 || \
192169962Sobrien	 (t) == FILE_LESTRING16 || \
193169962Sobrien	 (t) == FILE_REGEX || \
194169962Sobrien	 (t) == FILE_SEARCH || \
195169962Sobrien	 (t) == FILE_DEFAULT)
196133359Sobrien
197169942Sobrien#define FILE_FMT_NONE 0
198169942Sobrien#define FILE_FMT_NUM  1 /* "cduxXi" */
199169942Sobrien#define FILE_FMT_STR  2 /* "s" */
200169942Sobrien#define FILE_FMT_QUAD 3 /* "ll" */
201175296Sobrien#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
202175296Sobrien#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
203169942Sobrien
204133359Sobrien	/* Word 3 */
205103373Sobrien	uint8_t in_op;		/* operator for indirection */
206103373Sobrien	uint8_t mask_op;	/* operator for mask */
207169962Sobrien#ifdef ENABLE_CONDITIONALS
208169962Sobrien	uint8_t cond;		/* conditional type */
209169962Sobrien#else
210186690Sobrien	uint8_t dummy;
211169962Sobrien#endif
212186690Sobrien	uint8_t factor_op;
213186690Sobrien#define		FILE_FACTOR_OP_PLUS	'+'
214186690Sobrien#define		FILE_FACTOR_OP_MINUS	'-'
215186690Sobrien#define		FILE_FACTOR_OP_TIMES	'*'
216186690Sobrien#define		FILE_FACTOR_OP_DIV	'/'
217186690Sobrien#define		FILE_FACTOR_OP_NONE	'\0'
218169962Sobrien
219133359Sobrien#define				FILE_OPS	"&|^+-*/%"
220133359Sobrien#define				FILE_OPAND	0
221133359Sobrien#define				FILE_OPOR	1
222133359Sobrien#define				FILE_OPXOR	2
223133359Sobrien#define				FILE_OPADD	3
224133359Sobrien#define				FILE_OPMINUS	4
225133359Sobrien#define				FILE_OPMULTIPLY	5
226133359Sobrien#define				FILE_OPDIVIDE	6
227133359Sobrien#define				FILE_OPMODULO	7
228169962Sobrien#define				FILE_OPS_MASK	0x07 /* mask for above ops */
229169962Sobrien#define				FILE_UNUSED_1	0x08
230169962Sobrien#define				FILE_UNUSED_2	0x10
231169962Sobrien#define				FILE_UNUSED_3	0x20
232159764Sobrien#define				FILE_OPINVERSE	0x40
233159764Sobrien#define				FILE_OPINDIRECT	0x80
234169962Sobrien
235169962Sobrien#ifdef ENABLE_CONDITIONALS
236169962Sobrien#define				COND_NONE	0
237169962Sobrien#define				COND_IF		1
238169962Sobrien#define				COND_ELIF	2
239169962Sobrien#define				COND_ELSE	3
240169962Sobrien#endif /* ENABLE_CONDITIONALS */
241169962Sobrien
242133359Sobrien	/* Word 4 */
243133359Sobrien	uint32_t offset;	/* offset to magic number */
244133359Sobrien	/* Word 5 */
245159764Sobrien	int32_t in_offset;	/* offset from indirection */
246133359Sobrien	/* Word 6 */
247169942Sobrien	uint32_t lineno;	/* line number in magic file */
248169942Sobrien	/* Word 7,8 */
249169962Sobrien	union {
250169962Sobrien		uint64_t _mask;	/* for use with numeric and date types */
251169962Sobrien		struct {
252169962Sobrien			uint32_t _count;	/* repeat/line count */
253169962Sobrien			uint32_t _flags;	/* modifier flags */
254169962Sobrien		} _s;		/* for use with string types */
255169962Sobrien	} _u;
256169962Sobrien#define num_mask _u._mask
257186690Sobrien#define str_range _u._s._count
258169962Sobrien#define str_flags _u._s._flags
259133359Sobrien	/* Words 9-16 */
260186690Sobrien	union VALUETYPE value;		/* either number or string */
261133359Sobrien	/* Words 17..31 */
26268349Sobrien	char desc[MAXDESC];	/* description */
263186690Sobrien	/* Words 32..47 */
264186690Sobrien	char mimetype[MAXDESC]; /* MIME type */
265133359Sobrien};
26668349Sobrien
26768349Sobrien#define BIT(A)   (1 << (A))
268169962Sobrien#define STRING_COMPACT_BLANK		BIT(0)
269169962Sobrien#define STRING_COMPACT_OPTIONAL_BLANK	BIT(1)
270169962Sobrien#define STRING_IGNORE_LOWERCASE		BIT(2)
271169962Sobrien#define STRING_IGNORE_UPPERCASE		BIT(3)
272169962Sobrien#define REGEX_OFFSET_START		BIT(4)
27368349Sobrien#define CHAR_COMPACT_BLANK		'B'
27468349Sobrien#define CHAR_COMPACT_OPTIONAL_BLANK	'b'
275169962Sobrien#define CHAR_IGNORE_LOWERCASE		'c'
276169962Sobrien#define CHAR_IGNORE_UPPERCASE		'C'
277169962Sobrien#define CHAR_REGEX_OFFSET_START		's'
278169962Sobrien#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
279186690Sobrien#define STRING_DEFAULT_RANGE		100
28068349Sobrien
28168349Sobrien
28274784Sobrien/* list of magic entries */
28374784Sobrienstruct mlist {
28474784Sobrien	struct magic *magic;		/* array of magic entries */
285133359Sobrien	uint32_t nmagic;			/* number of entries in array */
286133359Sobrien	int mapped;  /* allocation type: 0 => apprentice_file
287133359Sobrien		      *                  1 => apprentice_map + malloc
288133359Sobrien		      *                  2 => apprentice_map + mmap */
28974784Sobrien	struct mlist *next, *prev;
29074784Sobrien};
29174784Sobrien
292186690Sobrien#ifdef __cplusplus
293186690Sobrien#define CAST(T, b)	static_cast<T>(b)
294186690Sobrien#else
295186690Sobrien#define CAST(T, b)	(b)
296186690Sobrien#endif
297186690Sobrien
298186690Sobrienstruct level_info {
299186690Sobrien	int32_t off;
300186690Sobrien	int got_match;
301186690Sobrien#ifdef ENABLE_CONDITIONALS
302186690Sobrien	int last_match;
303186690Sobrien	int last_cond;	/* used for error checking by parse() */
304186690Sobrien#endif
305186690Sobrien} *li;
306133359Sobrienstruct magic_set {
307169962Sobrien	struct mlist *mlist;
308169962Sobrien	struct cont {
309169962Sobrien		size_t len;
310186690Sobrien		struct level_info *li;
311169962Sobrien	} c;
312169962Sobrien	struct out {
313186690Sobrien		char *buf;		/* Accumulation buffer */
314186690Sobrien		char *pbuf;		/* Printable buffer */
315169962Sobrien	} o;
316169962Sobrien	uint32_t offset;
317169962Sobrien	int error;
318169962Sobrien	int flags;
319169962Sobrien	int haderr;
320169962Sobrien	const char *file;
321169962Sobrien	size_t line;			/* current magic line number */
322169962Sobrien
323169962Sobrien	/* data for searches */
324169962Sobrien	struct {
325169962Sobrien		const char *s;		/* start of search in original source */
326169962Sobrien		size_t s_len;		/* length of search region */
327169962Sobrien		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
328169962Sobrien		size_t rm_len;		/* match length */
329169962Sobrien	} search;
330169962Sobrien
331186690Sobrien	/* FIXME: Make the string dynamically allocated so that e.g.
332186690Sobrien	   strings matched in files can be longer than MAXstring */
333169962Sobrien	union VALUETYPE ms_value;	/* either number or string */
334133359Sobrien};
335133359Sobrien
336186690Sobrien/* Type for Unicode characters */
337186690Sobrientypedef unsigned long unichar;
338186690Sobrien
339103373Sobrienstruct stat;
340159764Sobrienprotected const char *file_fmttime(uint32_t, int);
341169962Sobrienprotected int file_buffer(struct magic_set *, int, const char *, const void *,
342169962Sobrien    size_t);
343133359Sobrienprotected int file_fsmagic(struct magic_set *, const char *, struct stat *);
344133359Sobrienprotected int file_pipe2file(struct magic_set *, int, const void *, size_t);
345186690Sobrienprotected int file_vprintf(struct magic_set *, const char *, va_list);
346186690Sobrienprotected int file_printf(struct magic_set *, const char *, ...)
347186690Sobrien    __attribute__((__format__(__printf__, 2, 3)));
348133359Sobrienprotected int file_reset(struct magic_set *);
349169962Sobrienprotected int file_tryelf(struct magic_set *, int, const unsigned char *,
350169962Sobrien    size_t);
351169962Sobrienprotected int file_zmagic(struct magic_set *, int, const char *,
352169962Sobrien    const unsigned char *, size_t);
353133359Sobrienprotected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
354133359Sobrienprotected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
355186690Sobrienprotected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
356133359Sobrienprotected struct mlist *file_apprentice(struct magic_set *, const char *, int);
357169962Sobrienprotected uint64_t file_signextend(struct magic_set *, struct magic *,
358169962Sobrien    uint64_t);
359133359Sobrienprotected void file_delmagic(struct magic *, int type, size_t entries);
360133359Sobrienprotected void file_badread(struct magic_set *);
361133359Sobrienprotected void file_badseek(struct magic_set *);
362169942Sobrienprotected void file_oomem(struct magic_set *, size_t);
363186690Sobrienprotected void file_error(struct magic_set *, int, const char *, ...)
364186690Sobrien    __attribute__((__format__(__printf__, 3, 4)));
365186690Sobrienprotected void file_magerror(struct magic_set *, const char *, ...)
366186690Sobrien    __attribute__((__format__(__printf__, 2, 3)));
367186690Sobrienprotected void file_magwarn(struct magic_set *, const char *, ...)
368186690Sobrien    __attribute__((__format__(__printf__, 2, 3)));
369133359Sobrienprotected void file_mdump(struct magic *);
370133359Sobrienprotected void file_showstr(FILE *, const char *, size_t);
371133359Sobrienprotected size_t file_mbswidth(const char *);
372133359Sobrienprotected const char *file_getbuffer(struct magic_set *);
373169962Sobrienprotected ssize_t sread(int, void *, size_t, int);
374169962Sobrienprotected int file_check_mem(struct magic_set *, unsigned int);
375186690Sobrienprotected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
37668349Sobrien
377169942Sobrien#ifndef COMPILE_ONLY
378169942Sobrienextern const char *file_names[];
379169942Sobrienextern const size_t file_nnames;
380169942Sobrien#endif
381169942Sobrien
38268349Sobrien#ifndef HAVE_STRERROR
38368349Sobrienextern int sys_nerr;
38468349Sobrienextern char *sys_errlist[];
38568349Sobrien#define strerror(e) \
38668349Sobrien	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
38768349Sobrien#endif
38868349Sobrien
38968349Sobrien#ifndef HAVE_STRTOUL
39068349Sobrien#define strtoul(a, b, c)	strtol(a, b, c)
39168349Sobrien#endif
39268349Sobrien
393186690Sobrien#ifndef HAVE_VASPRINTF
394186690Sobrienint vasprintf(char **, const char *, va_list);
395159764Sobrien#endif
396186690Sobrien#ifndef HAVE_ASPRINTF
397186690Sobrienint asprintf(char **ptr, const char *format_string, ...);
398186690Sobrien#endif
399159764Sobrien
40080588Sobrien#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
40180588Sobrien#define QUICK
40280588Sobrien#endif
40380588Sobrien
404159764Sobrien#ifndef O_BINARY
405159764Sobrien#define O_BINARY	0
406159764Sobrien#endif
407159764Sobrien
408186690Sobrien#ifndef __cplusplus
409186690Sobrien#ifdef __GNUC__
410186690Sobrienstatic const char *rcsid(const char *) __attribute__((__used__));
411186690Sobrien#endif
41268349Sobrien#define FILE_RCSID(id) \
41368349Sobrienstatic const char *rcsid(const char *p) { \
41468349Sobrien	return rcsid(p = id); \
41568349Sobrien}
416186690Sobrien#else
417186690Sobrien#define FILE_RCSID(id)
418186690Sobrien#endif
41968349Sobrien
42068349Sobrien#endif /* __file_h__ */
421