file.h revision 186690
1198157Srrs/*
2198157Srrs * Copyright (c) Ian F. Darwin 1986-1995.
3198157Srrs * Software written by Ian F. Darwin and others;
4198157Srrs * maintained 1995-present by Christos Zoulas and others.
5198157Srrs *
6198157Srrs * Redistribution and use in source and binary forms, with or without
7198157Srrs * modification, are permitted provided that the following conditions
8198157Srrs * are met:
9198157Srrs * 1. Redistributions of source code must retain the above copyright
10198157Srrs *    notice immediately at the beginning of the file, without modification,
11198157Srrs *    this list of conditions, and the following disclaimer.
12198157Srrs * 2. Redistributions in binary form must reproduce the above copyright
13198157Srrs *    notice, this list of conditions and the following disclaimer in the
14198157Srrs *    documentation and/or other materials provided with the distribution.
15198157Srrs *
16198157Srrs * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17198157Srrs * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18198157Srrs * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19198157Srrs * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20198157Srrs * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21198157Srrs * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22198157Srrs * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23198157Srrs * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24198157Srrs * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25198157Srrs * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26198157Srrs * SUCH DAMAGE.
27198157Srrs */
28198157Srrs/*
29198157Srrs * file.h - definitions for file(1) program
30198157Srrs * @(#)$File: file.h,v 1.108 2008/07/16 18:00:57 christos Exp $
31198157Srrs */
32198157Srrs
33198157Srrs#ifndef __file_h__
34198157Srrs#define __file_h__
35198157Srrs
36198157Srrs#ifdef HAVE_CONFIG_H
37198157Srrs#include <config.h>
38198157Srrs#endif
39198157Srrs
40198157Srrs#include <stdio.h>	/* Include that here, to make sure __P gets defined */
41198157Srrs#include <errno.h>
42198157Srrs#include <fcntl.h>	/* For open and flags */
43198157Srrs#ifdef HAVE_STDINT_H
44198157Srrs#include <stdint.h>
45198157Srrs#endif
46198157Srrs#ifdef HAVE_INTTYPES_H
47198157Srrs#include <inttypes.h>
48198157Srrs#endif
49198157Srrs#include <regex.h>
50198157Srrs#include <sys/types.h>
51198157Srrs/* Do this here and now, because struct stat gets re-defined on solaris */
52198157Srrs#include <sys/stat.h>
53198157Srrs#include <stdarg.h>
54198157Srrs
55198157Srrs#define ENABLE_CONDITIONALS
56198157Srrs
57198157Srrs#ifndef MAGIC
58198157Srrs#define MAGIC "/etc/magic"
59198157Srrs#endif
60198157Srrs
61198157Srrs#ifdef __EMX__
62198627Srrs#define PATHSEP	';'
63198157Srrs#else
64198157Srrs#define PATHSEP	':'
65198157Srrs#endif
66198157Srrs
67198627Srrs#define private static
68198627Srrs#ifndef protected
69198627Srrs#define protected
70198157Srrs#endif
71198157Srrs#define public
72198157Srrs
73198627Srrs#ifndef __GNUC_PREREQ__
74198627Srrs#ifdef __GNUC__
75198627Srrs#define	__GNUC_PREREQ__(x, y)						\
76198627Srrs	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
77198157Srrs	 (__GNUC__ > (x)))
78198627Srrs#else
79198627Srrs#define	__GNUC_PREREQ__(x, y)	0
80198627Srrs#endif
81198157Srrs#endif
82198627Srrs
83198157Srrs#ifndef __GNUC__
84198157Srrs#ifndef __attribute__
85198157Srrs#define __attribute__(a)
86198627Srrs#endif
87198627Srrs#endif
88198627Srrs
89198157Srrs#ifndef MIN
90198157Srrs#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
91198157Srrs#endif
92198157Srrs
93198157Srrs#ifndef MAX
94198157Srrs#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
95198157Srrs#endif
96198157Srrs
97198157Srrs#ifndef HOWMANY
98198157Srrs# define HOWMANY (256 * 1024)	/* how much of the file to look at */
99198157Srrs#endif
100198627Srrs#define MAXMAGIS 8192		/* max entries in any one magic file
101198157Srrs				   or directory */
102198157Srrs#define MAXDESC	64		/* max leng of text description/MIME type */
103198157Srrs#define MAXstring 32		/* max leng of "string" types */
104198157Srrs
105198157Srrs#define MAGICNO		0xF11E041C
106198157Srrs#define VERSIONNO	6
107198157Srrs#define FILE_MAGICSIZE	(32 * 6)
108198157Srrs
109198157Srrs#define	FILE_LOAD	0
110198157Srrs#define FILE_CHECK	1
111198157Srrs#define FILE_COMPILE	2
112198627Srrs
113198157Srrsunion VALUETYPE {
114198627Srrs	uint8_t b;
115198627Srrs	uint16_t h;
116198157Srrs	uint32_t l;
117198157Srrs	uint64_t q;
118198627Srrs	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
119198157Srrs	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
120198627Srrs	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
121198627Srrs	char s[MAXstring];	/* the search string or regex pattern */
122198627Srrs	unsigned char us[MAXstring];
123198627Srrs	float f;
124198627Srrs	double d;
125198627Srrs};
126198627Srrs
127198627Srrsstruct magic {
128198157Srrs	/* Word 1 */
129198627Srrs	uint16_t cont_level;	/* level of ">" */
130198627Srrs	uint8_t flag;
131198627Srrs#define INDIR		0x01	/* if '(...)' appears */
132198157Srrs#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
133198627Srrs#define INDIROFFADD	0x04	/* if '>&(' appears */
134198627Srrs#define UNSIGNED	0x08	/* comparison is unsigned */
135198627Srrs#define NOSPACE		0x10	/* suppress space character before output */
136198627Srrs#define BINTEST		0x20	/* test is for a binary type (set only
137198157Srrs                                   for top-level tests) */
138198627Srrs#define TEXTTEST	0	/* for passing to file_softmagic */
139198627Srrs
140198627Srrs	uint8_t factor;
141198157Srrs
142198157Srrs	/* Word 2 */
143198627Srrs	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
144198627Srrs	uint8_t vallen;		/* length of string value, if any */
145198627Srrs	uint8_t type;		/* comparison type (FILE_*) */
146198157Srrs	uint8_t in_type;	/* type of indirection */
147198627Srrs#define 			FILE_INVALID	0
148198627Srrs#define 			FILE_BYTE	1
149198627Srrs#define				FILE_SHORT	2
150198157Srrs#define				FILE_DEFAULT	3
151198627Srrs#define				FILE_LONG	4
152198627Srrs#define				FILE_STRING	5
153198627Srrs#define				FILE_DATE	6
154198157Srrs#define				FILE_BESHORT	7
155198627Srrs#define				FILE_BELONG	8
156198627Srrs#define				FILE_BEDATE	9
157198627Srrs#define				FILE_LESHORT	10
158198157Srrs#define				FILE_LELONG	11
159198157Srrs#define				FILE_LEDATE	12
160198627Srrs#define				FILE_PSTRING	13
161198627Srrs#define				FILE_LDATE	14
162198157Srrs#define				FILE_BELDATE	15
163198157Srrs#define				FILE_LELDATE	16
164198157Srrs#define				FILE_REGEX	17
165198627Srrs#define				FILE_BESTRING16	18
166198157Srrs#define				FILE_LESTRING16	19
167198157Srrs#define				FILE_SEARCH	20
168198157Srrs#define				FILE_MEDATE	21
169198157Srrs#define				FILE_MELDATE	22
170198157Srrs#define				FILE_MELONG	23
171198157Srrs#define				FILE_QUAD	24
172198157Srrs#define				FILE_LEQUAD	25
173198157Srrs#define				FILE_BEQUAD	26
174198157Srrs#define				FILE_QDATE	27
175198157Srrs#define				FILE_LEQDATE	28
176198627Srrs#define				FILE_BEQDATE	29
177198627Srrs#define				FILE_QLDATE	30
178198627Srrs#define				FILE_LEQLDATE	31
179198627Srrs#define				FILE_BEQLDATE	32
180198157Srrs#define				FILE_FLOAT	33
181198627Srrs#define				FILE_BEFLOAT	34
182198627Srrs#define				FILE_LEFLOAT	35
183198627Srrs#define				FILE_DOUBLE	36
184198627Srrs#define				FILE_BEDOUBLE	37
185198627Srrs#define				FILE_LEDOUBLE	38
186198627Srrs#define				FILE_NAMES_SIZE	39/* size of array to contain all names */
187198627Srrs
188198627Srrs#define IS_STRING(t) \
189198627Srrs	((t) == FILE_STRING || \
190198157Srrs	 (t) == FILE_PSTRING || \
191198627Srrs	 (t) == FILE_BESTRING16 || \
192198157Srrs	 (t) == FILE_LESTRING16 || \
193198157Srrs	 (t) == FILE_REGEX || \
194198157Srrs	 (t) == FILE_SEARCH || \
195198157Srrs	 (t) == FILE_DEFAULT)
196198157Srrs
197198157Srrs#define FILE_FMT_NONE 0
198198157Srrs#define FILE_FMT_NUM  1 /* "cduxXi" */
199198157Srrs#define FILE_FMT_STR  2 /* "s" */
200198157Srrs#define FILE_FMT_QUAD 3 /* "ll" */
201198157Srrs#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
202198157Srrs#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
203198627Srrs
204198157Srrs	/* Word 3 */
205198627Srrs	uint8_t in_op;		/* operator for indirection */
206198627Srrs	uint8_t mask_op;	/* operator for mask */
207198627Srrs#ifdef ENABLE_CONDITIONALS
208198627Srrs	uint8_t cond;		/* conditional type */
209198157Srrs#else
210198157Srrs	uint8_t dummy;
211198627Srrs#endif
212198627Srrs	uint8_t factor_op;
213198157Srrs#define		FILE_FACTOR_OP_PLUS	'+'
214198157Srrs#define		FILE_FACTOR_OP_MINUS	'-'
215198627Srrs#define		FILE_FACTOR_OP_TIMES	'*'
216198627Srrs#define		FILE_FACTOR_OP_DIV	'/'
217198627Srrs#define		FILE_FACTOR_OP_NONE	'\0'
218198627Srrs
219198627Srrs#define				FILE_OPS	"&|^+-*/%"
220198157Srrs#define				FILE_OPAND	0
221198627Srrs#define				FILE_OPOR	1
222198627Srrs#define				FILE_OPXOR	2
223198627Srrs#define				FILE_OPADD	3
224198157Srrs#define				FILE_OPMINUS	4
225198627Srrs#define				FILE_OPMULTIPLY	5
226198627Srrs#define				FILE_OPDIVIDE	6
227198627Srrs#define				FILE_OPMODULO	7
228198627Srrs#define				FILE_OPS_MASK	0x07 /* mask for above ops */
229198627Srrs#define				FILE_UNUSED_1	0x08
230198627Srrs#define				FILE_UNUSED_2	0x10
231198627Srrs#define				FILE_UNUSED_3	0x20
232198627Srrs#define				FILE_OPINVERSE	0x40
233198627Srrs#define				FILE_OPINDIRECT	0x80
234198627Srrs
235198157Srrs#ifdef ENABLE_CONDITIONALS
236198627Srrs#define				COND_NONE	0
237198627Srrs#define				COND_IF		1
238198627Srrs#define				COND_ELIF	2
239198627Srrs#define				COND_ELSE	3
240198627Srrs#endif /* ENABLE_CONDITIONALS */
241198627Srrs
242198627Srrs	/* Word 4 */
243198627Srrs	uint32_t offset;	/* offset to magic number */
244198627Srrs	/* Word 5 */
245198627Srrs	int32_t in_offset;	/* offset from indirection */
246198627Srrs	/* Word 6 */
247198627Srrs	uint32_t lineno;	/* line number in magic file */
248198627Srrs	/* Word 7,8 */
249198627Srrs	union {
250198627Srrs		uint64_t _mask;	/* for use with numeric and date types */
251198627Srrs		struct {
252198627Srrs			uint32_t _count;	/* repeat/line count */
253198627Srrs			uint32_t _flags;	/* modifier flags */
254198627Srrs		} _s;		/* for use with string types */
255198157Srrs	} _u;
256198157Srrs#define num_mask _u._mask
257198627Srrs#define str_range _u._s._count
258198157Srrs#define str_flags _u._s._flags
259198627Srrs	/* Words 9-16 */
260198627Srrs	union VALUETYPE value;		/* either number or string */
261198627Srrs	/* Words 17..31 */
262198627Srrs	char desc[MAXDESC];	/* description */
263198627Srrs	/* Words 32..47 */
264198627Srrs	char mimetype[MAXDESC]; /* MIME type */
265198627Srrs};
266198627Srrs
267198627Srrs#define BIT(A)   (1 << (A))
268198627Srrs#define STRING_COMPACT_BLANK		BIT(0)
269198627Srrs#define STRING_COMPACT_OPTIONAL_BLANK	BIT(1)
270198627Srrs#define STRING_IGNORE_LOWERCASE		BIT(2)
271198627Srrs#define STRING_IGNORE_UPPERCASE		BIT(3)
272198627Srrs#define REGEX_OFFSET_START		BIT(4)
273198627Srrs#define CHAR_COMPACT_BLANK		'B'
274198627Srrs#define CHAR_COMPACT_OPTIONAL_BLANK	'b'
275198627Srrs#define CHAR_IGNORE_LOWERCASE		'c'
276198627Srrs#define CHAR_IGNORE_UPPERCASE		'C'
277198627Srrs#define CHAR_REGEX_OFFSET_START		's'
278198627Srrs#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
279198627Srrs#define STRING_DEFAULT_RANGE		100
280198627Srrs
281198627Srrs
282198627Srrs/* list of magic entries */
283198627Srrsstruct mlist {
284198627Srrs	struct magic *magic;		/* array of magic entries */
285198627Srrs	uint32_t nmagic;			/* number of entries in array */
286198627Srrs	int mapped;  /* allocation type: 0 => apprentice_file
287198627Srrs		      *                  1 => apprentice_map + malloc
288198627Srrs		      *                  2 => apprentice_map + mmap */
289198627Srrs	struct mlist *next, *prev;
290198627Srrs};
291198627Srrs
292198627Srrs#ifdef __cplusplus
293198627Srrs#define CAST(T, b)	static_cast<T>(b)
294198627Srrs#else
295198627Srrs#define CAST(T, b)	(b)
296198627Srrs#endif
297198627Srrs
298198627Srrsstruct level_info {
299198627Srrs	int32_t off;
300198627Srrs	int got_match;
301198627Srrs#ifdef ENABLE_CONDITIONALS
302198157Srrs	int last_match;
303198627Srrs	int last_cond;	/* used for error checking by parse() */
304198627Srrs#endif
305198157Srrs} *li;
306198157Srrsstruct magic_set {
307198157Srrs	struct mlist *mlist;
308198157Srrs	struct cont {
309198157Srrs		size_t len;
310198157Srrs		struct level_info *li;
311198157Srrs	} c;
312198157Srrs	struct out {
313198157Srrs		char *buf;		/* Accumulation buffer */
314198157Srrs		char *pbuf;		/* Printable buffer */
315198627Srrs	} o;
316198627Srrs	uint32_t offset;
317198627Srrs	int error;
318198157Srrs	int flags;
319198627Srrs	int haderr;
320198627Srrs	const char *file;
321198157Srrs	size_t line;			/* current magic line number */
322198627Srrs
323198627Srrs	/* data for searches */
324198627Srrs	struct {
325198157Srrs		const char *s;		/* start of search in original source */
326198627Srrs		size_t s_len;		/* length of search region */
327198157Srrs		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
328198627Srrs		size_t rm_len;		/* match length */
329198157Srrs	} search;
330198157Srrs
331198157Srrs	/* FIXME: Make the string dynamically allocated so that e.g.
332198157Srrs	   strings matched in files can be longer than MAXstring */
333198627Srrs	union VALUETYPE ms_value;	/* either number or string */
334198627Srrs};
335198157Srrs
336198627Srrs/* Type for Unicode characters */
337198627Srrstypedef unsigned long unichar;
338198157Srrs
339198627Srrsstruct stat;
340198627Srrsprotected const char *file_fmttime(uint32_t, int);
341198157Srrsprotected int file_buffer(struct magic_set *, int, const char *, const void *,
342198627Srrs    size_t);
343198157Srrsprotected int file_fsmagic(struct magic_set *, const char *, struct stat *);
344198157Srrsprotected int file_pipe2file(struct magic_set *, int, const void *, size_t);
345198627Srrsprotected int file_vprintf(struct magic_set *, const char *, va_list);
346198627Srrsprotected int file_printf(struct magic_set *, const char *, ...)
347198627Srrs    __attribute__((__format__(__printf__, 2, 3)));
348198627Srrsprotected int file_reset(struct magic_set *);
349198627Srrsprotected int file_tryelf(struct magic_set *, int, const unsigned char *,
350198627Srrs    size_t);
351198157Srrsprotected int file_zmagic(struct magic_set *, int, const char *,
352198627Srrs    const unsigned char *, size_t);
353198627Srrsprotected int file_ascmagic(struct magic_set *, const unsigned char *, size_t);
354198627Srrsprotected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
355198627Srrsprotected int file_softmagic(struct magic_set *, const unsigned char *, size_t, int);
356198627Srrsprotected struct mlist *file_apprentice(struct magic_set *, const char *, int);
357198157Srrsprotected uint64_t file_signextend(struct magic_set *, struct magic *,
358198627Srrs    uint64_t);
359198627Srrsprotected void file_delmagic(struct magic *, int type, size_t entries);
360198627Srrsprotected void file_badread(struct magic_set *);
361198627Srrsprotected void file_badseek(struct magic_set *);
362198627Srrsprotected void file_oomem(struct magic_set *, size_t);
363198157Srrsprotected void file_error(struct magic_set *, int, const char *, ...)
364198627Srrs    __attribute__((__format__(__printf__, 3, 4)));
365198627Srrsprotected void file_magerror(struct magic_set *, const char *, ...)
366198627Srrs    __attribute__((__format__(__printf__, 2, 3)));
367198627Srrsprotected void file_magwarn(struct magic_set *, const char *, ...)
368198157Srrs    __attribute__((__format__(__printf__, 2, 3)));
369198627Srrsprotected void file_mdump(struct magic *);
370198627Srrsprotected void file_showstr(FILE *, const char *, size_t);
371198157Srrsprotected size_t file_mbswidth(const char *);
372198157Srrsprotected const char *file_getbuffer(struct magic_set *);
373198157Srrsprotected ssize_t sread(int, void *, size_t, int);
374198157Srrsprotected int file_check_mem(struct magic_set *, unsigned int);
375198157Srrsprotected int file_looks_utf8(const unsigned char *, size_t, unichar *, size_t *);
376198157Srrs
377198157Srrs#ifndef COMPILE_ONLY
378198157Srrsextern const char *file_names[];
379198627Srrsextern const size_t file_nnames;
380198627Srrs#endif
381198627Srrs
382198627Srrs#ifndef HAVE_STRERROR
383198627Srrsextern int sys_nerr;
384198157Srrsextern char *sys_errlist[];
385198627Srrs#define strerror(e) \
386198627Srrs	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
387198627Srrs#endif
388198627Srrs
389198627Srrs#ifndef HAVE_STRTOUL
390198627Srrs#define strtoul(a, b, c)	strtol(a, b, c)
391198627Srrs#endif
392198627Srrs
393198627Srrs#ifndef HAVE_VASPRINTF
394198157Srrsint vasprintf(char **, const char *, va_list);
395198627Srrs#endif
396198627Srrs#ifndef HAVE_ASPRINTF
397198627Srrsint asprintf(char **ptr, const char *format_string, ...);
398198627Srrs#endif
399198627Srrs
400198627Srrs#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
401198627Srrs#define QUICK
402198627Srrs#endif
403198627Srrs
404198627Srrs#ifndef O_BINARY
405198627Srrs#define O_BINARY	0
406198157Srrs#endif
407198627Srrs
408198627Srrs#ifndef __cplusplus
409198627Srrs#ifdef __GNUC__
410198627Srrsstatic const char *rcsid(const char *) __attribute__((__used__));
411198627Srrs#endif
412198627Srrs#define FILE_RCSID(id) \
413198627Srrsstatic const char *rcsid(const char *p) { \
414198627Srrs	return rcsid(p = id); \
415198627Srrs}
416198627Srrs#else
417198627Srrs#define FILE_RCSID(id)
418198627Srrs#endif
419198627Srrs
420198627Srrs#endif /* __file_h__ */
421198627Srrs