file.h revision 276415
1168515Sgshapiro/*
2168515Sgshapiro * Copyright (c) Ian F. Darwin 1986-1995.
3168515Sgshapiro * Software written by Ian F. Darwin and others;
4132943Sgshapiro * maintained 1995-present by Christos Zoulas and others.
5168515Sgshapiro *
6132943Sgshapiro * Redistribution and use in source and binary forms, with or without
7168515Sgshapiro * modification, are permitted provided that the following conditions
890792Sgshapiro * are met:
9168515Sgshapiro * 1. Redistributions of source code must retain the above copyright
1090792Sgshapiro *    notice immediately at the beginning of the file, without modification,
11168515Sgshapiro *    this list of conditions, and the following disclaimer.
12168515Sgshapiro * 2. Redistributions in binary form must reproduce the above copyright
1390792Sgshapiro *    notice, this list of conditions and the following disclaimer in the
1490792Sgshapiro *    documentation and/or other materials provided with the distribution.
15168515Sgshapiro *
1690792Sgshapiro * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17168515Sgshapiro * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1890792Sgshapiro * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19168515Sgshapiro * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
2090792Sgshapiro * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2190792Sgshapiro * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22168515Sgshapiro * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23168515Sgshapiro * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24168515Sgshapiro * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25168515Sgshapiro * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26168515Sgshapiro * SUCH DAMAGE.
27168515Sgshapiro */
28168515Sgshapiro/*
29168515Sgshapiro * file.h - definitions for file(1) program
30168515Sgshapiro * @(#)$File: file.h,v 1.161 2014/12/04 15:56:46 christos Exp $
31168515Sgshapiro */
32168515Sgshapiro
3390792Sgshapiro#ifndef __file_h__
3490792Sgshapiro#define __file_h__
35168515Sgshapiro
36168515Sgshapiro#ifdef HAVE_CONFIG_H
37168515Sgshapiro#include <config.h>
38168515Sgshapiro#endif
39168515Sgshapiro
40168515Sgshapiro#ifdef WIN32
41168515Sgshapiro  #ifdef _WIN64
42168515Sgshapiro    #define SIZE_T_FORMAT "I64"
4390792Sgshapiro  #else
4490792Sgshapiro    #define SIZE_T_FORMAT ""
45168515Sgshapiro  #endif
46168515Sgshapiro  #define INT64_T_FORMAT "I64"
47168515Sgshapiro#else
48168515Sgshapiro  #define SIZE_T_FORMAT "z"
49168515Sgshapiro  #define INT64_T_FORMAT "ll"
50168515Sgshapiro#endif
51168515Sgshapiro
5290792Sgshapiro#include <stdio.h>	/* Include that here, to make sure __P gets defined */
53168515Sgshapiro#include <errno.h>
54168515Sgshapiro#include <fcntl.h>	/* For open and flags */
55112810Sgshapiro#ifdef HAVE_STDINT_H
5690792Sgshapiro#ifndef __STDC_LIMIT_MACROS
57168515Sgshapiro#define __STDC_LIMIT_MACROS
5890792Sgshapiro#endif
59112810Sgshapiro#include <stdint.h>
60168515Sgshapiro#endif
61168515Sgshapiro#ifdef HAVE_INTTYPES_H
62168515Sgshapiro#include <inttypes.h>
63#endif
64#include <regex.h>
65#include <time.h>
66#include <sys/types.h>
67#ifndef WIN32
68#include <sys/param.h>
69#endif
70/* Do this here and now, because struct stat gets re-defined on solaris */
71#include <sys/stat.h>
72#include <stdarg.h>
73
74#define ENABLE_CONDITIONALS
75
76#ifndef MAGIC
77#define MAGIC "/etc/magic"
78#endif
79
80#if defined(__EMX__) || defined (WIN32)
81#define PATHSEP	';'
82#else
83#define PATHSEP	':'
84#endif
85
86#define private static
87
88#if HAVE_VISIBILITY && !defined(WIN32)
89#define public  __attribute__ ((__visibility__("default")))
90#ifndef protected
91#define protected __attribute__ ((__visibility__("hidden")))
92#endif
93#else
94#define public
95#ifndef protected
96#define protected
97#endif
98#endif
99
100#ifndef __arraycount
101#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
102#endif
103
104#ifndef __GNUC_PREREQ__
105#ifdef __GNUC__
106#define	__GNUC_PREREQ__(x, y)						\
107	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
108	 (__GNUC__ > (x)))
109#else
110#define	__GNUC_PREREQ__(x, y)	0
111#endif
112#endif
113
114#ifndef __GNUC__
115#ifndef __attribute__
116#define __attribute__(a)
117#endif
118#endif
119
120#ifndef MIN
121#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
122#endif
123
124#ifndef MAX
125#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
126#endif
127
128#ifndef HOWMANY
129# define HOWMANY (256 * 1024)	/* how much of the file to look at */
130#endif
131#define MAXMAGIS 8192		/* max entries in any one magic file
132				   or directory */
133#define MAXDESC	64		/* max len of text description/MIME type */
134#define MAXMIME	80		/* max len of text MIME type */
135#define MAXstring 64		/* max len of "string" types */
136
137#define MAGICNO		0xF11E041C
138#define VERSIONNO	12
139#define FILE_MAGICSIZE	248
140
141#define	FILE_LOAD	0
142#define FILE_CHECK	1
143#define FILE_COMPILE	2
144#define FILE_LIST	3
145
146union VALUETYPE {
147	uint8_t b;
148	uint16_t h;
149	uint32_t l;
150	uint64_t q;
151	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
152	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
153	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
154	char s[MAXstring];	/* the search string or regex pattern */
155	unsigned char us[MAXstring];
156	float f;
157	double d;
158};
159
160struct magic {
161	/* Word 1 */
162	uint16_t cont_level;	/* level of ">" */
163	uint8_t flag;
164#define INDIR		0x01	/* if '(...)' appears */
165#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
166#define INDIROFFADD	0x04	/* if '>&(' appears */
167#define UNSIGNED	0x08	/* comparison is unsigned */
168#define NOSPACE		0x10	/* suppress space character before output */
169#define BINTEST		0x20	/* test is for a binary type (set only
170				   for top-level tests) */
171#define TEXTTEST	0x40	/* for passing to file_softmagic */
172
173	uint8_t factor;
174
175	/* Word 2 */
176	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
177	uint8_t vallen;		/* length of string value, if any */
178	uint8_t type;		/* comparison type (FILE_*) */
179	uint8_t in_type;	/* type of indirection */
180#define 			FILE_INVALID	0
181#define 			FILE_BYTE	1
182#define				FILE_SHORT	2
183#define				FILE_DEFAULT	3
184#define				FILE_LONG	4
185#define				FILE_STRING	5
186#define				FILE_DATE	6
187#define				FILE_BESHORT	7
188#define				FILE_BELONG	8
189#define				FILE_BEDATE	9
190#define				FILE_LESHORT	10
191#define				FILE_LELONG	11
192#define				FILE_LEDATE	12
193#define				FILE_PSTRING	13
194#define				FILE_LDATE	14
195#define				FILE_BELDATE	15
196#define				FILE_LELDATE	16
197#define				FILE_REGEX	17
198#define				FILE_BESTRING16	18
199#define				FILE_LESTRING16	19
200#define				FILE_SEARCH	20
201#define				FILE_MEDATE	21
202#define				FILE_MELDATE	22
203#define				FILE_MELONG	23
204#define				FILE_QUAD	24
205#define				FILE_LEQUAD	25
206#define				FILE_BEQUAD	26
207#define				FILE_QDATE	27
208#define				FILE_LEQDATE	28
209#define				FILE_BEQDATE	29
210#define				FILE_QLDATE	30
211#define				FILE_LEQLDATE	31
212#define				FILE_BEQLDATE	32
213#define				FILE_FLOAT	33
214#define				FILE_BEFLOAT	34
215#define				FILE_LEFLOAT	35
216#define				FILE_DOUBLE	36
217#define				FILE_BEDOUBLE	37
218#define				FILE_LEDOUBLE	38
219#define				FILE_BEID3	39
220#define				FILE_LEID3	40
221#define				FILE_INDIRECT	41
222#define				FILE_QWDATE	42
223#define				FILE_LEQWDATE	43
224#define				FILE_BEQWDATE	44
225#define				FILE_NAME	45
226#define				FILE_USE	46
227#define				FILE_CLEAR	47
228#define				FILE_NAMES_SIZE	48 /* size of array to contain all names */
229
230#define IS_STRING(t) \
231	((t) == FILE_STRING || \
232	 (t) == FILE_PSTRING || \
233	 (t) == FILE_BESTRING16 || \
234	 (t) == FILE_LESTRING16 || \
235	 (t) == FILE_REGEX || \
236	 (t) == FILE_SEARCH || \
237	 (t) == FILE_NAME || \
238	 (t) == FILE_USE)
239
240#define FILE_FMT_NONE 0
241#define FILE_FMT_NUM  1 /* "cduxXi" */
242#define FILE_FMT_STR  2 /* "s" */
243#define FILE_FMT_QUAD 3 /* "ll" */
244#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
245#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
246
247	/* Word 3 */
248	uint8_t in_op;		/* operator for indirection */
249	uint8_t mask_op;	/* operator for mask */
250#ifdef ENABLE_CONDITIONALS
251	uint8_t cond;		/* conditional type */
252#else
253	uint8_t dummy;
254#endif
255	uint8_t factor_op;
256#define		FILE_FACTOR_OP_PLUS	'+'
257#define		FILE_FACTOR_OP_MINUS	'-'
258#define		FILE_FACTOR_OP_TIMES	'*'
259#define		FILE_FACTOR_OP_DIV	'/'
260#define		FILE_FACTOR_OP_NONE	'\0'
261
262#define				FILE_OPS	"&|^+-*/%"
263#define				FILE_OPAND	0
264#define				FILE_OPOR	1
265#define				FILE_OPXOR	2
266#define				FILE_OPADD	3
267#define				FILE_OPMINUS	4
268#define				FILE_OPMULTIPLY	5
269#define				FILE_OPDIVIDE	6
270#define				FILE_OPMODULO	7
271#define				FILE_OPS_MASK	0x07 /* mask for above ops */
272#define				FILE_UNUSED_1	0x08
273#define				FILE_UNUSED_2	0x10
274#define				FILE_UNUSED_3	0x20
275#define				FILE_OPINVERSE	0x40
276#define				FILE_OPINDIRECT	0x80
277
278#ifdef ENABLE_CONDITIONALS
279#define				COND_NONE	0
280#define				COND_IF		1
281#define				COND_ELIF	2
282#define				COND_ELSE	3
283#endif /* ENABLE_CONDITIONALS */
284
285	/* Word 4 */
286	uint32_t offset;	/* offset to magic number */
287	/* Word 5 */
288	int32_t in_offset;	/* offset from indirection */
289	/* Word 6 */
290	uint32_t lineno;	/* line number in magic file */
291	/* Word 7,8 */
292	union {
293		uint64_t _mask;	/* for use with numeric and date types */
294		struct {
295			uint32_t _count;	/* repeat/line count */
296			uint32_t _flags;	/* modifier flags */
297		} _s;		/* for use with string types */
298	} _u;
299#define num_mask _u._mask
300#define str_range _u._s._count
301#define str_flags _u._s._flags
302	/* Words 9-16 */
303	union VALUETYPE value;	/* either number or string */
304	/* Words 17-32 */
305	char desc[MAXDESC];	/* description */
306	/* Words 33-52 */
307	char mimetype[MAXMIME]; /* MIME type */
308	/* Words 53-54 */
309	char apple[8];
310};
311
312#define BIT(A)   (1 << (A))
313#define STRING_COMPACT_WHITESPACE		BIT(0)
314#define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
315#define STRING_IGNORE_LOWERCASE			BIT(2)
316#define STRING_IGNORE_UPPERCASE			BIT(3)
317#define REGEX_OFFSET_START			BIT(4)
318#define STRING_TEXTTEST				BIT(5)
319#define STRING_BINTEST				BIT(6)
320#define PSTRING_1_BE				BIT(7)
321#define PSTRING_1_LE				BIT(7)
322#define PSTRING_2_BE				BIT(8)
323#define PSTRING_2_LE				BIT(9)
324#define PSTRING_4_BE				BIT(10)
325#define PSTRING_4_LE				BIT(11)
326#define REGEX_LINE_COUNT			BIT(11)
327#define PSTRING_LEN	\
328    (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
329#define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
330#define	STRING_TRIM				BIT(13)
331#define CHAR_COMPACT_WHITESPACE			'W'
332#define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
333#define CHAR_IGNORE_LOWERCASE			'c'
334#define CHAR_IGNORE_UPPERCASE			'C'
335#define CHAR_REGEX_OFFSET_START			's'
336#define CHAR_TEXTTEST				't'
337#define	CHAR_TRIM				'T'
338#define CHAR_BINTEST				'b'
339#define CHAR_PSTRING_1_BE			'B'
340#define CHAR_PSTRING_1_LE			'B'
341#define CHAR_PSTRING_2_BE			'H'
342#define CHAR_PSTRING_2_LE			'h'
343#define CHAR_PSTRING_4_BE			'L'
344#define CHAR_PSTRING_4_LE			'l'
345#define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
346#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
347#define STRING_DEFAULT_RANGE		100
348
349
350/* list of magic entries */
351struct mlist {
352	struct magic *magic;		/* array of magic entries */
353	uint32_t nmagic;		/* number of entries in array */
354	void *map;			/* internal resources used by entry */
355	struct mlist *next, *prev;
356};
357
358#ifdef __cplusplus
359#define CAST(T, b)	static_cast<T>(b)
360#define RCAST(T, b)	reinterpret_cast<T>(b)
361#else
362#define CAST(T, b)	(T)(b)
363#define RCAST(T, b)	(T)(b)
364#endif
365
366struct level_info {
367	int32_t off;
368	int got_match;
369#ifdef ENABLE_CONDITIONALS
370	int last_match;
371	int last_cond;	/* used for error checking by parse() */
372#endif
373};
374
375#define MAGIC_SETS	2
376
377struct magic_set {
378	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
379	struct cont {
380		size_t len;
381		struct level_info *li;
382	} c;
383	struct out {
384		char *buf;		/* Accumulation buffer */
385		char *pbuf;		/* Printable buffer */
386	} o;
387	uint32_t offset;
388	int error;
389	int flags;			/* Control magic tests. */
390	int event_flags;		/* Note things that happened. */
391#define 		EVENT_HAD_ERR		0x01
392	const char *file;
393	size_t line;			/* current magic line number */
394
395	/* data for searches */
396	struct {
397		const char *s;		/* start of search in original source */
398		size_t s_len;		/* length of search region */
399		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
400		size_t rm_len;		/* match length */
401	} search;
402
403	/* FIXME: Make the string dynamically allocated so that e.g.
404	   strings matched in files can be longer than MAXstring */
405	union VALUETYPE ms_value;	/* either number or string */
406	uint16_t indir_max;
407	uint16_t name_max;
408	uint16_t elf_shnum_max;
409	uint16_t elf_phnum_max;
410#define	FILE_INDIR_MAX			15
411#define	FILE_NAME_MAX			30
412#define	FILE_ELF_SHNUM_MAX		32768
413#define	FILE_ELF_PHNUM_MAX		128
414};
415
416/* Type for Unicode characters */
417typedef unsigned long unichar;
418
419struct stat;
420#define FILE_T_LOCAL	1
421#define FILE_T_WINDOWS	2
422protected const char *file_fmttime(uint64_t, int, char *);
423protected struct magic_set *file_ms_alloc(int);
424protected void file_ms_free(struct magic_set *);
425protected int file_buffer(struct magic_set *, int, const char *, const void *,
426    size_t);
427protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
428protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
429protected int file_vprintf(struct magic_set *, const char *, va_list)
430    __attribute__((__format__(__printf__, 2, 0)));
431protected size_t file_printedlen(const struct magic_set *);
432protected int file_replace(struct magic_set *, const char *, const char *);
433protected int file_printf(struct magic_set *, const char *, ...)
434    __attribute__((__format__(__printf__, 2, 3)));
435protected int file_reset(struct magic_set *);
436protected int file_tryelf(struct magic_set *, int, const unsigned char *,
437    size_t);
438protected int file_trycdf(struct magic_set *, int, const unsigned char *,
439    size_t);
440#if HAVE_FORK
441protected int file_zmagic(struct magic_set *, int, const char *,
442    const unsigned char *, size_t);
443#endif
444protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
445    int);
446protected int file_ascmagic_with_encoding(struct magic_set *,
447    const unsigned char *, size_t, unichar *, size_t, const char *,
448    const char *, int);
449protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
450    unichar **, size_t *, const char **, const char **, const char **);
451protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
452protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
453    uint16_t, uint16_t *, int, int);
454protected int file_apprentice(struct magic_set *, const char *, int);
455protected int buffer_apprentice(struct magic_set *, struct magic **,
456    size_t *, size_t);
457protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
458protected uint64_t file_signextend(struct magic_set *, struct magic *,
459    uint64_t);
460protected void file_badread(struct magic_set *);
461protected void file_badseek(struct magic_set *);
462protected void file_oomem(struct magic_set *, size_t);
463protected void file_error(struct magic_set *, int, const char *, ...)
464    __attribute__((__format__(__printf__, 3, 4)));
465protected void file_magerror(struct magic_set *, const char *, ...)
466    __attribute__((__format__(__printf__, 2, 3)));
467protected void file_magwarn(struct magic_set *, const char *, ...)
468    __attribute__((__format__(__printf__, 2, 3)));
469protected void file_mdump(struct magic *);
470protected void file_showstr(FILE *, const char *, size_t);
471protected size_t file_mbswidth(const char *);
472protected const char *file_getbuffer(struct magic_set *);
473protected ssize_t sread(int, void *, size_t, int);
474protected int file_check_mem(struct magic_set *, unsigned int);
475protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
476    size_t *);
477protected size_t file_pstring_length_size(const struct magic *);
478protected size_t file_pstring_get_length(const struct magic *, const char *);
479#ifdef __EMX__
480protected int file_os2_apptype(struct magic_set *, const char *, const void *,
481    size_t);
482#endif /* __EMX__ */
483
484#if defined(HAVE_LOCALE_H)
485#include <locale.h>
486#endif
487#if defined(HAVE_XLOCALE_H)
488#include <xlocale.h>
489#endif
490
491typedef struct {
492	const char *pat;
493#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
494#define USE_C_LOCALE
495	locale_t old_lc_ctype;
496	locale_t c_lc_ctype;
497#endif
498	int rc;
499	regex_t rx;
500} file_regex_t;
501
502protected int file_regcomp(file_regex_t *, const char *, int);
503protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
504    int);
505protected void file_regfree(file_regex_t *);
506protected void file_regerror(file_regex_t *, int, struct magic_set *);
507
508typedef struct {
509	char *buf;
510	uint32_t offset;
511} file_pushbuf_t;
512
513protected file_pushbuf_t *file_push_buffer(struct magic_set *);
514protected char  *file_pop_buffer(struct magic_set *, file_pushbuf_t *);
515
516#ifndef COMPILE_ONLY
517extern const char *file_names[];
518extern const size_t file_nnames;
519#endif
520
521#ifndef HAVE_STRERROR
522extern int sys_nerr;
523extern char *sys_errlist[];
524#define strerror(e) \
525	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
526#endif
527
528#ifndef HAVE_STRTOUL
529#define strtoul(a, b, c)	strtol(a, b, c)
530#endif
531
532#ifndef HAVE_PREAD
533ssize_t pread(int, void *, size_t, off_t);
534#endif
535#ifndef HAVE_VASPRINTF
536int vasprintf(char **, const char *, va_list);
537#endif
538#ifndef HAVE_ASPRINTF
539int asprintf(char **, const char *, ...);
540#endif
541
542#ifndef HAVE_STRLCPY
543size_t strlcpy(char *, const char *, size_t);
544#endif
545#ifndef HAVE_STRLCAT
546size_t strlcat(char *, const char *, size_t);
547#endif
548#ifndef HAVE_STRCASESTR
549char *strcasestr(const char *, const char *);
550#endif
551#ifndef HAVE_GETLINE
552ssize_t getline(char **, size_t *, FILE *);
553ssize_t getdelim(char **, size_t *, int, FILE *);
554#endif
555#ifndef HAVE_CTIME_R
556char   *ctime_r(const time_t *, char *);
557#endif
558#ifndef HAVE_ASCTIME_R
559char   *asctime_r(const struct tm *, char *);
560#endif
561#ifndef HAVE_FMTCHECK
562const char *fmtcheck(const char *, const char *)
563     __attribute__((__format_arg__(2)));
564#endif
565
566#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
567#define QUICK
568#endif
569
570#ifndef O_BINARY
571#define O_BINARY	0
572#endif
573
574#ifndef __cplusplus
575#if defined(__GNUC__) && (__GNUC__ >= 3)
576#define FILE_RCSID(id) \
577static const char rcsid[] __attribute__((__used__)) = id;
578#else
579#define FILE_RCSID(id) \
580static const char *rcsid(const char *p) { \
581	return rcsid(p = id); \
582}
583#endif
584#else
585#define FILE_RCSID(id)
586#endif
587
588#endif /* __file_h__ */
589