file.h revision 328875
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file.h - definitions for file(1) program
30 * @(#)$File: file.h,v 1.183 2017/08/28 13:39:18 christos Exp $
31 */
32
33#ifndef __file_h__
34#define __file_h__
35
36#ifdef HAVE_CONFIG_H
37#include <config.h>
38#endif
39#ifdef HAVE_STDINT_H
40#ifndef __STDC_LIMIT_MACROS
41#define __STDC_LIMIT_MACROS
42#endif
43
44#ifdef WIN32
45  #ifdef _WIN64
46    #define SIZE_T_FORMAT "I64"
47  #else
48    #define SIZE_T_FORMAT ""
49  #endif
50  #define INT64_T_FORMAT "I64"
51  #define INTMAX_T_FORMAT "I64"
52#else
53  #define SIZE_T_FORMAT "z"
54  #define INT64_T_FORMAT "ll"
55  #define INTMAX_T_FORMAT "j"
56#endif
57#include <stdint.h>
58#endif
59
60#include <stdio.h>	/* Include that here, to make sure __P gets defined */
61#include <errno.h>
62#include <fcntl.h>	/* For open and flags */
63#ifdef HAVE_INTTYPES_H
64#include <inttypes.h>
65#endif
66#include <regex.h>
67#include <time.h>
68#include <sys/types.h>
69#ifndef WIN32
70#include <sys/param.h>
71#endif
72/* Do this here and now, because struct stat gets re-defined on solaris */
73#include <sys/stat.h>
74#include <stdarg.h>
75
76#define ENABLE_CONDITIONALS
77
78#ifndef MAGIC
79#define MAGIC "/etc/magic"
80#endif
81
82#if defined(__EMX__) || defined (WIN32)
83#define PATHSEP	';'
84#else
85#define PATHSEP	':'
86#endif
87
88#define private static
89
90#if HAVE_VISIBILITY && !defined(WIN32)
91#define public  __attribute__ ((__visibility__("default")))
92#ifndef protected
93#define protected __attribute__ ((__visibility__("hidden")))
94#endif
95#else
96#define public
97#ifndef protected
98#define protected
99#endif
100#endif
101
102#ifndef __arraycount
103#define __arraycount(a) (sizeof(a) / sizeof(a[0]))
104#endif
105
106#ifndef __GNUC_PREREQ__
107#ifdef __GNUC__
108#define	__GNUC_PREREQ__(x, y)						\
109	((__GNUC__ == (x) && __GNUC_MINOR__ >= (y)) ||			\
110	 (__GNUC__ > (x)))
111#else
112#define	__GNUC_PREREQ__(x, y)	0
113#endif
114#endif
115
116#ifndef __GNUC__
117#ifndef __attribute__
118#define __attribute__(a)
119#endif
120#endif
121
122#ifndef MIN
123#define	MIN(a,b)	(((a) < (b)) ? (a) : (b))
124#endif
125
126#ifndef MAX
127#define	MAX(a,b)	(((a) > (b)) ? (a) : (b))
128#endif
129
130#ifndef FILE_BYTES_MAX
131# define FILE_BYTES_MAX (1024 * 1024)	/* how much of the file to look at */
132#endif
133#define MAXMAGIS 8192		/* max entries in any one magic file
134				   or directory */
135#define MAXDESC	64		/* max len of text description/MIME type */
136#define MAXMIME	80		/* max len of text MIME type */
137#define MAXstring 96		/* max len of "string" types */
138
139#define MAGICNO		0xF11E041C
140#define VERSIONNO	14
141#define FILE_MAGICSIZE	344
142
143#define	FILE_LOAD	0
144#define FILE_CHECK	1
145#define FILE_COMPILE	2
146#define FILE_LIST	3
147
148union VALUETYPE {
149	uint8_t b;
150	uint16_t h;
151	uint32_t l;
152	uint64_t q;
153	uint8_t hs[2];	/* 2 bytes of a fixed-endian "short" */
154	uint8_t hl[4];	/* 4 bytes of a fixed-endian "long" */
155	uint8_t hq[8];	/* 8 bytes of a fixed-endian "quad" */
156	char s[MAXstring];	/* the search string or regex pattern */
157	unsigned char us[MAXstring];
158	float f;
159	double d;
160};
161
162struct magic {
163	/* Word 1 */
164	uint16_t cont_level;	/* level of ">" */
165	uint8_t flag;
166#define INDIR		0x01	/* if '(...)' appears */
167#define OFFADD		0x02	/* if '>&' or '>...(&' appears */
168#define INDIROFFADD	0x04	/* if '>&(' appears */
169#define UNSIGNED	0x08	/* comparison is unsigned */
170#define NOSPACE		0x10	/* suppress space character before output */
171#define BINTEST		0x20	/* test is for a binary type (set only
172				   for top-level tests) */
173#define TEXTTEST	0x40	/* for passing to file_softmagic */
174
175	uint8_t factor;
176
177	/* Word 2 */
178	uint8_t reln;		/* relation (0=eq, '>'=gt, etc) */
179	uint8_t vallen;		/* length of string value, if any */
180	uint8_t type;		/* comparison type (FILE_*) */
181	uint8_t in_type;	/* type of indirection */
182#define 			FILE_INVALID	0
183#define 			FILE_BYTE	1
184#define				FILE_SHORT	2
185#define				FILE_DEFAULT	3
186#define				FILE_LONG	4
187#define				FILE_STRING	5
188#define				FILE_DATE	6
189#define				FILE_BESHORT	7
190#define				FILE_BELONG	8
191#define				FILE_BEDATE	9
192#define				FILE_LESHORT	10
193#define				FILE_LELONG	11
194#define				FILE_LEDATE	12
195#define				FILE_PSTRING	13
196#define				FILE_LDATE	14
197#define				FILE_BELDATE	15
198#define				FILE_LELDATE	16
199#define				FILE_REGEX	17
200#define				FILE_BESTRING16	18
201#define				FILE_LESTRING16	19
202#define				FILE_SEARCH	20
203#define				FILE_MEDATE	21
204#define				FILE_MELDATE	22
205#define				FILE_MELONG	23
206#define				FILE_QUAD	24
207#define				FILE_LEQUAD	25
208#define				FILE_BEQUAD	26
209#define				FILE_QDATE	27
210#define				FILE_LEQDATE	28
211#define				FILE_BEQDATE	29
212#define				FILE_QLDATE	30
213#define				FILE_LEQLDATE	31
214#define				FILE_BEQLDATE	32
215#define				FILE_FLOAT	33
216#define				FILE_BEFLOAT	34
217#define				FILE_LEFLOAT	35
218#define				FILE_DOUBLE	36
219#define				FILE_BEDOUBLE	37
220#define				FILE_LEDOUBLE	38
221#define				FILE_BEID3	39
222#define				FILE_LEID3	40
223#define				FILE_INDIRECT	41
224#define				FILE_QWDATE	42
225#define				FILE_LEQWDATE	43
226#define				FILE_BEQWDATE	44
227#define				FILE_NAME	45
228#define				FILE_USE	46
229#define				FILE_CLEAR	47
230#define				FILE_DER	48
231#define				FILE_NAMES_SIZE	49 /* size of array to contain all names */
232
233#define IS_STRING(t) \
234	((t) == FILE_STRING || \
235	 (t) == FILE_PSTRING || \
236	 (t) == FILE_BESTRING16 || \
237	 (t) == FILE_LESTRING16 || \
238	 (t) == FILE_REGEX || \
239	 (t) == FILE_SEARCH || \
240	 (t) == FILE_INDIRECT || \
241	 (t) == FILE_NAME || \
242	 (t) == FILE_USE)
243
244#define FILE_FMT_NONE 0
245#define FILE_FMT_NUM  1 /* "cduxXi" */
246#define FILE_FMT_STR  2 /* "s" */
247#define FILE_FMT_QUAD 3 /* "ll" */
248#define FILE_FMT_FLOAT 4 /* "eEfFgG" */
249#define FILE_FMT_DOUBLE 5 /* "eEfFgG" */
250
251	/* Word 3 */
252	uint8_t in_op;		/* operator for indirection */
253	uint8_t mask_op;	/* operator for mask */
254#ifdef ENABLE_CONDITIONALS
255	uint8_t cond;		/* conditional type */
256#else
257	uint8_t dummy;
258#endif
259	uint8_t factor_op;
260#define		FILE_FACTOR_OP_PLUS	'+'
261#define		FILE_FACTOR_OP_MINUS	'-'
262#define		FILE_FACTOR_OP_TIMES	'*'
263#define		FILE_FACTOR_OP_DIV	'/'
264#define		FILE_FACTOR_OP_NONE	'\0'
265
266#define				FILE_OPS	"&|^+-*/%"
267#define				FILE_OPAND	0
268#define				FILE_OPOR	1
269#define				FILE_OPXOR	2
270#define				FILE_OPADD	3
271#define				FILE_OPMINUS	4
272#define				FILE_OPMULTIPLY	5
273#define				FILE_OPDIVIDE	6
274#define				FILE_OPMODULO	7
275#define				FILE_OPS_MASK	0x07 /* mask for above ops */
276#define				FILE_UNUSED_1	0x08
277#define				FILE_UNUSED_2	0x10
278#define				FILE_OPSIGNED	0x20
279#define				FILE_OPINVERSE	0x40
280#define				FILE_OPINDIRECT	0x80
281
282#ifdef ENABLE_CONDITIONALS
283#define				COND_NONE	0
284#define				COND_IF		1
285#define				COND_ELIF	2
286#define				COND_ELSE	3
287#endif /* ENABLE_CONDITIONALS */
288
289	/* Word 4 */
290	uint32_t offset;	/* offset to magic number */
291	/* Word 5 */
292	int32_t in_offset;	/* offset from indirection */
293	/* Word 6 */
294	uint32_t lineno;	/* line number in magic file */
295	/* Word 7,8 */
296	union {
297		uint64_t _mask;	/* for use with numeric and date types */
298		struct {
299			uint32_t _count;	/* repeat/line count */
300			uint32_t _flags;	/* modifier flags */
301		} _s;		/* for use with string types */
302	} _u;
303#define num_mask _u._mask
304#define str_range _u._s._count
305#define str_flags _u._s._flags
306	/* Words 9-24 */
307	union VALUETYPE value;	/* either number or string */
308	/* Words 25-40 */
309	char desc[MAXDESC];	/* description */
310	/* Words 41-60 */
311	char mimetype[MAXMIME]; /* MIME type */
312	/* Words 61-62 */
313	char apple[8];		/* APPLE CREATOR/TYPE */
314	/* Words 63-78 */
315	char ext[64];		/* Popular extensions */
316};
317
318#define BIT(A)   (1 << (A))
319#define STRING_COMPACT_WHITESPACE		BIT(0)
320#define STRING_COMPACT_OPTIONAL_WHITESPACE	BIT(1)
321#define STRING_IGNORE_LOWERCASE			BIT(2)
322#define STRING_IGNORE_UPPERCASE			BIT(3)
323#define REGEX_OFFSET_START			BIT(4)
324#define STRING_TEXTTEST				BIT(5)
325#define STRING_BINTEST				BIT(6)
326#define PSTRING_1_BE				BIT(7)
327#define PSTRING_1_LE				BIT(7)
328#define PSTRING_2_BE				BIT(8)
329#define PSTRING_2_LE				BIT(9)
330#define PSTRING_4_BE				BIT(10)
331#define PSTRING_4_LE				BIT(11)
332#define REGEX_LINE_COUNT			BIT(11)
333#define PSTRING_LEN	\
334    (PSTRING_1_BE|PSTRING_2_LE|PSTRING_2_BE|PSTRING_4_LE|PSTRING_4_BE)
335#define PSTRING_LENGTH_INCLUDES_ITSELF		BIT(12)
336#define	STRING_TRIM				BIT(13)
337#define CHAR_COMPACT_WHITESPACE			'W'
338#define CHAR_COMPACT_OPTIONAL_WHITESPACE	'w'
339#define CHAR_IGNORE_LOWERCASE			'c'
340#define CHAR_IGNORE_UPPERCASE			'C'
341#define CHAR_REGEX_OFFSET_START			's'
342#define CHAR_TEXTTEST				't'
343#define	CHAR_TRIM				'T'
344#define CHAR_BINTEST				'b'
345#define CHAR_PSTRING_1_BE			'B'
346#define CHAR_PSTRING_1_LE			'B'
347#define CHAR_PSTRING_2_BE			'H'
348#define CHAR_PSTRING_2_LE			'h'
349#define CHAR_PSTRING_4_BE			'L'
350#define CHAR_PSTRING_4_LE			'l'
351#define CHAR_PSTRING_LENGTH_INCLUDES_ITSELF     'J'
352#define STRING_IGNORE_CASE		(STRING_IGNORE_LOWERCASE|STRING_IGNORE_UPPERCASE)
353#define STRING_DEFAULT_RANGE		100
354
355#define	INDIRECT_RELATIVE			BIT(0)
356#define	CHAR_INDIRECT_RELATIVE			'r'
357
358/* list of magic entries */
359struct mlist {
360	struct magic *magic;		/* array of magic entries */
361	uint32_t nmagic;		/* number of entries in array */
362	void *map;			/* internal resources used by entry */
363	struct mlist *next, *prev;
364};
365
366#ifdef __cplusplus
367#define CAST(T, b)	static_cast<T>(b)
368#define RCAST(T, b)	reinterpret_cast<T>(b)
369#define CCAST(T, b)	const_cast<T>(b)
370#else
371#define CAST(T, b)	((T)(b))
372#define RCAST(T, b)	((T)(b))
373#define CCAST(T, b)	((T)(uintptr_t)(b))
374#endif
375
376struct level_info {
377	int32_t off;
378	int got_match;
379#ifdef ENABLE_CONDITIONALS
380	int last_match;
381	int last_cond;	/* used for error checking by parse() */
382#endif
383};
384
385#define MAGIC_SETS	2
386
387struct magic_set {
388	struct mlist *mlist[MAGIC_SETS];	/* list of regular entries */
389	struct cont {
390		size_t len;
391		struct level_info *li;
392	} c;
393	struct out {
394		char *buf;		/* Accumulation buffer */
395		char *pbuf;		/* Printable buffer */
396	} o;
397	uint32_t offset;
398	int error;
399	int flags;			/* Control magic tests. */
400	int event_flags;		/* Note things that happened. */
401#define 		EVENT_HAD_ERR		0x01
402	const char *file;
403	size_t line;			/* current magic line number */
404
405	/* data for searches */
406	struct {
407		const char *s;		/* start of search in original source */
408		size_t s_len;		/* length of search region */
409		size_t offset;		/* starting offset in source: XXX - should this be off_t? */
410		size_t rm_len;		/* match length */
411	} search;
412
413	/* FIXME: Make the string dynamically allocated so that e.g.
414	   strings matched in files can be longer than MAXstring */
415	union VALUETYPE ms_value;	/* either number or string */
416	uint16_t indir_max;
417	uint16_t name_max;
418	uint16_t elf_shnum_max;
419	uint16_t elf_phnum_max;
420	uint16_t elf_notes_max;
421	uint16_t regex_max;
422	size_t bytes_max;		/* number of bytes to read from file */
423#define	FILE_INDIR_MAX			50
424#define	FILE_NAME_MAX			30
425#define	FILE_ELF_SHNUM_MAX		32768
426#define	FILE_ELF_PHNUM_MAX		2048
427#define	FILE_ELF_NOTES_MAX		256
428#define	FILE_REGEX_MAX			8192
429};
430
431/* Type for Unicode characters */
432typedef unsigned long unichar;
433
434struct stat;
435#define FILE_T_LOCAL	1
436#define FILE_T_WINDOWS	2
437protected const char *file_fmttime(uint64_t, int, char *);
438protected struct magic_set *file_ms_alloc(int);
439protected void file_ms_free(struct magic_set *);
440protected int file_buffer(struct magic_set *, int, const char *, const void *,
441    size_t);
442protected int file_fsmagic(struct magic_set *, const char *, struct stat *);
443protected int file_pipe2file(struct magic_set *, int, const void *, size_t);
444protected int file_vprintf(struct magic_set *, const char *, va_list)
445    __attribute__((__format__(__printf__, 2, 0)));
446protected size_t file_printedlen(const struct magic_set *);
447protected int file_replace(struct magic_set *, const char *, const char *);
448protected int file_printf(struct magic_set *, const char *, ...)
449    __attribute__((__format__(__printf__, 2, 3)));
450protected int file_reset(struct magic_set *, int);
451protected int file_tryelf(struct magic_set *, int, const unsigned char *,
452    size_t);
453protected int file_trycdf(struct magic_set *, int, const unsigned char *,
454    size_t);
455#if HAVE_FORK
456protected int file_zmagic(struct magic_set *, int, const char *,
457    const unsigned char *, size_t);
458#endif
459protected int file_ascmagic(struct magic_set *, const unsigned char *, size_t,
460    int);
461protected int file_ascmagic_with_encoding(struct magic_set *,
462    const unsigned char *, size_t, unichar *, size_t, const char *,
463    const char *, int);
464protected int file_encoding(struct magic_set *, const unsigned char *, size_t,
465    unichar **, size_t *, const char **, const char **, const char **);
466protected int file_is_tar(struct magic_set *, const unsigned char *, size_t);
467protected int file_softmagic(struct magic_set *, const unsigned char *, size_t,
468    uint16_t *, uint16_t *, int, int);
469protected int file_apprentice(struct magic_set *, const char *, int);
470protected int buffer_apprentice(struct magic_set *, struct magic **,
471    size_t *, size_t);
472protected int file_magicfind(struct magic_set *, const char *, struct mlist *);
473protected uint64_t file_signextend(struct magic_set *, struct magic *,
474    uint64_t);
475protected void file_badread(struct magic_set *);
476protected void file_badseek(struct magic_set *);
477protected void file_oomem(struct magic_set *, size_t);
478protected void file_error(struct magic_set *, int, const char *, ...)
479    __attribute__((__format__(__printf__, 3, 4)));
480protected void file_magerror(struct magic_set *, const char *, ...)
481    __attribute__((__format__(__printf__, 2, 3)));
482protected void file_magwarn(struct magic_set *, const char *, ...)
483    __attribute__((__format__(__printf__, 2, 3)));
484protected void file_mdump(struct magic *);
485protected void file_showstr(FILE *, const char *, size_t);
486protected size_t file_mbswidth(const char *);
487protected const char *file_getbuffer(struct magic_set *);
488protected ssize_t sread(int, void *, size_t, int);
489protected int file_check_mem(struct magic_set *, unsigned int);
490protected int file_looks_utf8(const unsigned char *, size_t, unichar *,
491    size_t *);
492protected size_t file_pstring_length_size(const struct magic *);
493protected size_t file_pstring_get_length(const struct magic *, const char *);
494protected char * file_printable(char *, size_t, const char *);
495#ifdef __EMX__
496protected int file_os2_apptype(struct magic_set *, const char *, const void *,
497    size_t);
498#endif /* __EMX__ */
499
500#if defined(HAVE_LOCALE_H)
501#include <locale.h>
502#endif
503#if defined(HAVE_XLOCALE_H)
504#include <xlocale.h>
505#endif
506
507typedef struct {
508	const char *pat;
509#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
510#define USE_C_LOCALE
511	locale_t old_lc_ctype;
512	locale_t c_lc_ctype;
513#else
514	char *old_lc_ctype;
515#endif
516	int rc;
517	regex_t rx;
518} file_regex_t;
519
520protected int file_regcomp(file_regex_t *, const char *, int);
521protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
522    int);
523protected void file_regfree(file_regex_t *);
524protected void file_regerror(file_regex_t *, int, struct magic_set *);
525
526typedef struct {
527	char *buf;
528	uint32_t offset;
529} file_pushbuf_t;
530
531protected file_pushbuf_t *file_push_buffer(struct magic_set *);
532protected char  *file_pop_buffer(struct magic_set *, file_pushbuf_t *);
533
534#ifndef COMPILE_ONLY
535extern const char *file_names[];
536extern const size_t file_nnames;
537#endif
538
539#ifndef HAVE_STRERROR
540extern int sys_nerr;
541extern char *sys_errlist[];
542#define strerror(e) \
543	(((e) >= 0 && (e) < sys_nerr) ? sys_errlist[(e)] : "Unknown error")
544#endif
545
546#ifndef HAVE_STRTOUL
547#define strtoul(a, b, c)	strtol(a, b, c)
548#endif
549
550#ifndef HAVE_PREAD
551ssize_t pread(int, void *, size_t, off_t);
552#endif
553#ifndef HAVE_VASPRINTF
554int vasprintf(char **, const char *, va_list);
555#endif
556#ifndef HAVE_ASPRINTF
557int asprintf(char **, const char *, ...);
558#endif
559#ifndef HAVE_DPRINTF
560int dprintf(int, const char *, ...);
561#endif
562
563#ifndef HAVE_STRLCPY
564size_t strlcpy(char *, const char *, size_t);
565#endif
566#ifndef HAVE_STRLCAT
567size_t strlcat(char *, const char *, size_t);
568#endif
569#ifndef HAVE_STRCASESTR
570char *strcasestr(const char *, const char *);
571#endif
572#ifndef HAVE_GETLINE
573ssize_t getline(char **, size_t *, FILE *);
574ssize_t getdelim(char **, size_t *, int, FILE *);
575#endif
576#ifndef HAVE_CTIME_R
577char   *ctime_r(const time_t *, char *);
578#endif
579#ifndef HAVE_ASCTIME_R
580char   *asctime_r(const struct tm *, char *);
581#endif
582#ifndef HAVE_GMTIME_R
583struct tm *gmtime_r(const time_t *, struct tm *);
584#endif
585#ifndef HAVE_LOCALTIME_R
586struct tm *localtime_r(const time_t *, struct tm *);
587#endif
588#ifndef HAVE_FMTCHECK
589const char *fmtcheck(const char *, const char *)
590     __attribute__((__format_arg__(2)));
591#endif
592
593#if defined(HAVE_MMAP) && defined(HAVE_SYS_MMAN_H) && !defined(QUICK)
594#define QUICK
595#endif
596
597#ifndef O_BINARY
598#define O_BINARY	0
599#endif
600
601#ifndef __cplusplus
602#if defined(__GNUC__) && (__GNUC__ >= 3)
603#define FILE_RCSID(id) \
604static const char rcsid[] __attribute__((__used__)) = id;
605#else
606#define FILE_RCSID(id) \
607static const char *rcsid(const char *p) { \
608	return rcsid(p = id); \
609}
610#endif
611#else
612#define FILE_RCSID(id)
613#endif
614#ifndef __RCSID
615#define __RCSID(a)
616#endif
617
618#endif /* __file_h__ */
619