magic.c revision 299236
1/*
2 * Copyright (c) Christos Zoulas 2003.
3 * All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice immediately at the beginning of the file, without modification,
10 *    this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#ifdef WIN32
29#include <windows.h>
30#include <shlwapi.h>
31#endif
32
33#include "file.h"
34
35#ifndef	lint
36FILE_RCSID("@(#)$File: magic.c,v 1.95 2015/09/11 17:24:09 christos Exp $")
37#endif	/* lint */
38
39#include "magic.h"
40
41#include <stdlib.h>
42#include <unistd.h>
43#include <string.h>
44#ifdef QUICK
45#include <sys/mman.h>
46#endif
47#ifdef HAVE_LIMITS_H
48#include <limits.h>	/* for PIPE_BUF */
49#endif
50
51#if defined(HAVE_UTIMES)
52# include <sys/time.h>
53#elif defined(HAVE_UTIME)
54# if defined(HAVE_SYS_UTIME_H)
55#  include <sys/utime.h>
56# elif defined(HAVE_UTIME_H)
57#  include <utime.h>
58# endif
59#endif
60
61#ifdef HAVE_UNISTD_H
62#include <unistd.h>	/* for read() */
63#endif
64
65#ifndef PIPE_BUF
66/* Get the PIPE_BUF from pathconf */
67#ifdef _PC_PIPE_BUF
68#define PIPE_BUF pathconf(".", _PC_PIPE_BUF)
69#else
70#define PIPE_BUF 512
71#endif
72#endif
73
74private void close_and_restore(const struct magic_set *, const char *, int,
75    const struct stat *);
76private int unreadable_info(struct magic_set *, mode_t, const char *);
77private const char* get_default_magic(void);
78#ifndef COMPILE_ONLY
79private const char *file_or_fd(struct magic_set *, const char *, int);
80#endif
81
82#ifndef	STDIN_FILENO
83#define	STDIN_FILENO	0
84#endif
85
86#ifdef WIN32
87/* HINSTANCE of this shared library. Needed for get_default_magic() */
88static HINSTANCE _w32_dll_instance = NULL;
89
90static void
91_w32_append_path(char **hmagicpath, const char *fmt, ...)
92{
93	char *tmppath;
94        char *newpath;
95	va_list ap;
96
97	va_start(ap, fmt);
98	if (vasprintf(&tmppath, fmt, ap) < 0) {
99		va_end(ap);
100		return;
101	}
102	va_end(ap);
103
104	if (access(tmppath, R_OK) == -1)
105		goto out;
106
107	if (*hmagicpath == NULL) {
108		*hmagicpath = tmppath;
109		return;
110	}
111
112	if (asprintf(&newpath, "%s%c%s", *hmagicpath, PATHSEP, tmppath) < 0)
113		goto out;
114
115	free(*hmagicpath);
116	free(tmppath);
117	*hmagicpath = newpath;
118	return;
119out:
120	free(tmppath);
121}
122
123static void
124_w32_get_magic_relative_to(char **hmagicpath, HINSTANCE module)
125{
126	static const char *trypaths[] = {
127		"%s/share/misc/magic.mgc",
128		"%s/magic.mgc",
129	};
130	LPSTR dllpath;
131	size_t sp;
132
133	dllpath = calloc(MAX_PATH + 1, sizeof(*dllpath));
134
135	if (!GetModuleFileNameA(module, dllpath, MAX_PATH))
136		goto out;
137
138	PathRemoveFileSpecA(dllpath);
139
140	if (module) {
141		char exepath[MAX_PATH];
142		GetModuleFileNameA(NULL, exepath, MAX_PATH);
143		PathRemoveFileSpecA(exepath);
144		if (stricmp(exepath, dllpath) == 0)
145			goto out;
146	}
147
148	sp = strlen(dllpath);
149	if (sp > 3 && stricmp(&dllpath[sp - 3], "bin") == 0) {
150		_w32_append_path(hmagicpath,
151		    "%s/../share/misc/magic.mgc", dllpath);
152		goto out;
153	}
154
155	for (sp = 0; sp < __arraycount(trypaths); sp++)
156		_w32_append_path(hmagicpath, trypaths[sp], dllpath);
157out:
158	free(dllpath);
159}
160
161/* Placate GCC by offering a sacrificial previous prototype */
162BOOL WINAPI DllMain(HINSTANCE, DWORD, LPVOID);
163
164BOOL WINAPI
165DllMain(HINSTANCE hinstDLL, DWORD fdwReason,
166    LPVOID lpvReserved __attribute__((__unused__)))
167{
168	if (fdwReason == DLL_PROCESS_ATTACH)
169		_w32_dll_instance = hinstDLL;
170	return TRUE;
171}
172#endif
173
174private const char *
175get_default_magic(void)
176{
177	static const char hmagic[] = "/.magic/magic.mgc";
178	static char *default_magic;
179	char *home, *hmagicpath;
180
181#ifndef WIN32
182	struct stat st;
183
184	if (default_magic) {
185		free(default_magic);
186		default_magic = NULL;
187	}
188	if ((home = getenv("HOME")) == NULL)
189		return MAGIC;
190
191	if (asprintf(&hmagicpath, "%s/.magic.mgc", home) < 0)
192		return MAGIC;
193	if (stat(hmagicpath, &st) == -1) {
194		free(hmagicpath);
195		if (asprintf(&hmagicpath, "%s/.magic", home) < 0)
196			return MAGIC;
197		if (stat(hmagicpath, &st) == -1)
198			goto out;
199		if (S_ISDIR(st.st_mode)) {
200			free(hmagicpath);
201			if (asprintf(&hmagicpath, "%s/%s", home, hmagic) < 0)
202				return MAGIC;
203			if (access(hmagicpath, R_OK) == -1)
204				goto out;
205		}
206	}
207
208	if (asprintf(&default_magic, "%s:%s", hmagicpath, MAGIC) < 0)
209		goto out;
210	free(hmagicpath);
211	return default_magic;
212out:
213	default_magic = NULL;
214	free(hmagicpath);
215	return MAGIC;
216#else
217	hmagicpath = NULL;
218
219	if (default_magic) {
220		free(default_magic);
221		default_magic = NULL;
222	}
223
224	/* First, try to get a magic file from user-application data */
225	if ((home = getenv("LOCALAPPDATA")) != NULL)
226		_w32_append_path(&hmagicpath, "%s%s", home, hmagic);
227
228	/* Second, try to get a magic file from the user profile data */
229	if ((home = getenv("USERPROFILE")) != NULL)
230		_w32_append_path(&hmagicpath,
231		    "%s/Local Settings/Application Data%s", home, hmagic);
232
233	/* Third, try to get a magic file from Common Files */
234	if ((home = getenv("COMMONPROGRAMFILES")) != NULL)
235		_w32_append_path(&hmagicpath, "%s%s", home, hmagic);
236
237	/* Fourth, try to get magic file relative to exe location */
238        _w32_get_magic_relative_to(&hmagicpath, NULL);
239
240	/* Fifth, try to get magic file relative to dll location */
241        _w32_get_magic_relative_to(&hmagicpath, _w32_dll_instance);
242
243	/* Avoid MAGIC constant - it likely points to a file within MSys tree */
244	default_magic = hmagicpath;
245	return default_magic;
246#endif
247}
248
249public const char *
250magic_getpath(const char *magicfile, int action)
251{
252	if (magicfile != NULL)
253		return magicfile;
254
255	magicfile = getenv("MAGIC");
256	if (magicfile != NULL)
257		return magicfile;
258
259	return action == FILE_LOAD ? get_default_magic() : MAGIC;
260}
261
262public struct magic_set *
263magic_open(int flags)
264{
265	return file_ms_alloc(flags);
266}
267
268private int
269unreadable_info(struct magic_set *ms, mode_t md, const char *file)
270{
271	if (file) {
272		/* We cannot open it, but we were able to stat it. */
273		if (access(file, W_OK) == 0)
274			if (file_printf(ms, "writable, ") == -1)
275				return -1;
276		if (access(file, X_OK) == 0)
277			if (file_printf(ms, "executable, ") == -1)
278				return -1;
279	}
280	if (S_ISREG(md))
281		if (file_printf(ms, "regular file, ") == -1)
282			return -1;
283	if (file_printf(ms, "no read permission") == -1)
284		return -1;
285	return 0;
286}
287
288public void
289magic_close(struct magic_set *ms)
290{
291	if (ms == NULL)
292		return;
293	file_ms_free(ms);
294}
295
296/*
297 * load a magic file
298 */
299public int
300magic_load(struct magic_set *ms, const char *magicfile)
301{
302	if (ms == NULL)
303		return -1;
304	return file_apprentice(ms, magicfile, FILE_LOAD);
305}
306
307#ifndef COMPILE_ONLY
308/*
309 * Install a set of compiled magic buffers.
310 */
311public int
312magic_load_buffers(struct magic_set *ms, void **bufs, size_t *sizes,
313    size_t nbufs)
314{
315	if (ms == NULL)
316		return -1;
317	return buffer_apprentice(ms, (struct magic **)bufs, sizes, nbufs);
318}
319#endif
320
321public int
322magic_compile(struct magic_set *ms, const char *magicfile)
323{
324	if (ms == NULL)
325		return -1;
326	return file_apprentice(ms, magicfile, FILE_COMPILE);
327}
328
329public int
330magic_check(struct magic_set *ms, const char *magicfile)
331{
332	if (ms == NULL)
333		return -1;
334	return file_apprentice(ms, magicfile, FILE_CHECK);
335}
336
337public int
338magic_list(struct magic_set *ms, const char *magicfile)
339{
340	if (ms == NULL)
341		return -1;
342	return file_apprentice(ms, magicfile, FILE_LIST);
343}
344
345private void
346close_and_restore(const struct magic_set *ms, const char *name, int fd,
347    const struct stat *sb)
348{
349	if (fd == STDIN_FILENO || name == NULL)
350		return;
351	(void) close(fd);
352
353	if ((ms->flags & MAGIC_PRESERVE_ATIME) != 0) {
354		/*
355		 * Try to restore access, modification times if read it.
356		 * This is really *bad* because it will modify the status
357		 * time of the file... And of course this will affect
358		 * backup programs
359		 */
360#ifdef HAVE_UTIMES
361		struct timeval  utsbuf[2];
362		(void)memset(utsbuf, 0, sizeof(utsbuf));
363		utsbuf[0].tv_sec = sb->st_atime;
364		utsbuf[1].tv_sec = sb->st_mtime;
365
366		(void) utimes(name, utsbuf); /* don't care if loses */
367#elif defined(HAVE_UTIME_H) || defined(HAVE_SYS_UTIME_H)
368		struct utimbuf  utbuf;
369
370		(void)memset(&utbuf, 0, sizeof(utbuf));
371		utbuf.actime = sb->st_atime;
372		utbuf.modtime = sb->st_mtime;
373		(void) utime(name, &utbuf); /* don't care if loses */
374#endif
375	}
376}
377
378#ifndef COMPILE_ONLY
379
380/*
381 * find type of descriptor
382 */
383public const char *
384magic_descriptor(struct magic_set *ms, int fd)
385{
386	if (ms == NULL)
387		return NULL;
388	return file_or_fd(ms, NULL, fd);
389}
390
391/*
392 * find type of named file
393 */
394public const char *
395magic_file(struct magic_set *ms, const char *inname)
396{
397	if (ms == NULL)
398		return NULL;
399	return file_or_fd(ms, inname, STDIN_FILENO);
400}
401
402private const char *
403file_or_fd(struct magic_set *ms, const char *inname, int fd)
404{
405	int	rv = -1;
406	unsigned char *buf;
407	struct stat	sb;
408	ssize_t nbytes = 0;	/* number of bytes read from a datafile */
409	int	ispipe = 0;
410	off_t	pos = (off_t)-1;
411
412	if (file_reset(ms) == -1)
413		goto out;
414
415	/*
416	 * one extra for terminating '\0', and
417	 * some overlapping space for matches near EOF
418	 */
419#define SLOP (1 + sizeof(union VALUETYPE))
420	if ((buf = CAST(unsigned char *, malloc(HOWMANY + SLOP))) == NULL)
421		return NULL;
422
423	switch (file_fsmagic(ms, inname, &sb)) {
424	case -1:		/* error */
425		goto done;
426	case 0:			/* nothing found */
427		break;
428	default:		/* matched it and printed type */
429		rv = 0;
430		goto done;
431	}
432
433#ifdef WIN32
434	/* Place stdin in binary mode, so EOF (Ctrl+Z) doesn't stop early. */
435	if (fd == STDIN_FILENO)
436		_setmode(STDIN_FILENO, O_BINARY);
437#endif
438
439	if (inname == NULL) {
440		if (fstat(fd, &sb) == 0 && S_ISFIFO(sb.st_mode))
441			ispipe = 1;
442		else
443			pos = lseek(fd, (off_t)0, SEEK_CUR);
444	} else {
445		int flags = O_RDONLY|O_BINARY;
446		int okstat = stat(inname, &sb) == 0;
447
448		if (okstat && S_ISFIFO(sb.st_mode)) {
449#ifdef O_NONBLOCK
450			flags |= O_NONBLOCK;
451#endif
452			ispipe = 1;
453		}
454
455		errno = 0;
456		if ((fd = open(inname, flags)) < 0) {
457#ifdef WIN32
458			/*
459			 * Can't stat, can't open.  It may have been opened in
460			 * fsmagic, so if the user doesn't have read permission,
461			 * allow it to say so; otherwise an error was probably
462			 * displayed in fsmagic.
463			 */
464			if (!okstat && errno == EACCES) {
465				sb.st_mode = S_IFBLK;
466				okstat = 1;
467			}
468#endif
469			if (okstat &&
470			    unreadable_info(ms, sb.st_mode, inname) == -1)
471				goto done;
472			rv = 0;
473			goto done;
474		}
475#ifdef O_NONBLOCK
476		if ((flags = fcntl(fd, F_GETFL)) != -1) {
477			flags &= ~O_NONBLOCK;
478			(void)fcntl(fd, F_SETFL, flags);
479		}
480#endif
481	}
482
483	/*
484	 * try looking at the first HOWMANY bytes
485	 */
486	if (ispipe) {
487		ssize_t r = 0;
488
489		while ((r = sread(fd, (void *)&buf[nbytes],
490		    (size_t)(HOWMANY - nbytes), 1)) > 0) {
491			nbytes += r;
492			if (r < PIPE_BUF) break;
493		}
494
495		if (nbytes == 0) {
496			/* We can not read it, but we were able to stat it. */
497			if (unreadable_info(ms, sb.st_mode, inname) == -1)
498				goto done;
499			rv = 0;
500			goto done;
501		}
502
503	} else {
504		/* Windows refuses to read from a big console buffer. */
505		size_t howmany =
506#if defined(WIN32) && HOWMANY > 8 * 1024
507				_isatty(fd) ? 8 * 1024 :
508#endif
509				HOWMANY;
510		if ((nbytes = read(fd, (char *)buf, howmany)) == -1) {
511			if (inname == NULL && fd != STDIN_FILENO)
512				file_error(ms, errno, "cannot read fd %d", fd);
513			else
514				file_error(ms, errno, "cannot read `%s'",
515				    inname == NULL ? "/dev/stdin" : inname);
516			goto done;
517		}
518	}
519
520	(void)memset(buf + nbytes, 0, SLOP); /* NUL terminate */
521	if (file_buffer(ms, fd, inname, buf, (size_t)nbytes) == -1)
522		goto done;
523	rv = 0;
524done:
525	free(buf);
526	if (pos != (off_t)-1)
527		(void)lseek(fd, pos, SEEK_SET);
528	close_and_restore(ms, inname, fd, &sb);
529out:
530	return rv == 0 ? file_getbuffer(ms) : NULL;
531}
532
533
534public const char *
535magic_buffer(struct magic_set *ms, const void *buf, size_t nb)
536{
537	if (ms == NULL)
538		return NULL;
539	if (file_reset(ms) == -1)
540		return NULL;
541	/*
542	 * The main work is done here!
543	 * We have the file name and/or the data buffer to be identified.
544	 */
545	if (file_buffer(ms, -1, NULL, buf, nb) == -1) {
546		return NULL;
547	}
548	return file_getbuffer(ms);
549}
550#endif
551
552public const char *
553magic_error(struct magic_set *ms)
554{
555	if (ms == NULL)
556		return "Magic database is not open";
557	return (ms->event_flags & EVENT_HAD_ERR) ? ms->o.buf : NULL;
558}
559
560public int
561magic_errno(struct magic_set *ms)
562{
563	if (ms == NULL)
564		return EINVAL;
565	return (ms->event_flags & EVENT_HAD_ERR) ? ms->error : 0;
566}
567
568public int
569magic_setflags(struct magic_set *ms, int flags)
570{
571	if (ms == NULL)
572		return -1;
573#if !defined(HAVE_UTIME) && !defined(HAVE_UTIMES)
574	if (flags & MAGIC_PRESERVE_ATIME)
575		return -1;
576#endif
577	ms->flags = flags;
578	return 0;
579}
580
581public int
582magic_version(void)
583{
584	return MAGIC_VERSION;
585}
586
587public int
588magic_setparam(struct magic_set *ms, int param, const void *val)
589{
590	switch (param) {
591	case MAGIC_PARAM_INDIR_MAX:
592		ms->indir_max = (uint16_t)*(const size_t *)val;
593		return 0;
594	case MAGIC_PARAM_NAME_MAX:
595		ms->name_max = (uint16_t)*(const size_t *)val;
596		return 0;
597	case MAGIC_PARAM_ELF_PHNUM_MAX:
598		ms->elf_phnum_max = (uint16_t)*(const size_t *)val;
599		return 0;
600	case MAGIC_PARAM_ELF_SHNUM_MAX:
601		ms->elf_shnum_max = (uint16_t)*(const size_t *)val;
602		return 0;
603	case MAGIC_PARAM_ELF_NOTES_MAX:
604		ms->elf_notes_max = (uint16_t)*(const size_t *)val;
605		return 0;
606	case MAGIC_PARAM_REGEX_MAX:
607		ms->elf_notes_max = (uint16_t)*(const size_t *)val;
608		return 0;
609	default:
610		errno = EINVAL;
611		return -1;
612	}
613}
614
615public int
616magic_getparam(struct magic_set *ms, int param, void *val)
617{
618	switch (param) {
619	case MAGIC_PARAM_INDIR_MAX:
620		*(size_t *)val = ms->indir_max;
621		return 0;
622	case MAGIC_PARAM_NAME_MAX:
623		*(size_t *)val = ms->name_max;
624		return 0;
625	case MAGIC_PARAM_ELF_PHNUM_MAX:
626		*(size_t *)val = ms->elf_phnum_max;
627		return 0;
628	case MAGIC_PARAM_ELF_SHNUM_MAX:
629		*(size_t *)val = ms->elf_shnum_max;
630		return 0;
631	case MAGIC_PARAM_ELF_NOTES_MAX:
632		*(size_t *)val = ms->elf_notes_max;
633		return 0;
634	case MAGIC_PARAM_REGEX_MAX:
635		*(size_t *)val = ms->regex_max;
636		return 0;
637	default:
638		errno = EINVAL;
639		return -1;
640	}
641}
642