file.c revision 169962
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file - find type of a file or files - main program.
30 */
31
32#include "file.h"
33#include "magic.h"
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <string.h>
39#include <sys/types.h>
40#include <sys/param.h>	/* for MAXPATHLEN */
41#include <sys/stat.h>
42#ifdef RESTORE_TIME
43# if (__COHERENT__ >= 0x420)
44#  include <sys/utime.h>
45# else
46#  ifdef USE_UTIMES
47#   include <sys/time.h>
48#  else
49#   include <utime.h>
50#  endif
51# endif
52#endif
53#ifdef HAVE_UNISTD_H
54#include <unistd.h>	/* for read() */
55#endif
56#ifdef HAVE_LOCALE_H
57#include <locale.h>
58#endif
59#ifdef HAVE_WCHAR_H
60#include <wchar.h>
61#endif
62
63#ifdef HAVE_GETOPT_H
64#include <getopt.h>	/* for long options (is this portable?)*/
65#else
66#undef HAVE_GETOPT_LONG
67#endif
68
69#include <netinet/in.h>		/* for byte swapping */
70
71#include "patchlevel.h"
72
73#ifndef	lint
74FILE_RCSID("@(#)$File: file.c,v 1.111 2007/05/08 14:44:18 christos Exp $")
75#endif	/* lint */
76
77
78#ifdef S_IFLNK
79#define SYMLINKFLAG "Lh"
80#else
81#define SYMLINKFLAG ""
82#endif
83
84# define USAGE  "Usage: %s [-bcik" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n       %s -C -m magicfiles\n"
85
86#ifndef MAXPATHLEN
87#define	MAXPATHLEN	512
88#endif
89
90private int 		/* Global command-line options 		*/
91	bflag = 0,	/* brief output format	 		*/
92	nopad = 0,	/* Don't pad output			*/
93	nobuffer = 0,   /* Do not buffer stdout 		*/
94	nulsep = 0;	/* Append '\0' to the separator		*/
95
96private const char *magicfile = 0;	/* where the magic is	*/
97private const char *default_magicfile = MAGIC;
98private const char *separator = ":";	/* Default field separator	*/
99
100private char *progname;		/* used throughout 		*/
101
102private struct magic_set *magic;
103
104private void unwrap(char *);
105private void usage(void);
106#ifdef HAVE_GETOPT_LONG
107private void help(void);
108#endif
109#if 0
110private int byteconv4(int, int, int);
111private short byteconv2(int, int, int);
112#endif
113
114int main(int, char *[]);
115private void process(const char *, int);
116private void load(const char *, int);
117
118
119/*
120 * main - parse arguments and handle options
121 */
122int
123main(int argc, char *argv[])
124{
125	int c, i;
126	int action = 0, didsomefiles = 0, errflg = 0;
127	int flags = 0;
128	char *home, *usermagic;
129	struct stat sb;
130	static const char hmagic[] = "/.magic";
131#define OPTSTRING	"bcCde:f:F:hikLm:nNprsvz0"
132#ifdef HAVE_GETOPT_LONG
133	int longindex;
134	static const struct option long_options[] =
135	{
136		{"version", 0, 0, 'v'},
137		{"help", 0, 0, 0},
138		{"brief", 0, 0, 'b'},
139		{"checking-printout", 0, 0, 'c'},
140		{"debug", 0, 0, 'd'},
141		{"exclude", 1, 0, 'e' },
142		{"files-from", 1, 0, 'f'},
143		{"separator", 1, 0, 'F'},
144		{"mime", 0, 0, 'i'},
145		{"keep-going", 0, 0, 'k'},
146#ifdef S_IFLNK
147		{"dereference", 0, 0, 'L'},
148		{"no-dereference", 0, 0, 'h'},
149#endif
150		{"magic-file", 1, 0, 'm'},
151#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
152		{"preserve-date", 0, 0, 'p'},
153#endif
154		{"uncompress", 0, 0, 'z'},
155		{"raw", 0, 0, 'r'},
156		{"no-buffer", 0, 0, 'n'},
157		{"no-pad", 0, 0, 'N'},
158		{"special-files", 0, 0, 's'},
159		{"compile", 0, 0, 'C'},
160		{"print0", 0, 0, '0'},
161		{0, 0, 0, 0},
162	};
163#endif
164
165	static const struct {
166		const char *name;
167		int value;
168	} nv[] = {
169		{ "apptype",	MAGIC_NO_CHECK_APPTYPE },
170		{ "ascii",	MAGIC_NO_CHECK_ASCII },
171		{ "compress",	MAGIC_NO_CHECK_COMPRESS },
172		{ "elf",	MAGIC_NO_CHECK_ELF },
173		{ "fortran",	MAGIC_NO_CHECK_FORTRAN },
174		{ "soft",	MAGIC_NO_CHECK_SOFT },
175		{ "tar",	MAGIC_NO_CHECK_TAR },
176		{ "tokens",	MAGIC_NO_CHECK_TOKENS },
177		{ "troff",	MAGIC_NO_CHECK_TROFF },
178	};
179
180#ifdef LC_CTYPE
181	/* makes islower etc work for other langs */
182	(void)setlocale(LC_CTYPE, "");
183#endif
184
185#ifdef __EMX__
186	/* sh-like wildcard expansion! Shouldn't hurt at least ... */
187	_wildcard(&argc, &argv);
188#endif
189
190	if ((progname = strrchr(argv[0], '/')) != NULL)
191		progname++;
192	else
193		progname = argv[0];
194
195	magicfile = default_magicfile;
196	if ((usermagic = getenv("MAGIC")) != NULL)
197		magicfile = usermagic;
198	else
199		if ((home = getenv("HOME")) != NULL) {
200			if ((usermagic = malloc(strlen(home)
201			    + sizeof(hmagic))) != NULL) {
202				(void)strcpy(usermagic, home);
203				(void)strcat(usermagic, hmagic);
204				if (stat(usermagic, &sb)<0)
205					free(usermagic);
206				else
207					magicfile = usermagic;
208			}
209		}
210
211#ifdef S_IFLNK
212	flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0;
213#endif
214#ifndef HAVE_GETOPT_LONG
215	while ((c = getopt(argc, argv, OPTSTRING)) != -1)
216#else
217	while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
218	    &longindex)) != -1)
219#endif
220		switch (c) {
221#ifdef HAVE_GETOPT_LONG
222		case 0 :
223			if (longindex == 1)
224				help();
225			break;
226#endif
227		case '0':
228			nulsep = 1;
229			break;
230		case 'b':
231			++bflag;
232			break;
233		case 'c':
234			action = FILE_CHECK;
235			break;
236		case 'C':
237			action = FILE_COMPILE;
238			break;
239		case 'd':
240			flags |= MAGIC_DEBUG|MAGIC_CHECK;
241			break;
242		case 'e':
243			for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++)
244				if (strcmp(nv[i].name, optarg) == 0)
245					break;
246
247			if (i == sizeof(nv) / sizeof(nv[0]))
248				errflg++;
249			else
250				flags |= nv[i].value;
251			break;
252
253		case 'f':
254			if(action)
255				usage();
256			load(magicfile, flags);
257			unwrap(optarg);
258			++didsomefiles;
259			break;
260		case 'F':
261			separator = optarg;
262			break;
263		case 'i':
264			flags |= MAGIC_MIME;
265			break;
266		case 'k':
267			flags |= MAGIC_CONTINUE;
268			break;
269		case 'm':
270			magicfile = optarg;
271			break;
272		case 'n':
273			++nobuffer;
274			break;
275		case 'N':
276			++nopad;
277			break;
278#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
279		case 'p':
280			flags |= MAGIC_PRESERVE_ATIME;
281			break;
282#endif
283		case 'r':
284			flags |= MAGIC_RAW;
285			break;
286		case 's':
287			flags |= MAGIC_DEVICES;
288			break;
289		case 'v':
290			(void)fprintf(stdout, "%s-%d.%.2d\n", progname,
291				       FILE_VERSION_MAJOR, patchlevel);
292			(void)fprintf(stdout, "magic file from %s\n",
293				       magicfile);
294			return 1;
295		case 'z':
296			flags |= MAGIC_COMPRESS;
297			break;
298#ifdef S_IFLNK
299		case 'L':
300			flags |= MAGIC_SYMLINK;
301			break;
302		case 'h':
303			flags &= ~MAGIC_SYMLINK;
304			break;
305#endif
306		case '?':
307		default:
308			errflg++;
309			break;
310		}
311
312	if (errflg) {
313		usage();
314	}
315
316	switch(action) {
317	case FILE_CHECK:
318	case FILE_COMPILE:
319		magic = magic_open(flags|MAGIC_CHECK);
320		if (magic == NULL) {
321			(void)fprintf(stderr, "%s: %s\n", progname,
322			    strerror(errno));
323			return 1;
324		}
325		c = action == FILE_CHECK ? magic_check(magic, magicfile) :
326		    magic_compile(magic, magicfile);
327		if (c == -1) {
328			(void)fprintf(stderr, "%s: %s\n", progname,
329			    magic_error(magic));
330			return -1;
331		}
332		return 0;
333	default:
334		load(magicfile, flags);
335		break;
336	}
337
338	if (optind == argc) {
339		if (!didsomefiles) {
340			usage();
341		}
342	}
343	else {
344		int i, wid, nw;
345		for (wid = 0, i = optind; i < argc; i++) {
346			nw = file_mbswidth(argv[i]);
347			if (nw > wid)
348				wid = nw;
349		}
350		for (; optind < argc; optind++)
351			process(argv[optind], wid);
352	}
353
354	magic_close(magic);
355	return 0;
356}
357
358
359private void
360/*ARGSUSED*/
361load(const char *m, int flags)
362{
363	if (magic || m == NULL)
364		return;
365	magic = magic_open(flags);
366	if (magic == NULL) {
367		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
368		exit(1);
369	}
370	if (magic_load(magic, magicfile) == -1) {
371		(void)fprintf(stderr, "%s: %s\n",
372		    progname, magic_error(magic));
373		exit(1);
374	}
375}
376
377/*
378 * unwrap -- read a file of filenames, do each one.
379 */
380private void
381unwrap(char *fn)
382{
383	char buf[MAXPATHLEN];
384	FILE *f;
385	int wid = 0, cwid;
386	size_t len;
387
388	if (strcmp("-", fn) == 0) {
389		f = stdin;
390		wid = 1;
391	} else {
392		if ((f = fopen(fn, "r")) == NULL) {
393			(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
394			    progname, fn, strerror(errno));
395			exit(1);
396		}
397
398		while (fgets(buf, MAXPATHLEN, f) != NULL) {
399			len = strlen(buf);
400			if (len > 0 && buf[len - 1] == '\n')
401				buf[len - 1] = '\0';
402			cwid = file_mbswidth(buf);
403			if (cwid > wid)
404				wid = cwid;
405		}
406
407		rewind(f);
408	}
409
410	while (fgets(buf, MAXPATHLEN, f) != NULL) {
411		len = strlen(buf);
412		if (len > 0 && buf[len - 1] == '\n')
413			buf[len - 1] = '\0';
414		process(buf, wid);
415		if(nobuffer)
416			(void)fflush(stdout);
417	}
418
419	(void)fclose(f);
420}
421
422/*
423 * Called for each input file on the command line (or in a list of files)
424 */
425private void
426process(const char *inname, int wid)
427{
428	const char *type;
429	int std_in = strcmp(inname, "-") == 0;
430
431	if (wid > 0 && !bflag) {
432		(void)printf("%s", std_in ? "/dev/stdin" : inname);
433		if (nulsep)
434			(void)putc('\0', stdout);
435		else
436			(void)printf("%s", separator);
437		(void)printf("%*s ",
438		    (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
439	}
440
441	type = magic_file(magic, std_in ? NULL : inname);
442	if (type == NULL)
443		(void)printf("ERROR: %s\n", magic_error(magic));
444	else
445		(void)printf("%s\n", type);
446}
447
448
449#if 0
450/*
451 * byteconv4
452 * Input:
453 *	from		4 byte quantity to convert
454 *	same		whether to perform byte swapping
455 *	big_endian	whether we are a big endian host
456 */
457private int
458byteconv4(int from, int same, int big_endian)
459{
460	if (same)
461		return from;
462	else if (big_endian) {		/* lsb -> msb conversion on msb */
463		union {
464			int i;
465			char c[4];
466		} retval, tmpval;
467
468		tmpval.i = from;
469		retval.c[0] = tmpval.c[3];
470		retval.c[1] = tmpval.c[2];
471		retval.c[2] = tmpval.c[1];
472		retval.c[3] = tmpval.c[0];
473
474		return retval.i;
475	}
476	else
477		return ntohl(from);	/* msb -> lsb conversion on lsb */
478}
479
480/*
481 * byteconv2
482 * Same as byteconv4, but for shorts
483 */
484private short
485byteconv2(int from, int same, int big_endian)
486{
487	if (same)
488		return from;
489	else if (big_endian) {		/* lsb -> msb conversion on msb */
490		union {
491			short s;
492			char c[2];
493		} retval, tmpval;
494
495		tmpval.s = (short) from;
496		retval.c[0] = tmpval.c[1];
497		retval.c[1] = tmpval.c[0];
498
499		return retval.s;
500	}
501	else
502		return ntohs(from);	/* msb -> lsb conversion on lsb */
503}
504#endif
505
506size_t
507file_mbswidth(const char *s)
508{
509#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
510	size_t bytesconsumed, old_n, n, width = 0;
511	mbstate_t state;
512	wchar_t nextchar;
513	(void)memset(&state, 0, sizeof(mbstate_t));
514	old_n = n = strlen(s);
515
516	while (n > 0) {
517		bytesconsumed = mbrtowc(&nextchar, s, n, &state);
518		if (bytesconsumed == (size_t)(-1) ||
519		    bytesconsumed == (size_t)(-2)) {
520			/* Something went wrong, return something reasonable */
521			return old_n;
522		}
523		if (s[0] == '\n') {
524			/*
525			 * do what strlen() would do, so that caller
526			 * is always right
527			 */
528			width++;
529		} else
530			width += wcwidth(nextchar);
531
532		s += bytesconsumed, n -= bytesconsumed;
533	}
534	return width;
535#else
536	return strlen(s);
537#endif
538}
539
540private void
541usage(void)
542{
543	(void)fprintf(stderr, USAGE, progname, progname);
544#ifdef HAVE_GETOPT_LONG
545	(void)fputs("Try `file --help' for more information.\n", stderr);
546#endif
547	exit(1);
548}
549
550#ifdef HAVE_GETOPT_LONG
551private void
552help(void)
553{
554	(void)puts(
555"Usage: file [OPTION]... [FILE]...\n"
556"Determine file type of FILEs.\n"
557"\n"
558"  -m, --magic-file LIST      use LIST as a colon-separated list of magic\n"
559"                               number files\n"
560"  -z, --uncompress           try to look inside compressed files\n"
561"  -b, --brief                do not prepend filenames to output lines\n"
562"  -c, --checking-printout    print the parsed form of the magic file, use in\n"
563"                               conjunction with -m to debug a new magic file\n"
564"                               before installing it\n"
565"  -e, --exclude              exclude test from the list of test to be\n"
566"                               performed for file. Valid tests are:\n"
567"                               ascii, apptype, elf, compress, soft, tar\n"
568"  -f, --files-from FILE      read the filenames to be examined from FILE\n"
569"  -F, --separator string     use string as separator instead of `:'\n"
570"  -i, --mime                 output mime type strings\n"
571"  -k, --keep-going           don't stop at the first match\n"
572"  -L, --dereference          causes symlinks to be followed\n"
573"  -n, --no-buffer            do not buffer output\n"
574"  -N, --no-pad               do not pad output\n"
575"  -p, --preserve-date        preserve access times on files\n"
576"  -r, --raw                  don't translate unprintable chars to \\ooo\n"
577"  -s, --special-files        treat special (block/char devices) files as\n"
578"                             ordinary ones\n"
579"or\n"
580"      --help                 display this help and exit\n"
581"or\n"
582"      --version              output version information and exit\n"
583"or\n"
584"  -C, --compile              compile file specified by -m\n"
585);
586	exit(0);
587}
588#endif
589