file.c revision 186675
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * file - find type of a file or files - main program.
30 */
31
32#include "file.h"
33#include "magic.h"
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <unistd.h>
38#include <string.h>
39#include <sys/types.h>
40#include <sys/param.h>	/* for MAXPATHLEN */
41#include <sys/stat.h>
42#ifdef RESTORE_TIME
43# if (__COHERENT__ >= 0x420)
44#  include <sys/utime.h>
45# else
46#  ifdef USE_UTIMES
47#   include <sys/time.h>
48#  else
49#   include <utime.h>
50#  endif
51# endif
52#endif
53#ifdef HAVE_UNISTD_H
54#include <unistd.h>	/* for read() */
55#endif
56#ifdef HAVE_LOCALE_H
57#include <locale.h>
58#endif
59#ifdef HAVE_WCHAR_H
60#include <wchar.h>
61#endif
62
63#ifdef HAVE_GETOPT_H
64#include <getopt.h>	/* for long options (is this portable?)*/
65#else
66#undef HAVE_GETOPT_LONG
67#endif
68
69#include <netinet/in.h>		/* for byte swapping */
70
71#include "patchlevel.h"
72
73#ifndef	lint
74FILE_RCSID("@(#)$File: file.c,v 1.117 2007/12/27 16:35:58 christos Exp $")
75#endif	/* lint */
76
77
78#ifdef S_IFLNK
79#define SYMLINKFLAG "Lh"
80#else
81#define SYMLINKFLAG ""
82#endif
83
84# define USAGE  "Usage: %s [-bcik" SYMLINKFLAG "nNrsvz0] [-e test] [-f namefile] [-F separator] [-m magicfiles] file...\n       %s -C -m magicfiles\n"
85
86#ifndef MAXPATHLEN
87#define	MAXPATHLEN	512
88#endif
89
90private int 		/* Global command-line options 		*/
91	bflag = 0,	/* brief output format	 		*/
92	nopad = 0,	/* Don't pad output			*/
93	nobuffer = 0,   /* Do not buffer stdout 		*/
94	nulsep = 0;	/* Append '\0' to the separator		*/
95
96private const char *magicfile = 0;	/* where the magic is	*/
97private const char *default_magicfile = MAGIC;
98private const char *separator = ":";	/* Default field separator	*/
99
100private char *progname;		/* used throughout 		*/
101
102private struct magic_set *magic;
103
104private void unwrap(char *);
105private void usage(void);
106#ifdef HAVE_GETOPT_LONG
107private void help(void);
108#endif
109#if 0
110private int byteconv4(int, int, int);
111private short byteconv2(int, int, int);
112#endif
113
114int main(int, char *[]);
115private void process(const char *, int);
116private void load(const char *, int);
117
118
119/*
120 * main - parse arguments and handle options
121 */
122int
123main(int argc, char *argv[])
124{
125	int c;
126	size_t i;
127	int action = 0, didsomefiles = 0, errflg = 0;
128	int flags = 0;
129	char *home, *usermagic;
130	struct stat sb;
131	static const char hmagic[] = "/.magic";
132#define OPTSTRING	"bcCde:f:F:hikLm:nNprsvz0"
133#ifdef HAVE_GETOPT_LONG
134	int longindex;
135	static const struct option long_options[] =
136	{
137#define OPT(shortname, longname, opt, doc)      \
138    {longname, opt, NULL, shortname},
139#define OPT_LONGONLY(longname, opt, doc)        \
140    {longname, opt, NULL, 0},
141#include "file_opts.h"
142#undef OPT
143#undef OPT_LONGONLY
144    {0, 0, NULL, 0}
145};
146#endif
147
148	static const struct {
149		const char *name;
150		int value;
151	} nv[] = {
152		{ "apptype",	MAGIC_NO_CHECK_APPTYPE },
153		{ "ascii",	MAGIC_NO_CHECK_ASCII },
154		{ "compress",	MAGIC_NO_CHECK_COMPRESS },
155		{ "elf",	MAGIC_NO_CHECK_ELF },
156		{ "soft",	MAGIC_NO_CHECK_SOFT },
157		{ "tar",	MAGIC_NO_CHECK_TAR },
158		{ "tokens",	MAGIC_NO_CHECK_TOKENS },
159		{ "troff",	MAGIC_NO_CHECK_TROFF },
160	};
161
162#ifdef LC_CTYPE
163	/* makes islower etc work for other langs */
164	(void)setlocale(LC_CTYPE, "");
165#endif
166
167#ifdef __EMX__
168	/* sh-like wildcard expansion! Shouldn't hurt at least ... */
169	_wildcard(&argc, &argv);
170#endif
171
172	if ((progname = strrchr(argv[0], '/')) != NULL)
173		progname++;
174	else
175		progname = argv[0];
176
177	magicfile = default_magicfile;
178	if ((usermagic = getenv("MAGIC")) != NULL)
179		magicfile = usermagic;
180	else
181		if ((home = getenv("HOME")) != NULL) {
182			if ((usermagic = malloc(strlen(home)
183			    + sizeof(hmagic))) != NULL) {
184				(void)strcpy(usermagic, home);
185				(void)strcat(usermagic, hmagic);
186				if (stat(usermagic, &sb)<0)
187					free(usermagic);
188				else
189					magicfile = usermagic;
190			}
191		}
192
193#ifdef S_IFLNK
194	flags |= getenv("POSIXLY_CORRECT") ? MAGIC_SYMLINK : 0;
195#endif
196#ifndef HAVE_GETOPT_LONG
197	while ((c = getopt(argc, argv, OPTSTRING)) != -1)
198#else
199	while ((c = getopt_long(argc, argv, OPTSTRING, long_options,
200	    &longindex)) != -1)
201#endif
202		switch (c) {
203#ifdef HAVE_GETOPT_LONG
204		case 0 :
205			switch (longindex) {
206			case 0:
207				help();
208				break;
209			case 10:
210				flags |= MAGIC_MIME_TYPE;
211				break;
212			case 11:
213				flags |= MAGIC_MIME_ENCODING;
214				break;
215			}
216			break;
217#endif
218		case '0':
219			nulsep = 1;
220			break;
221		case 'b':
222			bflag++;
223			break;
224		case 'c':
225			action = FILE_CHECK;
226			break;
227		case 'C':
228			action = FILE_COMPILE;
229			break;
230		case 'd':
231			flags |= MAGIC_DEBUG|MAGIC_CHECK;
232			break;
233		case 'e':
234			for (i = 0; i < sizeof(nv) / sizeof(nv[0]); i++)
235				if (strcmp(nv[i].name, optarg) == 0)
236					break;
237
238			if (i == sizeof(nv) / sizeof(nv[0]))
239				errflg++;
240			else
241				flags |= nv[i].value;
242			break;
243
244		case 'f':
245			if(action)
246				usage();
247			load(magicfile, flags);
248			unwrap(optarg);
249			++didsomefiles;
250			break;
251		case 'F':
252			separator = optarg;
253			break;
254		case 'i':
255			flags |= MAGIC_MIME;
256			break;
257		case 'k':
258			flags |= MAGIC_CONTINUE;
259			break;
260		case 'm':
261			magicfile = optarg;
262			break;
263		case 'n':
264			++nobuffer;
265			break;
266		case 'N':
267			++nopad;
268			break;
269#if defined(HAVE_UTIME) || defined(HAVE_UTIMES)
270		case 'p':
271			flags |= MAGIC_PRESERVE_ATIME;
272			break;
273#endif
274		case 'r':
275			flags |= MAGIC_RAW;
276			break;
277		case 's':
278			flags |= MAGIC_DEVICES;
279			break;
280		case 'v':
281			(void)fprintf(stderr, "%s-%d.%.2d\n", progname,
282				       FILE_VERSION_MAJOR, patchlevel);
283			(void)fprintf(stderr, "magic file from %s\n",
284				       magicfile);
285			return 1;
286		case 'z':
287			flags |= MAGIC_COMPRESS;
288			break;
289#ifdef S_IFLNK
290		case 'L':
291			flags |= MAGIC_SYMLINK;
292			break;
293		case 'h':
294			flags &= ~MAGIC_SYMLINK;
295			break;
296#endif
297		case '?':
298		default:
299			errflg++;
300			break;
301		}
302
303	if (errflg) {
304		usage();
305	}
306
307	switch(action) {
308	case FILE_CHECK:
309	case FILE_COMPILE:
310		magic = magic_open(flags|MAGIC_CHECK);
311		if (magic == NULL) {
312			(void)fprintf(stderr, "%s: %s\n", progname,
313			    strerror(errno));
314			return 1;
315		}
316		c = action == FILE_CHECK ? magic_check(magic, magicfile) :
317		    magic_compile(magic, magicfile);
318		if (c == -1) {
319			(void)fprintf(stderr, "%s: %s\n", progname,
320			    magic_error(magic));
321			return -1;
322		}
323		return 0;
324	default:
325		load(magicfile, flags);
326		break;
327	}
328
329	if (optind == argc) {
330		if (!didsomefiles) {
331			usage();
332		}
333	}
334	else {
335		size_t j, wid, nw;
336		for (wid = 0, j = (size_t)optind; j < (size_t)argc; j++) {
337			nw = file_mbswidth(argv[j]);
338			if (nw > wid)
339				wid = nw;
340		}
341		/*
342		 * If bflag is only set twice, set it depending on
343		 * number of files [this is undocumented, and subject to change]
344		 */
345		if (bflag == 2) {
346			bflag = optind >= argc - 1;
347		}
348		for (; optind < argc; optind++)
349			process(argv[optind], wid);
350	}
351
352	magic_close(magic);
353	return 0;
354}
355
356
357private void
358/*ARGSUSED*/
359load(const char *m, int flags)
360{
361	if (magic || m == NULL)
362		return;
363	magic = magic_open(flags);
364	if (magic == NULL) {
365		(void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
366		exit(1);
367	}
368	if (magic_load(magic, magicfile) == -1) {
369		(void)fprintf(stderr, "%s: %s\n",
370		    progname, magic_error(magic));
371		exit(1);
372	}
373}
374
375/*
376 * unwrap -- read a file of filenames, do each one.
377 */
378private void
379unwrap(char *fn)
380{
381	char buf[MAXPATHLEN];
382	FILE *f;
383	int wid = 0, cwid;
384
385	if (strcmp("-", fn) == 0) {
386		f = stdin;
387		wid = 1;
388	} else {
389		if ((f = fopen(fn, "r")) == NULL) {
390			(void)fprintf(stderr, "%s: Cannot open `%s' (%s).\n",
391			    progname, fn, strerror(errno));
392			exit(1);
393		}
394
395		while (fgets(buf, MAXPATHLEN, f) != NULL) {
396			buf[strcspn(buf, "\n")] = '\0';
397			cwid = file_mbswidth(buf);
398			if (cwid > wid)
399				wid = cwid;
400		}
401
402		rewind(f);
403	}
404
405	while (fgets(buf, sizeof(buf), f) != NULL) {
406		buf[strcspn(buf, "\n")] = '\0';
407		process(buf, wid);
408		if(nobuffer)
409			(void)fflush(stdout);
410	}
411
412	(void)fclose(f);
413}
414
415/*
416 * Called for each input file on the command line (or in a list of files)
417 */
418private void
419process(const char *inname, int wid)
420{
421	const char *type;
422	int std_in = strcmp(inname, "-") == 0;
423
424	if (wid > 0 && !bflag) {
425		(void)printf("%s", std_in ? "/dev/stdin" : inname);
426		if (nulsep)
427			(void)putc('\0', stdout);
428		else
429			(void)printf("%s", separator);
430		(void)printf("%*s ",
431		    (int) (nopad ? 0 : (wid - file_mbswidth(inname))), "");
432	}
433
434	type = magic_file(magic, std_in ? NULL : inname);
435	if (type == NULL)
436		(void)printf("ERROR: %s\n", magic_error(magic));
437	else
438		(void)printf("%s\n", type);
439}
440
441
442#if 0
443/*
444 * byteconv4
445 * Input:
446 *	from		4 byte quantity to convert
447 *	same		whether to perform byte swapping
448 *	big_endian	whether we are a big endian host
449 */
450private int
451byteconv4(int from, int same, int big_endian)
452{
453	if (same)
454		return from;
455	else if (big_endian) {		/* lsb -> msb conversion on msb */
456		union {
457			int i;
458			char c[4];
459		} retval, tmpval;
460
461		tmpval.i = from;
462		retval.c[0] = tmpval.c[3];
463		retval.c[1] = tmpval.c[2];
464		retval.c[2] = tmpval.c[1];
465		retval.c[3] = tmpval.c[0];
466
467		return retval.i;
468	}
469	else
470		return ntohl(from);	/* msb -> lsb conversion on lsb */
471}
472
473/*
474 * byteconv2
475 * Same as byteconv4, but for shorts
476 */
477private short
478byteconv2(int from, int same, int big_endian)
479{
480	if (same)
481		return from;
482	else if (big_endian) {		/* lsb -> msb conversion on msb */
483		union {
484			short s;
485			char c[2];
486		} retval, tmpval;
487
488		tmpval.s = (short) from;
489		retval.c[0] = tmpval.c[1];
490		retval.c[1] = tmpval.c[0];
491
492		return retval.s;
493	}
494	else
495		return ntohs(from);	/* msb -> lsb conversion on lsb */
496}
497#endif
498
499size_t
500file_mbswidth(const char *s)
501{
502#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
503	size_t bytesconsumed, old_n, n, width = 0;
504	mbstate_t state;
505	wchar_t nextchar;
506	(void)memset(&state, 0, sizeof(mbstate_t));
507	old_n = n = strlen(s);
508
509	while (n > 0) {
510		bytesconsumed = mbrtowc(&nextchar, s, n, &state);
511		if (bytesconsumed == (size_t)(-1) ||
512		    bytesconsumed == (size_t)(-2)) {
513			/* Something went wrong, return something reasonable */
514			return old_n;
515		}
516		if (s[0] == '\n') {
517			/*
518			 * do what strlen() would do, so that caller
519			 * is always right
520			 */
521			width++;
522		} else
523			width += wcwidth(nextchar);
524
525		s += bytesconsumed, n -= bytesconsumed;
526	}
527	return width;
528#else
529	return strlen(s);
530#endif
531}
532
533private void
534usage(void)
535{
536	(void)fprintf(stderr, USAGE, progname, progname);
537#ifdef HAVE_GETOPT_LONG
538	(void)fputs("Try `file --help' for more information.\n", stderr);
539#endif
540	exit(1);
541}
542
543#ifdef HAVE_GETOPT_LONG
544private void
545help(void)
546{
547	(void)fputs(
548"Usage: file [OPTION...] [FILE...]\n"
549"Determine type of FILEs.\n"
550"\n", stderr);
551#define OPT(shortname, longname, opt, doc)      \
552        fprintf(stderr, "  -%c, --" longname doc, shortname);
553#define OPT_LONGONLY(longname, opt, doc)        \
554        fprintf(stderr, "      --" longname doc);
555#include "file_opts.h"
556#undef OPT
557#undef OPT_LONGONLY
558	exit(0);
559}
560#endif
561