compress.c revision 277592
1/*
2 * Copyright (c) Ian F. Darwin 1986-1995.
3 * Software written by Ian F. Darwin and others;
4 * maintained 1995-present by Christos Zoulas and others.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice immediately at the beginning of the file, without modification,
11 *    this list of conditions, and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28/*
29 * compress routines:
30 *	zmagic() - returns 0 if not recognized, uncompresses and prints
31 *		   information if recognized
32 *	uncompress(method, old, n, newch) - uncompress old into new,
33 *					    using method, return sizeof new
34 */
35#include "file.h"
36
37#ifndef lint
38FILE_RCSID("@(#)$File: compress.c,v 1.77 2014/12/12 16:33:01 christos Exp $")
39#endif
40
41#include "magic.h"
42#include <stdlib.h>
43#ifdef HAVE_UNISTD_H
44#include <unistd.h>
45#endif
46#include <string.h>
47#include <errno.h>
48#include <signal.h>
49#if !defined(__MINGW32__) && !defined(WIN32)
50#include <sys/ioctl.h>
51#endif
52#ifdef HAVE_SYS_WAIT_H
53#include <sys/wait.h>
54#endif
55#if defined(HAVE_SYS_TIME_H)
56#include <sys/time.h>
57#endif
58#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
59#define BUILTIN_DECOMPRESS
60#include <zlib.h>
61#endif
62
63private const struct {
64	const char magic[8];
65	size_t maglen;
66	const char *argv[3];
67	int silent;
68} compr[] = {
69	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
70	/* Uncompress can get stuck; so use gzip first if we have it
71	 * Idea from Damien Clark, thanks! */
72	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
73	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
74	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
75	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
76	/* the standard pack utilities do not accept standard input */
77	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
78	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
79					    /* ...only first file examined */
80	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
81	{ "LZIP",     4, { "lzip", "-cdq", NULL }, 1 },
82 	{ "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 },		/* XZ Utils */
83 	{ "LRZI",     4, { "lrzip", "-dqo-", NULL }, 1 },	/* LRZIP */
84 	{ "\004\"M\030", 4, { "lz4", "-cd", NULL }, 1 },	/* LZ4 */
85};
86
87#define NODATA ((size_t)~0)
88
89private ssize_t swrite(int, const void *, size_t);
90#if HAVE_FORK
91private size_t ncompr = sizeof(compr) / sizeof(compr[0]);
92private size_t uncompressbuf(struct magic_set *, int, size_t,
93    const unsigned char *, unsigned char **, size_t);
94#ifdef BUILTIN_DECOMPRESS
95private size_t uncompressgzipped(struct magic_set *, const unsigned char *,
96    unsigned char **, size_t);
97#endif
98
99protected int
100file_zmagic(struct magic_set *ms, int fd, const char *name,
101    const unsigned char *buf, size_t nbytes)
102{
103	unsigned char *newbuf = NULL;
104	size_t i, nsz;
105	int rv = 0;
106	int mime = ms->flags & MAGIC_MIME;
107	sig_t osigpipe;
108
109	if ((ms->flags & MAGIC_COMPRESS) == 0)
110		return 0;
111
112	osigpipe = signal(SIGPIPE, SIG_IGN);
113	for (i = 0; i < ncompr; i++) {
114		if (nbytes < compr[i].maglen)
115			continue;
116		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
117		    (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
118		    nbytes)) != NODATA) {
119			ms->flags &= ~MAGIC_COMPRESS;
120			rv = -1;
121			if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
122				goto error;
123
124			if (mime == MAGIC_MIME || mime == 0) {
125				if (file_printf(ms, mime ?
126				    " compressed-encoding=" : " (") == -1)
127					goto error;
128				if (file_buffer(ms, -1, NULL, buf, nbytes) == -1)
129					goto error;
130				if (!mime && file_printf(ms, ")") == -1)
131					goto error;
132			}
133
134			rv = 1;
135			break;
136		}
137	}
138error:
139	(void)signal(SIGPIPE, osigpipe);
140	free(newbuf);
141	ms->flags |= MAGIC_COMPRESS;
142	return rv;
143}
144#endif
145/*
146 * `safe' write for sockets and pipes.
147 */
148private ssize_t
149swrite(int fd, const void *buf, size_t n)
150{
151	ssize_t rv;
152	size_t rn = n;
153
154	do
155		switch (rv = write(fd, buf, n)) {
156		case -1:
157			if (errno == EINTR)
158				continue;
159			return -1;
160		default:
161			n -= rv;
162			buf = CAST(const char *, buf) + rv;
163			break;
164		}
165	while (n > 0);
166	return rn;
167}
168
169
170/*
171 * `safe' read for sockets and pipes.
172 */
173protected ssize_t
174sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
175{
176	ssize_t rv;
177#ifdef FIONREAD
178	int t = 0;
179#endif
180	size_t rn = n;
181
182	if (fd == STDIN_FILENO)
183		goto nocheck;
184
185#ifdef FIONREAD
186	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
187#ifdef FD_ZERO
188		ssize_t cnt;
189		for (cnt = 0;; cnt++) {
190			fd_set check;
191			struct timeval tout = {0, 100 * 1000};
192			int selrv;
193
194			FD_ZERO(&check);
195			FD_SET(fd, &check);
196
197			/*
198			 * Avoid soft deadlock: do not read if there
199			 * is nothing to read from sockets and pipes.
200			 */
201			selrv = select(fd + 1, &check, NULL, NULL, &tout);
202			if (selrv == -1) {
203				if (errno == EINTR || errno == EAGAIN)
204					continue;
205			} else if (selrv == 0 && cnt >= 5) {
206				return 0;
207			} else
208				break;
209		}
210#endif
211		(void)ioctl(fd, FIONREAD, &t);
212	}
213
214	if (t > 0 && (size_t)t < n) {
215		n = t;
216		rn = n;
217	}
218#endif
219
220nocheck:
221	do
222		switch ((rv = read(fd, buf, n))) {
223		case -1:
224			if (errno == EINTR)
225				continue;
226			return -1;
227		case 0:
228			return rn - n;
229		default:
230			n -= rv;
231			buf = ((char *)buf) + rv;
232			break;
233		}
234	while (n > 0);
235	return rn;
236}
237
238protected int
239file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
240    size_t nbytes)
241{
242	char buf[4096];
243	ssize_t r;
244	int tfd;
245
246	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf);
247#ifndef HAVE_MKSTEMP
248	{
249		char *ptr = mktemp(buf);
250		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
251		r = errno;
252		(void)unlink(ptr);
253		errno = r;
254	}
255#else
256	{
257		int te;
258		tfd = mkstemp(buf);
259		te = errno;
260		(void)unlink(buf);
261		errno = te;
262	}
263#endif
264	if (tfd == -1) {
265		file_error(ms, errno,
266		    "cannot create temporary file for pipe copy");
267		return -1;
268	}
269
270	if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
271		r = 1;
272	else {
273		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
274			if (swrite(tfd, buf, (size_t)r) != r)
275				break;
276	}
277
278	switch (r) {
279	case -1:
280		file_error(ms, errno, "error copying from pipe to temp file");
281		return -1;
282	case 0:
283		break;
284	default:
285		file_error(ms, errno, "error while writing to temp file");
286		return -1;
287	}
288
289	/*
290	 * We duplicate the file descriptor, because fclose on a
291	 * tmpfile will delete the file, but any open descriptors
292	 * can still access the phantom inode.
293	 */
294	if ((fd = dup2(tfd, fd)) == -1) {
295		file_error(ms, errno, "could not dup descriptor for temp file");
296		return -1;
297	}
298	(void)close(tfd);
299	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
300		file_badseek(ms);
301		return -1;
302	}
303	return fd;
304}
305#if HAVE_FORK
306#ifdef BUILTIN_DECOMPRESS
307
308#define FHCRC		(1 << 1)
309#define FEXTRA		(1 << 2)
310#define FNAME		(1 << 3)
311#define FCOMMENT	(1 << 4)
312
313private size_t
314uncompressgzipped(struct magic_set *ms, const unsigned char *old,
315    unsigned char **newch, size_t n)
316{
317	unsigned char flg = old[3];
318	size_t data_start = 10;
319	z_stream z;
320	int rc;
321
322	if (flg & FEXTRA) {
323		if (data_start+1 >= n)
324			return 0;
325		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
326	}
327	if (flg & FNAME) {
328		while(data_start < n && old[data_start])
329			data_start++;
330		data_start++;
331	}
332	if(flg & FCOMMENT) {
333		while(data_start < n && old[data_start])
334			data_start++;
335		data_start++;
336	}
337	if(flg & FHCRC)
338		data_start += 2;
339
340	if (data_start >= n)
341		return 0;
342	if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) {
343		return 0;
344	}
345
346	/* XXX: const castaway, via strchr */
347	z.next_in = (Bytef *)strchr((const char *)old + data_start,
348	    old[data_start]);
349	z.avail_in = CAST(uint32_t, (n - data_start));
350	z.next_out = *newch;
351	z.avail_out = HOWMANY;
352	z.zalloc = Z_NULL;
353	z.zfree = Z_NULL;
354	z.opaque = Z_NULL;
355
356	/* LINTED bug in header macro */
357	rc = inflateInit2(&z, -15);
358	if (rc != Z_OK) {
359		file_error(ms, 0, "zlib: %s", z.msg);
360		return 0;
361	}
362
363	rc = inflate(&z, Z_SYNC_FLUSH);
364	if (rc != Z_OK && rc != Z_STREAM_END) {
365		file_error(ms, 0, "zlib: %s", z.msg);
366		return 0;
367	}
368
369	n = (size_t)z.total_out;
370	(void)inflateEnd(&z);
371
372	/* let's keep the nul-terminate tradition */
373	(*newch)[n] = '\0';
374
375	return n;
376}
377#endif
378
379private size_t
380uncompressbuf(struct magic_set *ms, int fd, size_t method,
381    const unsigned char *old, unsigned char **newch, size_t n)
382{
383	int fdin[2], fdout[2];
384	int status;
385	ssize_t r;
386	pid_t pid;
387
388#ifdef BUILTIN_DECOMPRESS
389        /* FIXME: This doesn't cope with bzip2 */
390	if (method == 2)
391		return uncompressgzipped(ms, old, newch, n);
392#endif
393	(void)fflush(stdout);
394	(void)fflush(stderr);
395
396	if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
397		file_error(ms, errno, "cannot create pipe");
398		return NODATA;
399	}
400	switch (pid = fork()) {
401	case 0:	/* child */
402		(void) close(0);
403		if (fd != -1) {
404		    if (dup(fd) == -1)
405			_exit(1);
406		    (void) lseek(0, (off_t)0, SEEK_SET);
407		} else {
408		    if (dup(fdin[0]) == -1)
409			_exit(1);
410		    (void) close(fdin[0]);
411		    (void) close(fdin[1]);
412		}
413
414		(void) close(1);
415		if (dup(fdout[1]) == -1)
416			_exit(1);
417		(void) close(fdout[0]);
418		(void) close(fdout[1]);
419#ifndef DEBUG
420		if (compr[method].silent)
421			(void)close(2);
422#endif
423
424		(void)execvp(compr[method].argv[0],
425		    (char *const *)(intptr_t)compr[method].argv);
426#ifdef DEBUG
427		(void)fprintf(stderr, "exec `%s' failed (%s)\n",
428		    compr[method].argv[0], strerror(errno));
429#endif
430		exit(1);
431		/*NOTREACHED*/
432	case -1:
433		file_error(ms, errno, "could not fork");
434		return NODATA;
435
436	default: /* parent */
437		(void) close(fdout[1]);
438		if (fd == -1) {
439			(void) close(fdin[0]);
440			/*
441			 * fork again, to avoid blocking because both
442			 * pipes filled
443			 */
444			switch (fork()) {
445			case 0: /* child */
446				(void)close(fdout[0]);
447				if (swrite(fdin[1], old, n) != (ssize_t)n) {
448#ifdef DEBUG
449					(void)fprintf(stderr,
450					    "Write failed (%s)\n",
451					    strerror(errno));
452#endif
453					exit(1);
454				}
455				exit(0);
456				/*NOTREACHED*/
457
458			case -1:
459#ifdef DEBUG
460				(void)fprintf(stderr, "Fork failed (%s)\n",
461				    strerror(errno));
462#endif
463				exit(1);
464				/*NOTREACHED*/
465
466			default:  /* parent */
467				if (wait(&status) == -1) {
468#ifdef DEBUG
469					(void)fprintf(stderr,
470					    "Wait failed (%s)\n",
471					    strerror(errno));
472#endif
473					exit(1);
474				}
475				exit(WIFEXITED(status) ?
476				    WEXITSTATUS(status) : 1);
477				/*NOTREACHED*/
478			}
479			(void) close(fdin[1]);
480			fdin[1] = -1;
481		}
482
483		if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
484#ifdef DEBUG
485			(void)fprintf(stderr, "Malloc failed (%s)\n",
486			    strerror(errno));
487#endif
488			n = NODATA;
489			goto err;
490		}
491		if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
492#ifdef DEBUG
493			(void)fprintf(stderr, "Read failed (%s)\n",
494			    strerror(errno));
495#endif
496			free(*newch);
497			n = NODATA;
498			*newch = NULL;
499			goto err;
500		} else {
501			n = r;
502		}
503 		/* NUL terminate, as every buffer is handled here. */
504 		(*newch)[n] = '\0';
505err:
506		if (fdin[1] != -1)
507			(void) close(fdin[1]);
508		(void) close(fdout[0]);
509		if (wait(&status) == -1) {
510#ifdef DEBUG
511			(void)fprintf(stderr, "Wait failed (%s)\n",
512			    strerror(errno));
513#endif
514			n = NODATA;
515		} else if (!WIFEXITED(status)) {
516#ifdef DEBUG
517			(void)fprintf(stderr, "Child not exited (0x%x)\n",
518			    status);
519#endif
520		} else if (WEXITSTATUS(status) != 0) {
521#ifdef DEBUG
522			(void)fprintf(stderr, "Child exited (0x%d)\n",
523			    WEXITSTATUS(status));
524#endif
525		}
526
527		(void) close(fdin[0]);
528
529		return n;
530	}
531}
532#endif
533