compress.c revision 186690
168349Sobrien/*
2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995.
3133359Sobrien * Software written by Ian F. Darwin and others;
4133359Sobrien * maintained 1995-present by Christos Zoulas and others.
5133359Sobrien *
6133359Sobrien * Redistribution and use in source and binary forms, with or without
7133359Sobrien * modification, are permitted provided that the following conditions
8133359Sobrien * are met:
9133359Sobrien * 1. Redistributions of source code must retain the above copyright
10133359Sobrien *    notice immediately at the beginning of the file, without modification,
11133359Sobrien *    this list of conditions, and the following disclaimer.
12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
13133359Sobrien *    notice, this list of conditions and the following disclaimer in the
14133359Sobrien *    documentation and/or other materials provided with the distribution.
15133359Sobrien *
16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26133359Sobrien * SUCH DAMAGE.
27133359Sobrien */
28133359Sobrien/*
2968349Sobrien * compress routines:
3068349Sobrien *	zmagic() - returns 0 if not recognized, uncompresses and prints
3168349Sobrien *		   information if recognized
3268349Sobrien *	uncompress(method, old, n, newch) - uncompress old into new,
3368349Sobrien *					    using method, return sizeof new
3468349Sobrien */
3568349Sobrien#include "file.h"
36133359Sobrien#include "magic.h"
37133359Sobrien#include <stdio.h>
3868349Sobrien#include <stdlib.h>
3968349Sobrien#ifdef HAVE_UNISTD_H
4068349Sobrien#include <unistd.h>
4168349Sobrien#endif
4268349Sobrien#include <string.h>
43133359Sobrien#include <errno.h>
44133359Sobrien#include <sys/types.h>
45169942Sobrien#include <sys/ioctl.h>
4668349Sobrien#ifdef HAVE_SYS_WAIT_H
4768349Sobrien#include <sys/wait.h>
4868349Sobrien#endif
49169962Sobrien#if defined(HAVE_SYS_TIME_H)
50169962Sobrien#include <sys/time.h>
51169962Sobrien#endif
52175296Sobrien#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ)
53175296Sobrien#define BUILTIN_DECOMPRESS
54103373Sobrien#include <zlib.h>
55103373Sobrien#endif
56103373Sobrien
57169962Sobrien
5868349Sobrien#ifndef lint
59186690SobrienFILE_RCSID("@(#)$File: compress.c,v 1.57 2008/07/16 18:00:57 christos Exp $")
6068349Sobrien#endif
6168349Sobrien
62186690Sobrienprivate const struct {
63186690Sobrien	const char magic[8];
64133359Sobrien	size_t maglen;
65186690Sobrien	const char *argv[3];
66133359Sobrien	int silent;
6768349Sobrien} compr[] = {
6868349Sobrien	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
6975937Sobrien	/* Uncompress can get stuck; so use gzip first if we have it
7075937Sobrien	 * Idea from Damien Clark, thanks! */
7175937Sobrien	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
7268349Sobrien	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
7368349Sobrien	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
7468349Sobrien	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
7568349Sobrien	/* the standard pack utilities do not accept standard input */
7668349Sobrien	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
77159764Sobrien	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
78159764Sobrien					    /* ...only first file examined */
7980588Sobrien	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
8068349Sobrien};
8168349Sobrien
82169962Sobrienprivate size_t ncompr = sizeof(compr) / sizeof(compr[0]);
8368349Sobrien
84169942Sobrien#define NODATA ((size_t)~0)
8568349Sobrien
86169942Sobrien
87133359Sobrienprivate ssize_t swrite(int, const void *, size_t);
88159764Sobrienprivate size_t uncompressbuf(struct magic_set *, int, size_t,
89159764Sobrien    const unsigned char *, unsigned char **, size_t);
90175296Sobrien#ifdef BUILTIN_DECOMPRESS
91133359Sobrienprivate size_t uncompressgzipped(struct magic_set *, const unsigned char *,
92133359Sobrien    unsigned char **, size_t);
93103373Sobrien#endif
9468349Sobrien
95133359Sobrienprotected int
96169962Sobrienfile_zmagic(struct magic_set *ms, int fd, const char *name,
97169962Sobrien    const unsigned char *buf, size_t nbytes)
9868349Sobrien{
99133359Sobrien	unsigned char *newbuf = NULL;
100133359Sobrien	size_t i, nsz;
101133359Sobrien	int rv = 0;
102175296Sobrien	int mime = ms->flags & MAGIC_MIME;
10368349Sobrien
104133359Sobrien	if ((ms->flags & MAGIC_COMPRESS) == 0)
105133359Sobrien		return 0;
106133359Sobrien
10768349Sobrien	for (i = 0; i < ncompr; i++) {
10868349Sobrien		if (nbytes < compr[i].maglen)
10968349Sobrien			continue;
11068349Sobrien		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
111159764Sobrien		    (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
112169942Sobrien		    nbytes)) != NODATA) {
113133359Sobrien			ms->flags &= ~MAGIC_COMPRESS;
114133359Sobrien			rv = -1;
115169962Sobrien			if (file_buffer(ms, -1, name, newbuf, nsz) == -1)
116133359Sobrien				goto error;
117175296Sobrien
118175296Sobrien			if (mime == MAGIC_MIME || mime == 0) {
119175296Sobrien				if (file_printf(ms, mime ?
120175296Sobrien				    " compressed-encoding=" : " (") == -1)
121175296Sobrien					goto error;
122175296Sobrien			}
123175296Sobrien
124175296Sobrien			if ((mime == 0 || mime & MAGIC_MIME_ENCODING) &&
125175296Sobrien			    file_buffer(ms, -1, NULL, buf, nbytes) == -1)
126133359Sobrien				goto error;
127175296Sobrien
128175296Sobrien			if (!mime && file_printf(ms, ")") == -1)
129133359Sobrien				goto error;
130133359Sobrien			rv = 1;
131133359Sobrien			break;
13268349Sobrien		}
13368349Sobrien	}
134133359Sobrienerror:
135133359Sobrien	if (newbuf)
136133359Sobrien		free(newbuf);
137133359Sobrien	ms->flags |= MAGIC_COMPRESS;
138133359Sobrien	return rv;
13968349Sobrien}
14068349Sobrien
14175937Sobrien/*
14275937Sobrien * `safe' write for sockets and pipes.
14375937Sobrien */
144133359Sobrienprivate ssize_t
145103373Sobrienswrite(int fd, const void *buf, size_t n)
14675937Sobrien{
14775937Sobrien	int rv;
14875937Sobrien	size_t rn = n;
14968349Sobrien
15075937Sobrien	do
15175937Sobrien		switch (rv = write(fd, buf, n)) {
15275937Sobrien		case -1:
15375937Sobrien			if (errno == EINTR)
15475937Sobrien				continue;
15575937Sobrien			return -1;
15675937Sobrien		default:
15775937Sobrien			n -= rv;
158103373Sobrien			buf = ((const char *)buf) + rv;
15975937Sobrien			break;
16075937Sobrien		}
16175937Sobrien	while (n > 0);
16275937Sobrien	return rn;
16375937Sobrien}
16475937Sobrien
16575937Sobrien
16675937Sobrien/*
16775937Sobrien * `safe' read for sockets and pipes.
16875937Sobrien */
169169942Sobrienprotected ssize_t
170169962Sobriensread(int fd, void *buf, size_t n, int canbepipe)
17175937Sobrien{
172169962Sobrien	int rv, cnt;
173169942Sobrien#ifdef FIONREAD
174169942Sobrien	int t = 0;
175169942Sobrien#endif
17675937Sobrien	size_t rn = n;
17775937Sobrien
178169942Sobrien	if (fd == STDIN_FILENO)
179169942Sobrien		goto nocheck;
180169942Sobrien
181169942Sobrien#ifdef FIONREAD
182169962Sobrien	if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) {
183169942Sobrien#ifdef FD_ZERO
184169962Sobrien		for (cnt = 0;; cnt++) {
185169942Sobrien			fd_set check;
186169942Sobrien			struct timeval tout = {0, 100 * 1000};
187169962Sobrien			int selrv;
188169942Sobrien
189169942Sobrien			FD_ZERO(&check);
190169942Sobrien			FD_SET(fd, &check);
191169942Sobrien
192169942Sobrien			/*
193169942Sobrien			 * Avoid soft deadlock: do not read if there
194169942Sobrien			 * is nothing to read from sockets and pipes.
195169942Sobrien			 */
196169962Sobrien			selrv = select(fd + 1, &check, NULL, NULL, &tout);
197169962Sobrien			if (selrv == -1) {
198169942Sobrien				if (errno == EINTR || errno == EAGAIN)
199169942Sobrien					continue;
200169962Sobrien			} else if (selrv == 0 && cnt >= 5) {
201169942Sobrien				return 0;
202169962Sobrien			} else
203169962Sobrien				break;
204169942Sobrien		}
205169942Sobrien#endif
206169942Sobrien		(void)ioctl(fd, FIONREAD, &t);
207169942Sobrien	}
208169942Sobrien
209169942Sobrien	if (t > 0 && (size_t)t < n) {
210169942Sobrien		n = t;
211169942Sobrien		rn = n;
212169942Sobrien	}
213169942Sobrien#endif
214169942Sobrien
215169942Sobriennocheck:
21675937Sobrien	do
217169942Sobrien		switch ((rv = read(fd, buf, n))) {
21875937Sobrien		case -1:
21975937Sobrien			if (errno == EINTR)
22075937Sobrien				continue;
22175937Sobrien			return -1;
222103373Sobrien		case 0:
223103373Sobrien			return rn - n;
22475937Sobrien		default:
22575937Sobrien			n -= rv;
22675937Sobrien			buf = ((char *)buf) + rv;
22775937Sobrien			break;
22875937Sobrien		}
22975937Sobrien	while (n > 0);
23075937Sobrien	return rn;
23175937Sobrien}
23275937Sobrien
233133359Sobrienprotected int
234133359Sobrienfile_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
235133359Sobrien    size_t nbytes)
236103373Sobrien{
237103373Sobrien	char buf[4096];
238103373Sobrien	int r, tfd;
239103373Sobrien
240103373Sobrien	(void)strcpy(buf, "/tmp/file.XXXXXX");
241103373Sobrien#ifndef HAVE_MKSTEMP
242103373Sobrien	{
243103373Sobrien		char *ptr = mktemp(buf);
244103373Sobrien		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
245103373Sobrien		r = errno;
246103373Sobrien		(void)unlink(ptr);
247103373Sobrien		errno = r;
248103373Sobrien	}
249103373Sobrien#else
250103373Sobrien	tfd = mkstemp(buf);
251103373Sobrien	r = errno;
252103373Sobrien	(void)unlink(buf);
253103373Sobrien	errno = r;
254103373Sobrien#endif
255103373Sobrien	if (tfd == -1) {
256133359Sobrien		file_error(ms, errno,
257133359Sobrien		    "cannot create temporary file for pipe copy");
258133359Sobrien		return -1;
259103373Sobrien	}
260103373Sobrien
261133359Sobrien	if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
262103373Sobrien		r = 1;
263103373Sobrien	else {
264169962Sobrien		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
265133359Sobrien			if (swrite(tfd, buf, (size_t)r) != r)
266103373Sobrien				break;
267103373Sobrien	}
268103373Sobrien
269103373Sobrien	switch (r) {
270103373Sobrien	case -1:
271133359Sobrien		file_error(ms, errno, "error copying from pipe to temp file");
272133359Sobrien		return -1;
273103373Sobrien	case 0:
274103373Sobrien		break;
275103373Sobrien	default:
276133359Sobrien		file_error(ms, errno, "error while writing to temp file");
277133359Sobrien		return -1;
278103373Sobrien	}
279103373Sobrien
280103373Sobrien	/*
281103373Sobrien	 * We duplicate the file descriptor, because fclose on a
282103373Sobrien	 * tmpfile will delete the file, but any open descriptors
283103373Sobrien	 * can still access the phantom inode.
284103373Sobrien	 */
285103373Sobrien	if ((fd = dup2(tfd, fd)) == -1) {
286133359Sobrien		file_error(ms, errno, "could not dup descriptor for temp file");
287133359Sobrien		return -1;
288103373Sobrien	}
289103373Sobrien	(void)close(tfd);
290103373Sobrien	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
291133359Sobrien		file_badseek(ms);
292133359Sobrien		return -1;
293103373Sobrien	}
294103373Sobrien	return fd;
295103373Sobrien}
296103373Sobrien
297175296Sobrien#ifdef BUILTIN_DECOMPRESS
298103373Sobrien
299103373Sobrien#define FHCRC		(1 << 1)
300103373Sobrien#define FEXTRA		(1 << 2)
301103373Sobrien#define FNAME		(1 << 3)
302103373Sobrien#define FCOMMENT	(1 << 4)
303103373Sobrien
304133359Sobrienprivate size_t
305133359Sobrienuncompressgzipped(struct magic_set *ms, const unsigned char *old,
306133359Sobrien    unsigned char **newch, size_t n)
30768349Sobrien{
308103373Sobrien	unsigned char flg = old[3];
309133359Sobrien	size_t data_start = 10;
310103373Sobrien	z_stream z;
311103373Sobrien	int rc;
312103373Sobrien
313133359Sobrien	if (flg & FEXTRA) {
314133359Sobrien		if (data_start+1 >= n)
315133359Sobrien			return 0;
316103373Sobrien		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
317133359Sobrien	}
318103373Sobrien	if (flg & FNAME) {
319133359Sobrien		while(data_start < n && old[data_start])
320103373Sobrien			data_start++;
321103373Sobrien		data_start++;
322103373Sobrien	}
323103373Sobrien	if(flg & FCOMMENT) {
324133359Sobrien		while(data_start < n && old[data_start])
325103373Sobrien			data_start++;
326103373Sobrien		data_start++;
327103373Sobrien	}
328103373Sobrien	if(flg & FHCRC)
329103373Sobrien		data_start += 2;
330103373Sobrien
331133359Sobrien	if (data_start >= n)
332133359Sobrien		return 0;
333186690Sobrien	if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) {
334103373Sobrien		return 0;
335103373Sobrien	}
336103373Sobrien
337133359Sobrien	/* XXX: const castaway, via strchr */
338133359Sobrien	z.next_in = (Bytef *)strchr((const char *)old + data_start,
339133359Sobrien	    old[data_start]);
340103373Sobrien	z.avail_in = n - data_start;
341103373Sobrien	z.next_out = *newch;
342103373Sobrien	z.avail_out = HOWMANY;
343103373Sobrien	z.zalloc = Z_NULL;
344103373Sobrien	z.zfree = Z_NULL;
345103373Sobrien	z.opaque = Z_NULL;
346103373Sobrien
347103373Sobrien	rc = inflateInit2(&z, -15);
348103373Sobrien	if (rc != Z_OK) {
349133359Sobrien		file_error(ms, 0, "zlib: %s", z.msg);
350103373Sobrien		return 0;
351103373Sobrien	}
352103373Sobrien
353103373Sobrien	rc = inflate(&z, Z_SYNC_FLUSH);
354103373Sobrien	if (rc != Z_OK && rc != Z_STREAM_END) {
355133359Sobrien		file_error(ms, 0, "zlib: %s", z.msg);
356103373Sobrien		return 0;
357103373Sobrien	}
358103373Sobrien
359133359Sobrien	n = (size_t)z.total_out;
360169962Sobrien	(void)inflateEnd(&z);
361103373Sobrien
362103373Sobrien	/* let's keep the nul-terminate tradition */
363169942Sobrien	(*newch)[n] = '\0';
364103373Sobrien
365103373Sobrien	return n;
366103373Sobrien}
367103373Sobrien#endif
368103373Sobrien
369133359Sobrienprivate size_t
370159764Sobrienuncompressbuf(struct magic_set *ms, int fd, size_t method,
371159764Sobrien    const unsigned char *old, unsigned char **newch, size_t n)
372103373Sobrien{
37368349Sobrien	int fdin[2], fdout[2];
374133359Sobrien	int r;
37568349Sobrien
376175296Sobrien#ifdef BUILTIN_DECOMPRESS
377186690Sobrien        /* FIXME: This doesn't cope with bzip2 */
378103373Sobrien	if (method == 2)
379133359Sobrien		return uncompressgzipped(ms, old, newch, n);
380103373Sobrien#endif
381159764Sobrien	(void)fflush(stdout);
382159764Sobrien	(void)fflush(stderr);
383103373Sobrien
384159764Sobrien	if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
385133359Sobrien		file_error(ms, errno, "cannot create pipe");
386169942Sobrien		return NODATA;
38768349Sobrien	}
38868349Sobrien	switch (fork()) {
38968349Sobrien	case 0:	/* child */
39068349Sobrien		(void) close(0);
391159764Sobrien		if (fd != -1) {
392159764Sobrien		    (void) dup(fd);
393159764Sobrien		    (void) lseek(0, (off_t)0, SEEK_SET);
394159764Sobrien		} else {
395159764Sobrien		    (void) dup(fdin[0]);
396159764Sobrien		    (void) close(fdin[0]);
397159764Sobrien		    (void) close(fdin[1]);
398159764Sobrien		}
39968349Sobrien
40068349Sobrien		(void) close(1);
40168349Sobrien		(void) dup(fdout[1]);
40268349Sobrien		(void) close(fdout[0]);
40368349Sobrien		(void) close(fdout[1]);
404159764Sobrien#ifndef DEBUG
40568349Sobrien		if (compr[method].silent)
406159764Sobrien			(void)close(2);
407159764Sobrien#endif
40868349Sobrien
409169962Sobrien		(void)execvp(compr[method].argv[0],
410169962Sobrien		    (char *const *)(intptr_t)compr[method].argv);
411159764Sobrien#ifdef DEBUG
412159764Sobrien		(void)fprintf(stderr, "exec `%s' failed (%s)\n",
413159764Sobrien		    compr[method].argv[0], strerror(errno));
414159764Sobrien#endif
41568349Sobrien		exit(1);
41668349Sobrien		/*NOTREACHED*/
41768349Sobrien	case -1:
418133359Sobrien		file_error(ms, errno, "could not fork");
419169942Sobrien		return NODATA;
42068349Sobrien
42168349Sobrien	default: /* parent */
42268349Sobrien		(void) close(fdout[1]);
423159764Sobrien		if (fd == -1) {
424159764Sobrien			(void) close(fdin[0]);
425159764Sobrien			/*
426159764Sobrien			 * fork again, to avoid blocking because both
427159764Sobrien			 * pipes filled
428159764Sobrien			 */
429159764Sobrien			switch (fork()) {
430159764Sobrien			case 0: /* child */
431159764Sobrien				(void)close(fdout[0]);
432169962Sobrien				if (swrite(fdin[1], old, n) != (ssize_t)n) {
433159764Sobrien#ifdef DEBUG
434159764Sobrien					(void)fprintf(stderr,
435159764Sobrien					    "Write failed (%s)\n",
436159764Sobrien					    strerror(errno));
437159764Sobrien#endif
438159764Sobrien					exit(1);
439159764Sobrien				}
440159764Sobrien				exit(0);
441159764Sobrien				/*NOTREACHED*/
442159764Sobrien
443159764Sobrien			case -1:
444159764Sobrien#ifdef DEBUG
445159764Sobrien				(void)fprintf(stderr, "Fork failed (%s)\n",
446159764Sobrien				    strerror(errno));
447159764Sobrien#endif
448133359Sobrien				exit(1);
449159764Sobrien				/*NOTREACHED*/
450133359Sobrien
451159764Sobrien			default:  /* parent */
452159764Sobrien				break;
453159764Sobrien			}
454159764Sobrien			(void) close(fdin[1]);
455159764Sobrien			fdin[1] = -1;
456159764Sobrien		}
457133359Sobrien
458103373Sobrien		if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
459159764Sobrien#ifdef DEBUG
460159764Sobrien			(void)fprintf(stderr, "Malloc failed (%s)\n",
461159764Sobrien			    strerror(errno));
462159764Sobrien#endif
46375937Sobrien			n = 0;
46475937Sobrien			goto err;
46575937Sobrien		}
466169962Sobrien		if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) {
467159764Sobrien#ifdef DEBUG
468159764Sobrien			(void)fprintf(stderr, "Read failed (%s)\n",
469159764Sobrien			    strerror(errno));
470159764Sobrien#endif
47168349Sobrien			free(*newch);
47275937Sobrien			n = 0;
473133359Sobrien			newch[0] = '\0';
47475937Sobrien			goto err;
475133359Sobrien		} else {
476133359Sobrien			n = r;
47768349Sobrien		}
478103373Sobrien 		/* NUL terminate, as every buffer is handled here. */
479169942Sobrien 		(*newch)[n] = '\0';
48075937Sobrienerr:
48175937Sobrien		if (fdin[1] != -1)
48275937Sobrien			(void) close(fdin[1]);
48368349Sobrien		(void) close(fdout[0]);
484133359Sobrien#ifdef WNOHANG
485133359Sobrien		while (waitpid(-1, NULL, WNOHANG) != -1)
486133359Sobrien			continue;
487133359Sobrien#else
488133359Sobrien		(void)wait(NULL);
489133359Sobrien#endif
49068349Sobrien		return n;
49168349Sobrien	}
49268349Sobrien}
493