compress.c revision 159764
168349Sobrien/*
2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995.
3133359Sobrien * Software written by Ian F. Darwin and others;
4133359Sobrien * maintained 1995-present by Christos Zoulas and others.
5133359Sobrien *
6133359Sobrien * Redistribution and use in source and binary forms, with or without
7133359Sobrien * modification, are permitted provided that the following conditions
8133359Sobrien * are met:
9133359Sobrien * 1. Redistributions of source code must retain the above copyright
10133359Sobrien *    notice immediately at the beginning of the file, without modification,
11133359Sobrien *    this list of conditions, and the following disclaimer.
12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright
13133359Sobrien *    notice, this list of conditions and the following disclaimer in the
14133359Sobrien *    documentation and/or other materials provided with the distribution.
15133359Sobrien *
16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26133359Sobrien * SUCH DAMAGE.
27133359Sobrien */
28133359Sobrien/*
2968349Sobrien * compress routines:
3068349Sobrien *	zmagic() - returns 0 if not recognized, uncompresses and prints
3168349Sobrien *		   information if recognized
3268349Sobrien *	uncompress(method, old, n, newch) - uncompress old into new,
3368349Sobrien *					    using method, return sizeof new
3468349Sobrien */
3568349Sobrien#include "file.h"
36133359Sobrien#include "magic.h"
37133359Sobrien#include <stdio.h>
3868349Sobrien#include <stdlib.h>
3968349Sobrien#ifdef HAVE_UNISTD_H
4068349Sobrien#include <unistd.h>
4168349Sobrien#endif
4268349Sobrien#include <string.h>
43133359Sobrien#include <errno.h>
44133359Sobrien#include <sys/types.h>
4568349Sobrien#ifdef HAVE_SYS_WAIT_H
4668349Sobrien#include <sys/wait.h>
4768349Sobrien#endif
48103373Sobrien#ifdef HAVE_LIBZ
49103373Sobrien#include <zlib.h>
50103373Sobrien#endif
51103373Sobrien
5268349Sobrien#ifndef lint
53159764SobrienFILE_RCSID("@(#)$Id: compress.c,v 1.42 2005/03/06 05:58:22 christos Exp $")
5468349Sobrien#endif
5568349Sobrien
5668349Sobrien
57133359Sobrienprivate struct {
5868349Sobrien	const char *magic;
59133359Sobrien	size_t maglen;
6068349Sobrien	const char *const argv[3];
61133359Sobrien	int silent;
6268349Sobrien} compr[] = {
6368349Sobrien	{ "\037\235", 2, { "gzip", "-cdq", NULL }, 1 },		/* compressed */
6475937Sobrien	/* Uncompress can get stuck; so use gzip first if we have it
6575937Sobrien	 * Idea from Damien Clark, thanks! */
6675937Sobrien	{ "\037\235", 2, { "uncompress", "-c", NULL }, 1 },	/* compressed */
6768349Sobrien	{ "\037\213", 2, { "gzip", "-cdq", NULL }, 1 },		/* gzipped */
6868349Sobrien	{ "\037\236", 2, { "gzip", "-cdq", NULL }, 1 },		/* frozen */
6968349Sobrien	{ "\037\240", 2, { "gzip", "-cdq", NULL }, 1 },		/* SCO LZH */
7068349Sobrien	/* the standard pack utilities do not accept standard input */
7168349Sobrien	{ "\037\036", 2, { "gzip", "-cdq", NULL }, 0 },		/* packed */
72159764Sobrien	{ "PK\3\4",   4, { "gzip", "-cdq", NULL }, 1 },		/* pkzipped, */
73159764Sobrien					    /* ...only first file examined */
7480588Sobrien	{ "BZh",      3, { "bzip2", "-cd", NULL }, 1 },		/* bzip2-ed */
7568349Sobrien};
7668349Sobrien
77133359Sobrienprivate int ncompr = sizeof(compr) / sizeof(compr[0]);
7868349Sobrien
7968349Sobrien
80133359Sobrienprivate ssize_t swrite(int, const void *, size_t);
81133359Sobrienprivate ssize_t sread(int, void *, size_t);
82159764Sobrienprivate size_t uncompressbuf(struct magic_set *, int, size_t,
83159764Sobrien    const unsigned char *, unsigned char **, size_t);
84103373Sobrien#ifdef HAVE_LIBZ
85133359Sobrienprivate size_t uncompressgzipped(struct magic_set *, const unsigned char *,
86133359Sobrien    unsigned char **, size_t);
87103373Sobrien#endif
8868349Sobrien
89133359Sobrienprotected int
90159764Sobrienfile_zmagic(struct magic_set *ms, int fd, const unsigned char *buf,
91159764Sobrien    size_t nbytes)
9268349Sobrien{
93133359Sobrien	unsigned char *newbuf = NULL;
94133359Sobrien	size_t i, nsz;
95133359Sobrien	int rv = 0;
9668349Sobrien
97133359Sobrien	if ((ms->flags & MAGIC_COMPRESS) == 0)
98133359Sobrien		return 0;
99133359Sobrien
10068349Sobrien	for (i = 0; i < ncompr; i++) {
10168349Sobrien		if (nbytes < compr[i].maglen)
10268349Sobrien			continue;
10368349Sobrien		if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 &&
104159764Sobrien		    (nsz = uncompressbuf(ms, fd, i, buf, &newbuf,
105159764Sobrien		    nbytes)) != 0) {
106133359Sobrien			ms->flags &= ~MAGIC_COMPRESS;
107133359Sobrien			rv = -1;
108159764Sobrien			if (file_buffer(ms, -1, newbuf, nsz) == -1)
109133359Sobrien				goto error;
110133359Sobrien			if (file_printf(ms, " (") == -1)
111133359Sobrien				goto error;
112159764Sobrien			if (file_buffer(ms, -1, buf, nbytes) == -1)
113133359Sobrien				goto error;
114133359Sobrien			if (file_printf(ms, ")") == -1)
115133359Sobrien				goto error;
116133359Sobrien			rv = 1;
117133359Sobrien			break;
11868349Sobrien		}
11968349Sobrien	}
120133359Sobrienerror:
121133359Sobrien	if (newbuf)
122133359Sobrien		free(newbuf);
123133359Sobrien	ms->flags |= MAGIC_COMPRESS;
124133359Sobrien	return rv;
12568349Sobrien}
12668349Sobrien
12775937Sobrien/*
12875937Sobrien * `safe' write for sockets and pipes.
12975937Sobrien */
130133359Sobrienprivate ssize_t
131103373Sobrienswrite(int fd, const void *buf, size_t n)
13275937Sobrien{
13375937Sobrien	int rv;
13475937Sobrien	size_t rn = n;
13568349Sobrien
13675937Sobrien	do
13775937Sobrien		switch (rv = write(fd, buf, n)) {
13875937Sobrien		case -1:
13975937Sobrien			if (errno == EINTR)
14075937Sobrien				continue;
14175937Sobrien			return -1;
14275937Sobrien		default:
14375937Sobrien			n -= rv;
144103373Sobrien			buf = ((const char *)buf) + rv;
14575937Sobrien			break;
14675937Sobrien		}
14775937Sobrien	while (n > 0);
14875937Sobrien	return rn;
14975937Sobrien}
15075937Sobrien
15175937Sobrien
15275937Sobrien/*
15375937Sobrien * `safe' read for sockets and pipes.
15475937Sobrien */
155133359Sobrienprivate ssize_t
156103373Sobriensread(int fd, void *buf, size_t n)
15775937Sobrien{
15875937Sobrien	int rv;
15975937Sobrien	size_t rn = n;
16075937Sobrien
16175937Sobrien	do
16275937Sobrien		switch (rv = read(fd, buf, n)) {
16375937Sobrien		case -1:
16475937Sobrien			if (errno == EINTR)
16575937Sobrien				continue;
16675937Sobrien			return -1;
167103373Sobrien		case 0:
168103373Sobrien			return rn - n;
16975937Sobrien		default:
17075937Sobrien			n -= rv;
17175937Sobrien			buf = ((char *)buf) + rv;
17275937Sobrien			break;
17375937Sobrien		}
17475937Sobrien	while (n > 0);
17575937Sobrien	return rn;
17675937Sobrien}
17775937Sobrien
178133359Sobrienprotected int
179133359Sobrienfile_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
180133359Sobrien    size_t nbytes)
181103373Sobrien{
182103373Sobrien	char buf[4096];
183103373Sobrien	int r, tfd;
184103373Sobrien
185103373Sobrien	(void)strcpy(buf, "/tmp/file.XXXXXX");
186103373Sobrien#ifndef HAVE_MKSTEMP
187103373Sobrien	{
188103373Sobrien		char *ptr = mktemp(buf);
189103373Sobrien		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
190103373Sobrien		r = errno;
191103373Sobrien		(void)unlink(ptr);
192103373Sobrien		errno = r;
193103373Sobrien	}
194103373Sobrien#else
195103373Sobrien	tfd = mkstemp(buf);
196103373Sobrien	r = errno;
197103373Sobrien	(void)unlink(buf);
198103373Sobrien	errno = r;
199103373Sobrien#endif
200103373Sobrien	if (tfd == -1) {
201133359Sobrien		file_error(ms, errno,
202133359Sobrien		    "cannot create temporary file for pipe copy");
203133359Sobrien		return -1;
204103373Sobrien	}
205103373Sobrien
206133359Sobrien	if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes)
207103373Sobrien		r = 1;
208103373Sobrien	else {
209103373Sobrien		while ((r = sread(fd, buf, sizeof(buf))) > 0)
210133359Sobrien			if (swrite(tfd, buf, (size_t)r) != r)
211103373Sobrien				break;
212103373Sobrien	}
213103373Sobrien
214103373Sobrien	switch (r) {
215103373Sobrien	case -1:
216133359Sobrien		file_error(ms, errno, "error copying from pipe to temp file");
217133359Sobrien		return -1;
218103373Sobrien	case 0:
219103373Sobrien		break;
220103373Sobrien	default:
221133359Sobrien		file_error(ms, errno, "error while writing to temp file");
222133359Sobrien		return -1;
223103373Sobrien	}
224103373Sobrien
225103373Sobrien	/*
226103373Sobrien	 * We duplicate the file descriptor, because fclose on a
227103373Sobrien	 * tmpfile will delete the file, but any open descriptors
228103373Sobrien	 * can still access the phantom inode.
229103373Sobrien	 */
230103373Sobrien	if ((fd = dup2(tfd, fd)) == -1) {
231133359Sobrien		file_error(ms, errno, "could not dup descriptor for temp file");
232133359Sobrien		return -1;
233103373Sobrien	}
234103373Sobrien	(void)close(tfd);
235103373Sobrien	if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) {
236133359Sobrien		file_badseek(ms);
237133359Sobrien		return -1;
238103373Sobrien	}
239103373Sobrien	return fd;
240103373Sobrien}
241103373Sobrien
242103373Sobrien#ifdef HAVE_LIBZ
243103373Sobrien
244103373Sobrien#define FHCRC		(1 << 1)
245103373Sobrien#define FEXTRA		(1 << 2)
246103373Sobrien#define FNAME		(1 << 3)
247103373Sobrien#define FCOMMENT	(1 << 4)
248103373Sobrien
249133359Sobrienprivate size_t
250133359Sobrienuncompressgzipped(struct magic_set *ms, const unsigned char *old,
251133359Sobrien    unsigned char **newch, size_t n)
25268349Sobrien{
253103373Sobrien	unsigned char flg = old[3];
254133359Sobrien	size_t data_start = 10;
255103373Sobrien	z_stream z;
256103373Sobrien	int rc;
257103373Sobrien
258133359Sobrien	if (flg & FEXTRA) {
259133359Sobrien		if (data_start+1 >= n)
260133359Sobrien			return 0;
261103373Sobrien		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
262133359Sobrien	}
263103373Sobrien	if (flg & FNAME) {
264133359Sobrien		while(data_start < n && old[data_start])
265103373Sobrien			data_start++;
266103373Sobrien		data_start++;
267103373Sobrien	}
268103373Sobrien	if(flg & FCOMMENT) {
269133359Sobrien		while(data_start < n && old[data_start])
270103373Sobrien			data_start++;
271103373Sobrien		data_start++;
272103373Sobrien	}
273103373Sobrien	if(flg & FHCRC)
274103373Sobrien		data_start += 2;
275103373Sobrien
276133359Sobrien	if (data_start >= n)
277133359Sobrien		return 0;
278103373Sobrien	if ((*newch = (unsigned char *)malloc(HOWMANY + 1)) == NULL) {
279103373Sobrien		return 0;
280103373Sobrien	}
281103373Sobrien
282133359Sobrien	/* XXX: const castaway, via strchr */
283133359Sobrien	z.next_in = (Bytef *)strchr((const char *)old + data_start,
284133359Sobrien	    old[data_start]);
285103373Sobrien	z.avail_in = n - data_start;
286103373Sobrien	z.next_out = *newch;
287103373Sobrien	z.avail_out = HOWMANY;
288103373Sobrien	z.zalloc = Z_NULL;
289103373Sobrien	z.zfree = Z_NULL;
290103373Sobrien	z.opaque = Z_NULL;
291103373Sobrien
292103373Sobrien	rc = inflateInit2(&z, -15);
293103373Sobrien	if (rc != Z_OK) {
294133359Sobrien		file_error(ms, 0, "zlib: %s", z.msg);
295103373Sobrien		return 0;
296103373Sobrien	}
297103373Sobrien
298103373Sobrien	rc = inflate(&z, Z_SYNC_FLUSH);
299103373Sobrien	if (rc != Z_OK && rc != Z_STREAM_END) {
300133359Sobrien		file_error(ms, 0, "zlib: %s", z.msg);
301103373Sobrien		return 0;
302103373Sobrien	}
303103373Sobrien
304133359Sobrien	n = (size_t)z.total_out;
305103373Sobrien	inflateEnd(&z);
306103373Sobrien
307103373Sobrien	/* let's keep the nul-terminate tradition */
308103373Sobrien	(*newch)[n++] = '\0';
309103373Sobrien
310103373Sobrien	return n;
311103373Sobrien}
312103373Sobrien#endif
313103373Sobrien
314133359Sobrienprivate size_t
315159764Sobrienuncompressbuf(struct magic_set *ms, int fd, size_t method,
316159764Sobrien    const unsigned char *old, unsigned char **newch, size_t n)
317103373Sobrien{
31868349Sobrien	int fdin[2], fdout[2];
319133359Sobrien	int r;
32068349Sobrien
321103373Sobrien#ifdef HAVE_LIBZ
322103373Sobrien	if (method == 2)
323133359Sobrien		return uncompressgzipped(ms, old, newch, n);
324103373Sobrien#endif
325159764Sobrien	(void)fflush(stdout);
326159764Sobrien	(void)fflush(stderr);
327103373Sobrien
328159764Sobrien	if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) {
329133359Sobrien		file_error(ms, errno, "cannot create pipe");
330133359Sobrien		return 0;
33168349Sobrien	}
33268349Sobrien	switch (fork()) {
33368349Sobrien	case 0:	/* child */
33468349Sobrien		(void) close(0);
335159764Sobrien		if (fd != -1) {
336159764Sobrien		    (void) dup(fd);
337159764Sobrien		    (void) lseek(0, (off_t)0, SEEK_SET);
338159764Sobrien		} else {
339159764Sobrien		    (void) dup(fdin[0]);
340159764Sobrien		    (void) close(fdin[0]);
341159764Sobrien		    (void) close(fdin[1]);
342159764Sobrien		}
34368349Sobrien
34468349Sobrien		(void) close(1);
34568349Sobrien		(void) dup(fdout[1]);
34668349Sobrien		(void) close(fdout[0]);
34768349Sobrien		(void) close(fdout[1]);
348159764Sobrien#ifndef DEBUG
34968349Sobrien		if (compr[method].silent)
350159764Sobrien			(void)close(2);
351159764Sobrien#endif
35268349Sobrien
35368349Sobrien		execvp(compr[method].argv[0],
354159764Sobrien		       (char *const *)(intptr_t)compr[method].argv);
355159764Sobrien#ifdef DEBUG
356159764Sobrien		(void)fprintf(stderr, "exec `%s' failed (%s)\n",
357159764Sobrien		    compr[method].argv[0], strerror(errno));
358159764Sobrien#endif
35968349Sobrien		exit(1);
36068349Sobrien		/*NOTREACHED*/
36168349Sobrien	case -1:
362133359Sobrien		file_error(ms, errno, "could not fork");
363133359Sobrien		return 0;
36468349Sobrien
36568349Sobrien	default: /* parent */
36668349Sobrien		(void) close(fdout[1]);
367159764Sobrien		if (fd == -1) {
368159764Sobrien			(void) close(fdin[0]);
369159764Sobrien			/*
370159764Sobrien			 * fork again, to avoid blocking because both
371159764Sobrien			 * pipes filled
372159764Sobrien			 */
373159764Sobrien			switch (fork()) {
374159764Sobrien			case 0: /* child */
375159764Sobrien				(void)close(fdout[0]);
376159764Sobrien				if (swrite(fdin[1], old, n) != n) {
377159764Sobrien#ifdef DEBUG
378159764Sobrien					(void)fprintf(stderr,
379159764Sobrien					    "Write failed (%s)\n",
380159764Sobrien					    strerror(errno));
381159764Sobrien#endif
382159764Sobrien					exit(1);
383159764Sobrien				}
384159764Sobrien				exit(0);
385159764Sobrien				/*NOTREACHED*/
386159764Sobrien
387159764Sobrien			case -1:
388159764Sobrien#ifdef DEBUG
389159764Sobrien				(void)fprintf(stderr, "Fork failed (%s)\n",
390159764Sobrien				    strerror(errno));
391159764Sobrien#endif
392133359Sobrien				exit(1);
393159764Sobrien				/*NOTREACHED*/
394133359Sobrien
395159764Sobrien			default:  /* parent */
396159764Sobrien				break;
397159764Sobrien			}
398159764Sobrien			(void) close(fdin[1]);
399159764Sobrien			fdin[1] = -1;
400159764Sobrien		}
401133359Sobrien
402103373Sobrien		if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) {
403159764Sobrien#ifdef DEBUG
404159764Sobrien			(void)fprintf(stderr, "Malloc failed (%s)\n",
405159764Sobrien			    strerror(errno));
406159764Sobrien#endif
40775937Sobrien			n = 0;
40875937Sobrien			goto err;
40975937Sobrien		}
410133359Sobrien		if ((r = sread(fdout[0], *newch, HOWMANY)) <= 0) {
411159764Sobrien#ifdef DEBUG
412159764Sobrien			(void)fprintf(stderr, "Read failed (%s)\n",
413159764Sobrien			    strerror(errno));
414159764Sobrien#endif
41568349Sobrien			free(*newch);
41675937Sobrien			n = 0;
417133359Sobrien			newch[0] = '\0';
41875937Sobrien			goto err;
419133359Sobrien		} else {
420133359Sobrien			n = r;
42168349Sobrien		}
422103373Sobrien 		/* NUL terminate, as every buffer is handled here. */
423103373Sobrien 		(*newch)[n++] = '\0';
42475937Sobrienerr:
42575937Sobrien		if (fdin[1] != -1)
42675937Sobrien			(void) close(fdin[1]);
42768349Sobrien		(void) close(fdout[0]);
428133359Sobrien#ifdef WNOHANG
429133359Sobrien		while (waitpid(-1, NULL, WNOHANG) != -1)
430133359Sobrien			continue;
431133359Sobrien#else
432133359Sobrien		(void)wait(NULL);
433133359Sobrien#endif
43468349Sobrien		return n;
43568349Sobrien	}
43668349Sobrien}
437