1/*	$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $	*/
2
3/*
4 * Copyright (c) Ian F. Darwin 1986-1995.
5 * Software written by Ian F. Darwin and others;
6 * maintained 1995-present by Christos Zoulas and others.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice immediately at the beginning of the file, without modification,
13 *    this list of conditions, and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in the
16 *    documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30/*
31 * compress routines:
32 *	zmagic() - returns 0 if not recognized, uncompresses and prints
33 *		   information if recognized
34 *	uncompress(method, old, n, newch) - uncompress old into new,
35 *					    using method, return sizeof new
36 */
37#include "file.h"
38
39#ifndef lint
40#if 0
41FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $")
42#else
43__RCSID("$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $");
44#endif
45#endif
46
47#include "magic.h"
48#include <stdlib.h>
49#ifdef HAVE_UNISTD_H
50#include <unistd.h>
51#endif
52#ifdef HAVE_SPAWN_H
53#include <spawn.h>
54#endif
55#include <string.h>
56#include <errno.h>
57#include <ctype.h>
58#include <stdarg.h>
59#include <signal.h>
60#ifndef HAVE_SIG_T
61typedef void (*sig_t)(int);
62#endif /* HAVE_SIG_T */
63#ifdef HAVE_SYS_IOCTL_H
64#include <sys/ioctl.h>
65#endif
66#ifdef HAVE_SYS_WAIT_H
67#include <sys/wait.h>
68#endif
69#if defined(HAVE_SYS_TIME_H)
70#include <sys/time.h>
71#endif
72
73#if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT)
74#define BUILTIN_DECOMPRESS
75#include <zlib.h>
76#endif
77
78#if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT)
79#define BUILTIN_BZLIB
80#include <bzlib.h>
81#endif
82
83#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT)
84#define BUILTIN_XZLIB
85#include <lzma.h>
86#endif
87
88#if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT)
89#define BUILTIN_ZSTDLIB
90#include <zstd.h>
91#include <zstd_errors.h>
92#endif
93
94#if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT)
95#define BUILTIN_LZLIB
96#include <lzlib.h>
97#endif
98
99#ifdef DEBUG
100int tty = -1;
101#define DPRINTF(...)	do { \
102	if (tty == -1) \
103		tty = open("/dev/tty", O_RDWR); \
104	if (tty == -1) \
105		abort(); \
106	dprintf(tty, __VA_ARGS__); \
107} while (/*CONSTCOND*/0)
108#else
109#define DPRINTF(...)
110#endif
111
112#ifdef ZLIBSUPPORT
113/*
114 * The following python code is not really used because ZLIBSUPPORT is only
115 * defined if we have a built-in zlib, and the built-in zlib handles that.
116 * That is not true for android where we have zlib.h and not -lz.
117 */
118static const char zlibcode[] =
119    "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))";
120
121static const char *zlib_args[] = { "python", "-c", zlibcode, NULL };
122
123static int
124zlibcmp(const unsigned char *buf)
125{
126	unsigned short x = 1;
127	unsigned char *s = CAST(unsigned char *, CAST(void *, &x));
128
129	if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0)
130		return 0;
131	if (s[0] != 1)	/* endianness test */
132		x = buf[0] | (buf[1] << 8);
133	else
134		x = buf[1] | (buf[0] << 8);
135	if (x % 31)
136		return 0;
137	return 1;
138}
139#endif
140
141static int
142lzmacmp(const unsigned char *buf)
143{
144	if (buf[0] != 0x5d || buf[1] || buf[2])
145		return 0;
146	if (buf[12] && buf[12] != 0xff)
147		return 0;
148	return 1;
149}
150
151#define gzip_flags "-cd"
152#define lzip_flags gzip_flags
153
154static const char *gzip_args[] = {
155	"gzip", gzip_flags, NULL
156};
157static const char *uncompress_args[] = {
158	"uncompress", "-c", NULL
159};
160static const char *bzip2_args[] = {
161	"bzip2", "-cd", NULL
162};
163static const char *lzip_args[] = {
164	"lzip", lzip_flags, NULL
165};
166static const char *xz_args[] = {
167	"xz", "-cd", NULL
168};
169static const char *lrzip_args[] = {
170	"lrzip", "-qdf", "-", NULL
171};
172static const char *lz4_args[] = {
173	"lz4", "-cd", NULL
174};
175static const char *zstd_args[] = {
176	"zstd", "-cd", NULL
177};
178
179#define	do_zlib		NULL
180#define	do_bzlib	NULL
181
182file_private const struct {
183	union {
184		const char *magic;
185		int (*func)(const unsigned char *);
186	} u;
187	int maglen;
188	const char **argv;
189	void *unused;
190} compr[] = {
191#define METH_FROZEN	2
192#define METH_BZIP	7
193#define METH_XZ		9
194#define METH_LZIP	8
195#define METH_ZSTD	12
196#define METH_LZMA	13
197#define METH_ZLIB	14
198    { { .magic = "\037\235" },	2, gzip_args, NULL },	/* 0, compressed */
199    /* Uncompress can get stuck; so use gzip first if we have it
200     * Idea from Damien Clark, thanks! */
201    { { .magic = "\037\235" },	2, uncompress_args, NULL },/* 1, compressed */
202    { { .magic = "\037\213" },	2, gzip_args, do_zlib },/* 2, gzipped */
203    { { .magic = "\037\236" },	2, gzip_args, NULL },	/* 3, frozen */
204    { { .magic = "\037\240" },	2, gzip_args, NULL },	/* 4, SCO LZH */
205    /* the standard pack utilities do not accept standard input */
206    { { .magic = "\037\036" },	2, gzip_args, NULL },	/* 5, packed */
207    { { .magic = "PK\3\4" },	4, gzip_args, NULL },	/* 6, pkziped */
208    /* ...only first file examined */
209    { { .magic = "BZh" },	3, bzip2_args, do_bzlib },/* 7, bzip2-ed */
210    { { .magic = "LZIP" },	4, lzip_args, NULL },	/* 8, lzip-ed */
211    { { .magic = "\3757zXZ\0" },6, xz_args, NULL },	/* 9, XZ Util */
212    { { .magic = "LRZI" },	4, lrzip_args, NULL },	/* 10, LRZIP */
213    { { .magic = "\004\"M\030" },4, lz4_args, NULL },	/* 11, LZ4 */
214    { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */
215    { { .func = lzmacmp },	-13, xz_args, NULL },	/* 13, lzma */
216#ifdef ZLIBSUPPORT
217    { { .func = zlibcmp },	-2, zlib_args, NULL },	/* 14, zlib */
218#endif
219};
220
221#define OKDATA 	0
222#define NODATA	1
223#define ERRDATA	2
224
225file_private ssize_t swrite(int, const void *, size_t);
226#if HAVE_FORK
227file_private size_t ncompr = __arraycount(compr);
228file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *,
229    unsigned char **, size_t *);
230#ifdef BUILTIN_DECOMPRESS
231file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t,
232    size_t *, int);
233file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t,
234    size_t *, int);
235#endif
236#ifdef BUILTIN_BZLIB
237file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t,
238    size_t *, int);
239#endif
240#ifdef BUILTIN_XZLIB
241file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t,
242    size_t *, int);
243#endif
244#ifdef BUILTIN_ZSTDLIB
245file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t,
246    size_t *, int);
247#endif
248#ifdef BUILTIN_LZLIB
249file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t,
250    size_t *, int);
251#endif
252
253static int makeerror(unsigned char **, size_t *, const char *, ...)
254    __attribute__((__format__(__printf__, 3, 4)));
255file_private const char *methodname(size_t);
256
257file_private int
258format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf)
259{
260	unsigned char *p;
261	int mime = ms->flags & MAGIC_MIME;
262
263	if (!mime)
264		return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf);
265
266	for (p = buf; *p; p++)
267		if (!isalnum(*p))
268			*p = '-';
269
270	return file_printf(ms, "application/x-decompression-error-%s-%s",
271	    methodname(i), buf);
272}
273
274file_protected int
275file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name)
276{
277	unsigned char *newbuf = NULL;
278	size_t i, nsz;
279	char *rbuf;
280	file_pushbuf_t *pb;
281	int urv, prv, rv = 0;
282	int mime = ms->flags & MAGIC_MIME;
283	int fd = b->fd;
284	const unsigned char *buf = CAST(const unsigned char *, b->fbuf);
285	size_t nbytes = b->flen;
286	int sa_saved = 0;
287	struct sigaction sig_act;
288
289	if ((ms->flags & MAGIC_COMPRESS) == 0)
290		return 0;
291
292	for (i = 0; i < ncompr; i++) {
293		int zm;
294		if (nbytes < CAST(size_t, abs(compr[i].maglen)))
295			continue;
296		if (compr[i].maglen < 0) {
297			zm = (*compr[i].u.func)(buf);
298		} else {
299			zm = memcmp(buf, compr[i].u.magic,
300			    CAST(size_t, compr[i].maglen)) == 0;
301		}
302
303		if (!zm)
304			continue;
305
306		/* Prevent SIGPIPE death if child dies unexpectedly */
307		if (!sa_saved) {
308			//We can use sig_act for both new and old, but
309			struct sigaction new_act;
310			memset(&new_act, 0, sizeof(new_act));
311			new_act.sa_handler = SIG_IGN;
312			sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1;
313		}
314
315		nsz = nbytes;
316		free(newbuf);
317		urv = uncompressbuf(fd, ms->bytes_max, i,
318		    (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz);
319		DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv,
320		    (char *)newbuf, nsz);
321		switch (urv) {
322		case OKDATA:
323		case ERRDATA:
324			ms->flags &= ~MAGIC_COMPRESS;
325			if (urv == ERRDATA)
326				prv = format_decompression_error(ms, i, newbuf);
327			else
328				prv = file_buffer(ms, -1, NULL, name, newbuf,
329				    nsz);
330			if (prv == -1)
331				goto error;
332			rv = 1;
333			if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0)
334				goto out;
335			if (mime != MAGIC_MIME && mime != 0)
336				goto out;
337			if ((file_printf(ms,
338			    mime ? " compressed-encoding=" : " (")) == -1)
339				goto error;
340			if ((pb = file_push_buffer(ms)) == NULL)
341				goto error;
342			/*
343			 * XXX: If file_buffer fails here, we overwrite
344			 * the compressed text. FIXME.
345			 */
346			if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1)
347			{
348				if (file_pop_buffer(ms, pb) != NULL)
349					abort();
350				goto error;
351			}
352			if ((rbuf = file_pop_buffer(ms, pb)) != NULL) {
353				if (file_printf(ms, "%s", rbuf) == -1) {
354					free(rbuf);
355					goto error;
356				}
357				free(rbuf);
358			}
359			if (!mime && file_printf(ms, ")") == -1)
360				goto error;
361			/*FALLTHROUGH*/
362		case NODATA:
363			break;
364		default:
365			abort();
366			/*NOTREACHED*/
367		error:
368			rv = -1;
369			break;
370		}
371	}
372out:
373	DPRINTF("rv = %d\n", rv);
374
375	if (sa_saved && sig_act.sa_handler != SIG_IGN)
376		(void)sigaction(SIGPIPE, &sig_act, NULL);
377
378	free(newbuf);
379	ms->flags |= MAGIC_COMPRESS;
380	DPRINTF("Zmagic returns %d\n", rv);
381	return rv;
382}
383#endif
384/*
385 * `safe' write for sockets and pipes.
386 */
387file_private ssize_t
388swrite(int fd, const void *buf, size_t n)
389{
390	ssize_t rv;
391	size_t rn = n;
392
393	do
394		switch (rv = write(fd, buf, n)) {
395		case -1:
396			if (errno == EINTR)
397				continue;
398			return -1;
399		default:
400			n -= rv;
401			buf = CAST(const char *, buf) + rv;
402			break;
403		}
404	while (n > 0);
405	return rn;
406}
407
408
409/*
410 * `safe' read for sockets and pipes.
411 */
412file_protected ssize_t
413sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__)))
414{
415	ssize_t rv;
416#if defined(FIONREAD) && !defined(__MINGW32__)
417	int t = 0;
418#endif
419	size_t rn = n;
420
421	if (fd == STDIN_FILENO)
422		goto nocheck;
423
424#if defined(FIONREAD) && !defined(__MINGW32__)
425	if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) {
426#ifdef FD_ZERO
427		ssize_t cnt;
428		for (cnt = 0;; cnt++) {
429			fd_set check;
430			struct timeval tout = {0, 100 * 1000};
431			int selrv;
432
433			FD_ZERO(&check);
434			FD_SET(fd, &check);
435
436			/*
437			 * Avoid soft deadlock: do not read if there
438			 * is nothing to read from sockets and pipes.
439			 */
440			selrv = select(fd + 1, &check, NULL, NULL, &tout);
441			if (selrv == -1) {
442				if (errno == EINTR || errno == EAGAIN)
443					continue;
444			} else if (selrv == 0 && cnt >= 5) {
445				return 0;
446			} else
447				break;
448		}
449#endif
450		(void)ioctl(fd, FIONREAD, &t);
451	}
452
453	if (t > 0 && CAST(size_t, t) < n) {
454		n = t;
455		rn = n;
456	}
457#endif
458
459nocheck:
460	do
461		switch ((rv = read(fd, buf, n))) {
462		case -1:
463			if (errno == EINTR)
464				continue;
465			return -1;
466		case 0:
467			return rn - n;
468		default:
469			n -= rv;
470			buf = CAST(char *, CCAST(void *, buf)) + rv;
471			break;
472		}
473	while (n > 0);
474	return rn;
475}
476
477file_protected int
478file_pipe2file(struct magic_set *ms, int fd, const void *startbuf,
479    size_t nbytes)
480{
481	char buf[4096];
482	ssize_t r;
483	int tfd;
484
485#ifdef WIN32
486	const char *t;
487	buf[0] = '\0';
488	if ((t = getenv("TEMP")) != NULL)
489		(void)strlcpy(buf, t, sizeof(buf));
490	else if ((t = getenv("TMP")) != NULL)
491		(void)strlcpy(buf, t, sizeof(buf));
492	else if ((t = getenv("TMPDIR")) != NULL)
493		(void)strlcpy(buf, t, sizeof(buf));
494	if (buf[0] != '\0')
495		(void)strlcat(buf, "/", sizeof(buf));
496	(void)strlcat(buf, "file.XXXXXX", sizeof(buf));
497#else
498	(void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf));
499#endif
500#ifndef HAVE_MKSTEMP
501	{
502		char *ptr = mktemp(buf);
503		tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600);
504		r = errno;
505		(void)unlink(ptr);
506		errno = r;
507	}
508#else
509	{
510		int te;
511		mode_t ou = umask(0);
512		tfd = mkstemp(buf);
513		(void)umask(ou);
514		te = errno;
515		(void)unlink(buf);
516		errno = te;
517	}
518#endif
519	if (tfd == -1) {
520		file_error(ms, errno,
521		    "cannot create temporary file for pipe copy");
522		return -1;
523	}
524
525	if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes))
526		r = 1;
527	else {
528		while ((r = sread(fd, buf, sizeof(buf), 1)) > 0)
529			if (swrite(tfd, buf, CAST(size_t, r)) != r)
530				break;
531	}
532
533	switch (r) {
534	case -1:
535		file_error(ms, errno, "error copying from pipe to temp file");
536		return -1;
537	case 0:
538		break;
539	default:
540		file_error(ms, errno, "error while writing to temp file");
541		return -1;
542	}
543
544	/*
545	 * We duplicate the file descriptor, because fclose on a
546	 * tmpfile will delete the file, but any open descriptors
547	 * can still access the phantom inode.
548	 */
549	if ((fd = dup2(tfd, fd)) == -1) {
550		file_error(ms, errno, "could not dup descriptor for temp file");
551		return -1;
552	}
553	(void)close(tfd);
554	if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) {
555		file_badseek(ms);
556		return -1;
557	}
558	return fd;
559}
560#if HAVE_FORK
561#ifdef BUILTIN_DECOMPRESS
562
563#define FHCRC		(1 << 1)
564#define FEXTRA		(1 << 2)
565#define FNAME		(1 << 3)
566#define FCOMMENT	(1 << 4)
567
568
569file_private int
570uncompressgzipped(const unsigned char *old, unsigned char **newch,
571    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
572{
573	unsigned char flg;
574	size_t data_start = 10;
575
576	if (*n < 4) {
577		goto err;
578	}
579
580	flg = old[3];
581
582	if (flg & FEXTRA) {
583		if (data_start + 1 >= *n)
584			goto err;
585		data_start += 2 + old[data_start] + old[data_start + 1] * 256;
586	}
587	if (flg & FNAME) {
588		while(data_start < *n && old[data_start])
589			data_start++;
590		data_start++;
591	}
592	if (flg & FCOMMENT) {
593		while(data_start < *n && old[data_start])
594			data_start++;
595		data_start++;
596	}
597	if (flg & FHCRC)
598		data_start += 2;
599
600	if (data_start >= *n)
601		goto err;
602
603	*n -= data_start;
604	old += data_start;
605	return uncompresszlib(old, newch, bytes_max, n, 0);
606err:
607	return makeerror(newch, n, "File too short");
608}
609
610file_private int
611uncompresszlib(const unsigned char *old, unsigned char **newch,
612    size_t bytes_max, size_t *n, int zlib)
613{
614	int rc;
615	z_stream z;
616
617	DPRINTF("builtin zlib decompression\n");
618	z.next_in = CCAST(Bytef *, old);
619	z.avail_in = CAST(uint32_t, *n);
620	z.next_out = *newch;
621	z.avail_out = CAST(unsigned int, bytes_max);
622	z.zalloc = Z_NULL;
623	z.zfree = Z_NULL;
624	z.opaque = Z_NULL;
625
626	/* LINTED bug in header macro */
627	rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15);
628	if (rc != Z_OK)
629		goto err;
630
631	rc = inflate(&z, Z_SYNC_FLUSH);
632	if (rc != Z_OK && rc != Z_STREAM_END) {
633		inflateEnd(&z);
634		goto err;
635	}
636
637	*n = CAST(size_t, z.total_out);
638	rc = inflateEnd(&z);
639	if (rc != Z_OK)
640		goto err;
641
642	/* let's keep the nul-terminate tradition */
643	(*newch)[*n] = '\0';
644
645	return OKDATA;
646err:
647	return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc));
648}
649#endif
650
651#ifdef BUILTIN_BZLIB
652file_private int
653uncompressbzlib(const unsigned char *old, unsigned char **newch,
654    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
655{
656	int rc;
657	bz_stream bz;
658
659	DPRINTF("builtin bzlib decompression\n");
660	memset(&bz, 0, sizeof(bz));
661	rc = BZ2_bzDecompressInit(&bz, 0, 0);
662	if (rc != BZ_OK)
663		goto err;
664
665	bz.next_in = CCAST(char *, RCAST(const char *, old));
666	bz.avail_in = CAST(uint32_t, *n);
667	bz.next_out = RCAST(char *, *newch);
668	bz.avail_out = CAST(unsigned int, bytes_max);
669
670	rc = BZ2_bzDecompress(&bz);
671	if (rc != BZ_OK && rc != BZ_STREAM_END) {
672		BZ2_bzDecompressEnd(&bz);
673		goto err;
674	}
675
676	/* Assume byte_max is within 32bit */
677	/* assert(bz.total_out_hi32 == 0); */
678	*n = CAST(size_t, bz.total_out_lo32);
679	rc = BZ2_bzDecompressEnd(&bz);
680	if (rc != BZ_OK)
681		goto err;
682
683	/* let's keep the nul-terminate tradition */
684	(*newch)[*n] = '\0';
685
686	return OKDATA;
687err:
688	return makeerror(newch, n, "bunzip error %d", rc);
689}
690#endif
691
692#ifdef BUILTIN_XZLIB
693file_private int
694uncompressxzlib(const unsigned char *old, unsigned char **newch,
695    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
696{
697	int rc;
698	lzma_stream xz;
699
700	DPRINTF("builtin xzlib decompression\n");
701	memset(&xz, 0, sizeof(xz));
702	rc = lzma_auto_decoder(&xz, UINT64_MAX, 0);
703	if (rc != LZMA_OK)
704		goto err;
705
706	xz.next_in = CCAST(const uint8_t *, old);
707	xz.avail_in = CAST(uint32_t, *n);
708	xz.next_out = RCAST(uint8_t *, *newch);
709	xz.avail_out = CAST(unsigned int, bytes_max);
710
711	rc = lzma_code(&xz, LZMA_RUN);
712	if (rc != LZMA_OK && rc != LZMA_STREAM_END) {
713		lzma_end(&xz);
714		goto err;
715	}
716
717	*n = CAST(size_t, xz.total_out);
718
719	lzma_end(&xz);
720
721	/* let's keep the nul-terminate tradition */
722	(*newch)[*n] = '\0';
723
724	return OKDATA;
725err:
726	return makeerror(newch, n, "unxz error %d", rc);
727}
728#endif
729
730#ifdef BUILTIN_ZSTDLIB
731file_private int
732uncompresszstd(const unsigned char *old, unsigned char **newch,
733    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
734{
735	size_t rc;
736	ZSTD_DStream *zstd;
737	ZSTD_inBuffer in;
738	ZSTD_outBuffer out;
739
740	DPRINTF("builtin zstd decompression\n");
741	if ((zstd = ZSTD_createDStream()) == NULL) {
742		return makeerror(newch, n, "No ZSTD decompression stream, %s",
743		    strerror(errno));
744	}
745
746	rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only);
747	if (ZSTD_isError(rc))
748		goto err;
749
750	in.src = CCAST(const void *, old);
751	in.size = *n;
752	in.pos = 0;
753	out.dst = RCAST(void *, *newch);
754	out.size = bytes_max;
755	out.pos = 0;
756
757	rc = ZSTD_decompressStream(zstd, &out, &in);
758	if (ZSTD_isError(rc))
759		goto err;
760
761	*n = out.pos;
762
763	ZSTD_freeDStream(zstd);
764
765	/* let's keep the nul-terminate tradition */
766	(*newch)[*n] = '\0';
767
768	return OKDATA;
769err:
770	ZSTD_freeDStream(zstd);
771	return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc));
772}
773#endif
774
775#ifdef BUILTIN_LZLIB
776file_private int
777uncompresslzlib(const unsigned char *old, unsigned char **newch,
778    size_t bytes_max, size_t *n, int extra __attribute__((__unused__)))
779{
780	enum LZ_Errno err;
781	size_t old_remaining = *n;
782	size_t new_remaining = bytes_max;
783	size_t total_read = 0;
784	unsigned char *bufp;
785	struct LZ_Decoder *dec;
786
787	bufp = *newch;
788
789	DPRINTF("builtin lzlib decompression\n");
790	dec = LZ_decompress_open();
791	if (!dec) {
792		return makeerror(newch, n, "unable to allocate LZ_Decoder");
793	}
794	if (LZ_decompress_errno(dec) != LZ_ok)
795		goto err;
796
797	for (;;) {
798		// LZ_decompress_read() stops at member boundaries, so we may
799		// have more than one successful read after writing all data
800		// we have.
801		if (old_remaining > 0) {
802			int wr = LZ_decompress_write(dec, old, old_remaining);
803			if (wr < 0)
804				goto err;
805			old_remaining -= wr;
806			old += wr;
807		}
808
809		int rd = LZ_decompress_read(dec, bufp, new_remaining);
810		if (rd > 0) {
811			new_remaining -= rd;
812			bufp += rd;
813			total_read += rd;
814		}
815
816		if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok)
817			goto err;
818		if (new_remaining == 0)
819			break;
820		if (old_remaining == 0 && rd == 0)
821			break;
822	}
823
824	LZ_decompress_close(dec);
825	*n = total_read;
826
827	/* let's keep the nul-terminate tradition */
828	*bufp = '\0';
829
830	return OKDATA;
831err:
832	err = LZ_decompress_errno(dec);
833	LZ_decompress_close(dec);
834	return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err));
835}
836#endif
837
838
839static int
840makeerror(unsigned char **buf, size_t *len, const char *fmt, ...)
841{
842	char *msg;
843	va_list ap;
844	int rv;
845
846	DPRINTF("Makeerror %s\n", fmt);
847	free(*buf);
848	va_start(ap, fmt);
849	rv = vasprintf(&msg, fmt, ap);
850	va_end(ap);
851	if (rv < 0) {
852		DPRINTF("Makeerror failed");
853		*buf = NULL;
854		*len = 0;
855		return NODATA;
856	}
857	*buf = RCAST(unsigned char *, msg);
858	*len = strlen(msg);
859	return ERRDATA;
860}
861
862static void
863closefd(int *fd, size_t i)
864{
865	if (fd[i] == -1)
866		return;
867	(void) close(fd[i]);
868	fd[i] = -1;
869}
870
871static void
872closep(int *fd)
873{
874	size_t i;
875	for (i = 0; i < 2; i++)
876		closefd(fd, i);
877}
878
879static void
880movedesc(void *v, int i, int fd)
881{
882	if (fd == i)
883		return; /* "no dup was necessary" */
884#ifdef HAVE_POSIX_SPAWNP
885	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
886	posix_spawn_file_actions_adddup2(fa, fd, i);
887	posix_spawn_file_actions_addclose(fa, fd);
888#else
889	if (dup2(fd, i) == -1) {
890		DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno));
891		exit(EXIT_FAILURE);
892	}
893	close(v ? fd : fd);
894#endif
895}
896
897static void
898closedesc(void *v, int fd)
899{
900#ifdef HAVE_POSIX_SPAWNP
901	posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v);
902	posix_spawn_file_actions_addclose(fa, fd);
903#else
904	close(v ? fd : fd);
905#endif
906}
907
908static void
909handledesc(void *v, int fd, int fdp[3][2])
910{
911	if (fd != -1) {
912		(void) lseek(fd, CAST(off_t, 0), SEEK_SET);
913		movedesc(v, STDIN_FILENO, fd);
914	} else {
915		movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]);
916		if (fdp[STDIN_FILENO][1] > 2)
917		    closedesc(v, fdp[STDIN_FILENO][1]);
918	}
919
920	file_clear_closexec(STDIN_FILENO);
921
922///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly
923	movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]);
924	if (fdp[STDOUT_FILENO][0] > 2)
925		closedesc(v, fdp[STDOUT_FILENO][0]);
926
927	file_clear_closexec(STDOUT_FILENO);
928
929	movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]);
930	if (fdp[STDERR_FILENO][0] > 2)
931		closedesc(v, fdp[STDERR_FILENO][0]);
932
933	file_clear_closexec(STDERR_FILENO);
934}
935
936static pid_t
937writechild(int fd, const void *old, size_t n)
938{
939	pid_t pid;
940
941	/*
942	 * fork again, to avoid blocking because both
943	 * pipes filled
944	 */
945	pid = fork();
946	if (pid == -1) {
947		DPRINTF("Fork failed (%s)\n", strerror(errno));
948		return -1;
949	}
950	if (pid == 0) {
951		/* child */
952		if (swrite(fd, old, n) != CAST(ssize_t, n)) {
953			DPRINTF("Write failed (%s)\n", strerror(errno));
954			exit(EXIT_FAILURE);
955		}
956		exit(EXIT_SUCCESS);
957	}
958	/* parent */
959	return pid;
960}
961
962static ssize_t
963filter_error(unsigned char *ubuf, ssize_t n)
964{
965	char *p;
966	char *buf;
967
968	ubuf[n] = '\0';
969	buf = RCAST(char *, ubuf);
970	while (isspace(CAST(unsigned char, *buf)))
971		buf++;
972	DPRINTF("Filter error[[[%s]]]\n", buf);
973	if ((p = strchr(CAST(char *, buf), '\n')) != NULL)
974		*p = '\0';
975	if ((p = strchr(CAST(char *, buf), ';')) != NULL)
976		*p = '\0';
977	if ((p = strrchr(CAST(char *, buf), ':')) != NULL) {
978		++p;
979		while (isspace(CAST(unsigned char, *p)))
980			p++;
981		n = strlen(p);
982		memmove(ubuf, p, CAST(size_t, n + 1));
983	}
984	DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf);
985	if (islower(*ubuf))
986		*ubuf = toupper(*ubuf);
987	return n;
988}
989
990file_private const char *
991methodname(size_t method)
992{
993	switch (method) {
994#ifdef BUILTIN_DECOMPRESS
995	case METH_FROZEN:
996	case METH_ZLIB:
997		return "zlib";
998#endif
999#ifdef BUILTIN_BZLIB
1000	case METH_BZIP:
1001		return "bzlib";
1002#endif
1003#ifdef BUILTIN_XZLIB
1004	case METH_XZ:
1005	case METH_LZMA:
1006		return "xzlib";
1007#endif
1008#ifdef BUILTIN_ZSTDLIB
1009	case METH_ZSTD:
1010		return "zstd";
1011#endif
1012#ifdef BUILTIN_LZLIB
1013	case METH_LZIP:
1014		return "lzlib";
1015#endif
1016	default:
1017		return compr[method].argv[0];
1018	}
1019}
1020
1021file_private int (*
1022getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t,
1023    size_t *, int)
1024{
1025	switch (method) {
1026#ifdef BUILTIN_DECOMPRESS
1027	case METH_FROZEN:
1028		return uncompressgzipped;
1029	case METH_ZLIB:
1030		return uncompresszlib;
1031#endif
1032#ifdef BUILTIN_BZLIB
1033	case METH_BZIP:
1034		return uncompressbzlib;
1035#endif
1036#ifdef BUILTIN_XZLIB
1037	case METH_XZ:
1038	case METH_LZMA:
1039		return uncompressxzlib;
1040#endif
1041#ifdef BUILTIN_ZSTDLIB
1042	case METH_ZSTD:
1043		return uncompresszstd;
1044#endif
1045#ifdef BUILTIN_LZLIB
1046	case METH_LZIP:
1047		return uncompresslzlib;
1048#endif
1049	default:
1050		return NULL;
1051	}
1052}
1053
1054file_private int
1055uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork,
1056    const unsigned char *old, unsigned char **newch, size_t* n)
1057{
1058	int fdp[3][2];
1059	int status, rv, w;
1060	pid_t pid;
1061	pid_t writepid = -1;
1062	size_t i;
1063	ssize_t r, re;
1064	char *const *args;
1065#ifdef HAVE_POSIX_SPAWNP
1066	posix_spawn_file_actions_t fa;
1067#endif
1068	int (*decompress)(const unsigned char *, unsigned char **,
1069	    size_t, size_t *, int) = getdecompressor(method);
1070
1071	*newch = CAST(unsigned char *, malloc(bytes_max + 1));
1072	if (*newch == NULL)
1073		return makeerror(newch, n, "No buffer, %s", strerror(errno));
1074
1075	if (decompress) {
1076		if (nofork) {
1077			return makeerror(newch, n,
1078			    "Fork is required to uncompress, but disabled");
1079		}
1080		return (*decompress)(old, newch, bytes_max, n, 1);
1081	}
1082
1083	(void)fflush(stdout);
1084	(void)fflush(stderr);
1085
1086	for (i = 0; i < __arraycount(fdp); i++)
1087		fdp[i][0] = fdp[i][1] = -1;
1088
1089	/*
1090	 * There are multithreaded users who run magic_file()
1091	 * from dozens of threads. If two parallel magic_file() calls
1092	 * analyze two large compressed files, both will spawn
1093	 * an uncompressing child here, which writes out uncompressed data.
1094	 * We read some portion, then close the pipe, then waitpid() the child.
1095	 * If uncompressed data is larger, child should get EPIPE and exit.
1096	 * However, with *parallel* calls OTHER child may unintentionally
1097	 * inherit pipe fds, thus keeping pipe open and making writes in
1098	 * our child block instead of failing with EPIPE!
1099	 * (For the bug to occur, two threads must mutually inherit their pipes,
1100	 * and both must have large outputs. Thus it happens not that often).
1101	 * To avoid this, be sure to create pipes with O_CLOEXEC.
1102	 */
1103	if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) ||
1104	    file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 ||
1105	    file_pipe_closexec(fdp[STDERR_FILENO]) == -1) {
1106		closep(fdp[STDIN_FILENO]);
1107		closep(fdp[STDOUT_FILENO]);
1108		return makeerror(newch, n, "Cannot create pipe, %s",
1109		    strerror(errno));
1110	}
1111
1112	args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv));
1113#ifdef HAVE_POSIX_SPAWNP
1114	posix_spawn_file_actions_init(&fa);
1115
1116	handledesc(&fa, fd, fdp);
1117
1118	DPRINTF("Executing %s\n", compr[method].argv[0]);
1119	status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL,
1120	    args, NULL);
1121
1122	posix_spawn_file_actions_destroy(&fa);
1123
1124	if (status == -1) {
1125		return makeerror(newch, n, "Cannot posix_spawn `%s', %s",
1126		    compr[method].argv[0], strerror(errno));
1127	}
1128#else
1129	/* For processes with large mapped virtual sizes, vfork
1130	 * may be _much_ faster (10-100 times) than fork.
1131	 */
1132	pid = vfork();
1133	if (pid == -1) {
1134		return makeerror(newch, n, "Cannot vfork, %s",
1135		    strerror(errno));
1136	}
1137	if (pid == 0) {
1138		/* child */
1139		/* Note: we are after vfork, do not modify memory
1140		 * in a way which confuses parent. In particular,
1141		 * do not modify fdp[i][j].
1142		 */
1143		handledesc(NULL, fd, fdp);
1144		DPRINTF("Executing %s\n", compr[method].argv[0]);
1145
1146		(void)execvp(compr[method].argv[0], args);
1147		dprintf(STDERR_FILENO, "exec `%s' failed, %s",
1148		    compr[method].argv[0], strerror(errno));
1149		_exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */
1150	}
1151#endif
1152	/* parent */
1153	/* Close write sides of child stdout/err pipes */
1154	for (i = 1; i < __arraycount(fdp); i++)
1155		closefd(fdp[i], 1);
1156	/* Write the buffer data to child stdin, if we don't have fd */
1157	if (fd == -1) {
1158		closefd(fdp[STDIN_FILENO], 0);
1159		writepid = writechild(fdp[STDIN_FILENO][1], old, *n);
1160		if (writepid == (pid_t)-1) {
1161			rv = makeerror(newch, n, "Write to child failed, %s",
1162			    strerror(errno));
1163			DPRINTF("Write to child failed\n");
1164			goto err;
1165		}
1166		closefd(fdp[STDIN_FILENO], 1);
1167	}
1168
1169	rv = OKDATA;
1170	r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0);
1171	DPRINTF("read got %zd\n", r);
1172	if (r < 0) {
1173		rv = ERRDATA;
1174		DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0],
1175		        strerror(errno));
1176		goto err;
1177	}
1178	if (CAST(size_t, r) == bytes_max) {
1179		/*
1180		 * close fd so that the child exits with sigpipe and ignore
1181		 * errors, otherwise we risk the child blocking and never
1182		 * exiting.
1183		 */
1184		DPRINTF("Closing stdout for bytes_max\n");
1185		closefd(fdp[STDOUT_FILENO], 0);
1186		goto ok;
1187	}
1188	if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) {
1189		DPRINTF("Got stuff from stderr %s\n", *newch);
1190		rv = ERRDATA;
1191		r = filter_error(*newch, r);
1192		goto ok;
1193	}
1194	if  (re == 0)
1195		goto ok;
1196	rv = makeerror(newch, n, "Read stderr failed, %s",
1197	    strerror(errno));
1198	goto err;
1199ok:
1200	*n = r;
1201	/* NUL terminate, as every buffer is handled here. */
1202	(*newch)[*n] = '\0';
1203err:
1204	closefd(fdp[STDIN_FILENO], 1);
1205	closefd(fdp[STDOUT_FILENO], 0);
1206	closefd(fdp[STDERR_FILENO], 0);
1207
1208	w = waitpid(pid, &status, 0);
1209wait_err:
1210	if (w == -1) {
1211		rv = makeerror(newch, n, "Wait failed, %s", strerror(errno));
1212		DPRINTF("Child wait return %#x\n", status);
1213	} else if (!WIFEXITED(status)) {
1214		DPRINTF("Child not exited (%#x)\n", status);
1215	} else if (WEXITSTATUS(status) != 0) {
1216		DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status));
1217	}
1218	if (writepid > 0) {
1219		/* _After_ we know decompressor has exited, our input writer
1220		 * definitely will exit now (at worst, writing fails in it,
1221		 * since output fd is closed now on the reading size).
1222		 */
1223		w = waitpid(writepid, &status, 0);
1224		writepid = -1;
1225		goto wait_err;
1226	}
1227
1228	closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here!
1229	DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv);
1230
1231	return rv;
1232}
1233#endif
1234