1/* $NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $ */ 2 3/* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30/* 31 * compress routines: 32 * zmagic() - returns 0 if not recognized, uncompresses and prints 33 * information if recognized 34 * uncompress(method, old, n, newch) - uncompress old into new, 35 * using method, return sizeof new 36 */ 37#include "file.h" 38 39#ifndef lint 40#if 0 41FILE_RCSID("@(#)$File: compress.c,v 1.157 2023/05/21 15:59:58 christos Exp $") 42#else 43__RCSID("$NetBSD: compress.c,v 1.23 2023/08/18 19:00:11 christos Exp $"); 44#endif 45#endif 46 47#include "magic.h" 48#include <stdlib.h> 49#ifdef HAVE_UNISTD_H 50#include <unistd.h> 51#endif 52#ifdef HAVE_SPAWN_H 53#include <spawn.h> 54#endif 55#include <string.h> 56#include <errno.h> 57#include <ctype.h> 58#include <stdarg.h> 59#include <signal.h> 60#ifndef HAVE_SIG_T 61typedef void (*sig_t)(int); 62#endif /* HAVE_SIG_T */ 63#ifdef HAVE_SYS_IOCTL_H 64#include <sys/ioctl.h> 65#endif 66#ifdef HAVE_SYS_WAIT_H 67#include <sys/wait.h> 68#endif 69#if defined(HAVE_SYS_TIME_H) 70#include <sys/time.h> 71#endif 72 73#if defined(HAVE_ZLIB_H) && defined(ZLIBSUPPORT) 74#define BUILTIN_DECOMPRESS 75#include <zlib.h> 76#endif 77 78#if defined(HAVE_BZLIB_H) && defined(BZLIBSUPPORT) 79#define BUILTIN_BZLIB 80#include <bzlib.h> 81#endif 82 83#if defined(HAVE_LZMA_H) && defined(XZLIBSUPPORT) 84#define BUILTIN_XZLIB 85#include <lzma.h> 86#endif 87 88#if defined(HAVE_ZSTD_H) && defined(ZSTDLIBSUPPORT) 89#define BUILTIN_ZSTDLIB 90#include <zstd.h> 91#include <zstd_errors.h> 92#endif 93 94#if defined(HAVE_LZLIB_H) && defined(LZLIBSUPPORT) 95#define BUILTIN_LZLIB 96#include <lzlib.h> 97#endif 98 99#ifdef DEBUG 100int tty = -1; 101#define DPRINTF(...) do { \ 102 if (tty == -1) \ 103 tty = open("/dev/tty", O_RDWR); \ 104 if (tty == -1) \ 105 abort(); \ 106 dprintf(tty, __VA_ARGS__); \ 107} while (/*CONSTCOND*/0) 108#else 109#define DPRINTF(...) 110#endif 111 112#ifdef ZLIBSUPPORT 113/* 114 * The following python code is not really used because ZLIBSUPPORT is only 115 * defined if we have a built-in zlib, and the built-in zlib handles that. 116 * That is not true for android where we have zlib.h and not -lz. 117 */ 118static const char zlibcode[] = 119 "import sys, zlib; sys.stdout.write(zlib.decompress(sys.stdin.read()))"; 120 121static const char *zlib_args[] = { "python", "-c", zlibcode, NULL }; 122 123static int 124zlibcmp(const unsigned char *buf) 125{ 126 unsigned short x = 1; 127 unsigned char *s = CAST(unsigned char *, CAST(void *, &x)); 128 129 if ((buf[0] & 0xf) != 8 || (buf[0] & 0x80) != 0) 130 return 0; 131 if (s[0] != 1) /* endianness test */ 132 x = buf[0] | (buf[1] << 8); 133 else 134 x = buf[1] | (buf[0] << 8); 135 if (x % 31) 136 return 0; 137 return 1; 138} 139#endif 140 141static int 142lzmacmp(const unsigned char *buf) 143{ 144 if (buf[0] != 0x5d || buf[1] || buf[2]) 145 return 0; 146 if (buf[12] && buf[12] != 0xff) 147 return 0; 148 return 1; 149} 150 151#define gzip_flags "-cd" 152#define lzip_flags gzip_flags 153 154static const char *gzip_args[] = { 155 "gzip", gzip_flags, NULL 156}; 157static const char *uncompress_args[] = { 158 "uncompress", "-c", NULL 159}; 160static const char *bzip2_args[] = { 161 "bzip2", "-cd", NULL 162}; 163static const char *lzip_args[] = { 164 "lzip", lzip_flags, NULL 165}; 166static const char *xz_args[] = { 167 "xz", "-cd", NULL 168}; 169static const char *lrzip_args[] = { 170 "lrzip", "-qdf", "-", NULL 171}; 172static const char *lz4_args[] = { 173 "lz4", "-cd", NULL 174}; 175static const char *zstd_args[] = { 176 "zstd", "-cd", NULL 177}; 178 179#define do_zlib NULL 180#define do_bzlib NULL 181 182file_private const struct { 183 union { 184 const char *magic; 185 int (*func)(const unsigned char *); 186 } u; 187 int maglen; 188 const char **argv; 189 void *unused; 190} compr[] = { 191#define METH_FROZEN 2 192#define METH_BZIP 7 193#define METH_XZ 9 194#define METH_LZIP 8 195#define METH_ZSTD 12 196#define METH_LZMA 13 197#define METH_ZLIB 14 198 { { .magic = "\037\235" }, 2, gzip_args, NULL }, /* 0, compressed */ 199 /* Uncompress can get stuck; so use gzip first if we have it 200 * Idea from Damien Clark, thanks! */ 201 { { .magic = "\037\235" }, 2, uncompress_args, NULL },/* 1, compressed */ 202 { { .magic = "\037\213" }, 2, gzip_args, do_zlib },/* 2, gzipped */ 203 { { .magic = "\037\236" }, 2, gzip_args, NULL }, /* 3, frozen */ 204 { { .magic = "\037\240" }, 2, gzip_args, NULL }, /* 4, SCO LZH */ 205 /* the standard pack utilities do not accept standard input */ 206 { { .magic = "\037\036" }, 2, gzip_args, NULL }, /* 5, packed */ 207 { { .magic = "PK\3\4" }, 4, gzip_args, NULL }, /* 6, pkziped */ 208 /* ...only first file examined */ 209 { { .magic = "BZh" }, 3, bzip2_args, do_bzlib },/* 7, bzip2-ed */ 210 { { .magic = "LZIP" }, 4, lzip_args, NULL }, /* 8, lzip-ed */ 211 { { .magic = "\3757zXZ\0" },6, xz_args, NULL }, /* 9, XZ Util */ 212 { { .magic = "LRZI" }, 4, lrzip_args, NULL }, /* 10, LRZIP */ 213 { { .magic = "\004\"M\030" },4, lz4_args, NULL }, /* 11, LZ4 */ 214 { { .magic = "\x28\xB5\x2F\xFD" }, 4, zstd_args, NULL },/* 12, zstd */ 215 { { .func = lzmacmp }, -13, xz_args, NULL }, /* 13, lzma */ 216#ifdef ZLIBSUPPORT 217 { { .func = zlibcmp }, -2, zlib_args, NULL }, /* 14, zlib */ 218#endif 219}; 220 221#define OKDATA 0 222#define NODATA 1 223#define ERRDATA 2 224 225file_private ssize_t swrite(int, const void *, size_t); 226#if HAVE_FORK 227file_private size_t ncompr = __arraycount(compr); 228file_private int uncompressbuf(int, size_t, size_t, int, const unsigned char *, 229 unsigned char **, size_t *); 230#ifdef BUILTIN_DECOMPRESS 231file_private int uncompresszlib(const unsigned char *, unsigned char **, size_t, 232 size_t *, int); 233file_private int uncompressgzipped(const unsigned char *, unsigned char **, size_t, 234 size_t *, int); 235#endif 236#ifdef BUILTIN_BZLIB 237file_private int uncompressbzlib(const unsigned char *, unsigned char **, size_t, 238 size_t *, int); 239#endif 240#ifdef BUILTIN_XZLIB 241file_private int uncompressxzlib(const unsigned char *, unsigned char **, size_t, 242 size_t *, int); 243#endif 244#ifdef BUILTIN_ZSTDLIB 245file_private int uncompresszstd(const unsigned char *, unsigned char **, size_t, 246 size_t *, int); 247#endif 248#ifdef BUILTIN_LZLIB 249file_private int uncompresslzlib(const unsigned char *, unsigned char **, size_t, 250 size_t *, int); 251#endif 252 253static int makeerror(unsigned char **, size_t *, const char *, ...) 254 __attribute__((__format__(__printf__, 3, 4))); 255file_private const char *methodname(size_t); 256 257file_private int 258format_decompression_error(struct magic_set *ms, size_t i, unsigned char *buf) 259{ 260 unsigned char *p; 261 int mime = ms->flags & MAGIC_MIME; 262 263 if (!mime) 264 return file_printf(ms, "ERROR:[%s: %s]", methodname(i), buf); 265 266 for (p = buf; *p; p++) 267 if (!isalnum(*p)) 268 *p = '-'; 269 270 return file_printf(ms, "application/x-decompression-error-%s-%s", 271 methodname(i), buf); 272} 273 274file_protected int 275file_zmagic(struct magic_set *ms, const struct buffer *b, const char *name) 276{ 277 unsigned char *newbuf = NULL; 278 size_t i, nsz; 279 char *rbuf; 280 file_pushbuf_t *pb; 281 int urv, prv, rv = 0; 282 int mime = ms->flags & MAGIC_MIME; 283 int fd = b->fd; 284 const unsigned char *buf = CAST(const unsigned char *, b->fbuf); 285 size_t nbytes = b->flen; 286 int sa_saved = 0; 287 struct sigaction sig_act; 288 289 if ((ms->flags & MAGIC_COMPRESS) == 0) 290 return 0; 291 292 for (i = 0; i < ncompr; i++) { 293 int zm; 294 if (nbytes < CAST(size_t, abs(compr[i].maglen))) 295 continue; 296 if (compr[i].maglen < 0) { 297 zm = (*compr[i].u.func)(buf); 298 } else { 299 zm = memcmp(buf, compr[i].u.magic, 300 CAST(size_t, compr[i].maglen)) == 0; 301 } 302 303 if (!zm) 304 continue; 305 306 /* Prevent SIGPIPE death if child dies unexpectedly */ 307 if (!sa_saved) { 308 //We can use sig_act for both new and old, but 309 struct sigaction new_act; 310 memset(&new_act, 0, sizeof(new_act)); 311 new_act.sa_handler = SIG_IGN; 312 sa_saved = sigaction(SIGPIPE, &new_act, &sig_act) != -1; 313 } 314 315 nsz = nbytes; 316 free(newbuf); 317 urv = uncompressbuf(fd, ms->bytes_max, i, 318 (ms->flags & MAGIC_NO_COMPRESS_FORK), buf, &newbuf, &nsz); 319 DPRINTF("uncompressbuf = %d, %s, %" SIZE_T_FORMAT "u\n", urv, 320 (char *)newbuf, nsz); 321 switch (urv) { 322 case OKDATA: 323 case ERRDATA: 324 ms->flags &= ~MAGIC_COMPRESS; 325 if (urv == ERRDATA) 326 prv = format_decompression_error(ms, i, newbuf); 327 else 328 prv = file_buffer(ms, -1, NULL, name, newbuf, 329 nsz); 330 if (prv == -1) 331 goto error; 332 rv = 1; 333 if ((ms->flags & MAGIC_COMPRESS_TRANSP) != 0) 334 goto out; 335 if (mime != MAGIC_MIME && mime != 0) 336 goto out; 337 if ((file_printf(ms, 338 mime ? " compressed-encoding=" : " (")) == -1) 339 goto error; 340 if ((pb = file_push_buffer(ms)) == NULL) 341 goto error; 342 /* 343 * XXX: If file_buffer fails here, we overwrite 344 * the compressed text. FIXME. 345 */ 346 if (file_buffer(ms, -1, NULL, NULL, buf, nbytes) == -1) 347 { 348 if (file_pop_buffer(ms, pb) != NULL) 349 abort(); 350 goto error; 351 } 352 if ((rbuf = file_pop_buffer(ms, pb)) != NULL) { 353 if (file_printf(ms, "%s", rbuf) == -1) { 354 free(rbuf); 355 goto error; 356 } 357 free(rbuf); 358 } 359 if (!mime && file_printf(ms, ")") == -1) 360 goto error; 361 /*FALLTHROUGH*/ 362 case NODATA: 363 break; 364 default: 365 abort(); 366 /*NOTREACHED*/ 367 error: 368 rv = -1; 369 break; 370 } 371 } 372out: 373 DPRINTF("rv = %d\n", rv); 374 375 if (sa_saved && sig_act.sa_handler != SIG_IGN) 376 (void)sigaction(SIGPIPE, &sig_act, NULL); 377 378 free(newbuf); 379 ms->flags |= MAGIC_COMPRESS; 380 DPRINTF("Zmagic returns %d\n", rv); 381 return rv; 382} 383#endif 384/* 385 * `safe' write for sockets and pipes. 386 */ 387file_private ssize_t 388swrite(int fd, const void *buf, size_t n) 389{ 390 ssize_t rv; 391 size_t rn = n; 392 393 do 394 switch (rv = write(fd, buf, n)) { 395 case -1: 396 if (errno == EINTR) 397 continue; 398 return -1; 399 default: 400 n -= rv; 401 buf = CAST(const char *, buf) + rv; 402 break; 403 } 404 while (n > 0); 405 return rn; 406} 407 408 409/* 410 * `safe' read for sockets and pipes. 411 */ 412file_protected ssize_t 413sread(int fd, void *buf, size_t n, int canbepipe __attribute__((__unused__))) 414{ 415 ssize_t rv; 416#if defined(FIONREAD) && !defined(__MINGW32__) 417 int t = 0; 418#endif 419 size_t rn = n; 420 421 if (fd == STDIN_FILENO) 422 goto nocheck; 423 424#if defined(FIONREAD) && !defined(__MINGW32__) 425 if (canbepipe && (ioctl(fd, FIONREAD, &t) == -1 || t == 0)) { 426#ifdef FD_ZERO 427 ssize_t cnt; 428 for (cnt = 0;; cnt++) { 429 fd_set check; 430 struct timeval tout = {0, 100 * 1000}; 431 int selrv; 432 433 FD_ZERO(&check); 434 FD_SET(fd, &check); 435 436 /* 437 * Avoid soft deadlock: do not read if there 438 * is nothing to read from sockets and pipes. 439 */ 440 selrv = select(fd + 1, &check, NULL, NULL, &tout); 441 if (selrv == -1) { 442 if (errno == EINTR || errno == EAGAIN) 443 continue; 444 } else if (selrv == 0 && cnt >= 5) { 445 return 0; 446 } else 447 break; 448 } 449#endif 450 (void)ioctl(fd, FIONREAD, &t); 451 } 452 453 if (t > 0 && CAST(size_t, t) < n) { 454 n = t; 455 rn = n; 456 } 457#endif 458 459nocheck: 460 do 461 switch ((rv = read(fd, buf, n))) { 462 case -1: 463 if (errno == EINTR) 464 continue; 465 return -1; 466 case 0: 467 return rn - n; 468 default: 469 n -= rv; 470 buf = CAST(char *, CCAST(void *, buf)) + rv; 471 break; 472 } 473 while (n > 0); 474 return rn; 475} 476 477file_protected int 478file_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 479 size_t nbytes) 480{ 481 char buf[4096]; 482 ssize_t r; 483 int tfd; 484 485#ifdef WIN32 486 const char *t; 487 buf[0] = '\0'; 488 if ((t = getenv("TEMP")) != NULL) 489 (void)strlcpy(buf, t, sizeof(buf)); 490 else if ((t = getenv("TMP")) != NULL) 491 (void)strlcpy(buf, t, sizeof(buf)); 492 else if ((t = getenv("TMPDIR")) != NULL) 493 (void)strlcpy(buf, t, sizeof(buf)); 494 if (buf[0] != '\0') 495 (void)strlcat(buf, "/", sizeof(buf)); 496 (void)strlcat(buf, "file.XXXXXX", sizeof(buf)); 497#else 498 (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof(buf)); 499#endif 500#ifndef HAVE_MKSTEMP 501 { 502 char *ptr = mktemp(buf); 503 tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 504 r = errno; 505 (void)unlink(ptr); 506 errno = r; 507 } 508#else 509 { 510 int te; 511 mode_t ou = umask(0); 512 tfd = mkstemp(buf); 513 (void)umask(ou); 514 te = errno; 515 (void)unlink(buf); 516 errno = te; 517 } 518#endif 519 if (tfd == -1) { 520 file_error(ms, errno, 521 "cannot create temporary file for pipe copy"); 522 return -1; 523 } 524 525 if (swrite(tfd, startbuf, nbytes) != CAST(ssize_t, nbytes)) 526 r = 1; 527 else { 528 while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 529 if (swrite(tfd, buf, CAST(size_t, r)) != r) 530 break; 531 } 532 533 switch (r) { 534 case -1: 535 file_error(ms, errno, "error copying from pipe to temp file"); 536 return -1; 537 case 0: 538 break; 539 default: 540 file_error(ms, errno, "error while writing to temp file"); 541 return -1; 542 } 543 544 /* 545 * We duplicate the file descriptor, because fclose on a 546 * tmpfile will delete the file, but any open descriptors 547 * can still access the phantom inode. 548 */ 549 if ((fd = dup2(tfd, fd)) == -1) { 550 file_error(ms, errno, "could not dup descriptor for temp file"); 551 return -1; 552 } 553 (void)close(tfd); 554 if (lseek(fd, CAST(off_t, 0), SEEK_SET) == CAST(off_t, -1)) { 555 file_badseek(ms); 556 return -1; 557 } 558 return fd; 559} 560#if HAVE_FORK 561#ifdef BUILTIN_DECOMPRESS 562 563#define FHCRC (1 << 1) 564#define FEXTRA (1 << 2) 565#define FNAME (1 << 3) 566#define FCOMMENT (1 << 4) 567 568 569file_private int 570uncompressgzipped(const unsigned char *old, unsigned char **newch, 571 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 572{ 573 unsigned char flg; 574 size_t data_start = 10; 575 576 if (*n < 4) { 577 goto err; 578 } 579 580 flg = old[3]; 581 582 if (flg & FEXTRA) { 583 if (data_start + 1 >= *n) 584 goto err; 585 data_start += 2 + old[data_start] + old[data_start + 1] * 256; 586 } 587 if (flg & FNAME) { 588 while(data_start < *n && old[data_start]) 589 data_start++; 590 data_start++; 591 } 592 if (flg & FCOMMENT) { 593 while(data_start < *n && old[data_start]) 594 data_start++; 595 data_start++; 596 } 597 if (flg & FHCRC) 598 data_start += 2; 599 600 if (data_start >= *n) 601 goto err; 602 603 *n -= data_start; 604 old += data_start; 605 return uncompresszlib(old, newch, bytes_max, n, 0); 606err: 607 return makeerror(newch, n, "File too short"); 608} 609 610file_private int 611uncompresszlib(const unsigned char *old, unsigned char **newch, 612 size_t bytes_max, size_t *n, int zlib) 613{ 614 int rc; 615 z_stream z; 616 617 DPRINTF("builtin zlib decompression\n"); 618 z.next_in = CCAST(Bytef *, old); 619 z.avail_in = CAST(uint32_t, *n); 620 z.next_out = *newch; 621 z.avail_out = CAST(unsigned int, bytes_max); 622 z.zalloc = Z_NULL; 623 z.zfree = Z_NULL; 624 z.opaque = Z_NULL; 625 626 /* LINTED bug in header macro */ 627 rc = zlib ? inflateInit(&z) : inflateInit2(&z, -15); 628 if (rc != Z_OK) 629 goto err; 630 631 rc = inflate(&z, Z_SYNC_FLUSH); 632 if (rc != Z_OK && rc != Z_STREAM_END) { 633 inflateEnd(&z); 634 goto err; 635 } 636 637 *n = CAST(size_t, z.total_out); 638 rc = inflateEnd(&z); 639 if (rc != Z_OK) 640 goto err; 641 642 /* let's keep the nul-terminate tradition */ 643 (*newch)[*n] = '\0'; 644 645 return OKDATA; 646err: 647 return makeerror(newch, n, "%s", z.msg ? z.msg : zError(rc)); 648} 649#endif 650 651#ifdef BUILTIN_BZLIB 652file_private int 653uncompressbzlib(const unsigned char *old, unsigned char **newch, 654 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 655{ 656 int rc; 657 bz_stream bz; 658 659 DPRINTF("builtin bzlib decompression\n"); 660 memset(&bz, 0, sizeof(bz)); 661 rc = BZ2_bzDecompressInit(&bz, 0, 0); 662 if (rc != BZ_OK) 663 goto err; 664 665 bz.next_in = CCAST(char *, RCAST(const char *, old)); 666 bz.avail_in = CAST(uint32_t, *n); 667 bz.next_out = RCAST(char *, *newch); 668 bz.avail_out = CAST(unsigned int, bytes_max); 669 670 rc = BZ2_bzDecompress(&bz); 671 if (rc != BZ_OK && rc != BZ_STREAM_END) { 672 BZ2_bzDecompressEnd(&bz); 673 goto err; 674 } 675 676 /* Assume byte_max is within 32bit */ 677 /* assert(bz.total_out_hi32 == 0); */ 678 *n = CAST(size_t, bz.total_out_lo32); 679 rc = BZ2_bzDecompressEnd(&bz); 680 if (rc != BZ_OK) 681 goto err; 682 683 /* let's keep the nul-terminate tradition */ 684 (*newch)[*n] = '\0'; 685 686 return OKDATA; 687err: 688 return makeerror(newch, n, "bunzip error %d", rc); 689} 690#endif 691 692#ifdef BUILTIN_XZLIB 693file_private int 694uncompressxzlib(const unsigned char *old, unsigned char **newch, 695 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 696{ 697 int rc; 698 lzma_stream xz; 699 700 DPRINTF("builtin xzlib decompression\n"); 701 memset(&xz, 0, sizeof(xz)); 702 rc = lzma_auto_decoder(&xz, UINT64_MAX, 0); 703 if (rc != LZMA_OK) 704 goto err; 705 706 xz.next_in = CCAST(const uint8_t *, old); 707 xz.avail_in = CAST(uint32_t, *n); 708 xz.next_out = RCAST(uint8_t *, *newch); 709 xz.avail_out = CAST(unsigned int, bytes_max); 710 711 rc = lzma_code(&xz, LZMA_RUN); 712 if (rc != LZMA_OK && rc != LZMA_STREAM_END) { 713 lzma_end(&xz); 714 goto err; 715 } 716 717 *n = CAST(size_t, xz.total_out); 718 719 lzma_end(&xz); 720 721 /* let's keep the nul-terminate tradition */ 722 (*newch)[*n] = '\0'; 723 724 return OKDATA; 725err: 726 return makeerror(newch, n, "unxz error %d", rc); 727} 728#endif 729 730#ifdef BUILTIN_ZSTDLIB 731file_private int 732uncompresszstd(const unsigned char *old, unsigned char **newch, 733 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 734{ 735 size_t rc; 736 ZSTD_DStream *zstd; 737 ZSTD_inBuffer in; 738 ZSTD_outBuffer out; 739 740 DPRINTF("builtin zstd decompression\n"); 741 if ((zstd = ZSTD_createDStream()) == NULL) { 742 return makeerror(newch, n, "No ZSTD decompression stream, %s", 743 strerror(errno)); 744 } 745 746 rc = ZSTD_DCtx_reset(zstd, ZSTD_reset_session_only); 747 if (ZSTD_isError(rc)) 748 goto err; 749 750 in.src = CCAST(const void *, old); 751 in.size = *n; 752 in.pos = 0; 753 out.dst = RCAST(void *, *newch); 754 out.size = bytes_max; 755 out.pos = 0; 756 757 rc = ZSTD_decompressStream(zstd, &out, &in); 758 if (ZSTD_isError(rc)) 759 goto err; 760 761 *n = out.pos; 762 763 ZSTD_freeDStream(zstd); 764 765 /* let's keep the nul-terminate tradition */ 766 (*newch)[*n] = '\0'; 767 768 return OKDATA; 769err: 770 ZSTD_freeDStream(zstd); 771 return makeerror(newch, n, "zstd error %d", ZSTD_getErrorCode(rc)); 772} 773#endif 774 775#ifdef BUILTIN_LZLIB 776file_private int 777uncompresslzlib(const unsigned char *old, unsigned char **newch, 778 size_t bytes_max, size_t *n, int extra __attribute__((__unused__))) 779{ 780 enum LZ_Errno err; 781 size_t old_remaining = *n; 782 size_t new_remaining = bytes_max; 783 size_t total_read = 0; 784 unsigned char *bufp; 785 struct LZ_Decoder *dec; 786 787 bufp = *newch; 788 789 DPRINTF("builtin lzlib decompression\n"); 790 dec = LZ_decompress_open(); 791 if (!dec) { 792 return makeerror(newch, n, "unable to allocate LZ_Decoder"); 793 } 794 if (LZ_decompress_errno(dec) != LZ_ok) 795 goto err; 796 797 for (;;) { 798 // LZ_decompress_read() stops at member boundaries, so we may 799 // have more than one successful read after writing all data 800 // we have. 801 if (old_remaining > 0) { 802 int wr = LZ_decompress_write(dec, old, old_remaining); 803 if (wr < 0) 804 goto err; 805 old_remaining -= wr; 806 old += wr; 807 } 808 809 int rd = LZ_decompress_read(dec, bufp, new_remaining); 810 if (rd > 0) { 811 new_remaining -= rd; 812 bufp += rd; 813 total_read += rd; 814 } 815 816 if (rd < 0 || LZ_decompress_errno(dec) != LZ_ok) 817 goto err; 818 if (new_remaining == 0) 819 break; 820 if (old_remaining == 0 && rd == 0) 821 break; 822 } 823 824 LZ_decompress_close(dec); 825 *n = total_read; 826 827 /* let's keep the nul-terminate tradition */ 828 *bufp = '\0'; 829 830 return OKDATA; 831err: 832 err = LZ_decompress_errno(dec); 833 LZ_decompress_close(dec); 834 return makeerror(newch, n, "lzlib error: %s", LZ_strerror(err)); 835} 836#endif 837 838 839static int 840makeerror(unsigned char **buf, size_t *len, const char *fmt, ...) 841{ 842 char *msg; 843 va_list ap; 844 int rv; 845 846 DPRINTF("Makeerror %s\n", fmt); 847 free(*buf); 848 va_start(ap, fmt); 849 rv = vasprintf(&msg, fmt, ap); 850 va_end(ap); 851 if (rv < 0) { 852 DPRINTF("Makeerror failed"); 853 *buf = NULL; 854 *len = 0; 855 return NODATA; 856 } 857 *buf = RCAST(unsigned char *, msg); 858 *len = strlen(msg); 859 return ERRDATA; 860} 861 862static void 863closefd(int *fd, size_t i) 864{ 865 if (fd[i] == -1) 866 return; 867 (void) close(fd[i]); 868 fd[i] = -1; 869} 870 871static void 872closep(int *fd) 873{ 874 size_t i; 875 for (i = 0; i < 2; i++) 876 closefd(fd, i); 877} 878 879static void 880movedesc(void *v, int i, int fd) 881{ 882 if (fd == i) 883 return; /* "no dup was necessary" */ 884#ifdef HAVE_POSIX_SPAWNP 885 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 886 posix_spawn_file_actions_adddup2(fa, fd, i); 887 posix_spawn_file_actions_addclose(fa, fd); 888#else 889 if (dup2(fd, i) == -1) { 890 DPRINTF("dup(%d, %d) failed (%s)\n", fd, i, strerror(errno)); 891 exit(EXIT_FAILURE); 892 } 893 close(v ? fd : fd); 894#endif 895} 896 897static void 898closedesc(void *v, int fd) 899{ 900#ifdef HAVE_POSIX_SPAWNP 901 posix_spawn_file_actions_t *fa = RCAST(posix_spawn_file_actions_t *, v); 902 posix_spawn_file_actions_addclose(fa, fd); 903#else 904 close(v ? fd : fd); 905#endif 906} 907 908static void 909handledesc(void *v, int fd, int fdp[3][2]) 910{ 911 if (fd != -1) { 912 (void) lseek(fd, CAST(off_t, 0), SEEK_SET); 913 movedesc(v, STDIN_FILENO, fd); 914 } else { 915 movedesc(v, STDIN_FILENO, fdp[STDIN_FILENO][0]); 916 if (fdp[STDIN_FILENO][1] > 2) 917 closedesc(v, fdp[STDIN_FILENO][1]); 918 } 919 920 file_clear_closexec(STDIN_FILENO); 921 922///FIXME: if one of the fdp[i][j] is 0 or 1, this can bomb spectacularly 923 movedesc(v, STDOUT_FILENO, fdp[STDOUT_FILENO][1]); 924 if (fdp[STDOUT_FILENO][0] > 2) 925 closedesc(v, fdp[STDOUT_FILENO][0]); 926 927 file_clear_closexec(STDOUT_FILENO); 928 929 movedesc(v, STDERR_FILENO, fdp[STDERR_FILENO][1]); 930 if (fdp[STDERR_FILENO][0] > 2) 931 closedesc(v, fdp[STDERR_FILENO][0]); 932 933 file_clear_closexec(STDERR_FILENO); 934} 935 936static pid_t 937writechild(int fd, const void *old, size_t n) 938{ 939 pid_t pid; 940 941 /* 942 * fork again, to avoid blocking because both 943 * pipes filled 944 */ 945 pid = fork(); 946 if (pid == -1) { 947 DPRINTF("Fork failed (%s)\n", strerror(errno)); 948 return -1; 949 } 950 if (pid == 0) { 951 /* child */ 952 if (swrite(fd, old, n) != CAST(ssize_t, n)) { 953 DPRINTF("Write failed (%s)\n", strerror(errno)); 954 exit(EXIT_FAILURE); 955 } 956 exit(EXIT_SUCCESS); 957 } 958 /* parent */ 959 return pid; 960} 961 962static ssize_t 963filter_error(unsigned char *ubuf, ssize_t n) 964{ 965 char *p; 966 char *buf; 967 968 ubuf[n] = '\0'; 969 buf = RCAST(char *, ubuf); 970 while (isspace(CAST(unsigned char, *buf))) 971 buf++; 972 DPRINTF("Filter error[[[%s]]]\n", buf); 973 if ((p = strchr(CAST(char *, buf), '\n')) != NULL) 974 *p = '\0'; 975 if ((p = strchr(CAST(char *, buf), ';')) != NULL) 976 *p = '\0'; 977 if ((p = strrchr(CAST(char *, buf), ':')) != NULL) { 978 ++p; 979 while (isspace(CAST(unsigned char, *p))) 980 p++; 981 n = strlen(p); 982 memmove(ubuf, p, CAST(size_t, n + 1)); 983 } 984 DPRINTF("Filter error after[[[%s]]]\n", (char *)ubuf); 985 if (islower(*ubuf)) 986 *ubuf = toupper(*ubuf); 987 return n; 988} 989 990file_private const char * 991methodname(size_t method) 992{ 993 switch (method) { 994#ifdef BUILTIN_DECOMPRESS 995 case METH_FROZEN: 996 case METH_ZLIB: 997 return "zlib"; 998#endif 999#ifdef BUILTIN_BZLIB 1000 case METH_BZIP: 1001 return "bzlib"; 1002#endif 1003#ifdef BUILTIN_XZLIB 1004 case METH_XZ: 1005 case METH_LZMA: 1006 return "xzlib"; 1007#endif 1008#ifdef BUILTIN_ZSTDLIB 1009 case METH_ZSTD: 1010 return "zstd"; 1011#endif 1012#ifdef BUILTIN_LZLIB 1013 case METH_LZIP: 1014 return "lzlib"; 1015#endif 1016 default: 1017 return compr[method].argv[0]; 1018 } 1019} 1020 1021file_private int (* 1022getdecompressor(size_t method))(const unsigned char *, unsigned char **, size_t, 1023 size_t *, int) 1024{ 1025 switch (method) { 1026#ifdef BUILTIN_DECOMPRESS 1027 case METH_FROZEN: 1028 return uncompressgzipped; 1029 case METH_ZLIB: 1030 return uncompresszlib; 1031#endif 1032#ifdef BUILTIN_BZLIB 1033 case METH_BZIP: 1034 return uncompressbzlib; 1035#endif 1036#ifdef BUILTIN_XZLIB 1037 case METH_XZ: 1038 case METH_LZMA: 1039 return uncompressxzlib; 1040#endif 1041#ifdef BUILTIN_ZSTDLIB 1042 case METH_ZSTD: 1043 return uncompresszstd; 1044#endif 1045#ifdef BUILTIN_LZLIB 1046 case METH_LZIP: 1047 return uncompresslzlib; 1048#endif 1049 default: 1050 return NULL; 1051 } 1052} 1053 1054file_private int 1055uncompressbuf(int fd, size_t bytes_max, size_t method, int nofork, 1056 const unsigned char *old, unsigned char **newch, size_t* n) 1057{ 1058 int fdp[3][2]; 1059 int status, rv, w; 1060 pid_t pid; 1061 pid_t writepid = -1; 1062 size_t i; 1063 ssize_t r, re; 1064 char *const *args; 1065#ifdef HAVE_POSIX_SPAWNP 1066 posix_spawn_file_actions_t fa; 1067#endif 1068 int (*decompress)(const unsigned char *, unsigned char **, 1069 size_t, size_t *, int) = getdecompressor(method); 1070 1071 *newch = CAST(unsigned char *, malloc(bytes_max + 1)); 1072 if (*newch == NULL) 1073 return makeerror(newch, n, "No buffer, %s", strerror(errno)); 1074 1075 if (decompress) { 1076 if (nofork) { 1077 return makeerror(newch, n, 1078 "Fork is required to uncompress, but disabled"); 1079 } 1080 return (*decompress)(old, newch, bytes_max, n, 1); 1081 } 1082 1083 (void)fflush(stdout); 1084 (void)fflush(stderr); 1085 1086 for (i = 0; i < __arraycount(fdp); i++) 1087 fdp[i][0] = fdp[i][1] = -1; 1088 1089 /* 1090 * There are multithreaded users who run magic_file() 1091 * from dozens of threads. If two parallel magic_file() calls 1092 * analyze two large compressed files, both will spawn 1093 * an uncompressing child here, which writes out uncompressed data. 1094 * We read some portion, then close the pipe, then waitpid() the child. 1095 * If uncompressed data is larger, child should get EPIPE and exit. 1096 * However, with *parallel* calls OTHER child may unintentionally 1097 * inherit pipe fds, thus keeping pipe open and making writes in 1098 * our child block instead of failing with EPIPE! 1099 * (For the bug to occur, two threads must mutually inherit their pipes, 1100 * and both must have large outputs. Thus it happens not that often). 1101 * To avoid this, be sure to create pipes with O_CLOEXEC. 1102 */ 1103 if ((fd == -1 && file_pipe_closexec(fdp[STDIN_FILENO]) == -1) || 1104 file_pipe_closexec(fdp[STDOUT_FILENO]) == -1 || 1105 file_pipe_closexec(fdp[STDERR_FILENO]) == -1) { 1106 closep(fdp[STDIN_FILENO]); 1107 closep(fdp[STDOUT_FILENO]); 1108 return makeerror(newch, n, "Cannot create pipe, %s", 1109 strerror(errno)); 1110 } 1111 1112 args = RCAST(char *const *, RCAST(intptr_t, compr[method].argv)); 1113#ifdef HAVE_POSIX_SPAWNP 1114 posix_spawn_file_actions_init(&fa); 1115 1116 handledesc(&fa, fd, fdp); 1117 1118 DPRINTF("Executing %s\n", compr[method].argv[0]); 1119 status = posix_spawnp(&pid, compr[method].argv[0], &fa, NULL, 1120 args, NULL); 1121 1122 posix_spawn_file_actions_destroy(&fa); 1123 1124 if (status == -1) { 1125 return makeerror(newch, n, "Cannot posix_spawn `%s', %s", 1126 compr[method].argv[0], strerror(errno)); 1127 } 1128#else 1129 /* For processes with large mapped virtual sizes, vfork 1130 * may be _much_ faster (10-100 times) than fork. 1131 */ 1132 pid = vfork(); 1133 if (pid == -1) { 1134 return makeerror(newch, n, "Cannot vfork, %s", 1135 strerror(errno)); 1136 } 1137 if (pid == 0) { 1138 /* child */ 1139 /* Note: we are after vfork, do not modify memory 1140 * in a way which confuses parent. In particular, 1141 * do not modify fdp[i][j]. 1142 */ 1143 handledesc(NULL, fd, fdp); 1144 DPRINTF("Executing %s\n", compr[method].argv[0]); 1145 1146 (void)execvp(compr[method].argv[0], args); 1147 dprintf(STDERR_FILENO, "exec `%s' failed, %s", 1148 compr[method].argv[0], strerror(errno)); 1149 _exit(EXIT_FAILURE); /* _exit(), not exit(), because of vfork */ 1150 } 1151#endif 1152 /* parent */ 1153 /* Close write sides of child stdout/err pipes */ 1154 for (i = 1; i < __arraycount(fdp); i++) 1155 closefd(fdp[i], 1); 1156 /* Write the buffer data to child stdin, if we don't have fd */ 1157 if (fd == -1) { 1158 closefd(fdp[STDIN_FILENO], 0); 1159 writepid = writechild(fdp[STDIN_FILENO][1], old, *n); 1160 if (writepid == (pid_t)-1) { 1161 rv = makeerror(newch, n, "Write to child failed, %s", 1162 strerror(errno)); 1163 DPRINTF("Write to child failed\n"); 1164 goto err; 1165 } 1166 closefd(fdp[STDIN_FILENO], 1); 1167 } 1168 1169 rv = OKDATA; 1170 r = sread(fdp[STDOUT_FILENO][0], *newch, bytes_max, 0); 1171 DPRINTF("read got %zd\n", r); 1172 if (r < 0) { 1173 rv = ERRDATA; 1174 DPRINTF("Read stdout failed %d (%s)\n", fdp[STDOUT_FILENO][0], 1175 strerror(errno)); 1176 goto err; 1177 } 1178 if (CAST(size_t, r) == bytes_max) { 1179 /* 1180 * close fd so that the child exits with sigpipe and ignore 1181 * errors, otherwise we risk the child blocking and never 1182 * exiting. 1183 */ 1184 DPRINTF("Closing stdout for bytes_max\n"); 1185 closefd(fdp[STDOUT_FILENO], 0); 1186 goto ok; 1187 } 1188 if ((re = sread(fdp[STDERR_FILENO][0], *newch, bytes_max, 0)) > 0) { 1189 DPRINTF("Got stuff from stderr %s\n", *newch); 1190 rv = ERRDATA; 1191 r = filter_error(*newch, r); 1192 goto ok; 1193 } 1194 if (re == 0) 1195 goto ok; 1196 rv = makeerror(newch, n, "Read stderr failed, %s", 1197 strerror(errno)); 1198 goto err; 1199ok: 1200 *n = r; 1201 /* NUL terminate, as every buffer is handled here. */ 1202 (*newch)[*n] = '\0'; 1203err: 1204 closefd(fdp[STDIN_FILENO], 1); 1205 closefd(fdp[STDOUT_FILENO], 0); 1206 closefd(fdp[STDERR_FILENO], 0); 1207 1208 w = waitpid(pid, &status, 0); 1209wait_err: 1210 if (w == -1) { 1211 rv = makeerror(newch, n, "Wait failed, %s", strerror(errno)); 1212 DPRINTF("Child wait return %#x\n", status); 1213 } else if (!WIFEXITED(status)) { 1214 DPRINTF("Child not exited (%#x)\n", status); 1215 } else if (WEXITSTATUS(status) != 0) { 1216 DPRINTF("Child exited (%#x)\n", WEXITSTATUS(status)); 1217 } 1218 if (writepid > 0) { 1219 /* _After_ we know decompressor has exited, our input writer 1220 * definitely will exit now (at worst, writing fails in it, 1221 * since output fd is closed now on the reading size). 1222 */ 1223 w = waitpid(writepid, &status, 0); 1224 writepid = -1; 1225 goto wait_err; 1226 } 1227 1228 closefd(fdp[STDIN_FILENO], 0); //why? it is already closed here! 1229 DPRINTF("Returning %p n=%" SIZE_T_FORMAT "u rv=%d\n", *newch, *n, rv); 1230 1231 return rv; 1232} 1233#endif 1234