compress.c revision 226048
168349Sobrien/* 2133359Sobrien * Copyright (c) Ian F. Darwin 1986-1995. 3133359Sobrien * Software written by Ian F. Darwin and others; 4133359Sobrien * maintained 1995-present by Christos Zoulas and others. 5133359Sobrien * 6133359Sobrien * Redistribution and use in source and binary forms, with or without 7133359Sobrien * modification, are permitted provided that the following conditions 8133359Sobrien * are met: 9133359Sobrien * 1. Redistributions of source code must retain the above copyright 10133359Sobrien * notice immediately at the beginning of the file, without modification, 11133359Sobrien * this list of conditions, and the following disclaimer. 12133359Sobrien * 2. Redistributions in binary form must reproduce the above copyright 13133359Sobrien * notice, this list of conditions and the following disclaimer in the 14133359Sobrien * documentation and/or other materials provided with the distribution. 15133359Sobrien * 16133359Sobrien * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17133359Sobrien * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18133359Sobrien * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19133359Sobrien * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 20133359Sobrien * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21133359Sobrien * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22133359Sobrien * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23133359Sobrien * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24133359Sobrien * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25133359Sobrien * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26133359Sobrien * SUCH DAMAGE. 27133359Sobrien */ 28133359Sobrien/* 2968349Sobrien * compress routines: 3068349Sobrien * zmagic() - returns 0 if not recognized, uncompresses and prints 3168349Sobrien * information if recognized 3268349Sobrien * uncompress(method, old, n, newch) - uncompress old into new, 3368349Sobrien * using method, return sizeof new 3468349Sobrien */ 3568349Sobrien#include "file.h" 36191736Sobrien 37191736Sobrien#ifndef lint 38226048SobrienFILE_RCSID("@(#)$File: compress.c,v 1.67 2011/09/01 12:12:37 christos Exp $") 39191736Sobrien#endif 40191736Sobrien 41133359Sobrien#include "magic.h" 4268349Sobrien#include <stdlib.h> 4368349Sobrien#ifdef HAVE_UNISTD_H 4468349Sobrien#include <unistd.h> 4568349Sobrien#endif 4668349Sobrien#include <string.h> 47133359Sobrien#include <errno.h> 48226048Sobrien#ifndef __MINGW32__ 49169942Sobrien#include <sys/ioctl.h> 50226048Sobrien#endif 5168349Sobrien#ifdef HAVE_SYS_WAIT_H 5268349Sobrien#include <sys/wait.h> 5368349Sobrien#endif 54169962Sobrien#if defined(HAVE_SYS_TIME_H) 55169962Sobrien#include <sys/time.h> 56169962Sobrien#endif 57175296Sobrien#if defined(HAVE_ZLIB_H) && defined(HAVE_LIBZ) 58175296Sobrien#define BUILTIN_DECOMPRESS 59103373Sobrien#include <zlib.h> 60103373Sobrien#endif 61103373Sobrien 62186690Sobrienprivate const struct { 63186690Sobrien const char magic[8]; 64133359Sobrien size_t maglen; 65186690Sobrien const char *argv[3]; 66133359Sobrien int silent; 6768349Sobrien} compr[] = { 6868349Sobrien { "\037\235", 2, { "gzip", "-cdq", NULL }, 1 }, /* compressed */ 6975937Sobrien /* Uncompress can get stuck; so use gzip first if we have it 7075937Sobrien * Idea from Damien Clark, thanks! */ 7175937Sobrien { "\037\235", 2, { "uncompress", "-c", NULL }, 1 }, /* compressed */ 7268349Sobrien { "\037\213", 2, { "gzip", "-cdq", NULL }, 1 }, /* gzipped */ 7368349Sobrien { "\037\236", 2, { "gzip", "-cdq", NULL }, 1 }, /* frozen */ 7468349Sobrien { "\037\240", 2, { "gzip", "-cdq", NULL }, 1 }, /* SCO LZH */ 7568349Sobrien /* the standard pack utilities do not accept standard input */ 7668349Sobrien { "\037\036", 2, { "gzip", "-cdq", NULL }, 0 }, /* packed */ 77159764Sobrien { "PK\3\4", 4, { "gzip", "-cdq", NULL }, 1 }, /* pkzipped, */ 78159764Sobrien /* ...only first file examined */ 7980588Sobrien { "BZh", 3, { "bzip2", "-cd", NULL }, 1 }, /* bzip2-ed */ 80191736Sobrien { "LZIP", 4, { "lzip", "-cdq", NULL }, 1 }, 81192348Sdelphij { "\3757zXZ\0",6,{ "xz", "-cd", NULL }, 1 }, /* XZ Utils */ 82226048Sobrien { "LRZI", 4, { "lrzip", "-dqo-", NULL }, 1 }, /* LRZIP */ 8368349Sobrien}; 8468349Sobrien 85169942Sobrien#define NODATA ((size_t)~0) 8668349Sobrien 87133359Sobrienprivate ssize_t swrite(int, const void *, size_t); 88226048Sobrien#if HAVE_FORK 89226048Sobrienprivate size_t ncompr = sizeof(compr) / sizeof(compr[0]); 90159764Sobrienprivate size_t uncompressbuf(struct magic_set *, int, size_t, 91159764Sobrien const unsigned char *, unsigned char **, size_t); 92175296Sobrien#ifdef BUILTIN_DECOMPRESS 93133359Sobrienprivate size_t uncompressgzipped(struct magic_set *, const unsigned char *, 94133359Sobrien unsigned char **, size_t); 95103373Sobrien#endif 9668349Sobrien 97133359Sobrienprotected int 98169962Sobrienfile_zmagic(struct magic_set *ms, int fd, const char *name, 99169962Sobrien const unsigned char *buf, size_t nbytes) 10068349Sobrien{ 101133359Sobrien unsigned char *newbuf = NULL; 102133359Sobrien size_t i, nsz; 103133359Sobrien int rv = 0; 104175296Sobrien int mime = ms->flags & MAGIC_MIME; 10568349Sobrien 106133359Sobrien if ((ms->flags & MAGIC_COMPRESS) == 0) 107133359Sobrien return 0; 108133359Sobrien 10968349Sobrien for (i = 0; i < ncompr; i++) { 11068349Sobrien if (nbytes < compr[i].maglen) 11168349Sobrien continue; 11268349Sobrien if (memcmp(buf, compr[i].magic, compr[i].maglen) == 0 && 113159764Sobrien (nsz = uncompressbuf(ms, fd, i, buf, &newbuf, 114169942Sobrien nbytes)) != NODATA) { 115133359Sobrien ms->flags &= ~MAGIC_COMPRESS; 116133359Sobrien rv = -1; 117169962Sobrien if (file_buffer(ms, -1, name, newbuf, nsz) == -1) 118133359Sobrien goto error; 119175296Sobrien 120175296Sobrien if (mime == MAGIC_MIME || mime == 0) { 121175296Sobrien if (file_printf(ms, mime ? 122175296Sobrien " compressed-encoding=" : " (") == -1) 123175296Sobrien goto error; 124175296Sobrien } 125175296Sobrien 126175296Sobrien if ((mime == 0 || mime & MAGIC_MIME_ENCODING) && 127175296Sobrien file_buffer(ms, -1, NULL, buf, nbytes) == -1) 128133359Sobrien goto error; 129175296Sobrien 130175296Sobrien if (!mime && file_printf(ms, ")") == -1) 131133359Sobrien goto error; 132133359Sobrien rv = 1; 133133359Sobrien break; 13468349Sobrien } 13568349Sobrien } 136133359Sobrienerror: 137133359Sobrien if (newbuf) 138133359Sobrien free(newbuf); 139133359Sobrien ms->flags |= MAGIC_COMPRESS; 140133359Sobrien return rv; 14168349Sobrien} 142226048Sobrien#endif 14375937Sobrien/* 14475937Sobrien * `safe' write for sockets and pipes. 14575937Sobrien */ 146133359Sobrienprivate ssize_t 147103373Sobrienswrite(int fd, const void *buf, size_t n) 14875937Sobrien{ 149226048Sobrien ssize_t rv; 15075937Sobrien size_t rn = n; 15168349Sobrien 15275937Sobrien do 15375937Sobrien switch (rv = write(fd, buf, n)) { 15475937Sobrien case -1: 15575937Sobrien if (errno == EINTR) 15675937Sobrien continue; 15775937Sobrien return -1; 15875937Sobrien default: 15975937Sobrien n -= rv; 160226048Sobrien buf = CAST(const char *, buf) + rv; 16175937Sobrien break; 16275937Sobrien } 16375937Sobrien while (n > 0); 16475937Sobrien return rn; 16575937Sobrien} 16675937Sobrien 16775937Sobrien 16875937Sobrien/* 16975937Sobrien * `safe' read for sockets and pipes. 17075937Sobrien */ 171169942Sobrienprotected ssize_t 172226048Sobriensread(int fd, void *buf, size_t n, int canbepipe __attribute__ ((unused))) 17375937Sobrien{ 174226048Sobrien ssize_t rv; 175226048Sobrien#ifdef FD_ZERO 176226048Sobrien ssize_t cnt; 177226048Sobrien#endif 178169942Sobrien#ifdef FIONREAD 179169942Sobrien int t = 0; 180169942Sobrien#endif 18175937Sobrien size_t rn = n; 18275937Sobrien 183169942Sobrien if (fd == STDIN_FILENO) 184169942Sobrien goto nocheck; 185169942Sobrien 186169942Sobrien#ifdef FIONREAD 187169962Sobrien if ((canbepipe && (ioctl(fd, FIONREAD, &t) == -1)) || (t == 0)) { 188169942Sobrien#ifdef FD_ZERO 189169962Sobrien for (cnt = 0;; cnt++) { 190169942Sobrien fd_set check; 191169942Sobrien struct timeval tout = {0, 100 * 1000}; 192169962Sobrien int selrv; 193169942Sobrien 194169942Sobrien FD_ZERO(&check); 195169942Sobrien FD_SET(fd, &check); 196169942Sobrien 197169942Sobrien /* 198169942Sobrien * Avoid soft deadlock: do not read if there 199169942Sobrien * is nothing to read from sockets and pipes. 200169942Sobrien */ 201169962Sobrien selrv = select(fd + 1, &check, NULL, NULL, &tout); 202169962Sobrien if (selrv == -1) { 203169942Sobrien if (errno == EINTR || errno == EAGAIN) 204169942Sobrien continue; 205169962Sobrien } else if (selrv == 0 && cnt >= 5) { 206169942Sobrien return 0; 207169962Sobrien } else 208169962Sobrien break; 209169942Sobrien } 210169942Sobrien#endif 211169942Sobrien (void)ioctl(fd, FIONREAD, &t); 212169942Sobrien } 213169942Sobrien 214169942Sobrien if (t > 0 && (size_t)t < n) { 215169942Sobrien n = t; 216169942Sobrien rn = n; 217169942Sobrien } 218169942Sobrien#endif 219169942Sobrien 220169942Sobriennocheck: 22175937Sobrien do 222169942Sobrien switch ((rv = read(fd, buf, n))) { 22375937Sobrien case -1: 22475937Sobrien if (errno == EINTR) 22575937Sobrien continue; 22675937Sobrien return -1; 227103373Sobrien case 0: 228103373Sobrien return rn - n; 22975937Sobrien default: 23075937Sobrien n -= rv; 23175937Sobrien buf = ((char *)buf) + rv; 23275937Sobrien break; 23375937Sobrien } 23475937Sobrien while (n > 0); 23575937Sobrien return rn; 23675937Sobrien} 23775937Sobrien 238133359Sobrienprotected int 239133359Sobrienfile_pipe2file(struct magic_set *ms, int fd, const void *startbuf, 240133359Sobrien size_t nbytes) 241103373Sobrien{ 242103373Sobrien char buf[4096]; 243226048Sobrien ssize_t r; 244226048Sobrien int tfd; 245226048Sobrien#ifdef HAVE_MKSTEMP 246226048Sobrien int te; 247226048Sobrien#endif 248103373Sobrien 249191736Sobrien (void)strlcpy(buf, "/tmp/file.XXXXXX", sizeof buf); 250103373Sobrien#ifndef HAVE_MKSTEMP 251103373Sobrien { 252103373Sobrien char *ptr = mktemp(buf); 253103373Sobrien tfd = open(ptr, O_RDWR|O_TRUNC|O_EXCL|O_CREAT, 0600); 254103373Sobrien r = errno; 255103373Sobrien (void)unlink(ptr); 256103373Sobrien errno = r; 257103373Sobrien } 258103373Sobrien#else 259103373Sobrien tfd = mkstemp(buf); 260226048Sobrien te = errno; 261103373Sobrien (void)unlink(buf); 262226048Sobrien errno = te; 263103373Sobrien#endif 264103373Sobrien if (tfd == -1) { 265133359Sobrien file_error(ms, errno, 266133359Sobrien "cannot create temporary file for pipe copy"); 267133359Sobrien return -1; 268103373Sobrien } 269103373Sobrien 270133359Sobrien if (swrite(tfd, startbuf, nbytes) != (ssize_t)nbytes) 271103373Sobrien r = 1; 272103373Sobrien else { 273169962Sobrien while ((r = sread(fd, buf, sizeof(buf), 1)) > 0) 274133359Sobrien if (swrite(tfd, buf, (size_t)r) != r) 275103373Sobrien break; 276103373Sobrien } 277103373Sobrien 278103373Sobrien switch (r) { 279103373Sobrien case -1: 280133359Sobrien file_error(ms, errno, "error copying from pipe to temp file"); 281133359Sobrien return -1; 282103373Sobrien case 0: 283103373Sobrien break; 284103373Sobrien default: 285133359Sobrien file_error(ms, errno, "error while writing to temp file"); 286133359Sobrien return -1; 287103373Sobrien } 288103373Sobrien 289103373Sobrien /* 290103373Sobrien * We duplicate the file descriptor, because fclose on a 291103373Sobrien * tmpfile will delete the file, but any open descriptors 292103373Sobrien * can still access the phantom inode. 293103373Sobrien */ 294103373Sobrien if ((fd = dup2(tfd, fd)) == -1) { 295133359Sobrien file_error(ms, errno, "could not dup descriptor for temp file"); 296133359Sobrien return -1; 297103373Sobrien } 298103373Sobrien (void)close(tfd); 299103373Sobrien if (lseek(fd, (off_t)0, SEEK_SET) == (off_t)-1) { 300133359Sobrien file_badseek(ms); 301133359Sobrien return -1; 302103373Sobrien } 303103373Sobrien return fd; 304103373Sobrien} 305226048Sobrien#if HAVE_FORK 306175296Sobrien#ifdef BUILTIN_DECOMPRESS 307103373Sobrien 308103373Sobrien#define FHCRC (1 << 1) 309103373Sobrien#define FEXTRA (1 << 2) 310103373Sobrien#define FNAME (1 << 3) 311103373Sobrien#define FCOMMENT (1 << 4) 312103373Sobrien 313133359Sobrienprivate size_t 314133359Sobrienuncompressgzipped(struct magic_set *ms, const unsigned char *old, 315133359Sobrien unsigned char **newch, size_t n) 31668349Sobrien{ 317103373Sobrien unsigned char flg = old[3]; 318133359Sobrien size_t data_start = 10; 319103373Sobrien z_stream z; 320103373Sobrien int rc; 321103373Sobrien 322133359Sobrien if (flg & FEXTRA) { 323133359Sobrien if (data_start+1 >= n) 324133359Sobrien return 0; 325103373Sobrien data_start += 2 + old[data_start] + old[data_start + 1] * 256; 326133359Sobrien } 327103373Sobrien if (flg & FNAME) { 328133359Sobrien while(data_start < n && old[data_start]) 329103373Sobrien data_start++; 330103373Sobrien data_start++; 331103373Sobrien } 332103373Sobrien if(flg & FCOMMENT) { 333133359Sobrien while(data_start < n && old[data_start]) 334103373Sobrien data_start++; 335103373Sobrien data_start++; 336103373Sobrien } 337103373Sobrien if(flg & FHCRC) 338103373Sobrien data_start += 2; 339103373Sobrien 340133359Sobrien if (data_start >= n) 341133359Sobrien return 0; 342186690Sobrien if ((*newch = CAST(unsigned char *, malloc(HOWMANY + 1))) == NULL) { 343103373Sobrien return 0; 344103373Sobrien } 345103373Sobrien 346133359Sobrien /* XXX: const castaway, via strchr */ 347133359Sobrien z.next_in = (Bytef *)strchr((const char *)old + data_start, 348133359Sobrien old[data_start]); 349226048Sobrien z.avail_in = CAST(uint32_t, (n - data_start)); 350103373Sobrien z.next_out = *newch; 351103373Sobrien z.avail_out = HOWMANY; 352103373Sobrien z.zalloc = Z_NULL; 353103373Sobrien z.zfree = Z_NULL; 354103373Sobrien z.opaque = Z_NULL; 355103373Sobrien 356226048Sobrien /* LINTED bug in header macro */ 357103373Sobrien rc = inflateInit2(&z, -15); 358103373Sobrien if (rc != Z_OK) { 359133359Sobrien file_error(ms, 0, "zlib: %s", z.msg); 360103373Sobrien return 0; 361103373Sobrien } 362103373Sobrien 363103373Sobrien rc = inflate(&z, Z_SYNC_FLUSH); 364103373Sobrien if (rc != Z_OK && rc != Z_STREAM_END) { 365133359Sobrien file_error(ms, 0, "zlib: %s", z.msg); 366103373Sobrien return 0; 367103373Sobrien } 368103373Sobrien 369133359Sobrien n = (size_t)z.total_out; 370169962Sobrien (void)inflateEnd(&z); 371103373Sobrien 372103373Sobrien /* let's keep the nul-terminate tradition */ 373169942Sobrien (*newch)[n] = '\0'; 374103373Sobrien 375103373Sobrien return n; 376103373Sobrien} 377103373Sobrien#endif 378103373Sobrien 379133359Sobrienprivate size_t 380159764Sobrienuncompressbuf(struct magic_set *ms, int fd, size_t method, 381159764Sobrien const unsigned char *old, unsigned char **newch, size_t n) 382103373Sobrien{ 38368349Sobrien int fdin[2], fdout[2]; 384226048Sobrien ssize_t r; 385226048Sobrien pid_t pid; 38668349Sobrien 387175296Sobrien#ifdef BUILTIN_DECOMPRESS 388186690Sobrien /* FIXME: This doesn't cope with bzip2 */ 389103373Sobrien if (method == 2) 390133359Sobrien return uncompressgzipped(ms, old, newch, n); 391103373Sobrien#endif 392159764Sobrien (void)fflush(stdout); 393159764Sobrien (void)fflush(stderr); 394103373Sobrien 395159764Sobrien if ((fd != -1 && pipe(fdin) == -1) || pipe(fdout) == -1) { 396133359Sobrien file_error(ms, errno, "cannot create pipe"); 397169942Sobrien return NODATA; 39868349Sobrien } 399226048Sobrien switch (pid = fork()) { 40068349Sobrien case 0: /* child */ 40168349Sobrien (void) close(0); 402159764Sobrien if (fd != -1) { 403159764Sobrien (void) dup(fd); 404159764Sobrien (void) lseek(0, (off_t)0, SEEK_SET); 405159764Sobrien } else { 406159764Sobrien (void) dup(fdin[0]); 407159764Sobrien (void) close(fdin[0]); 408159764Sobrien (void) close(fdin[1]); 409159764Sobrien } 41068349Sobrien 41168349Sobrien (void) close(1); 41268349Sobrien (void) dup(fdout[1]); 41368349Sobrien (void) close(fdout[0]); 41468349Sobrien (void) close(fdout[1]); 415159764Sobrien#ifndef DEBUG 41668349Sobrien if (compr[method].silent) 417159764Sobrien (void)close(2); 418159764Sobrien#endif 41968349Sobrien 420169962Sobrien (void)execvp(compr[method].argv[0], 421169962Sobrien (char *const *)(intptr_t)compr[method].argv); 422159764Sobrien#ifdef DEBUG 423159764Sobrien (void)fprintf(stderr, "exec `%s' failed (%s)\n", 424159764Sobrien compr[method].argv[0], strerror(errno)); 425159764Sobrien#endif 42668349Sobrien exit(1); 42768349Sobrien /*NOTREACHED*/ 42868349Sobrien case -1: 429133359Sobrien file_error(ms, errno, "could not fork"); 430169942Sobrien return NODATA; 43168349Sobrien 43268349Sobrien default: /* parent */ 43368349Sobrien (void) close(fdout[1]); 434159764Sobrien if (fd == -1) { 435159764Sobrien (void) close(fdin[0]); 436159764Sobrien /* 437159764Sobrien * fork again, to avoid blocking because both 438159764Sobrien * pipes filled 439159764Sobrien */ 440159764Sobrien switch (fork()) { 441159764Sobrien case 0: /* child */ 442159764Sobrien (void)close(fdout[0]); 443169962Sobrien if (swrite(fdin[1], old, n) != (ssize_t)n) { 444159764Sobrien#ifdef DEBUG 445159764Sobrien (void)fprintf(stderr, 446159764Sobrien "Write failed (%s)\n", 447159764Sobrien strerror(errno)); 448159764Sobrien#endif 449159764Sobrien exit(1); 450159764Sobrien } 451159764Sobrien exit(0); 452159764Sobrien /*NOTREACHED*/ 453159764Sobrien 454159764Sobrien case -1: 455159764Sobrien#ifdef DEBUG 456159764Sobrien (void)fprintf(stderr, "Fork failed (%s)\n", 457159764Sobrien strerror(errno)); 458159764Sobrien#endif 459133359Sobrien exit(1); 460159764Sobrien /*NOTREACHED*/ 461133359Sobrien 462159764Sobrien default: /* parent */ 463159764Sobrien break; 464159764Sobrien } 465159764Sobrien (void) close(fdin[1]); 466159764Sobrien fdin[1] = -1; 467159764Sobrien } 468133359Sobrien 469103373Sobrien if ((*newch = (unsigned char *) malloc(HOWMANY + 1)) == NULL) { 470159764Sobrien#ifdef DEBUG 471159764Sobrien (void)fprintf(stderr, "Malloc failed (%s)\n", 472159764Sobrien strerror(errno)); 473159764Sobrien#endif 47475937Sobrien n = 0; 47575937Sobrien goto err; 47675937Sobrien } 477169962Sobrien if ((r = sread(fdout[0], *newch, HOWMANY, 0)) <= 0) { 478159764Sobrien#ifdef DEBUG 479159764Sobrien (void)fprintf(stderr, "Read failed (%s)\n", 480159764Sobrien strerror(errno)); 481159764Sobrien#endif 48268349Sobrien free(*newch); 48375937Sobrien n = 0; 484133359Sobrien newch[0] = '\0'; 48575937Sobrien goto err; 486133359Sobrien } else { 487133359Sobrien n = r; 48868349Sobrien } 489103373Sobrien /* NUL terminate, as every buffer is handled here. */ 490169942Sobrien (*newch)[n] = '\0'; 49175937Sobrienerr: 49275937Sobrien if (fdin[1] != -1) 49375937Sobrien (void) close(fdin[1]); 49468349Sobrien (void) close(fdout[0]); 495133359Sobrien#ifdef WNOHANG 496226048Sobrien while (waitpid(pid, NULL, WNOHANG) != -1) 497133359Sobrien continue; 498133359Sobrien#else 499133359Sobrien (void)wait(NULL); 500133359Sobrien#endif 501192348Sdelphij (void) close(fdin[0]); 502192348Sdelphij 50368349Sobrien return n; 50468349Sobrien } 50568349Sobrien} 506226048Sobrien#endif 507