1/* pigz.c -- parallel implementation of gzip 2 * Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013 Mark Adler 3 * Version 2.3.1 9 Oct 2013 Mark Adler 4 */ 5 6/* 7 This software is provided 'as-is', without any express or implied 8 warranty. In no event will the author be held liable for any damages 9 arising from the use of this software. 10 11 Permission is granted to anyone to use this software for any purpose, 12 including commercial applications, and to alter it and redistribute it 13 freely, subject to the following restrictions: 14 15 1. The origin of this software must not be misrepresented; you must not 16 claim that you wrote the original software. If you use this software 17 in a product, an acknowledgment in the product documentation would be 18 appreciated but is not required. 19 2. Altered source versions must be plainly marked as such, and must not be 20 misrepresented as being the original software. 21 3. This notice may not be removed or altered from any source distribution. 22 23 Mark Adler 24 madler@alumni.caltech.edu 25 26 Mark accepts donations for providing this software. Donations are not 27 required or expected. Any amount that you feel is appropriate would be 28 appreciated. You can use this link: 29 30 https://www.paypal.com/cgi-bin/webscr?cmd=_s-xclick&hosted_button_id=536055 31 32 */ 33 34/* Version history: 35 1.0 17 Jan 2007 First version, pipe only 36 1.1 28 Jan 2007 Avoid void * arithmetic (some compilers don't get that) 37 Add note about requiring zlib 1.2.3 38 Allow compression level 0 (no compression) 39 Completely rewrite parallelism -- add a write thread 40 Use deflateSetDictionary() to make use of history 41 Tune argument defaults to best performance on four cores 42 1.2.1 1 Feb 2007 Add long command line options, add all gzip options 43 Add debugging options 44 1.2.2 19 Feb 2007 Add list (--list) function 45 Process file names on command line, write .gz output 46 Write name and time in gzip header, set output file time 47 Implement all command line options except --recursive 48 Add --keep option to prevent deleting input files 49 Add thread tracing information with -vv used 50 Copy crc32_combine() from zlib (shared libraries issue) 51 1.3 25 Feb 2007 Implement --recursive 52 Expand help to show all options 53 Show help if no arguments or output piping are provided 54 Process options in GZIP environment variable 55 Add progress indicator to write thread if --verbose 56 1.4 4 Mar 2007 Add --independent to facilitate damaged file recovery 57 Reallocate jobs for new --blocksize or --processes 58 Do not delete original if writing to stdout 59 Allow --processes 1, which does no threading 60 Add NOTHREAD define to compile without threads 61 Incorporate license text from zlib in source code 62 1.5 25 Mar 2007 Reinitialize jobs for new compression level 63 Copy attributes and owner from input file to output file 64 Add decompression and testing 65 Add -lt (or -ltv) to show all entries and proper lengths 66 Add decompression, testing, listing of LZW (.Z) files 67 Only generate and show trace log if DEBUG defined 68 Take "-" argument to mean read file from stdin 69 1.6 30 Mar 2007 Add zlib stream compression (--zlib), and decompression 70 1.7 29 Apr 2007 Decompress first entry of a zip file (if deflated) 71 Avoid empty deflate blocks at end of deflate stream 72 Show zlib check value (Adler-32) when listing 73 Don't complain when decompressing empty file 74 Warn about trailing junk for gzip and zlib streams 75 Make listings consistent, ignore gzip extra flags 76 Add zip stream compression (--zip) 77 1.8 13 May 2007 Document --zip option in help output 78 2.0 19 Oct 2008 Complete rewrite of thread usage and synchronization 79 Use polling threads and a pool of memory buffers 80 Remove direct pthread library use, hide in yarn.c 81 2.0.1 20 Oct 2008 Check version of zlib at compile time, need >= 1.2.3 82 2.1 24 Oct 2008 Decompress with read, write, inflate, and check threads 83 Remove spurious use of ctime_r(), ctime() more portable 84 Change application of job->calc lock to be a semaphore 85 Detect size of off_t at run time to select %lu vs. %llu 86 #define large file support macro even if not __linux__ 87 Remove _LARGEFILE64_SOURCE, _FILE_OFFSET_BITS is enough 88 Detect file-too-large error and report, blame build 89 Replace check combination routines with those from zlib 90 2.1.1 28 Oct 2008 Fix a leak for files with an integer number of blocks 91 Update for yarn 1.1 (yarn_prefix and yarn_abort) 92 2.1.2 30 Oct 2008 Work around use of beta zlib in production systems 93 2.1.3 8 Nov 2008 Don't use zlib combination routines, put back in pigz 94 2.1.4 9 Nov 2008 Fix bug when decompressing very short files 95 2.1.5 20 Jul 2009 Added 2008, 2009 to --license statement 96 Allow numeric parameter immediately after -p or -b 97 Enforce parameter after -p, -b, -s, before other options 98 Enforce numeric parameters to have only numeric digits 99 Try to determine the number of processors for -p default 100 Fix --suffix short option to be -S to match gzip [Bloch] 101 Decompress if executable named "unpigz" [Amundsen] 102 Add a little bit of testing to Makefile 103 2.1.6 17 Jan 2010 Added pigz.spec to distribution for RPM systems [Brown] 104 Avoid some compiler warnings 105 Process symbolic links if piping to stdout [Hoffstätte] 106 Decompress if executable named "gunzip" [Hoffstätte] 107 Allow ".tgz" suffix [Chernookiy] 108 Fix adler32 comparison on .zz files 109 2.1.7 17 Dec 2011 Avoid unused parameter warning in reenter() 110 Don't assume 2's complement ints in compress_thread() 111 Replicate gzip -cdf cat-like behavior 112 Replicate gzip -- option to suppress option decoding 113 Test output from make test instead of showing it 114 Updated pigz.spec to install unpigz, pigz.1 [Obermaier] 115 Add PIGZ environment variable [Mueller] 116 Replicate gzip suffix search when decoding or listing 117 Fix bug in load() to set in_left to zero on end of file 118 Do not check suffix when input file won't be modified 119 Decompress to stdout if name is "*cat" [Hayasaka] 120 Write data descriptor signature to be like Info-ZIP 121 Update and sort options list in help 122 Use CC variable for compiler in Makefile 123 Exit with code 2 if a warning has been issued 124 Fix thread synchronization problem when tracing 125 Change macro name MAX to MAX2 to avoid library conflicts 126 Determine number of processors on HP-UX [Lloyd] 127 2.2 31 Dec 2011 Check for expansion bound busting (e.g. modified zlib) 128 Make the "threads" list head global variable volatile 129 Fix construction and printing of 32-bit check values 130 Add --rsyncable functionality 131 2.2.1 1 Jan 2012 Fix bug in --rsyncable buffer management 132 2.2.2 1 Jan 2012 Fix another bug in --rsyncable buffer management 133 2.2.3 15 Jan 2012 Remove volatile in yarn.c 134 Reduce the number of input buffers 135 Change initial rsyncable hash to comparison value 136 Improve the efficiency of arriving at a byte boundary 137 Add thread portability #defines from yarn.c 138 Have rsyncable compression be independent of threading 139 Fix bug where constructed dictionaries not being used 140 2.2.4 11 Mar 2012 Avoid some return value warnings 141 Improve the portability of printing the off_t type 142 Check for existence of compress binary before using 143 Update zlib version checking to 1.2.6 for new functions 144 Fix bug in zip (-K) output 145 Fix license in pigz.spec 146 Remove thread portability #defines in pigz.c 147 2.2.5 28 Jul 2012 Avoid race condition in free_pool() 148 Change suffix to .tar when decompressing or listing .tgz 149 Print name of executable in error messages 150 Show help properly when the name is unpigz or gunzip 151 Fix permissions security problem before output is closed 152 2.3 3 Mar 2013 Don't complain about missing suffix on stdout 153 Put all global variables in a structure for readability 154 Do not decompress concatenated zlib streams (just gzip) 155 Add option for compression level 11 to use zopfli 156 Fix handling of junk after compressed data 157 2.3.1 9 Oct 2013 Fix builds of pigzt and pigzn to include zopfli 158 Add -lm, needed to link log function on some systems 159 Respect LDFLAGS in Makefile, use CFLAGS consistently 160 Add memory allocation tracking 161 Fix casting error in uncompressed length calculation 162 Update zopfli to Mar 10, 2013 Google state 163 Support zopfli in single thread case 164 Add -F, -I, -M, and -O options for zopfli tuning 165 */ 166 167#define VERSION "pigz 2.3.1\n" 168 169/* To-do: 170 - make source portable for Windows, VMS, etc. (see gzip source code) 171 - make build portable (currently good for Unixish) 172 */ 173 174/* 175 pigz compresses using threads to make use of multiple processors and cores. 176 The input is broken up into 128 KB chunks with each compressed in parallel. 177 The individual check value for each chunk is also calculated in parallel. 178 The compressed data is written in order to the output, and a combined check 179 value is calculated from the individual check values. 180 181 The compressed data format generated is in the gzip, zlib, or single-entry 182 zip format using the deflate compression method. The compression produces 183 partial raw deflate streams which are concatenated by a single write thread 184 and wrapped with the appropriate header and trailer, where the trailer 185 contains the combined check value. 186 187 Each partial raw deflate stream is terminated by an empty stored block 188 (using the Z_SYNC_FLUSH option of zlib), in order to end that partial bit 189 stream at a byte boundary, unless that partial stream happens to already end 190 at a byte boundary (the latter requires zlib 1.2.6 or later). Ending on a 191 byte boundary allows the partial streams to be concatenated simply as 192 sequences of bytes. This adds a very small four to five byte overhead 193 (average 3.75 bytes) to the output for each input chunk. 194 195 The default input block size is 128K, but can be changed with the -b option. 196 The number of compress threads is set by default to 8, which can be changed 197 using the -p option. Specifying -p 1 avoids the use of threads entirely. 198 pigz will try to determine the number of processors in the machine, in which 199 case if that number is two or greater, pigz will use that as the default for 200 -p instead of 8. 201 202 The input blocks, while compressed independently, have the last 32K of the 203 previous block loaded as a preset dictionary to preserve the compression 204 effectiveness of deflating in a single thread. This can be turned off using 205 the --independent or -i option, so that the blocks can be decompressed 206 independently for partial error recovery or for random access. 207 208 Decompression can't be parallelized, at least not without specially prepared 209 deflate streams for that purpose. As a result, pigz uses a single thread 210 (the main thread) for decompression, but will create three other threads for 211 reading, writing, and check calculation, which can speed up decompression 212 under some circumstances. Parallel decompression can be turned off by 213 specifying one process (-dp 1 or -tp 1). 214 215 pigz requires zlib 1.2.1 or later to allow setting the dictionary when doing 216 raw deflate. Since zlib 1.2.3 corrects security vulnerabilities in zlib 217 version 1.2.1 and 1.2.2, conditionals check for zlib 1.2.3 or later during 218 the compilation of pigz.c. zlib 1.2.4 includes some improvements to 219 Z_FULL_FLUSH and deflateSetDictionary() that permit identical output for 220 pigz with and without threads, which is not possible with zlib 1.2.3. This 221 may be important for uses of pigz -R where small changes in the contents 222 should result in small changes in the archive for rsync. Note that due to 223 the details of how the lower levels of compression result in greater speed, 224 compression level 3 and below does not permit identical pigz output with 225 and without threads. 226 227 pigz uses the POSIX pthread library for thread control and communication, 228 through the yarn.h interface to yarn.c. yarn.c can be replaced with 229 equivalent implementations using other thread libraries. pigz can be 230 compiled with NOTHREAD #defined to not use threads at all (in which case 231 pigz will not be able to live up to the "parallel" in its name). 232 */ 233 234/* 235 Details of parallel compression implementation: 236 237 When doing parallel compression, pigz uses the main thread to read the input 238 in 'size' sized chunks (see -b), and puts those in a compression job list, 239 each with a sequence number to keep track of the ordering. If it is not the 240 first chunk, then that job also points to the previous input buffer, from 241 which the last 32K will be used as a dictionary (unless -i is specified). 242 This sets a lower limit of 32K on 'size'. 243 244 pigz launches up to 'procs' compression threads (see -p). Each compression 245 thread continues to look for jobs in the compression list and perform those 246 jobs until instructed to return. When a job is pulled, the dictionary, if 247 provided, will be loaded into the deflate engine and then that input buffer 248 is dropped for reuse. Then the input data is compressed into an output 249 buffer that grows in size if necessary to hold the compressed data. The job 250 is then put into the write job list, sorted by the sequence number. The 251 compress thread however continues to calculate the check value on the input 252 data, either a CRC-32 or Adler-32, possibly in parallel with the write 253 thread writing the output data. Once that's done, the compress thread drops 254 the input buffer and also releases the lock on the check value so that the 255 write thread can combine it with the previous check values. The compress 256 thread has then completed that job, and goes to look for another. 257 258 All of the compress threads are left running and waiting even after the last 259 chunk is processed, so that they can support the next input to be compressed 260 (more than one input file on the command line). Once pigz is done, it will 261 call all the compress threads home (that'll do pig, that'll do). 262 263 Before starting to read the input, the main thread launches the write thread 264 so that it is ready pick up jobs immediately. The compress thread puts the 265 write jobs in the list in sequence sorted order, so that the first job in 266 the list is always has the lowest sequence number. The write thread waits 267 for the next write job in sequence, and then gets that job. The job still 268 holds its input buffer, from which the write thread gets the input buffer 269 length for use in check value combination. Then the write thread drops that 270 input buffer to allow its reuse. Holding on to the input buffer until the 271 write thread starts also has the benefit that the read and compress threads 272 can't get way ahead of the write thread and build up a large backlog of 273 unwritten compressed data. The write thread will write the compressed data, 274 drop the output buffer, and then wait for the check value to be unlocked 275 by the compress thread. Then the write thread combines the check value for 276 this chunk with the total check value for eventual use in the trailer. If 277 this is not the last chunk, the write thread then goes back to look for the 278 next output chunk in sequence. After the last chunk, the write thread 279 returns and joins the main thread. Unlike the compress threads, a new write 280 thread is launched for each input stream. The write thread writes the 281 appropriate header and trailer around the compressed data. 282 283 The input and output buffers are reused through their collection in pools. 284 Each buffer has a use count, which when decremented to zero returns the 285 buffer to the respective pool. Each input buffer has up to three parallel 286 uses: as the input for compression, as the data for the check value 287 calculation, and as a dictionary for compression. Each output buffer has 288 only one use, which is as the output of compression followed serially as 289 data to be written. The input pool is limited in the number of buffers, so 290 that reading does not get way ahead of compression and eat up memory with 291 more input than can be used. The limit is approximately two times the 292 number of compression threads. In the case that reading is fast as compared 293 to compression, that number allows a second set of buffers to be read while 294 the first set of compressions are being performed. The number of output 295 buffers is not directly limited, but is indirectly limited by the release of 296 input buffers to about the same number. 297 */ 298 299/* use large file functions if available */ 300#define _FILE_OFFSET_BITS 64 301 302/* included headers and what is expected from each */ 303#include <stdio.h> /* fflush(), fprintf(), fputs(), getchar(), putc(), */ 304 /* puts(), printf(), vasprintf(), stderr, EOF, NULL, 305 SEEK_END, size_t, off_t */ 306#include <stdlib.h> /* exit(), malloc(), free(), realloc(), atol(), */ 307 /* atoi(), getenv() */ 308#include <stdarg.h> /* va_start(), va_end(), va_list */ 309#include <string.h> /* memset(), memchr(), memcpy(), strcmp(), strcpy() */ 310 /* strncpy(), strlen(), strcat(), strrchr() */ 311#include <errno.h> /* errno, EEXIST */ 312#include <assert.h> /* assert() */ 313#include <time.h> /* ctime(), time(), time_t, mktime() */ 314#include <signal.h> /* signal(), SIGINT */ 315#include <sys/types.h> /* ssize_t */ 316#include <sys/stat.h> /* chmod(), stat(), fstat(), lstat(), struct stat, */ 317 /* S_IFDIR, S_IFLNK, S_IFMT, S_IFREG */ 318#include <sys/time.h> /* utimes(), gettimeofday(), struct timeval */ 319#include <unistd.h> /* unlink(), _exit(), read(), write(), close(), */ 320 /* lseek(), isatty(), chown() */ 321#include <fcntl.h> /* open(), O_CREAT, O_EXCL, O_RDONLY, O_TRUNC, */ 322 /* O_WRONLY */ 323#include <dirent.h> /* opendir(), readdir(), closedir(), DIR, */ 324 /* struct dirent */ 325#include <limits.h> /* PATH_MAX, UINT_MAX, INT_MAX */ 326#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 327# include <inttypes.h> /* intmax_t */ 328#endif 329 330#ifdef DEBUG 331# if defined(__APPLE__) 332# include <malloc/malloc.h> 333# define MALLOC_SIZE(p) malloc_size(p) 334# elif defined (__linux) 335# include <malloc.h> 336# define MALLOC_SIZE(p) malloc_usable_size(p) 337# elif defined (_WIN32) || defined(_WIN64) 338# include <malloc.h> 339# define MALLOC_SIZE(p) _msize(p) 340# else 341# define MALLOC_SIZE(p) (0) 342# endif 343#endif 344 345#ifdef __hpux 346# include <sys/param.h> 347# include <sys/pstat.h> 348#endif 349 350#include "zlib.h" /* deflateInit2(), deflateReset(), deflate(), */ 351 /* deflateEnd(), deflateSetDictionary(), crc32(), 352 inflateBackInit(), inflateBack(), inflateBackEnd(), 353 Z_DEFAULT_COMPRESSION, Z_DEFAULT_STRATEGY, 354 Z_DEFLATED, Z_NO_FLUSH, Z_NULL, Z_OK, 355 Z_SYNC_FLUSH, z_stream */ 356#if !defined(ZLIB_VERNUM) || ZLIB_VERNUM < 0x1230 357# error Need zlib version 1.2.3 or later 358#endif 359 360#ifndef NOTHREAD 361# include "yarn.h" /* thread, launch(), join(), join_all(), */ 362 /* lock, new_lock(), possess(), twist(), wait_for(), 363 release(), peek_lock(), free_lock(), yarn_name */ 364#endif 365#include "zopfli/deflate.h" /* ZopfliDeflatePart(), ZopfliInitOptions(), 366 ZopfliOptions */ 367 368/* for local functions and globals */ 369#define local static 370 371/* prevent end-of-line conversions on MSDOSish operating systems */ 372#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__) 373# include <io.h> /* setmode(), O_BINARY */ 374# define SET_BINARY_MODE(fd) setmode(fd, O_BINARY) 375#else 376# define SET_BINARY_MODE(fd) 377#endif 378 379/* release an allocated pointer, if allocated, and mark as unallocated */ 380#define RELEASE(ptr) \ 381 do { \ 382 if ((ptr) != NULL) { \ 383 FREE(ptr); \ 384 ptr = NULL; \ 385 } \ 386 } while (0) 387 388/* sliding dictionary size for deflate */ 389#define DICT 32768U 390 391/* largest power of 2 that fits in an unsigned int -- used to limit requests 392 to zlib functions that use unsigned int lengths */ 393#define MAXP2 (UINT_MAX - (UINT_MAX >> 1)) 394 395/* rsyncable constants -- RSYNCBITS is the number of bits in the mask for 396 comparison. For random input data, there will be a hit on average every 397 1<<RSYNCBITS bytes. So for an RSYNCBITS of 12, there will be an average of 398 one hit every 4096 bytes, resulting in a mean block size of 4096. RSYNCMASK 399 is the resulting bit mask. RSYNCHIT is what the hash value is compared to 400 after applying the mask. 401 402 The choice of 12 for RSYNCBITS is consistent with the original rsyncable 403 patch for gzip which also uses a 12-bit mask. This results in a relatively 404 small hit to compression, on the order of 1.5% to 3%. A mask of 13 bits can 405 be used instead if a hit of less than 1% to the compression is desired, at 406 the expense of more blocks transmitted for rsync updates. (Your mileage may 407 vary.) 408 409 This implementation of rsyncable uses a different hash algorithm than what 410 the gzip rsyncable patch uses in order to provide better performance in 411 several regards. The algorithm is simply to shift the hash value left one 412 bit and exclusive-or that with the next byte. This is masked to the number 413 of hash bits (RSYNCMASK) and compared to all ones except for a zero in the 414 top bit (RSYNCHIT). This rolling hash has a very small window of 19 bytes 415 (RSYNCBITS+7). The small window provides the benefit of much more rapid 416 resynchronization after a change, than does the 4096-byte window of the gzip 417 rsyncable patch. 418 419 The comparison value is chosen to avoid matching any repeated bytes or short 420 sequences. The gzip rsyncable patch on the other hand uses a sum and zero 421 for comparison, which results in certain bad behaviors, such as always 422 matching everywhere in a long sequence of zeros. Such sequences occur 423 frequently in tar files. 424 425 This hash efficiently discards history older than 19 bytes simply by 426 shifting that data past the top of the mask -- no history needs to be 427 retained to undo its impact on the hash value, as is needed for a sum. 428 429 The choice of the comparison value (RSYNCHIT) has the virtue of avoiding 430 extremely short blocks. The shortest block is five bytes (RSYNCBITS-7) from 431 hit to hit, and is unlikely. Whereas with the gzip rsyncable algorithm, 432 blocks of one byte are not only possible, but in fact are the most likely 433 block size. 434 435 Thanks and acknowledgement to Kevin Day for his experimentation and insights 436 on rsyncable hash characteristics that led to some of the choices here. 437 */ 438#define RSYNCBITS 12 439#define RSYNCMASK ((1U << RSYNCBITS) - 1) 440#define RSYNCHIT (RSYNCMASK >> 1) 441 442/* initial pool counts and sizes -- INBUFS is the limit on the number of input 443 spaces as a function of the number of processors (used to throttle the 444 creation of compression jobs), OUTPOOL is the initial size of the output 445 data buffer, chosen to make resizing of the buffer very unlikely and to 446 allow prepending with a dictionary for use as an input buffer for zopfli */ 447#define INBUFS(p) (((p)<<1)+3) 448#define OUTPOOL(s) ((s)+((s)>>4)+DICT) 449 450/* input buffer size */ 451#define BUF 32768U 452 453/* globals (modified by main thread only when it's the only thread) */ 454local struct { 455 char *prog; /* name by which pigz was invoked */ 456 int ind; /* input file descriptor */ 457 int outd; /* output file descriptor */ 458 char inf[PATH_MAX+1]; /* input file name (accommodate recursion) */ 459 char *outf; /* output file name (allocated if not NULL) */ 460 int verbosity; /* 0 = quiet, 1 = normal, 2 = verbose, 3 = trace */ 461 int headis; /* 1 to store name, 2 to store date, 3 both */ 462 int pipeout; /* write output to stdout even if file */ 463 int keep; /* true to prevent deletion of input file */ 464 int force; /* true to overwrite, compress links, cat */ 465 int form; /* gzip = 0, zlib = 1, zip = 2 or 3 */ 466 unsigned char magic1; /* first byte of possible header when decoding */ 467 int recurse; /* true to dive down into directory structure */ 468 char *sufx; /* suffix to use (".gz" or user supplied) */ 469 char *name; /* name for gzip header */ 470 time_t mtime; /* time stamp from input file for gzip header */ 471 int list; /* true to list files instead of compress */ 472 int first; /* true if we need to print listing header */ 473 int decode; /* 0 to compress, 1 to decompress, 2 to test */ 474 int level; /* compression level */ 475 ZopfliOptions zopts; /* zopfli compression options */ 476 int rsync; /* true for rsync blocking */ 477 int procs; /* maximum number of compression threads (>= 1) */ 478 int setdict; /* true to initialize dictionary in each thread */ 479 size_t block; /* uncompressed input size per thread (>= 32K) */ 480 int warned; /* true if a warning has been given */ 481 482 /* saved gzip/zip header data for decompression, testing, and listing */ 483 time_t stamp; /* time stamp from gzip header */ 484 char *hname; /* name from header (allocated) */ 485 unsigned long zip_crc; /* local header crc */ 486 unsigned long zip_clen; /* local header compressed length */ 487 unsigned long zip_ulen; /* local header uncompressed length */ 488 489 /* globals for decompression and listing buffered reading */ 490 unsigned char in_buf[BUF]; /* input buffer */ 491 unsigned char *in_next; /* next unused byte in buffer */ 492 size_t in_left; /* number of unused bytes in buffer */ 493 int in_eof; /* true if reached end of file on input */ 494 int in_short; /* true if last read didn't fill buffer */ 495 off_t in_tot; /* total bytes read from input */ 496 off_t out_tot; /* total bytes written to output */ 497 unsigned long out_check; /* check value of output */ 498 499#ifndef NOTHREAD 500 /* globals for decompression parallel reading */ 501 unsigned char in_buf2[BUF]; /* second buffer for parallel reads */ 502 size_t in_len; /* data waiting in next buffer */ 503 int in_which; /* -1: start, 0: in_buf2, 1: in_buf */ 504 lock *load_state; /* value = 0 to wait, 1 to read a buffer */ 505 thread *load_thread; /* load_read() thread for joining */ 506#endif 507} g; 508 509/* display a complaint with the program name on stderr */ 510local int complain(char *fmt, ...) 511{ 512 va_list ap; 513 514 if (g.verbosity > 0) { 515 fprintf(stderr, "%s: ", g.prog); 516 va_start(ap, fmt); 517 vfprintf(stderr, fmt, ap); 518 va_end(ap); 519 putc('\n', stderr); 520 fflush(stderr); 521 g.warned = 1; 522 } 523 return 0; 524} 525 526/* exit with error, delete output file if in the middle of writing it */ 527local int bail(char *why, char *what) 528{ 529 if (g.outd != -1 && g.outf != NULL) 530 unlink(g.outf); 531 complain("abort: %s%s", why, what); 532 exit(1); 533 return 0; 534} 535 536#ifdef DEBUG 537 538/* memory tracking */ 539 540local struct mem_track_s { 541 size_t num; /* current number of allocations */ 542 size_t size; /* total size of current allocations */ 543 size_t max; /* maximum size of allocations */ 544#ifndef NOTHREAD 545 lock *lock; /* lock for access across threads */ 546#endif 547} mem_track; 548 549#ifndef NOTHREAD 550# define mem_track_grab(m) possess((m)->lock) 551# define mem_track_drop(m) release((m)->lock) 552#else 553# define mem_track_grab(m) 554# define mem_track_drop(m) 555#endif 556 557local void *malloc_track(struct mem_track_s *mem, size_t size) 558{ 559 void *ptr; 560 561 ptr = malloc(size); 562 if (ptr != NULL) { 563 size = MALLOC_SIZE(ptr); 564 mem_track_grab(mem); 565 mem->num++; 566 mem->size += size; 567 if (mem->size > mem->max) 568 mem->max = mem->size; 569 mem_track_drop(mem); 570 } 571 return ptr; 572} 573 574local void *realloc_track(struct mem_track_s *mem, void *ptr, size_t size) 575{ 576 size_t was; 577 578 if (ptr == NULL) 579 return malloc_track(mem, size); 580 was = MALLOC_SIZE(ptr); 581 ptr = realloc(ptr, size); 582 if (ptr != NULL) { 583 size = MALLOC_SIZE(ptr); 584 mem_track_grab(mem); 585 mem->size -= was; 586 mem->size += size; 587 if (mem->size > mem->max) 588 mem->max = mem->size; 589 mem_track_drop(mem); 590 } 591 return ptr; 592} 593 594local void free_track(struct mem_track_s *mem, void *ptr) 595{ 596 size_t size; 597 598 if (ptr != NULL) { 599 size = MALLOC_SIZE(ptr); 600 mem_track_grab(mem); 601 mem->num--; 602 mem->size -= size; 603 mem_track_drop(mem); 604 free(ptr); 605 } 606} 607 608#ifndef NOTHREAD 609local void *yarn_malloc(size_t size) 610{ 611 return malloc_track(&mem_track, size); 612} 613 614local void yarn_free(void *ptr) 615{ 616 return free_track(&mem_track, ptr); 617} 618#endif 619 620local voidpf zlib_alloc(voidpf opaque, uInt items, uInt size) 621{ 622 return malloc_track(opaque, items * (size_t)size); 623} 624 625local void zlib_free(voidpf opaque, voidpf address) 626{ 627 free_track(opaque, address); 628} 629 630#define MALLOC(s) malloc_track(&mem_track, s) 631#define REALLOC(p, s) realloc_track(&mem_track, p, s) 632#define FREE(p) free_track(&mem_track, p) 633#define OPAQUE (&mem_track) 634#define ZALLOC zlib_alloc 635#define ZFREE zlib_free 636 637/* starting time of day for tracing */ 638local struct timeval start; 639 640/* trace log */ 641local struct log { 642 struct timeval when; /* time of entry */ 643 char *msg; /* message */ 644 struct log *next; /* next entry */ 645} *log_head, **log_tail = NULL; 646#ifndef NOTHREAD 647 local lock *log_lock = NULL; 648#endif 649 650/* maximum log entry length */ 651#define MAXMSG 256 652 653/* set up log (call from main thread before other threads launched) */ 654local void log_init(void) 655{ 656 if (log_tail == NULL) { 657 mem_track.num = 0; 658 mem_track.size = 0; 659 mem_track.max = 0; 660#ifndef NOTHREAD 661 mem_track.lock = new_lock(0); 662 yarn_mem(yarn_malloc, yarn_free); 663 log_lock = new_lock(0); 664#endif 665 log_head = NULL; 666 log_tail = &log_head; 667 } 668} 669 670/* add entry to trace log */ 671local void log_add(char *fmt, ...) 672{ 673 struct timeval now; 674 struct log *me; 675 va_list ap; 676 char msg[MAXMSG]; 677 678 gettimeofday(&now, NULL); 679 me = MALLOC(sizeof(struct log)); 680 if (me == NULL) 681 bail("not enough memory", ""); 682 me->when = now; 683 va_start(ap, fmt); 684 vsnprintf(msg, MAXMSG, fmt, ap); 685 va_end(ap); 686 me->msg = MALLOC(strlen(msg) + 1); 687 if (me->msg == NULL) { 688 FREE(me); 689 bail("not enough memory", ""); 690 } 691 strcpy(me->msg, msg); 692 me->next = NULL; 693#ifndef NOTHREAD 694 assert(log_lock != NULL); 695 possess(log_lock); 696#endif 697 *log_tail = me; 698 log_tail = &(me->next); 699#ifndef NOTHREAD 700 twist(log_lock, BY, +1); 701#endif 702} 703 704/* pull entry from trace log and print it, return false if empty */ 705local int log_show(void) 706{ 707 struct log *me; 708 struct timeval diff; 709 710 if (log_tail == NULL) 711 return 0; 712#ifndef NOTHREAD 713 possess(log_lock); 714#endif 715 me = log_head; 716 if (me == NULL) { 717#ifndef NOTHREAD 718 release(log_lock); 719#endif 720 return 0; 721 } 722 log_head = me->next; 723 if (me->next == NULL) 724 log_tail = &log_head; 725#ifndef NOTHREAD 726 twist(log_lock, BY, -1); 727#endif 728 diff.tv_usec = me->when.tv_usec - start.tv_usec; 729 diff.tv_sec = me->when.tv_sec - start.tv_sec; 730 if (diff.tv_usec < 0) { 731 diff.tv_usec += 1000000L; 732 diff.tv_sec--; 733 } 734 fprintf(stderr, "trace %ld.%06ld %s\n", 735 (long)diff.tv_sec, (long)diff.tv_usec, me->msg); 736 fflush(stderr); 737 FREE(me->msg); 738 FREE(me); 739 return 1; 740} 741 742/* release log resources (need to do log_init() to use again) */ 743local void log_free(void) 744{ 745 struct log *me; 746 747 if (log_tail != NULL) { 748#ifndef NOTHREAD 749 possess(log_lock); 750#endif 751 while ((me = log_head) != NULL) { 752 log_head = me->next; 753 FREE(me->msg); 754 FREE(me); 755 } 756#ifndef NOTHREAD 757 twist(log_lock, TO, 0); 758 free_lock(log_lock); 759 log_lock = NULL; 760 yarn_mem(malloc, free); 761 free_lock(mem_track.lock); 762#endif 763 log_tail = NULL; 764 } 765} 766 767/* show entries until no more, free log */ 768local void log_dump(void) 769{ 770 if (log_tail == NULL) 771 return; 772 while (log_show()) 773 ; 774 log_free(); 775 if (mem_track.num || mem_track.size) 776 complain("memory leak: %lu allocs of %lu bytes total", 777 mem_track.num, mem_track.size); 778 if (mem_track.max) 779 fprintf(stderr, "%lu bytes of memory used\n", mem_track.max); 780} 781 782/* debugging macro */ 783#define Trace(x) \ 784 do { \ 785 if (g.verbosity > 2) { \ 786 log_add x; \ 787 } \ 788 } while (0) 789 790#else /* !DEBUG */ 791 792#define MALLOC malloc 793#define REALLOC realloc 794#define FREE free 795#define OPAQUE Z_NULL 796#define ZALLOC Z_NULL 797#define ZFREE Z_NULL 798 799#define log_dump() 800#define Trace(x) 801 802#endif 803 804/* read up to len bytes into buf, repeating read() calls as needed */ 805local size_t readn(int desc, unsigned char *buf, size_t len) 806{ 807 ssize_t ret; 808 size_t got; 809 810 got = 0; 811 while (len) { 812 ret = read(desc, buf, len); 813 if (ret < 0) 814 bail("read error on ", g.inf); 815 if (ret == 0) 816 break; 817 buf += ret; 818 len -= ret; 819 got += ret; 820 } 821 return got; 822} 823 824/* write len bytes, repeating write() calls as needed */ 825local void writen(int desc, unsigned char *buf, size_t len) 826{ 827 ssize_t ret; 828 829 while (len) { 830 ret = write(desc, buf, len); 831 if (ret < 1) { 832 complain("write error code %d", errno); 833 bail("write error on ", g.outf); 834 } 835 buf += ret; 836 len -= ret; 837 } 838} 839 840/* convert Unix time to MS-DOS date and time, assuming current timezone 841 (you got a better idea?) */ 842local unsigned long time2dos(time_t t) 843{ 844 struct tm *tm; 845 unsigned long dos; 846 847 if (t == 0) 848 t = time(NULL); 849 tm = localtime(&t); 850 if (tm->tm_year < 80 || tm->tm_year > 207) 851 return 0; 852 dos = (tm->tm_year - 80) << 25; 853 dos += (tm->tm_mon + 1) << 21; 854 dos += tm->tm_mday << 16; 855 dos += tm->tm_hour << 11; 856 dos += tm->tm_min << 5; 857 dos += (tm->tm_sec + 1) >> 1; /* round to double-seconds */ 858 return dos; 859} 860 861/* put a 4-byte integer into a byte array in LSB order or MSB order */ 862#define PUT2L(a,b) (*(a)=(b)&0xff,(a)[1]=(b)>>8) 863#define PUT4L(a,b) (PUT2L(a,(b)&0xffff),PUT2L((a)+2,(b)>>16)) 864#define PUT4M(a,b) (*(a)=(b)>>24,(a)[1]=(b)>>16,(a)[2]=(b)>>8,(a)[3]=(b)) 865 866/* write a gzip, zlib, or zip header using the information in the globals */ 867local unsigned long put_header(void) 868{ 869 unsigned long len; 870 unsigned char head[30]; 871 872 if (g.form > 1) { /* zip */ 873 /* write local header */ 874 PUT4L(head, 0x04034b50UL); /* local header signature */ 875 PUT2L(head + 4, 20); /* version needed to extract (2.0) */ 876 PUT2L(head + 6, 8); /* flags: data descriptor follows data */ 877 PUT2L(head + 8, 8); /* deflate */ 878 PUT4L(head + 10, time2dos(g.mtime)); 879 PUT4L(head + 14, 0); /* crc (not here) */ 880 PUT4L(head + 18, 0); /* compressed length (not here) */ 881 PUT4L(head + 22, 0); /* uncompressed length (not here) */ 882 PUT2L(head + 26, g.name == NULL ? 1 : /* length of name */ 883 strlen(g.name)); 884 PUT2L(head + 28, 9); /* length of extra field (see below) */ 885 writen(g.outd, head, 30); /* write local header */ 886 len = 30; 887 888 /* write file name (use "-" for stdin) */ 889 if (g.name == NULL) 890 writen(g.outd, (unsigned char *)"-", 1); 891 else 892 writen(g.outd, (unsigned char *)g.name, strlen(g.name)); 893 len += g.name == NULL ? 1 : strlen(g.name); 894 895 /* write extended timestamp extra field block (9 bytes) */ 896 PUT2L(head, 0x5455); /* extended timestamp signature */ 897 PUT2L(head + 2, 5); /* number of data bytes in this block */ 898 head[4] = 1; /* flag presence of mod time */ 899 PUT4L(head + 5, g.mtime); /* mod time */ 900 writen(g.outd, head, 9); /* write extra field block */ 901 len += 9; 902 } 903 else if (g.form) { /* zlib */ 904 head[0] = 0x78; /* deflate, 32K window */ 905 head[1] = (g.level >= 9 ? 3 : 906 (g.level == 1 ? 0 : 907 (g.level >= 6 || g.level == Z_DEFAULT_COMPRESSION ? 908 1 : 2))) << 6; 909 head[1] += 31 - (((head[0] << 8) + head[1]) % 31); 910 writen(g.outd, head, 2); 911 len = 2; 912 } 913 else { /* gzip */ 914 head[0] = 31; 915 head[1] = 139; 916 head[2] = 8; /* deflate */ 917 head[3] = g.name != NULL ? 8 : 0; 918 PUT4L(head + 4, g.mtime); 919 head[8] = g.level >= 9 ? 2 : (g.level == 1 ? 4 : 0); 920 head[9] = 3; /* unix */ 921 writen(g.outd, head, 10); 922 len = 10; 923 if (g.name != NULL) 924 writen(g.outd, (unsigned char *)g.name, strlen(g.name) + 1); 925 if (g.name != NULL) 926 len += strlen(g.name) + 1; 927 } 928 return len; 929} 930 931/* write a gzip, zlib, or zip trailer */ 932local void put_trailer(unsigned long ulen, unsigned long clen, 933 unsigned long check, unsigned long head) 934{ 935 unsigned char tail[46]; 936 937 if (g.form > 1) { /* zip */ 938 unsigned long cent; 939 940 /* write data descriptor (as promised in local header) */ 941 PUT4L(tail, 0x08074b50UL); 942 PUT4L(tail + 4, check); 943 PUT4L(tail + 8, clen); 944 PUT4L(tail + 12, ulen); 945 writen(g.outd, tail, 16); 946 947 /* write central file header */ 948 PUT4L(tail, 0x02014b50UL); /* central header signature */ 949 tail[4] = 63; /* obeyed version 6.3 of the zip spec */ 950 tail[5] = 255; /* ignore external attributes */ 951 PUT2L(tail + 6, 20); /* version needed to extract (2.0) */ 952 PUT2L(tail + 8, 8); /* data descriptor is present */ 953 PUT2L(tail + 10, 8); /* deflate */ 954 PUT4L(tail + 12, time2dos(g.mtime)); 955 PUT4L(tail + 16, check); /* crc */ 956 PUT4L(tail + 20, clen); /* compressed length */ 957 PUT4L(tail + 24, ulen); /* uncompressed length */ 958 PUT2L(tail + 28, g.name == NULL ? 1 : /* length of name */ 959 strlen(g.name)); 960 PUT2L(tail + 30, 9); /* length of extra field (see below) */ 961 PUT2L(tail + 32, 0); /* no file comment */ 962 PUT2L(tail + 34, 0); /* disk number 0 */ 963 PUT2L(tail + 36, 0); /* internal file attributes */ 964 PUT4L(tail + 38, 0); /* external file attributes (ignored) */ 965 PUT4L(tail + 42, 0); /* offset of local header */ 966 writen(g.outd, tail, 46); /* write central file header */ 967 cent = 46; 968 969 /* write file name (use "-" for stdin) */ 970 if (g.name == NULL) 971 writen(g.outd, (unsigned char *)"-", 1); 972 else 973 writen(g.outd, (unsigned char *)g.name, strlen(g.name)); 974 cent += g.name == NULL ? 1 : strlen(g.name); 975 976 /* write extended timestamp extra field block (9 bytes) */ 977 PUT2L(tail, 0x5455); /* extended timestamp signature */ 978 PUT2L(tail + 2, 5); /* number of data bytes in this block */ 979 tail[4] = 1; /* flag presence of mod time */ 980 PUT4L(tail + 5, g.mtime); /* mod time */ 981 writen(g.outd, tail, 9); /* write extra field block */ 982 cent += 9; 983 984 /* write end of central directory record */ 985 PUT4L(tail, 0x06054b50UL); /* end of central directory signature */ 986 PUT2L(tail + 4, 0); /* number of this disk */ 987 PUT2L(tail + 6, 0); /* disk with start of central directory */ 988 PUT2L(tail + 8, 1); /* number of entries on this disk */ 989 PUT2L(tail + 10, 1); /* total number of entries */ 990 PUT4L(tail + 12, cent); /* size of central directory */ 991 PUT4L(tail + 16, head + clen + 16); /* offset of central directory */ 992 PUT2L(tail + 20, 0); /* no zip file comment */ 993 writen(g.outd, tail, 22); /* write end of central directory record */ 994 } 995 else if (g.form) { /* zlib */ 996 PUT4M(tail, check); 997 writen(g.outd, tail, 4); 998 } 999 else { /* gzip */ 1000 PUT4L(tail, check); 1001 PUT4L(tail + 4, ulen); 1002 writen(g.outd, tail, 8); 1003 } 1004} 1005 1006/* compute check value depending on format */ 1007#define CHECK(a,b,c) (g.form == 1 ? adler32(a,b,c) : crc32(a,b,c)) 1008 1009#ifndef NOTHREAD 1010/* -- threaded portions of pigz -- */ 1011 1012/* -- check value combination routines for parallel calculation -- */ 1013 1014#define COMB(a,b,c) (g.form == 1 ? adler32_comb(a,b,c) : crc32_comb(a,b,c)) 1015/* combine two crc-32's or two adler-32's (copied from zlib 1.2.3 so that pigz 1016 can be compatible with older versions of zlib) */ 1017 1018/* we copy the combination routines from zlib here, in order to avoid 1019 linkage issues with the zlib 1.2.3 builds on Sun, Ubuntu, and others */ 1020 1021local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec) 1022{ 1023 unsigned long sum; 1024 1025 sum = 0; 1026 while (vec) { 1027 if (vec & 1) 1028 sum ^= *mat; 1029 vec >>= 1; 1030 mat++; 1031 } 1032 return sum; 1033} 1034 1035local void gf2_matrix_square(unsigned long *square, unsigned long *mat) 1036{ 1037 int n; 1038 1039 for (n = 0; n < 32; n++) 1040 square[n] = gf2_matrix_times(mat, mat[n]); 1041} 1042 1043local unsigned long crc32_comb(unsigned long crc1, unsigned long crc2, 1044 size_t len2) 1045{ 1046 int n; 1047 unsigned long row; 1048 unsigned long even[32]; /* even-power-of-two zeros operator */ 1049 unsigned long odd[32]; /* odd-power-of-two zeros operator */ 1050 1051 /* degenerate case */ 1052 if (len2 == 0) 1053 return crc1; 1054 1055 /* put operator for one zero bit in odd */ 1056 odd[0] = 0xedb88320UL; /* CRC-32 polynomial */ 1057 row = 1; 1058 for (n = 1; n < 32; n++) { 1059 odd[n] = row; 1060 row <<= 1; 1061 } 1062 1063 /* put operator for two zero bits in even */ 1064 gf2_matrix_square(even, odd); 1065 1066 /* put operator for four zero bits in odd */ 1067 gf2_matrix_square(odd, even); 1068 1069 /* apply len2 zeros to crc1 (first square will put the operator for one 1070 zero byte, eight zero bits, in even) */ 1071 do { 1072 /* apply zeros operator for this bit of len2 */ 1073 gf2_matrix_square(even, odd); 1074 if (len2 & 1) 1075 crc1 = gf2_matrix_times(even, crc1); 1076 len2 >>= 1; 1077 1078 /* if no more bits set, then done */ 1079 if (len2 == 0) 1080 break; 1081 1082 /* another iteration of the loop with odd and even swapped */ 1083 gf2_matrix_square(odd, even); 1084 if (len2 & 1) 1085 crc1 = gf2_matrix_times(odd, crc1); 1086 len2 >>= 1; 1087 1088 /* if no more bits set, then done */ 1089 } while (len2 != 0); 1090 1091 /* return combined crc */ 1092 crc1 ^= crc2; 1093 return crc1; 1094} 1095 1096#define BASE 65521U /* largest prime smaller than 65536 */ 1097#define LOW16 0xffff /* mask lower 16 bits */ 1098 1099local unsigned long adler32_comb(unsigned long adler1, unsigned long adler2, 1100 size_t len2) 1101{ 1102 unsigned long sum1; 1103 unsigned long sum2; 1104 unsigned rem; 1105 1106 /* the derivation of this formula is left as an exercise for the reader */ 1107 rem = (unsigned)(len2 % BASE); 1108 sum1 = adler1 & LOW16; 1109 sum2 = (rem * sum1) % BASE; 1110 sum1 += (adler2 & LOW16) + BASE - 1; 1111 sum2 += ((adler1 >> 16) & LOW16) + ((adler2 >> 16) & LOW16) + BASE - rem; 1112 if (sum1 >= BASE) sum1 -= BASE; 1113 if (sum1 >= BASE) sum1 -= BASE; 1114 if (sum2 >= (BASE << 1)) sum2 -= (BASE << 1); 1115 if (sum2 >= BASE) sum2 -= BASE; 1116 return sum1 | (sum2 << 16); 1117} 1118 1119/* -- pool of spaces for buffer management -- */ 1120 1121/* These routines manage a pool of spaces. Each pool specifies a fixed size 1122 buffer to be contained in each space. Each space has a use count, which 1123 when decremented to zero returns the space to the pool. If a space is 1124 requested from the pool and the pool is empty, a space is immediately 1125 created unless a specified limit on the number of spaces has been reached. 1126 Only if the limit is reached will it wait for a space to be returned to the 1127 pool. Each space knows what pool it belongs to, so that it can be returned. 1128 */ 1129 1130/* a space (one buffer for each space) */ 1131struct space { 1132 lock *use; /* use count -- return to pool when zero */ 1133 unsigned char *buf; /* buffer of size size */ 1134 size_t size; /* current size of this buffer */ 1135 size_t len; /* for application usage (initially zero) */ 1136 struct pool *pool; /* pool to return to */ 1137 struct space *next; /* for pool linked list */ 1138}; 1139 1140/* pool of spaces (one pool for each type needed) */ 1141struct pool { 1142 lock *have; /* unused spaces available, lock for list */ 1143 struct space *head; /* linked list of available buffers */ 1144 size_t size; /* size of new buffers in this pool */ 1145 int limit; /* number of new spaces allowed, or -1 */ 1146 int made; /* number of buffers made */ 1147}; 1148 1149/* initialize a pool (pool structure itself provided, not allocated) -- the 1150 limit is the maximum number of spaces in the pool, or -1 to indicate no 1151 limit, i.e., to never wait for a buffer to return to the pool */ 1152local void new_pool(struct pool *pool, size_t size, int limit) 1153{ 1154 pool->have = new_lock(0); 1155 pool->head = NULL; 1156 pool->size = size; 1157 pool->limit = limit; 1158 pool->made = 0; 1159} 1160 1161/* get a space from a pool -- the use count is initially set to one, so there 1162 is no need to call use_space() for the first use */ 1163local struct space *get_space(struct pool *pool) 1164{ 1165 struct space *space; 1166 1167 /* if can't create any more, wait for a space to show up */ 1168 possess(pool->have); 1169 if (pool->limit == 0) 1170 wait_for(pool->have, NOT_TO_BE, 0); 1171 1172 /* if a space is available, pull it from the list and return it */ 1173 if (pool->head != NULL) { 1174 space = pool->head; 1175 possess(space->use); 1176 pool->head = space->next; 1177 twist(pool->have, BY, -1); /* one less in pool */ 1178 twist(space->use, TO, 1); /* initially one user */ 1179 space->len = 0; 1180 return space; 1181 } 1182 1183 /* nothing available, don't want to wait, make a new space */ 1184 assert(pool->limit != 0); 1185 if (pool->limit > 0) 1186 pool->limit--; 1187 pool->made++; 1188 release(pool->have); 1189 space = MALLOC(sizeof(struct space)); 1190 if (space == NULL) 1191 bail("not enough memory", ""); 1192 space->use = new_lock(1); /* initially one user */ 1193 space->buf = MALLOC(pool->size); 1194 if (space->buf == NULL) 1195 bail("not enough memory", ""); 1196 space->size = pool->size; 1197 space->len = 0; 1198 space->pool = pool; /* remember the pool this belongs to */ 1199 return space; 1200} 1201 1202/* compute next size up by multiplying by about 2**(1/3) and round to the next 1203 power of 2 if we're close (so three applications results in doubling) -- if 1204 small, go up to at least 16, if overflow, go to max size_t value */ 1205local size_t grow(size_t size) 1206{ 1207 size_t was, top; 1208 int shift; 1209 1210 was = size; 1211 size += size >> 2; 1212 top = size; 1213 for (shift = 0; top > 7; shift++) 1214 top >>= 1; 1215 if (top == 7) 1216 size = (size_t)1 << (shift + 3); 1217 if (size < 16) 1218 size = 16; 1219 if (size <= was) 1220 size = (size_t)0 - 1; 1221 return size; 1222} 1223 1224/* increase the size of the buffer in space */ 1225local void grow_space(struct space *space) 1226{ 1227 size_t more; 1228 1229 /* compute next size up */ 1230 more = grow(space->size); 1231 if (more == space->size) 1232 bail("not enough memory", ""); 1233 1234 /* reallocate the buffer */ 1235 space->buf = REALLOC(space->buf, more); 1236 if (space->buf == NULL) 1237 bail("not enough memory", ""); 1238 space->size = more; 1239} 1240 1241/* increment the use count to require one more drop before returning this space 1242 to the pool */ 1243local void use_space(struct space *space) 1244{ 1245 possess(space->use); 1246 twist(space->use, BY, +1); 1247} 1248 1249/* drop a space, returning it to the pool if the use count is zero */ 1250local void drop_space(struct space *space) 1251{ 1252 int use; 1253 struct pool *pool; 1254 1255 possess(space->use); 1256 use = peek_lock(space->use); 1257 assert(use != 0); 1258 if (use == 1) { 1259 pool = space->pool; 1260 possess(pool->have); 1261 space->next = pool->head; 1262 pool->head = space; 1263 twist(pool->have, BY, +1); 1264 } 1265 twist(space->use, BY, -1); 1266} 1267 1268/* free the memory and lock resources of a pool -- return number of spaces for 1269 debugging and resource usage measurement */ 1270local int free_pool(struct pool *pool) 1271{ 1272 int count; 1273 struct space *space; 1274 1275 possess(pool->have); 1276 count = 0; 1277 while ((space = pool->head) != NULL) { 1278 pool->head = space->next; 1279 FREE(space->buf); 1280 free_lock(space->use); 1281 FREE(space); 1282 count++; 1283 } 1284 assert(count == pool->made); 1285 release(pool->have); 1286 free_lock(pool->have); 1287 return count; 1288} 1289 1290/* input and output buffer pools */ 1291local struct pool in_pool; 1292local struct pool out_pool; 1293local struct pool dict_pool; 1294local struct pool lens_pool; 1295 1296/* -- parallel compression -- */ 1297 1298/* compress or write job (passed from compress list to write list) -- if seq is 1299 equal to -1, compress_thread is instructed to return; if more is false then 1300 this is the last chunk, which after writing tells write_thread to return */ 1301struct job { 1302 long seq; /* sequence number */ 1303 int more; /* true if this is not the last chunk */ 1304 struct space *in; /* input data to compress */ 1305 struct space *out; /* dictionary or resulting compressed data */ 1306 struct space *lens; /* coded list of flush block lengths */ 1307 unsigned long check; /* check value for input data */ 1308 lock *calc; /* released when check calculation complete */ 1309 struct job *next; /* next job in the list (either list) */ 1310}; 1311 1312/* list of compress jobs (with tail for appending to list) */ 1313local lock *compress_have = NULL; /* number of compress jobs waiting */ 1314local struct job *compress_head, **compress_tail; 1315 1316/* list of write jobs */ 1317local lock *write_first; /* lowest sequence number in list */ 1318local struct job *write_head; 1319 1320/* number of compression threads running */ 1321local int cthreads = 0; 1322 1323/* write thread if running */ 1324local thread *writeth = NULL; 1325 1326/* setup job lists (call from main thread) */ 1327local void setup_jobs(void) 1328{ 1329 /* set up only if not already set up*/ 1330 if (compress_have != NULL) 1331 return; 1332 1333 /* allocate locks and initialize lists */ 1334 compress_have = new_lock(0); 1335 compress_head = NULL; 1336 compress_tail = &compress_head; 1337 write_first = new_lock(-1); 1338 write_head = NULL; 1339 1340 /* initialize buffer pools (initial size for out_pool not critical, since 1341 buffers will be grown in size if needed -- initial size chosen to make 1342 this unlikely -- same for lens_pool) */ 1343 new_pool(&in_pool, g.block, INBUFS(g.procs)); 1344 new_pool(&out_pool, OUTPOOL(g.block), -1); 1345 new_pool(&dict_pool, DICT, -1); 1346 new_pool(&lens_pool, g.block >> (RSYNCBITS - 1), -1); 1347} 1348 1349/* command the compress threads to all return, then join them all (call from 1350 main thread), free all the thread-related resources */ 1351local void finish_jobs(void) 1352{ 1353 struct job job; 1354 int caught; 1355 1356 /* only do this once */ 1357 if (compress_have == NULL) 1358 return; 1359 1360 /* command all of the extant compress threads to return */ 1361 possess(compress_have); 1362 job.seq = -1; 1363 job.next = NULL; 1364 compress_head = &job; 1365 compress_tail = &(job.next); 1366 twist(compress_have, BY, +1); /* will wake them all up */ 1367 1368 /* join all of the compress threads, verify they all came back */ 1369 caught = join_all(); 1370 Trace(("-- joined %d compress threads", caught)); 1371 assert(caught == cthreads); 1372 cthreads = 0; 1373 1374 /* free the resources */ 1375 caught = free_pool(&lens_pool); 1376 Trace(("-- freed %d block lengths buffers", caught)); 1377 caught = free_pool(&dict_pool); 1378 Trace(("-- freed %d dictionary buffers", caught)); 1379 caught = free_pool(&out_pool); 1380 Trace(("-- freed %d output buffers", caught)); 1381 caught = free_pool(&in_pool); 1382 Trace(("-- freed %d input buffers", caught)); 1383 free_lock(write_first); 1384 free_lock(compress_have); 1385 compress_have = NULL; 1386} 1387 1388/* compress all strm->avail_in bytes at strm->next_in to out->buf, updating 1389 out->len, grow the size of the buffer (out->size) if necessary -- respect 1390 the size limitations of the zlib stream data types (size_t may be larger 1391 than unsigned) */ 1392local void deflate_engine(z_stream *strm, struct space *out, int flush) 1393{ 1394 size_t room; 1395 1396 do { 1397 room = out->size - out->len; 1398 if (room == 0) { 1399 grow_space(out); 1400 room = out->size - out->len; 1401 } 1402 strm->next_out = out->buf + out->len; 1403 strm->avail_out = room < UINT_MAX ? (unsigned)room : UINT_MAX; 1404 (void)deflate(strm, flush); 1405 out->len = strm->next_out - out->buf; 1406 } while (strm->avail_out == 0); 1407 assert(strm->avail_in == 0); 1408} 1409 1410/* get the next compression job from the head of the list, compress and compute 1411 the check value on the input, and put a job in the write list with the 1412 results -- keep looking for more jobs, returning when a job is found with a 1413 sequence number of -1 (leave that job in the list for other incarnations to 1414 find) */ 1415local void compress_thread(void *dummy) 1416{ 1417 struct job *job; /* job pulled and working on */ 1418 struct job *here, **prior; /* pointers for inserting in write list */ 1419 unsigned long check; /* check value of input */ 1420 unsigned char *next; /* pointer for blocks, check value data */ 1421 size_t left; /* input left to process */ 1422 size_t len; /* remaining bytes to compress/check */ 1423#if ZLIB_VERNUM >= 0x1260 1424 int bits; /* deflate pending bits */ 1425#endif 1426 struct space *temp; /* temporary space for zopfli input */ 1427 z_stream strm; /* deflate stream */ 1428 1429 (void)dummy; 1430 1431 /* initialize the deflate stream for this thread */ 1432 strm.zfree = ZFREE; 1433 strm.zalloc = ZALLOC; 1434 strm.opaque = OPAQUE; 1435 if (deflateInit2(&strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != Z_OK) 1436 bail("not enough memory", ""); 1437 1438 /* keep looking for work */ 1439 for (;;) { 1440 /* get a job (like I tell my son) */ 1441 possess(compress_have); 1442 wait_for(compress_have, NOT_TO_BE, 0); 1443 job = compress_head; 1444 assert(job != NULL); 1445 if (job->seq == -1) 1446 break; 1447 compress_head = job->next; 1448 if (job->next == NULL) 1449 compress_tail = &compress_head; 1450 twist(compress_have, BY, -1); 1451 1452 /* got a job -- initialize and set the compression level (note that if 1453 deflateParams() is called immediately after deflateReset(), there is 1454 no need to initialize the input/output for the stream) */ 1455 Trace(("-- compressing #%ld", job->seq)); 1456 if (g.level <= 9) { 1457 (void)deflateReset(&strm); 1458 (void)deflateParams(&strm, g.level, Z_DEFAULT_STRATEGY); 1459 } 1460 else { 1461 temp = get_space(&out_pool); 1462 temp->len = 0; 1463 } 1464 1465 /* set dictionary if provided, release that input or dictionary buffer 1466 (not NULL if g.setdict is true and if this is not the first work 1467 unit) */ 1468 if (job->out != NULL) { 1469 len = job->out->len; 1470 left = len < DICT ? len : DICT; 1471 if (g.level <= 9) 1472 deflateSetDictionary(&strm, job->out->buf + (len - left), 1473 left); 1474 else { 1475 memcpy(temp->buf, job->out->buf + (len - left), left); 1476 temp->len = left; 1477 } 1478 drop_space(job->out); 1479 } 1480 1481 /* set up input and output */ 1482 job->out = get_space(&out_pool); 1483 if (g.level <= 9) { 1484 strm.next_in = job->in->buf; 1485 strm.next_out = job->out->buf; 1486 } 1487 else 1488 memcpy(temp->buf + temp->len, job->in->buf, job->in->len); 1489 1490 /* compress each block, either flushing or finishing */ 1491 next = job->lens == NULL ? NULL : job->lens->buf; 1492 left = job->in->len; 1493 job->out->len = 0; 1494 do { 1495 /* decode next block length from blocks list */ 1496 len = next == NULL ? 128 : *next++; 1497 if (len < 128) /* 64..32831 */ 1498 len = (len << 8) + (*next++) + 64; 1499 else if (len == 128) /* end of list */ 1500 len = left; 1501 else if (len < 192) /* 1..63 */ 1502 len &= 0x3f; 1503 else { /* 32832..4227135 */ 1504 len = ((len & 0x3f) << 16) + (*next++ << 8) + 32832U; 1505 len += *next++; 1506 } 1507 left -= len; 1508 1509 if (g.level <= 9) { 1510 /* run MAXP2-sized amounts of input through deflate -- this 1511 loop is needed for those cases where the unsigned type is 1512 smaller than the size_t type, or when len is close to the 1513 limit of the size_t type */ 1514 while (len > MAXP2) { 1515 strm.avail_in = MAXP2; 1516 deflate_engine(&strm, job->out, Z_NO_FLUSH); 1517 len -= MAXP2; 1518 } 1519 1520 /* run the last piece through deflate -- end on a byte 1521 boundary, using a sync marker if necessary, or finish the 1522 deflate stream if this is the last block */ 1523 strm.avail_in = (unsigned)len; 1524 if (left || job->more) { 1525#if ZLIB_VERNUM >= 0x1260 1526 deflate_engine(&strm, job->out, Z_BLOCK); 1527 1528 /* add enough empty blocks to get to a byte boundary */ 1529 (void)deflatePending(&strm, Z_NULL, &bits); 1530 if (bits & 1) 1531 deflate_engine(&strm, job->out, Z_SYNC_FLUSH); 1532 else if (bits & 7) { 1533 do { /* add static empty blocks */ 1534 bits = deflatePrime(&strm, 10, 2); 1535 assert(bits == Z_OK); 1536 (void)deflatePending(&strm, Z_NULL, &bits); 1537 } while (bits & 7); 1538 deflate_engine(&strm, job->out, Z_BLOCK); 1539 } 1540#else 1541 deflate_engine(&strm, job->out, Z_SYNC_FLUSH); 1542#endif 1543 } 1544 else 1545 deflate_engine(&strm, job->out, Z_FINISH); 1546 } 1547 else { 1548 /* compress len bytes using zopfli, bring to byte boundary */ 1549 unsigned char bits, *out; 1550 size_t outsize; 1551 1552 out = NULL; 1553 outsize = 0; 1554 bits = 0; 1555 ZopfliDeflatePart(&g.zopts, 2, !(left || job->more), 1556 temp->buf, temp->len, temp->len + len, 1557 &bits, &out, &outsize); 1558 assert(job->out->len + outsize + 5 <= job->out->size); 1559 memcpy(job->out->buf + job->out->len, out, outsize); 1560 free(out); 1561 job->out->len += outsize; 1562 if (left || job->more) { 1563 bits &= 7; 1564 if (bits & 1) { 1565 if (bits == 7) 1566 job->out->buf[job->out->len++] = 0; 1567 job->out->buf[job->out->len++] = 0; 1568 job->out->buf[job->out->len++] = 0; 1569 job->out->buf[job->out->len++] = 0xff; 1570 job->out->buf[job->out->len++] = 0xff; 1571 } 1572 else if (bits) { 1573 do { 1574 job->out->buf[job->out->len - 1] += 2 << bits; 1575 job->out->buf[job->out->len++] = 0; 1576 bits += 2; 1577 } while (bits < 8); 1578 } 1579 } 1580 temp->len += len; 1581 } 1582 } while (left); 1583 if (g.level > 9) 1584 drop_space(temp); 1585 if (job->lens != NULL) { 1586 drop_space(job->lens); 1587 job->lens = NULL; 1588 } 1589 Trace(("-- compressed #%ld%s", job->seq, job->more ? "" : " (last)")); 1590 1591 /* reserve input buffer until check value has been calculated */ 1592 use_space(job->in); 1593 1594 /* insert write job in list in sorted order, alert write thread */ 1595 possess(write_first); 1596 prior = &write_head; 1597 while ((here = *prior) != NULL) { 1598 if (here->seq > job->seq) 1599 break; 1600 prior = &(here->next); 1601 } 1602 job->next = here; 1603 *prior = job; 1604 twist(write_first, TO, write_head->seq); 1605 1606 /* calculate the check value in parallel with writing, alert the write 1607 thread that the calculation is complete, and drop this usage of the 1608 input buffer */ 1609 len = job->in->len; 1610 next = job->in->buf; 1611 check = CHECK(0L, Z_NULL, 0); 1612 while (len > MAXP2) { 1613 check = CHECK(check, next, MAXP2); 1614 len -= MAXP2; 1615 next += MAXP2; 1616 } 1617 check = CHECK(check, next, (unsigned)len); 1618 drop_space(job->in); 1619 job->check = check; 1620 Trace(("-- checked #%ld%s", job->seq, job->more ? "" : " (last)")); 1621 possess(job->calc); 1622 twist(job->calc, TO, 1); 1623 1624 /* done with that one -- go find another job */ 1625 } 1626 1627 /* found job with seq == -1 -- free deflate memory and return to join */ 1628 release(compress_have); 1629 (void)deflateEnd(&strm); 1630} 1631 1632/* collect the write jobs off of the list in sequence order and write out the 1633 compressed data until the last chunk is written -- also write the header and 1634 trailer and combine the individual check values of the input buffers */ 1635local void write_thread(void *dummy) 1636{ 1637 long seq; /* next sequence number looking for */ 1638 struct job *job; /* job pulled and working on */ 1639 size_t len; /* input length */ 1640 int more; /* true if more chunks to write */ 1641 unsigned long head; /* header length */ 1642 unsigned long ulen; /* total uncompressed size (overflow ok) */ 1643 unsigned long clen; /* total compressed size (overflow ok) */ 1644 unsigned long check; /* check value of uncompressed data */ 1645 1646 (void)dummy; 1647 1648 /* build and write header */ 1649 Trace(("-- write thread running")); 1650 head = put_header(); 1651 1652 /* process output of compress threads until end of input */ 1653 ulen = clen = 0; 1654 check = CHECK(0L, Z_NULL, 0); 1655 seq = 0; 1656 do { 1657 /* get next write job in order */ 1658 possess(write_first); 1659 wait_for(write_first, TO_BE, seq); 1660 job = write_head; 1661 write_head = job->next; 1662 twist(write_first, TO, write_head == NULL ? -1 : write_head->seq); 1663 1664 /* update lengths, save uncompressed length for COMB */ 1665 more = job->more; 1666 len = job->in->len; 1667 drop_space(job->in); 1668 ulen += (unsigned long)len; 1669 clen += (unsigned long)(job->out->len); 1670 1671 /* write the compressed data and drop the output buffer */ 1672 Trace(("-- writing #%ld", seq)); 1673 writen(g.outd, job->out->buf, job->out->len); 1674 drop_space(job->out); 1675 Trace(("-- wrote #%ld%s", seq, more ? "" : " (last)")); 1676 1677 /* wait for check calculation to complete, then combine, once 1678 the compress thread is done with the input, release it */ 1679 possess(job->calc); 1680 wait_for(job->calc, TO_BE, 1); 1681 release(job->calc); 1682 check = COMB(check, job->check, len); 1683 1684 /* free the job */ 1685 free_lock(job->calc); 1686 FREE(job); 1687 1688 /* get the next buffer in sequence */ 1689 seq++; 1690 } while (more); 1691 1692 /* write trailer */ 1693 put_trailer(ulen, clen, check, head); 1694 1695 /* verify no more jobs, prepare for next use */ 1696 possess(compress_have); 1697 assert(compress_head == NULL && peek_lock(compress_have) == 0); 1698 release(compress_have); 1699 possess(write_first); 1700 assert(write_head == NULL); 1701 twist(write_first, TO, -1); 1702} 1703 1704/* encode a hash hit to the block lengths list -- hit == 0 ends the list */ 1705local void append_len(struct job *job, size_t len) 1706{ 1707 struct space *lens; 1708 1709 assert(len < 4227136UL); 1710 if (job->lens == NULL) 1711 job->lens = get_space(&lens_pool); 1712 lens = job->lens; 1713 if (lens->size < lens->len + 3) 1714 grow_space(lens); 1715 if (len < 64) 1716 lens->buf[lens->len++] = len + 128; 1717 else if (len < 32832U) { 1718 len -= 64; 1719 lens->buf[lens->len++] = len >> 8; 1720 lens->buf[lens->len++] = len; 1721 } 1722 else { 1723 len -= 32832U; 1724 lens->buf[lens->len++] = (len >> 16) + 192; 1725 lens->buf[lens->len++] = len >> 8; 1726 lens->buf[lens->len++] = len; 1727 } 1728} 1729 1730/* compress ind to outd, using multiple threads for the compression and check 1731 value calculations and one other thread for writing the output -- compress 1732 threads will be launched and left running (waiting actually) to support 1733 subsequent calls of parallel_compress() */ 1734local void parallel_compress(void) 1735{ 1736 long seq; /* sequence number */ 1737 struct space *curr; /* input data to compress */ 1738 struct space *next; /* input data that follows curr */ 1739 struct space *hold; /* input data that follows next */ 1740 struct space *dict; /* dictionary for next compression */ 1741 struct job *job; /* job for compress, then write */ 1742 int more; /* true if more input to read */ 1743 unsigned hash; /* hash for rsyncable */ 1744 unsigned char *scan; /* next byte to compute hash on */ 1745 unsigned char *end; /* after end of data to compute hash on */ 1746 unsigned char *last; /* position after last hit */ 1747 size_t left; /* last hit in curr to end of curr */ 1748 size_t len; /* for various length computations */ 1749 1750 /* if first time or after an option change, setup the job lists */ 1751 setup_jobs(); 1752 1753 /* start write thread */ 1754 writeth = launch(write_thread, NULL); 1755 1756 /* read from input and start compress threads (write thread will pick up 1757 the output of the compress threads) */ 1758 seq = 0; 1759 next = get_space(&in_pool); 1760 next->len = readn(g.ind, next->buf, next->size); 1761 hold = NULL; 1762 dict = NULL; 1763 scan = next->buf; 1764 hash = RSYNCHIT; 1765 left = 0; 1766 do { 1767 /* create a new job */ 1768 job = MALLOC(sizeof(struct job)); 1769 if (job == NULL) 1770 bail("not enough memory", ""); 1771 job->calc = new_lock(0); 1772 1773 /* update input spaces */ 1774 curr = next; 1775 next = hold; 1776 hold = NULL; 1777 1778 /* get more input if we don't already have some */ 1779 if (next == NULL) { 1780 next = get_space(&in_pool); 1781 next->len = readn(g.ind, next->buf, next->size); 1782 } 1783 1784 /* if rsyncable, generate block lengths and prepare curr for job to 1785 likely have less than size bytes (up to the last hash hit) */ 1786 job->lens = NULL; 1787 if (g.rsync && curr->len) { 1788 /* compute the hash function starting where we last left off to 1789 cover either size bytes or to EOF, whichever is less, through 1790 the data in curr (and in the next loop, through next) -- save 1791 the block lengths resulting from the hash hits in the job->lens 1792 list */ 1793 if (left == 0) { 1794 /* scan is in curr */ 1795 last = curr->buf; 1796 end = curr->buf + curr->len; 1797 while (scan < end) { 1798 hash = ((hash << 1) ^ *scan++) & RSYNCMASK; 1799 if (hash == RSYNCHIT) { 1800 len = scan - last; 1801 append_len(job, len); 1802 last = scan; 1803 } 1804 } 1805 1806 /* continue scan in next */ 1807 left = scan - last; 1808 scan = next->buf; 1809 } 1810 1811 /* scan in next for enough bytes to fill curr, or what is available 1812 in next, whichever is less (if next isn't full, then we're at 1813 the end of the file) -- the bytes in curr since the last hit, 1814 stored in left, counts towards the size of the first block */ 1815 last = next->buf; 1816 len = curr->size - curr->len; 1817 if (len > next->len) 1818 len = next->len; 1819 end = next->buf + len; 1820 while (scan < end) { 1821 hash = ((hash << 1) ^ *scan++) & RSYNCMASK; 1822 if (hash == RSYNCHIT) { 1823 len = (scan - last) + left; 1824 left = 0; 1825 append_len(job, len); 1826 last = scan; 1827 } 1828 } 1829 append_len(job, 0); 1830 1831 /* create input in curr for job up to last hit or entire buffer if 1832 no hits at all -- save remainder in next and possibly hold */ 1833 len = (job->lens->len == 1 ? scan : last) - next->buf; 1834 if (len) { 1835 /* got hits in next, or no hits in either -- copy to curr */ 1836 memcpy(curr->buf + curr->len, next->buf, len); 1837 curr->len += len; 1838 memmove(next->buf, next->buf + len, next->len - len); 1839 next->len -= len; 1840 scan -= len; 1841 left = 0; 1842 } 1843 else if (job->lens->len != 1 && left && next->len) { 1844 /* had hits in curr, but none in next, and last hit in curr 1845 wasn't right at the end, so we have input there to save -- 1846 use curr up to the last hit, save the rest, moving next to 1847 hold */ 1848 hold = next; 1849 next = get_space(&in_pool); 1850 memcpy(next->buf, curr->buf + (curr->len - left), left); 1851 next->len = left; 1852 curr->len -= left; 1853 } 1854 else { 1855 /* else, last match happened to be right at the end of curr, 1856 or we're at the end of the input compressing the rest */ 1857 left = 0; 1858 } 1859 } 1860 1861 /* compress curr->buf to curr->len -- compress thread will drop curr */ 1862 job->in = curr; 1863 1864 /* set job->more if there is more to compress after curr */ 1865 more = next->len != 0; 1866 job->more = more; 1867 1868 /* provide dictionary for this job, prepare dictionary for next job */ 1869 job->out = dict; 1870 if (more && g.setdict) { 1871 if (curr->len >= DICT || job->out == NULL) { 1872 dict = curr; 1873 use_space(dict); 1874 } 1875 else { 1876 dict = get_space(&dict_pool); 1877 len = DICT - curr->len; 1878 memcpy(dict->buf, job->out->buf + (job->out->len - len), len); 1879 memcpy(dict->buf + len, curr->buf, curr->len); 1880 dict->len = DICT; 1881 } 1882 } 1883 1884 /* preparation of job is complete */ 1885 job->seq = seq; 1886 Trace(("-- read #%ld%s", seq, more ? "" : " (last)")); 1887 if (++seq < 1) 1888 bail("input too long: ", g.inf); 1889 1890 /* start another compress thread if needed */ 1891 if (cthreads < seq && cthreads < g.procs) { 1892 (void)launch(compress_thread, NULL); 1893 cthreads++; 1894 } 1895 1896 /* put job at end of compress list, let all the compressors know */ 1897 possess(compress_have); 1898 job->next = NULL; 1899 *compress_tail = job; 1900 compress_tail = &(job->next); 1901 twist(compress_have, BY, +1); 1902 } while (more); 1903 drop_space(next); 1904 1905 /* wait for the write thread to complete (we leave the compress threads out 1906 there and waiting in case there is another stream to compress) */ 1907 join(writeth); 1908 writeth = NULL; 1909 Trace(("-- write thread joined")); 1910} 1911 1912#endif 1913 1914/* repeated code in single_compress to compress available input and write it */ 1915#define DEFLATE_WRITE(flush) \ 1916 do { \ 1917 do { \ 1918 strm->avail_out = out_size; \ 1919 strm->next_out = out; \ 1920 (void)deflate(strm, flush); \ 1921 writen(g.outd, out, out_size - strm->avail_out); \ 1922 clen += out_size - strm->avail_out; \ 1923 } while (strm->avail_out == 0); \ 1924 assert(strm->avail_in == 0); \ 1925 } while (0) 1926 1927/* do a simple compression in a single thread from ind to outd -- if reset is 1928 true, instead free the memory that was allocated and retained for input, 1929 output, and deflate */ 1930local void single_compress(int reset) 1931{ 1932 size_t got; /* amount of data in in[] */ 1933 size_t more; /* amount of data in next[] (0 if eof) */ 1934 size_t start; /* start of data in next[] */ 1935 size_t have; /* bytes in current block for -i */ 1936 size_t hist; /* offset of permitted history */ 1937 int fresh; /* if true, reset compression history */ 1938 unsigned hash; /* hash for rsyncable */ 1939 unsigned char *scan; /* pointer for hash computation */ 1940 size_t left; /* bytes left to compress after hash hit */ 1941 unsigned long head; /* header length */ 1942 unsigned long ulen; /* total uncompressed size (overflow ok) */ 1943 unsigned long clen; /* total compressed size (overflow ok) */ 1944 unsigned long check; /* check value of uncompressed data */ 1945 static unsigned out_size; /* size of output buffer */ 1946 static unsigned char *in, *next, *out; /* reused i/o buffers */ 1947 static z_stream *strm = NULL; /* reused deflate structure */ 1948 1949 /* if requested, just release the allocations and return */ 1950 if (reset) { 1951 if (strm != NULL) { 1952 (void)deflateEnd(strm); 1953 FREE(strm); 1954 FREE(out); 1955 FREE(next); 1956 FREE(in); 1957 strm = NULL; 1958 } 1959 return; 1960 } 1961 1962 /* initialize the deflate structure if this is the first time */ 1963 if (strm == NULL) { 1964 out_size = g.block > MAXP2 ? MAXP2 : (unsigned)g.block; 1965 if ((in = MALLOC(g.block + DICT)) == NULL || 1966 (next = MALLOC(g.block + DICT)) == NULL || 1967 (out = MALLOC(out_size)) == NULL || 1968 (strm = MALLOC(sizeof(z_stream))) == NULL) 1969 bail("not enough memory", ""); 1970 strm->zfree = ZFREE; 1971 strm->zalloc = ZALLOC; 1972 strm->opaque = OPAQUE; 1973 if (deflateInit2(strm, 6, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY) != 1974 Z_OK) 1975 bail("not enough memory", ""); 1976 } 1977 1978 /* write header */ 1979 head = put_header(); 1980 1981 /* set compression level in case it changed */ 1982 if (g.level <= 9) { 1983 (void)deflateReset(strm); 1984 (void)deflateParams(strm, g.level, Z_DEFAULT_STRATEGY); 1985 } 1986 1987 /* do raw deflate and calculate check value */ 1988 got = 0; 1989 more = readn(g.ind, next, g.block); 1990 ulen = (unsigned long)more; 1991 start = 0; 1992 hist = 0; 1993 clen = 0; 1994 have = 0; 1995 check = CHECK(0L, Z_NULL, 0); 1996 hash = RSYNCHIT; 1997 do { 1998 /* get data to compress, see if there is any more input */ 1999 if (got == 0) { 2000 scan = in; in = next; next = scan; 2001 strm->next_in = in + start; 2002 got = more; 2003 if (g.level > 9) { 2004 left = start + more - hist; 2005 if (left > DICT) 2006 left = DICT; 2007 memcpy(next, in + ((start + more) - left), left); 2008 start = left; 2009 hist = 0; 2010 } 2011 else 2012 start = 0; 2013 more = readn(g.ind, next + start, g.block); 2014 ulen += (unsigned long)more; 2015 } 2016 2017 /* if rsyncable, compute hash until a hit or the end of the block */ 2018 left = 0; 2019 if (g.rsync && got) { 2020 scan = strm->next_in; 2021 left = got; 2022 do { 2023 if (left == 0) { 2024 /* went to the end -- if no more or no hit in size bytes, 2025 then proceed to do a flush or finish with got bytes */ 2026 if (more == 0 || got == g.block) 2027 break; 2028 2029 /* fill in[] with what's left there and as much as possible 2030 from next[] -- set up to continue hash hit search */ 2031 if (g.level > 9) { 2032 left = (strm->next_in - in) - hist; 2033 if (left > DICT) 2034 left = DICT; 2035 } 2036 memmove(in, strm->next_in - left, left + got); 2037 hist = 0; 2038 strm->next_in = in + left; 2039 scan = in + left + got; 2040 left = more > g.block - got ? g.block - got : more; 2041 memcpy(scan, next + start, left); 2042 got += left; 2043 more -= left; 2044 start += left; 2045 2046 /* if that emptied the next buffer, try to refill it */ 2047 if (more == 0) { 2048 more = readn(g.ind, next, g.block); 2049 ulen += (unsigned long)more; 2050 start = 0; 2051 } 2052 } 2053 left--; 2054 hash = ((hash << 1) ^ *scan++) & RSYNCMASK; 2055 } while (hash != RSYNCHIT); 2056 got -= left; 2057 } 2058 2059 /* clear history for --independent option */ 2060 fresh = 0; 2061 if (!g.setdict) { 2062 have += got; 2063 if (have > g.block) { 2064 fresh = 1; 2065 have = got; 2066 } 2067 } 2068 2069 if (g.level <= 9) { 2070 /* clear history if requested */ 2071 if (fresh) 2072 (void)deflateReset(strm); 2073 2074 /* compress MAXP2-size chunks in case unsigned type is small */ 2075 while (got > MAXP2) { 2076 strm->avail_in = MAXP2; 2077 check = CHECK(check, strm->next_in, strm->avail_in); 2078 DEFLATE_WRITE(Z_NO_FLUSH); 2079 got -= MAXP2; 2080 } 2081 2082 /* compress the remainder, emit a block, finish if end of input */ 2083 strm->avail_in = (unsigned)got; 2084 got = left; 2085 check = CHECK(check, strm->next_in, strm->avail_in); 2086 if (more || got) { 2087#if ZLIB_VERNUM >= 0x1260 2088 int bits; 2089 2090 DEFLATE_WRITE(Z_BLOCK); 2091 (void)deflatePending(strm, Z_NULL, &bits); 2092 if (bits & 1) 2093 DEFLATE_WRITE(Z_SYNC_FLUSH); 2094 else if (bits & 7) { 2095 do { 2096 bits = deflatePrime(strm, 10, 2); 2097 assert(bits == Z_OK); 2098 (void)deflatePending(strm, Z_NULL, &bits); 2099 } while (bits & 7); 2100 DEFLATE_WRITE(Z_NO_FLUSH); 2101 } 2102#else 2103 DEFLATE_WRITE(Z_SYNC_FLUSH); 2104#endif 2105 } 2106 else 2107 DEFLATE_WRITE(Z_FINISH); 2108 } 2109 else { 2110 /* compress got bytes using zopfli, bring to byte boundary */ 2111 unsigned char bits, *out; 2112 size_t outsize, off; 2113 2114 /* discard history if requested */ 2115 off = strm->next_in - in; 2116 if (fresh) 2117 hist = off; 2118 2119 out = NULL; 2120 outsize = 0; 2121 bits = 0; 2122 ZopfliDeflatePart(&g.zopts, 2, !(more || left), 2123 in + hist, off - hist, (off - hist) + got, 2124 &bits, &out, &outsize); 2125 bits &= 7; 2126 if ((more || left) && bits) { 2127 if (bits & 1) { 2128 writen(g.outd, out, outsize); 2129 if (bits == 7) 2130 writen(g.outd, (unsigned char *)"\0", 1); 2131 writen(g.outd, (unsigned char *)"\0\0\xff\xff", 4); 2132 } 2133 else { 2134 assert(outsize > 0); 2135 writen(g.outd, out, outsize - 1); 2136 do { 2137 out[outsize - 1] += 2 << bits; 2138 writen(g.outd, out + outsize - 1, 1); 2139 out[outsize - 1] = 0; 2140 bits += 2; 2141 } while (bits < 8); 2142 writen(g.outd, out + outsize - 1, 1); 2143 } 2144 } 2145 else 2146 writen(g.outd, out, outsize); 2147 free(out); 2148 while (got > MAXP2) { 2149 check = CHECK(check, strm->next_in, MAXP2); 2150 strm->next_in += MAXP2; 2151 got -= MAXP2; 2152 } 2153 check = CHECK(check, strm->next_in, (unsigned)got); 2154 strm->next_in += got; 2155 got = left; 2156 } 2157 2158 /* do until no more input */ 2159 } while (more || got); 2160 2161 /* write trailer */ 2162 put_trailer(ulen, clen, check, head); 2163} 2164 2165/* --- decompression --- */ 2166 2167#ifndef NOTHREAD 2168/* parallel read thread */ 2169local void load_read(void *dummy) 2170{ 2171 size_t len; 2172 2173 (void)dummy; 2174 2175 Trace(("-- launched decompress read thread")); 2176 do { 2177 possess(g.load_state); 2178 wait_for(g.load_state, TO_BE, 1); 2179 g.in_len = len = readn(g.ind, g.in_which ? g.in_buf : g.in_buf2, BUF); 2180 Trace(("-- decompress read thread read %lu bytes", len)); 2181 twist(g.load_state, TO, 0); 2182 } while (len == BUF); 2183 Trace(("-- exited decompress read thread")); 2184} 2185#endif 2186 2187/* load() is called when the input has been consumed in order to provide more 2188 input data: load the input buffer with BUF or fewer bytes (fewer if at end 2189 of file) from the file g.ind, set g.in_next to point to the g.in_left bytes 2190 read, update g.in_tot, and return g.in_left -- g.in_eof is set to true when 2191 g.in_left has gone to zero and there is no more data left to read */ 2192local size_t load(void) 2193{ 2194 /* if already detected end of file, do nothing */ 2195 if (g.in_short) { 2196 g.in_eof = 1; 2197 g.in_left = 0; 2198 return 0; 2199 } 2200 2201#ifndef NOTHREAD 2202 /* if first time in or procs == 1, read a buffer to have something to 2203 return, otherwise wait for the previous read job to complete */ 2204 if (g.procs > 1) { 2205 /* if first time, fire up the read thread, ask for a read */ 2206 if (g.in_which == -1) { 2207 g.in_which = 1; 2208 g.load_state = new_lock(1); 2209 g.load_thread = launch(load_read, NULL); 2210 } 2211 2212 /* wait for the previously requested read to complete */ 2213 possess(g.load_state); 2214 wait_for(g.load_state, TO_BE, 0); 2215 release(g.load_state); 2216 2217 /* set up input buffer with the data just read */ 2218 g.in_next = g.in_which ? g.in_buf : g.in_buf2; 2219 g.in_left = g.in_len; 2220 2221 /* if not at end of file, alert read thread to load next buffer, 2222 alternate between g.in_buf and g.in_buf2 */ 2223 if (g.in_len == BUF) { 2224 g.in_which = 1 - g.in_which; 2225 possess(g.load_state); 2226 twist(g.load_state, TO, 1); 2227 } 2228 2229 /* at end of file -- join read thread (already exited), clean up */ 2230 else { 2231 join(g.load_thread); 2232 free_lock(g.load_state); 2233 g.in_which = -1; 2234 } 2235 } 2236 else 2237#endif 2238 { 2239 /* don't use threads -- simply read a buffer into g.in_buf */ 2240 g.in_left = readn(g.ind, g.in_next = g.in_buf, BUF); 2241 } 2242 2243 /* note end of file */ 2244 if (g.in_left < BUF) { 2245 g.in_short = 1; 2246 2247 /* if we got bupkis, now is the time to mark eof */ 2248 if (g.in_left == 0) 2249 g.in_eof = 1; 2250 } 2251 2252 /* update the total and return the available bytes */ 2253 g.in_tot += g.in_left; 2254 return g.in_left; 2255} 2256 2257/* initialize for reading new input */ 2258local void in_init(void) 2259{ 2260 g.in_left = 0; 2261 g.in_eof = 0; 2262 g.in_short = 0; 2263 g.in_tot = 0; 2264#ifndef NOTHREAD 2265 g.in_which = -1; 2266#endif 2267} 2268 2269/* buffered reading macros for decompression and listing */ 2270#define GET() (g.in_eof || (g.in_left == 0 && load() == 0) ? EOF : \ 2271 (g.in_left--, *g.in_next++)) 2272#define GET2() (tmp2 = GET(), tmp2 + ((unsigned)(GET()) << 8)) 2273#define GET4() (tmp4 = GET2(), tmp4 + ((unsigned long)(GET2()) << 16)) 2274#define SKIP(dist) \ 2275 do { \ 2276 size_t togo = (dist); \ 2277 while (togo > g.in_left) { \ 2278 togo -= g.in_left; \ 2279 if (load() == 0) \ 2280 return -1; \ 2281 } \ 2282 g.in_left -= togo; \ 2283 g.in_next += togo; \ 2284 } while (0) 2285 2286/* pull LSB order or MSB order integers from an unsigned char buffer */ 2287#define PULL2L(p) ((p)[0] + ((unsigned)((p)[1]) << 8)) 2288#define PULL4L(p) (PULL2L(p) + ((unsigned long)(PULL2L((p) + 2)) << 16)) 2289#define PULL2M(p) (((unsigned)((p)[0]) << 8) + (p)[1]) 2290#define PULL4M(p) (((unsigned long)(PULL2M(p)) << 16) + PULL2M((p) + 2)) 2291 2292/* convert MS-DOS date and time to a Unix time, assuming current timezone 2293 (you got a better idea?) */ 2294local time_t dos2time(unsigned long dos) 2295{ 2296 struct tm tm; 2297 2298 if (dos == 0) 2299 return time(NULL); 2300 tm.tm_year = ((int)(dos >> 25) & 0x7f) + 80; 2301 tm.tm_mon = ((int)(dos >> 21) & 0xf) - 1; 2302 tm.tm_mday = (int)(dos >> 16) & 0x1f; 2303 tm.tm_hour = (int)(dos >> 11) & 0x1f; 2304 tm.tm_min = (int)(dos >> 5) & 0x3f; 2305 tm.tm_sec = (int)(dos << 1) & 0x3e; 2306 tm.tm_isdst = -1; /* figure out if DST or not */ 2307 return mktime(&tm); 2308} 2309 2310/* convert an unsigned 32-bit integer to signed, even if long > 32 bits */ 2311local long tolong(unsigned long val) 2312{ 2313 return (long)(val & 0x7fffffffUL) - (long)(val & 0x80000000UL); 2314} 2315 2316#define LOW32 0xffffffffUL 2317 2318/* process zip extra field to extract zip64 lengths and Unix mod time */ 2319local int read_extra(unsigned len, int save) 2320{ 2321 unsigned id, size, tmp2; 2322 unsigned long tmp4; 2323 2324 /* process extra blocks */ 2325 while (len >= 4) { 2326 id = GET2(); 2327 size = GET2(); 2328 if (g.in_eof) 2329 return -1; 2330 len -= 4; 2331 if (size > len) 2332 break; 2333 len -= size; 2334 if (id == 0x0001) { 2335 /* Zip64 Extended Information Extra Field */ 2336 if (g.zip_ulen == LOW32 && size >= 8) { 2337 g.zip_ulen = GET4(); 2338 SKIP(4); 2339 size -= 8; 2340 } 2341 if (g.zip_clen == LOW32 && size >= 8) { 2342 g.zip_clen = GET4(); 2343 SKIP(4); 2344 size -= 8; 2345 } 2346 } 2347 if (save) { 2348 if ((id == 0x000d || id == 0x5855) && size >= 8) { 2349 /* PKWare Unix or Info-ZIP Type 1 Unix block */ 2350 SKIP(4); 2351 g.stamp = tolong(GET4()); 2352 size -= 8; 2353 } 2354 if (id == 0x5455 && size >= 5) { 2355 /* Extended Timestamp block */ 2356 size--; 2357 if (GET() & 1) { 2358 g.stamp = tolong(GET4()); 2359 size -= 4; 2360 } 2361 } 2362 } 2363 SKIP(size); 2364 } 2365 SKIP(len); 2366 return 0; 2367} 2368 2369/* read a gzip, zip, zlib, or lzw header from ind and return the method in the 2370 range 0..256 (256 implies a zip method greater than 255), or on error return 2371 negative: -1 is immediate EOF, -2 is not a recognized compressed format, -3 2372 is premature EOF within the header, -4 is unexpected header flag values, -5 2373 is the zip central directory; a method of 257 is lzw -- if the return value 2374 is not negative, then get_header() sets g.form to indicate gzip (0), zlib 2375 (1), or zip (2, or 3 if the entry is followed by a data descriptor) */ 2376local int get_header(int save) 2377{ 2378 unsigned magic; /* magic header */ 2379 int method; /* compression method */ 2380 int flags; /* header flags */ 2381 unsigned fname, extra; /* name and extra field lengths */ 2382 unsigned tmp2; /* for macro */ 2383 unsigned long tmp4; /* for macro */ 2384 2385 /* clear return information */ 2386 if (save) { 2387 g.stamp = 0; 2388 RELEASE(g.hname); 2389 } 2390 2391 /* see if it's a gzip, zlib, or lzw file */ 2392 g.form = -1; 2393 g.magic1 = GET(); 2394 if (g.in_eof) 2395 return -1; 2396 magic = g.magic1 << 8; 2397 magic += GET(); 2398 if (g.in_eof) 2399 return -2; 2400 if (magic % 31 == 0) { /* it's zlib */ 2401 g.form = 1; 2402 return (int)((magic >> 8) & 0xf); 2403 } 2404 if (magic == 0x1f9d) /* it's lzw */ 2405 return 257; 2406 if (magic == 0x504b) { /* it's zip */ 2407 magic = GET2(); /* the rest of the signature */ 2408 if (g.in_eof) 2409 return -3; 2410 if (magic == 0x0201 || magic == 0x0806) 2411 return -5; /* central header or archive extra */ 2412 if (magic != 0x0403) 2413 return -4; /* not a local header */ 2414 SKIP(2); 2415 flags = GET2(); 2416 if (g.in_eof) 2417 return -3; 2418 if (flags & 0xfff0) 2419 return -4; 2420 method = GET(); /* return low byte of method or 256 */ 2421 if (GET() != 0 || flags & 1) 2422 method = 256; /* unknown or encrypted */ 2423 if (g.in_eof) 2424 return -3; 2425 if (save) 2426 g.stamp = dos2time(GET4()); 2427 else 2428 SKIP(4); 2429 g.zip_crc = GET4(); 2430 g.zip_clen = GET4(); 2431 g.zip_ulen = GET4(); 2432 fname = GET2(); 2433 extra = GET2(); 2434 if (save) { 2435 char *next = g.hname = MALLOC(fname + 1); 2436 if (g.hname == NULL) 2437 bail("not enough memory", ""); 2438 while (fname > g.in_left) { 2439 memcpy(next, g.in_next, g.in_left); 2440 fname -= g.in_left; 2441 next += g.in_left; 2442 if (load() == 0) 2443 return -3; 2444 } 2445 memcpy(next, g.in_next, fname); 2446 g.in_left -= fname; 2447 g.in_next += fname; 2448 next += fname; 2449 *next = 0; 2450 } 2451 else 2452 SKIP(fname); 2453 read_extra(extra, save); 2454 g.form = 2 + ((flags & 8) >> 3); 2455 return g.in_eof ? -3 : method; 2456 } 2457 if (magic != 0x1f8b) { /* not gzip */ 2458 g.in_left++; /* unget second magic byte */ 2459 g.in_next--; 2460 return -2; 2461 } 2462 2463 /* it's gzip -- get method and flags */ 2464 method = GET(); 2465 flags = GET(); 2466 if (g.in_eof) 2467 return -1; 2468 if (flags & 0xe0) 2469 return -4; 2470 2471 /* get time stamp */ 2472 if (save) 2473 g.stamp = tolong(GET4()); 2474 else 2475 SKIP(4); 2476 2477 /* skip extra field and OS */ 2478 SKIP(2); 2479 2480 /* skip extra field, if present */ 2481 if (flags & 4) { 2482 extra = GET2(); 2483 if (g.in_eof) 2484 return -3; 2485 SKIP(extra); 2486 } 2487 2488 /* read file name, if present, into allocated memory */ 2489 if ((flags & 8) && save) { 2490 unsigned char *end; 2491 size_t copy, have, size = 128; 2492 g.hname = MALLOC(size); 2493 if (g.hname == NULL) 2494 bail("not enough memory", ""); 2495 have = 0; 2496 do { 2497 if (g.in_left == 0 && load() == 0) 2498 return -3; 2499 end = memchr(g.in_next, 0, g.in_left); 2500 copy = end == NULL ? g.in_left : (size_t)(end - g.in_next) + 1; 2501 if (have + copy > size) { 2502 while (have + copy > (size <<= 1)) 2503 ; 2504 g.hname = REALLOC(g.hname, size); 2505 if (g.hname == NULL) 2506 bail("not enough memory", ""); 2507 } 2508 memcpy(g.hname + have, g.in_next, copy); 2509 have += copy; 2510 g.in_left -= copy; 2511 g.in_next += copy; 2512 } while (end == NULL); 2513 } 2514 else if (flags & 8) 2515 while (GET() != 0) 2516 if (g.in_eof) 2517 return -3; 2518 2519 /* skip comment */ 2520 if (flags & 16) 2521 while (GET() != 0) 2522 if (g.in_eof) 2523 return -3; 2524 2525 /* skip header crc */ 2526 if (flags & 2) 2527 SKIP(2); 2528 2529 /* return gzip compression method */ 2530 g.form = 0; 2531 return method; 2532} 2533 2534/* --- list contents of compressed input (gzip, zlib, or lzw) */ 2535 2536/* find standard compressed file suffix, return length of suffix */ 2537local size_t compressed_suffix(char *nm) 2538{ 2539 size_t len; 2540 2541 len = strlen(nm); 2542 if (len > 4) { 2543 nm += len - 4; 2544 len = 4; 2545 if (strcmp(nm, ".zip") == 0 || strcmp(nm, ".ZIP") == 0 || 2546 strcmp(nm, ".tgz") == 0) 2547 return 4; 2548 } 2549 if (len > 3) { 2550 nm += len - 3; 2551 len = 3; 2552 if (strcmp(nm, ".gz") == 0 || strcmp(nm, "-gz") == 0 || 2553 strcmp(nm, ".zz") == 0 || strcmp(nm, "-zz") == 0) 2554 return 3; 2555 } 2556 if (len > 2) { 2557 nm += len - 2; 2558 if (strcmp(nm, ".z") == 0 || strcmp(nm, "-z") == 0 || 2559 strcmp(nm, "_z") == 0 || strcmp(nm, ".Z") == 0) 2560 return 2; 2561 } 2562 return 0; 2563} 2564 2565/* listing file name lengths for -l and -lv */ 2566#define NAMEMAX1 48 /* name display limit at verbosity 1 */ 2567#define NAMEMAX2 16 /* name display limit at verbosity 2 */ 2568 2569/* print gzip or lzw file information */ 2570local void show_info(int method, unsigned long check, off_t len, int cont) 2571{ 2572 size_t max; /* maximum name length for current verbosity */ 2573 size_t n; /* name length without suffix */ 2574 time_t now; /* for getting current year */ 2575 char mod[26]; /* modification time in text */ 2576 char tag[NAMEMAX1+1]; /* header or file name, possibly truncated */ 2577 2578 /* create abbreviated name from header file name or actual file name */ 2579 max = g.verbosity > 1 ? NAMEMAX2 : NAMEMAX1; 2580 memset(tag, 0, max + 1); 2581 if (cont) 2582 strncpy(tag, "<...>", max + 1); 2583 else if (g.hname == NULL) { 2584 n = strlen(g.inf) - compressed_suffix(g.inf); 2585 strncpy(tag, g.inf, n > max + 1 ? max + 1 : n); 2586 if (strcmp(g.inf + n, ".tgz") == 0 && n < max + 1) 2587 strncpy(tag + n, ".tar", max + 1 - n); 2588 } 2589 else 2590 strncpy(tag, g.hname, max + 1); 2591 if (tag[max]) 2592 strcpy(tag + max - 3, "..."); 2593 2594 /* convert time stamp to text */ 2595 if (g.stamp) { 2596 strcpy(mod, ctime(&g.stamp)); 2597 now = time(NULL); 2598 if (strcmp(mod + 20, ctime(&now) + 20) != 0) 2599 strcpy(mod + 11, mod + 19); 2600 } 2601 else 2602 strcpy(mod + 4, "------ -----"); 2603 mod[16] = 0; 2604 2605 /* if first time, print header */ 2606 if (g.first) { 2607 if (g.verbosity > 1) 2608 fputs("method check timestamp ", stdout); 2609 if (g.verbosity > 0) 2610 puts("compressed original reduced name"); 2611 g.first = 0; 2612 } 2613 2614 /* print information */ 2615 if (g.verbosity > 1) { 2616 if (g.form == 3 && !g.decode) 2617 printf("zip%3d -------- %s ", method, mod + 4); 2618 else if (g.form > 1) 2619 printf("zip%3d %08lx %s ", method, check, mod + 4); 2620 else if (g.form == 1) 2621 printf("zlib%2d %08lx %s ", method, check, mod + 4); 2622 else if (method == 257) 2623 printf("lzw -------- %s ", mod + 4); 2624 else 2625 printf("gzip%2d %08lx %s ", method, check, mod + 4); 2626 } 2627 if (g.verbosity > 0) { 2628 if ((g.form == 3 && !g.decode) || 2629 (method == 8 && g.in_tot > (len + (len >> 10) + 12)) || 2630 (method == 257 && g.in_tot > len + (len >> 1) + 3)) 2631#if __STDC_VERSION__-0 >= 199901L || __GNUC__-0 >= 3 2632 printf("%10jd %10jd? unk %s\n", 2633 (intmax_t)g.in_tot, (intmax_t)len, tag); 2634 else 2635 printf("%10jd %10jd %6.1f%% %s\n", 2636 (intmax_t)g.in_tot, (intmax_t)len, 2637 len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len, 2638 tag); 2639#else 2640 printf(sizeof(off_t) == sizeof(long) ? 2641 "%10ld %10ld? unk %s\n" : "%10lld %10lld? unk %s\n", 2642 g.in_tot, len, tag); 2643 else 2644 printf(sizeof(off_t) == sizeof(long) ? 2645 "%10ld %10ld %6.1f%% %s\n" : "%10lld %10lld %6.1f%% %s\n", 2646 g.in_tot, len, 2647 len == 0 ? 0 : 100 * (len - g.in_tot)/(double)len, 2648 tag); 2649#endif 2650 } 2651} 2652 2653/* list content information about the gzip file at ind (only works if the gzip 2654 file contains a single gzip stream with no junk at the end, and only works 2655 well if the uncompressed length is less than 4 GB) */ 2656local void list_info(void) 2657{ 2658 int method; /* get_header() return value */ 2659 size_t n; /* available trailer bytes */ 2660 off_t at; /* used to calculate compressed length */ 2661 unsigned char tail[8]; /* trailer containing check and length */ 2662 unsigned long check, len; /* check value and length from trailer */ 2663 2664 /* initialize input buffer */ 2665 in_init(); 2666 2667 /* read header information and position input after header */ 2668 method = get_header(1); 2669 if (method < 0) { 2670 RELEASE(g.hname); 2671 if (method != -1 && g.verbosity > 1) 2672 complain("%s not a compressed file -- skipping", g.inf); 2673 return; 2674 } 2675 2676 /* list zip file */ 2677 if (g.form > 1) { 2678 g.in_tot = g.zip_clen; 2679 show_info(method, g.zip_crc, g.zip_ulen, 0); 2680 return; 2681 } 2682 2683 /* list zlib file */ 2684 if (g.form == 1) { 2685 at = lseek(g.ind, 0, SEEK_END); 2686 if (at == -1) { 2687 check = 0; 2688 do { 2689 len = g.in_left < 4 ? g.in_left : 4; 2690 g.in_next += g.in_left - len; 2691 while (len--) 2692 check = (check << 8) + *g.in_next++; 2693 } while (load() != 0); 2694 check &= LOW32; 2695 } 2696 else { 2697 g.in_tot = at; 2698 lseek(g.ind, -4, SEEK_END); 2699 readn(g.ind, tail, 4); 2700 check = PULL4M(tail); 2701 } 2702 g.in_tot -= 6; 2703 show_info(method, check, 0, 0); 2704 return; 2705 } 2706 2707 /* list lzw file */ 2708 if (method == 257) { 2709 at = lseek(g.ind, 0, SEEK_END); 2710 if (at == -1) 2711 while (load() != 0) 2712 ; 2713 else 2714 g.in_tot = at; 2715 g.in_tot -= 3; 2716 show_info(method, 0, 0, 0); 2717 return; 2718 } 2719 2720 /* skip to end to get trailer (8 bytes), compute compressed length */ 2721 if (g.in_short) { /* whole thing already read */ 2722 if (g.in_left < 8) { 2723 complain("%s not a valid gzip file -- skipping", g.inf); 2724 return; 2725 } 2726 g.in_tot = g.in_left - 8; /* compressed size */ 2727 memcpy(tail, g.in_next + (g.in_left - 8), 8); 2728 } 2729 else if ((at = lseek(g.ind, -8, SEEK_END)) != -1) { 2730 g.in_tot = at - g.in_tot + g.in_left; /* compressed size */ 2731 readn(g.ind, tail, 8); /* get trailer */ 2732 } 2733 else { /* can't seek */ 2734 at = g.in_tot - g.in_left; /* save header size */ 2735 do { 2736 n = g.in_left < 8 ? g.in_left : 8; 2737 memcpy(tail, g.in_next + (g.in_left - n), n); 2738 load(); 2739 } while (g.in_left == BUF); /* read until end */ 2740 if (g.in_left < 8) { 2741 if (n + g.in_left < 8) { 2742 complain("%s not a valid gzip file -- skipping", g.inf); 2743 return; 2744 } 2745 if (g.in_left) { 2746 if (n + g.in_left > 8) 2747 memcpy(tail, tail + n - (8 - g.in_left), 8 - g.in_left); 2748 memcpy(tail + 8 - g.in_left, g.in_next, g.in_left); 2749 } 2750 } 2751 else 2752 memcpy(tail, g.in_next + (g.in_left - 8), 8); 2753 g.in_tot -= at + 8; 2754 } 2755 if (g.in_tot < 2) { 2756 complain("%s not a valid gzip file -- skipping", g.inf); 2757 return; 2758 } 2759 2760 /* convert trailer to check and uncompressed length (modulo 2^32) */ 2761 check = PULL4L(tail); 2762 len = PULL4L(tail + 4); 2763 2764 /* list information about contents */ 2765 show_info(method, check, len, 0); 2766 RELEASE(g.hname); 2767} 2768 2769/* --- copy input to output (when acting like cat) --- */ 2770 2771local void cat(void) 2772{ 2773 /* write first magic byte (if we're here, there's at least one byte) */ 2774 writen(g.outd, &g.magic1, 1); 2775 g.out_tot = 1; 2776 2777 /* copy the remainder of the input to the output (if there were any more 2778 bytes of input, then g.in_left is non-zero and g.in_next is pointing to 2779 the second magic byte) */ 2780 while (g.in_left) { 2781 writen(g.outd, g.in_next, g.in_left); 2782 g.out_tot += g.in_left; 2783 g.in_left = 0; 2784 load(); 2785 } 2786} 2787 2788/* --- decompress deflate input --- */ 2789 2790/* call-back input function for inflateBack() */ 2791local unsigned inb(void *desc, unsigned char **buf) 2792{ 2793 (void)desc; 2794 load(); 2795 *buf = g.in_next; 2796 return g.in_left; 2797} 2798 2799/* output buffers and window for infchk() and unlzw() */ 2800#define OUTSIZE 32768U /* must be at least 32K for inflateBack() window */ 2801local unsigned char out_buf[OUTSIZE]; 2802 2803#ifndef NOTHREAD 2804/* output data for parallel write and check */ 2805local unsigned char out_copy[OUTSIZE]; 2806local size_t out_len; 2807 2808/* outb threads states */ 2809local lock *outb_write_more = NULL; 2810local lock *outb_check_more; 2811 2812/* output write thread */ 2813local void outb_write(void *dummy) 2814{ 2815 size_t len; 2816 2817 (void)dummy; 2818 2819 Trace(("-- launched decompress write thread")); 2820 do { 2821 possess(outb_write_more); 2822 wait_for(outb_write_more, TO_BE, 1); 2823 len = out_len; 2824 if (len && g.decode == 1) 2825 writen(g.outd, out_copy, len); 2826 Trace(("-- decompress wrote %lu bytes", len)); 2827 twist(outb_write_more, TO, 0); 2828 } while (len); 2829 Trace(("-- exited decompress write thread")); 2830} 2831 2832/* output check thread */ 2833local void outb_check(void *dummy) 2834{ 2835 size_t len; 2836 2837 (void)dummy; 2838 2839 Trace(("-- launched decompress check thread")); 2840 do { 2841 possess(outb_check_more); 2842 wait_for(outb_check_more, TO_BE, 1); 2843 len = out_len; 2844 g.out_check = CHECK(g.out_check, out_copy, len); 2845 Trace(("-- decompress checked %lu bytes", len)); 2846 twist(outb_check_more, TO, 0); 2847 } while (len); 2848 Trace(("-- exited decompress check thread")); 2849} 2850#endif 2851 2852/* call-back output function for inflateBack() -- wait for the last write and 2853 check calculation to complete, copy the write buffer, and then alert the 2854 write and check threads and return for more decompression while that's 2855 going on (or just write and check if no threads or if proc == 1) */ 2856local int outb(void *desc, unsigned char *buf, unsigned len) 2857{ 2858#ifndef NOTHREAD 2859 static thread *wr, *ch; 2860 2861 if (g.procs > 1) { 2862 /* if first time, initialize state and launch threads */ 2863 if (outb_write_more == NULL) { 2864 outb_write_more = new_lock(0); 2865 outb_check_more = new_lock(0); 2866 wr = launch(outb_write, NULL); 2867 ch = launch(outb_check, NULL); 2868 } 2869 2870 /* wait for previous write and check threads to complete */ 2871 possess(outb_check_more); 2872 wait_for(outb_check_more, TO_BE, 0); 2873 possess(outb_write_more); 2874 wait_for(outb_write_more, TO_BE, 0); 2875 2876 /* copy the output and alert the worker bees */ 2877 out_len = len; 2878 g.out_tot += len; 2879 memcpy(out_copy, buf, len); 2880 twist(outb_write_more, TO, 1); 2881 twist(outb_check_more, TO, 1); 2882 2883 /* if requested with len == 0, clean up -- terminate and join write and 2884 check threads, free lock */ 2885 if (len == 0) { 2886 join(ch); 2887 join(wr); 2888 free_lock(outb_check_more); 2889 free_lock(outb_write_more); 2890 outb_write_more = NULL; 2891 } 2892 2893 /* return for more decompression while last buffer is being written 2894 and having its check value calculated -- we wait for those to finish 2895 the next time this function is called */ 2896 return 0; 2897 } 2898#endif 2899 2900 (void)desc; 2901 2902 /* if just one process or no threads, then do it without threads */ 2903 if (len) { 2904 if (g.decode == 1) 2905 writen(g.outd, buf, len); 2906 g.out_check = CHECK(g.out_check, buf, len); 2907 g.out_tot += len; 2908 } 2909 return 0; 2910} 2911 2912/* inflate for decompression or testing -- decompress from ind to outd unless 2913 decode != 1, in which case just test ind, and then also list if list != 0; 2914 look for and decode multiple, concatenated gzip and/or zlib streams; 2915 read and check the gzip, zlib, or zip trailer */ 2916local void infchk(void) 2917{ 2918 int ret, cont, was; 2919 unsigned long check, len; 2920 z_stream strm; 2921 unsigned tmp2; 2922 unsigned long tmp4; 2923 off_t clen; 2924 2925 cont = 0; 2926 do { 2927 /* header already read -- set up for decompression */ 2928 g.in_tot = g.in_left; /* track compressed data length */ 2929 g.out_tot = 0; 2930 g.out_check = CHECK(0L, Z_NULL, 0); 2931 strm.zalloc = ZALLOC; 2932 strm.zfree = ZFREE; 2933 strm.opaque = OPAQUE; 2934 ret = inflateBackInit(&strm, 15, out_buf); 2935 if (ret != Z_OK) 2936 bail("not enough memory", ""); 2937 2938 /* decompress, compute lengths and check value */ 2939 strm.avail_in = g.in_left; 2940 strm.next_in = g.in_next; 2941 ret = inflateBack(&strm, inb, NULL, outb, NULL); 2942 if (ret != Z_STREAM_END) 2943 bail("corrupted input -- invalid deflate data: ", g.inf); 2944 g.in_left = strm.avail_in; 2945 g.in_next = strm.next_in; 2946 inflateBackEnd(&strm); 2947 outb(NULL, NULL, 0); /* finish off final write and check */ 2948 2949 /* compute compressed data length */ 2950 clen = g.in_tot - g.in_left; 2951 2952 /* read and check trailer */ 2953 if (g.form > 1) { /* zip local trailer (if any) */ 2954 if (g.form == 3) { /* data descriptor follows */ 2955 /* read original version of data descriptor */ 2956 g.zip_crc = GET4(); 2957 g.zip_clen = GET4(); 2958 g.zip_ulen = GET4(); 2959 if (g.in_eof) 2960 bail("corrupted zip entry -- missing trailer: ", g.inf); 2961 2962 /* if crc doesn't match, try info-zip variant with sig */ 2963 if (g.zip_crc != g.out_check) { 2964 if (g.zip_crc != 0x08074b50UL || g.zip_clen != g.out_check) 2965 bail("corrupted zip entry -- crc32 mismatch: ", g.inf); 2966 g.zip_crc = g.zip_clen; 2967 g.zip_clen = g.zip_ulen; 2968 g.zip_ulen = GET4(); 2969 } 2970 2971 /* handle incredibly rare cases where crc equals signature */ 2972 else if (g.zip_crc == 0x08074b50UL && 2973 g.zip_clen == g.zip_crc && 2974 ((clen & LOW32) != g.zip_crc || 2975 g.zip_ulen == g.zip_crc)) { 2976 g.zip_crc = g.zip_clen; 2977 g.zip_clen = g.zip_ulen; 2978 g.zip_ulen = GET4(); 2979 } 2980 2981 /* if second length doesn't match, try 64-bit lengths */ 2982 if (g.zip_ulen != (g.out_tot & LOW32)) { 2983 g.zip_ulen = GET4(); 2984 (void)GET4(); 2985 } 2986 if (g.in_eof) 2987 bail("corrupted zip entry -- missing trailer: ", g.inf); 2988 } 2989 if (g.zip_clen != (clen & LOW32) || 2990 g.zip_ulen != (g.out_tot & LOW32)) 2991 bail("corrupted zip entry -- length mismatch: ", g.inf); 2992 check = g.zip_crc; 2993 } 2994 else if (g.form == 1) { /* zlib (big-endian) trailer */ 2995 check = (unsigned long)(GET()) << 24; 2996 check += (unsigned long)(GET()) << 16; 2997 check += (unsigned)(GET()) << 8; 2998 check += GET(); 2999 if (g.in_eof) 3000 bail("corrupted zlib stream -- missing trailer: ", g.inf); 3001 if (check != g.out_check) 3002 bail("corrupted zlib stream -- adler32 mismatch: ", g.inf); 3003 } 3004 else { /* gzip trailer */ 3005 check = GET4(); 3006 len = GET4(); 3007 if (g.in_eof) 3008 bail("corrupted gzip stream -- missing trailer: ", g.inf); 3009 if (check != g.out_check) 3010 bail("corrupted gzip stream -- crc32 mismatch: ", g.inf); 3011 if (len != (g.out_tot & LOW32)) 3012 bail("corrupted gzip stream -- length mismatch: ", g.inf); 3013 } 3014 3015 /* show file information if requested */ 3016 if (g.list) { 3017 g.in_tot = clen; 3018 show_info(8, check, g.out_tot, cont); 3019 cont = 1; 3020 } 3021 3022 /* if a gzip entry follows a gzip entry, decompress it (don't replace 3023 saved header information from first entry) */ 3024 was = g.form; 3025 } while (was == 0 && (ret = get_header(0)) == 8 && g.form == 0); 3026 3027 /* gzip -cdf copies junk after gzip stream directly to output */ 3028 if (was == 0 && ret == -2 && g.force && g.pipeout && g.decode != 2 && 3029 !g.list) 3030 cat(); 3031 else if (was > 1 && get_header(0) != -5) 3032 complain("entries after the first in %s were ignored", g.inf); 3033 else if ((was == 0 && ret != -1) || (was == 1 && GET() != EOF)) 3034 complain("%s OK, has trailing junk which was ignored", g.inf); 3035} 3036 3037/* --- decompress Unix compress (LZW) input --- */ 3038 3039/* memory for unlzw() -- 3040 the first 256 entries of prefix[] and suffix[] are never used, could 3041 have offset the index, but it's faster to waste the memory */ 3042unsigned short prefix[65536]; /* index to LZW prefix string */ 3043unsigned char suffix[65536]; /* one-character LZW suffix */ 3044unsigned char match[65280 + 2]; /* buffer for reversed match */ 3045 3046/* throw out what's left in the current bits byte buffer (this is a vestigial 3047 aspect of the compressed data format derived from an implementation that 3048 made use of a special VAX machine instruction!) */ 3049#define FLUSHCODE() \ 3050 do { \ 3051 left = 0; \ 3052 rem = 0; \ 3053 if (chunk > g.in_left) { \ 3054 chunk -= g.in_left; \ 3055 if (load() == 0) \ 3056 break; \ 3057 if (chunk > g.in_left) { \ 3058 chunk = g.in_left = 0; \ 3059 break; \ 3060 } \ 3061 } \ 3062 g.in_left -= chunk; \ 3063 g.in_next += chunk; \ 3064 chunk = 0; \ 3065 } while (0) 3066 3067/* Decompress a compress (LZW) file from ind to outd. The compress magic 3068 header (two bytes) has already been read and verified. */ 3069local void unlzw(void) 3070{ 3071 int got; /* byte just read by GET() */ 3072 unsigned chunk; /* bytes left in current chunk */ 3073 int left; /* bits left in rem */ 3074 unsigned rem; /* unused bits from input */ 3075 int bits; /* current bits per code */ 3076 unsigned code; /* code, table traversal index */ 3077 unsigned mask; /* mask for current bits codes */ 3078 int max; /* maximum bits per code for this stream */ 3079 int flags; /* compress flags, then block compress flag */ 3080 unsigned end; /* last valid entry in prefix/suffix tables */ 3081 unsigned temp; /* current code */ 3082 unsigned prev; /* previous code */ 3083 unsigned final; /* last character written for previous code */ 3084 unsigned stack; /* next position for reversed string */ 3085 unsigned outcnt; /* bytes in output buffer */ 3086 unsigned char *p; 3087 3088 /* process remainder of compress header -- a flags byte */ 3089 g.out_tot = 0; 3090 flags = GET(); 3091 if (g.in_eof) 3092 bail("missing lzw data: ", g.inf); 3093 if (flags & 0x60) 3094 bail("unknown lzw flags set: ", g.inf); 3095 max = flags & 0x1f; 3096 if (max < 9 || max > 16) 3097 bail("lzw bits out of range: ", g.inf); 3098 if (max == 9) /* 9 doesn't really mean 9 */ 3099 max = 10; 3100 flags &= 0x80; /* true if block compress */ 3101 3102 /* clear table */ 3103 bits = 9; 3104 mask = 0x1ff; 3105 end = flags ? 256 : 255; 3106 3107 /* set up: get first 9-bit code, which is the first decompressed byte, but 3108 don't create a table entry until the next code */ 3109 got = GET(); 3110 if (g.in_eof) /* no compressed data is ok */ 3111 return; 3112 final = prev = (unsigned)got; /* low 8 bits of code */ 3113 got = GET(); 3114 if (g.in_eof || (got & 1) != 0) /* missing a bit or code >= 256 */ 3115 bail("invalid lzw code: ", g.inf); 3116 rem = (unsigned)got >> 1; /* remaining 7 bits */ 3117 left = 7; 3118 chunk = bits - 2; /* 7 bytes left in this chunk */ 3119 out_buf[0] = (unsigned char)final; /* write first decompressed byte */ 3120 outcnt = 1; 3121 3122 /* decode codes */ 3123 stack = 0; 3124 for (;;) { 3125 /* if the table will be full after this, increment the code size */ 3126 if (end >= mask && bits < max) { 3127 FLUSHCODE(); 3128 bits++; 3129 mask <<= 1; 3130 mask++; 3131 } 3132 3133 /* get a code of length bits */ 3134 if (chunk == 0) /* decrement chunk modulo bits */ 3135 chunk = bits; 3136 code = rem; /* low bits of code */ 3137 got = GET(); 3138 if (g.in_eof) { /* EOF is end of compressed data */ 3139 /* write remaining buffered output */ 3140 g.out_tot += outcnt; 3141 if (outcnt && g.decode == 1) 3142 writen(g.outd, out_buf, outcnt); 3143 return; 3144 } 3145 code += (unsigned)got << left; /* middle (or high) bits of code */ 3146 left += 8; 3147 chunk--; 3148 if (bits > left) { /* need more bits */ 3149 got = GET(); 3150 if (g.in_eof) /* can't end in middle of code */ 3151 bail("invalid lzw code: ", g.inf); 3152 code += (unsigned)got << left; /* high bits of code */ 3153 left += 8; 3154 chunk--; 3155 } 3156 code &= mask; /* mask to current code length */ 3157 left -= bits; /* number of unused bits */ 3158 rem = (unsigned)got >> (8 - left); /* unused bits from last byte */ 3159 3160 /* process clear code (256) */ 3161 if (code == 256 && flags) { 3162 FLUSHCODE(); 3163 bits = 9; /* initialize bits and mask */ 3164 mask = 0x1ff; 3165 end = 255; /* empty table */ 3166 continue; /* get next code */ 3167 } 3168 3169 /* special code to reuse last match */ 3170 temp = code; /* save the current code */ 3171 if (code > end) { 3172 /* Be picky on the allowed code here, and make sure that the code 3173 we drop through (prev) will be a valid index so that random 3174 input does not cause an exception. The code != end + 1 check is 3175 empirically derived, and not checked in the original uncompress 3176 code. If this ever causes a problem, that check could be safely 3177 removed. Leaving this check in greatly improves pigz's ability 3178 to detect random or corrupted input after a compress header. 3179 In any case, the prev > end check must be retained. */ 3180 if (code != end + 1 || prev > end) 3181 bail("invalid lzw code: ", g.inf); 3182 match[stack++] = (unsigned char)final; 3183 code = prev; 3184 } 3185 3186 /* walk through linked list to generate output in reverse order */ 3187 p = match + stack; 3188 while (code >= 256) { 3189 *p++ = suffix[code]; 3190 code = prefix[code]; 3191 } 3192 stack = p - match; 3193 match[stack++] = (unsigned char)code; 3194 final = code; 3195 3196 /* link new table entry */ 3197 if (end < mask) { 3198 end++; 3199 prefix[end] = (unsigned short)prev; 3200 suffix[end] = (unsigned char)final; 3201 } 3202 3203 /* set previous code for next iteration */ 3204 prev = temp; 3205 3206 /* write output in forward order */ 3207 while (stack > OUTSIZE - outcnt) { 3208 while (outcnt < OUTSIZE) 3209 out_buf[outcnt++] = match[--stack]; 3210 g.out_tot += outcnt; 3211 if (g.decode == 1) 3212 writen(g.outd, out_buf, outcnt); 3213 outcnt = 0; 3214 } 3215 p = match + stack; 3216 do { 3217 out_buf[outcnt++] = *--p; 3218 } while (p > match); 3219 stack = 0; 3220 3221 /* loop for next code with final and prev as the last match, rem and 3222 left provide the first 0..7 bits of the next code, end is the last 3223 valid table entry */ 3224 } 3225} 3226 3227/* --- file processing --- */ 3228 3229/* extract file name from path */ 3230local char *justname(char *path) 3231{ 3232 char *p; 3233 3234 p = path + strlen(path); 3235 while (--p >= path) 3236 if (*p == '/') 3237 break; 3238 return p + 1; 3239} 3240 3241/* Copy file attributes, from -> to, as best we can. This is best effort, so 3242 no errors are reported. The mode bits, including suid, sgid, and the sticky 3243 bit are copied (if allowed), the owner's user id and group id are copied 3244 (again if allowed), and the access and modify times are copied. */ 3245local void copymeta(char *from, char *to) 3246{ 3247 struct stat st; 3248 struct timeval times[2]; 3249 3250 /* get all of from's Unix meta data, return if not a regular file */ 3251 if (stat(from, &st) != 0 || (st.st_mode & S_IFMT) != S_IFREG) 3252 return; 3253 3254 /* set to's mode bits, ignore errors */ 3255 (void)chmod(to, st.st_mode & 07777); 3256 3257 /* copy owner's user and group, ignore errors */ 3258 (void)chown(to, st.st_uid, st.st_gid); 3259 3260 /* copy access and modify times, ignore errors */ 3261 times[0].tv_sec = st.st_atime; 3262 times[0].tv_usec = 0; 3263 times[1].tv_sec = st.st_mtime; 3264 times[1].tv_usec = 0; 3265 (void)utimes(to, times); 3266} 3267 3268/* set the access and modify times of fd to t */ 3269local void touch(char *path, time_t t) 3270{ 3271 struct timeval times[2]; 3272 3273 times[0].tv_sec = t; 3274 times[0].tv_usec = 0; 3275 times[1].tv_sec = t; 3276 times[1].tv_usec = 0; 3277 (void)utimes(path, times); 3278} 3279 3280/* process provided input file, or stdin if path is NULL -- process() can 3281 call itself for recursive directory processing */ 3282local void process(char *path) 3283{ 3284 int method = -1; /* get_header() return value */ 3285 size_t len; /* length of base name (minus suffix) */ 3286 struct stat st; /* to get file type and mod time */ 3287 /* all compressed suffixes for decoding search, in length order */ 3288 static char *sufs[] = {".z", "-z", "_z", ".Z", ".gz", "-gz", ".zz", "-zz", 3289 ".zip", ".ZIP", ".tgz", NULL}; 3290 3291 /* open input file with name in, descriptor ind -- set name and mtime */ 3292 if (path == NULL) { 3293 strcpy(g.inf, "<stdin>"); 3294 g.ind = 0; 3295 g.name = NULL; 3296 g.mtime = g.headis & 2 ? 3297 (fstat(g.ind, &st) ? time(NULL) : st.st_mtime) : 0; 3298 len = 0; 3299 } 3300 else { 3301 /* set input file name (already set if recursed here) */ 3302 if (path != g.inf) { 3303 strncpy(g.inf, path, sizeof(g.inf)); 3304 if (g.inf[sizeof(g.inf) - 1]) 3305 bail("name too long: ", path); 3306 } 3307 len = strlen(g.inf); 3308 3309 /* try to stat input file -- if not there and decoding, look for that 3310 name with compressed suffixes */ 3311 if (lstat(g.inf, &st)) { 3312 if (errno == ENOENT && (g.list || g.decode)) { 3313 char **try = sufs; 3314 do { 3315 if (*try == NULL || len + strlen(*try) >= sizeof(g.inf)) 3316 break; 3317 strcpy(g.inf + len, *try++); 3318 errno = 0; 3319 } while (lstat(g.inf, &st) && errno == ENOENT); 3320 } 3321#ifdef EOVERFLOW 3322 if (errno == EOVERFLOW || errno == EFBIG) 3323 bail(g.inf, 3324 " too large -- not compiled with large file support"); 3325#endif 3326 if (errno) { 3327 g.inf[len] = 0; 3328 complain("%s does not exist -- skipping", g.inf); 3329 return; 3330 } 3331 len = strlen(g.inf); 3332 } 3333 3334 /* only process regular files, but allow symbolic links if -f, 3335 recurse into directory if -r */ 3336 if ((st.st_mode & S_IFMT) != S_IFREG && 3337 (st.st_mode & S_IFMT) != S_IFLNK && 3338 (st.st_mode & S_IFMT) != S_IFDIR) { 3339 complain("%s is a special file or device -- skipping", g.inf); 3340 return; 3341 } 3342 if ((st.st_mode & S_IFMT) == S_IFLNK && !g.force && !g.pipeout) { 3343 complain("%s is a symbolic link -- skipping", g.inf); 3344 return; 3345 } 3346 if ((st.st_mode & S_IFMT) == S_IFDIR && !g.recurse) { 3347 complain("%s is a directory -- skipping", g.inf); 3348 return; 3349 } 3350 3351 /* recurse into directory (assumes Unix) */ 3352 if ((st.st_mode & S_IFMT) == S_IFDIR) { 3353 char *roll, *item, *cut, *base, *bigger; 3354 size_t len, hold; 3355 DIR *here; 3356 struct dirent *next; 3357 3358 /* accumulate list of entries (need to do this, since readdir() 3359 behavior not defined if directory modified between calls) */ 3360 here = opendir(g.inf); 3361 if (here == NULL) 3362 return; 3363 hold = 512; 3364 roll = MALLOC(hold); 3365 if (roll == NULL) 3366 bail("not enough memory", ""); 3367 *roll = 0; 3368 item = roll; 3369 while ((next = readdir(here)) != NULL) { 3370 if (next->d_name[0] == 0 || 3371 (next->d_name[0] == '.' && (next->d_name[1] == 0 || 3372 (next->d_name[1] == '.' && next->d_name[2] == 0)))) 3373 continue; 3374 len = strlen(next->d_name) + 1; 3375 if (item + len + 1 > roll + hold) { 3376 do { /* make roll bigger */ 3377 hold <<= 1; 3378 } while (item + len + 1 > roll + hold); 3379 bigger = REALLOC(roll, hold); 3380 if (bigger == NULL) { 3381 FREE(roll); 3382 bail("not enough memory", ""); 3383 } 3384 item = bigger + (item - roll); 3385 roll = bigger; 3386 } 3387 strcpy(item, next->d_name); 3388 item += len; 3389 *item = 0; 3390 } 3391 closedir(here); 3392 3393 /* run process() for each entry in the directory */ 3394 cut = base = g.inf + strlen(g.inf); 3395 if (base > g.inf && base[-1] != (unsigned char)'/') { 3396 if ((size_t)(base - g.inf) >= sizeof(g.inf)) 3397 bail("path too long", g.inf); 3398 *base++ = '/'; 3399 } 3400 item = roll; 3401 while (*item) { 3402 strncpy(base, item, sizeof(g.inf) - (base - g.inf)); 3403 if (g.inf[sizeof(g.inf) - 1]) { 3404 strcpy(g.inf + (sizeof(g.inf) - 4), "..."); 3405 bail("path too long: ", g.inf); 3406 } 3407 process(g.inf); 3408 item += strlen(item) + 1; 3409 } 3410 *cut = 0; 3411 3412 /* release list of entries */ 3413 FREE(roll); 3414 return; 3415 } 3416 3417 /* don't compress .gz (or provided suffix) files, unless -f */ 3418 if (!(g.force || g.list || g.decode) && len >= strlen(g.sufx) && 3419 strcmp(g.inf + len - strlen(g.sufx), g.sufx) == 0) { 3420 complain("%s ends with %s -- skipping", g.inf, g.sufx); 3421 return; 3422 } 3423 3424 /* create output file only if input file has compressed suffix */ 3425 if (g.decode == 1 && !g.pipeout && !g.list) { 3426 int suf = compressed_suffix(g.inf); 3427 if (suf == 0) { 3428 complain("%s does not have compressed suffix -- skipping", 3429 g.inf); 3430 return; 3431 } 3432 len -= suf; 3433 } 3434 3435 /* open input file */ 3436 g.ind = open(g.inf, O_RDONLY, 0); 3437 if (g.ind < 0) 3438 bail("read error on ", g.inf); 3439 3440 /* prepare gzip header information for compression */ 3441 g.name = g.headis & 1 ? justname(g.inf) : NULL; 3442 g.mtime = g.headis & 2 ? st.st_mtime : 0; 3443 } 3444 SET_BINARY_MODE(g.ind); 3445 3446 /* if decoding or testing, try to read gzip header */ 3447 g.hname = NULL; 3448 if (g.decode) { 3449 in_init(); 3450 method = get_header(1); 3451 if (method != 8 && method != 257 && 3452 /* gzip -cdf acts like cat on uncompressed input */ 3453 !(method == -2 && g.force && g.pipeout && g.decode != 2 && 3454 !g.list)) { 3455 RELEASE(g.hname); 3456 if (g.ind != 0) 3457 close(g.ind); 3458 if (method != -1) 3459 complain(method < 0 ? "%s is not compressed -- skipping" : 3460 "%s has unknown compression method -- skipping", 3461 g.inf); 3462 return; 3463 } 3464 3465 /* if requested, test input file (possibly a special list) */ 3466 if (g.decode == 2) { 3467 if (method == 8) 3468 infchk(); 3469 else { 3470 unlzw(); 3471 if (g.list) { 3472 g.in_tot -= 3; 3473 show_info(method, 0, g.out_tot, 0); 3474 } 3475 } 3476 RELEASE(g.hname); 3477 if (g.ind != 0) 3478 close(g.ind); 3479 return; 3480 } 3481 } 3482 3483 /* if requested, just list information about input file */ 3484 if (g.list) { 3485 list_info(); 3486 RELEASE(g.hname); 3487 if (g.ind != 0) 3488 close(g.ind); 3489 return; 3490 } 3491 3492 /* create output file out, descriptor outd */ 3493 if (path == NULL || g.pipeout) { 3494 /* write to stdout */ 3495 g.outf = MALLOC(strlen("<stdout>") + 1); 3496 if (g.outf == NULL) 3497 bail("not enough memory", ""); 3498 strcpy(g.outf, "<stdout>"); 3499 g.outd = 1; 3500 if (!g.decode && !g.force && isatty(g.outd)) 3501 bail("trying to write compressed data to a terminal", 3502 " (use -f to force)"); 3503 } 3504 else { 3505 char *to = g.inf, *sufx = ""; 3506 size_t pre = 0; 3507 3508 /* select parts of the output file name */ 3509 if (g.decode) { 3510 /* for -dN or -dNT, use the path from the input file and the name 3511 from the header, stripping any path in the header name */ 3512 if ((g.headis & 1) != 0 && g.hname != NULL) { 3513 pre = justname(g.inf) - g.inf; 3514 to = justname(g.hname); 3515 len = strlen(to); 3516 } 3517 /* for -d or -dNn, replace abbreviated suffixes */ 3518 else if (strcmp(to + len, ".tgz") == 0) 3519 sufx = ".tar"; 3520 } 3521 else 3522 /* add appropriate suffix when compressing */ 3523 sufx = g.sufx; 3524 3525 /* create output file and open to write */ 3526 g.outf = MALLOC(pre + len + strlen(sufx) + 1); 3527 if (g.outf == NULL) 3528 bail("not enough memory", ""); 3529 memcpy(g.outf, g.inf, pre); 3530 memcpy(g.outf + pre, to, len); 3531 strcpy(g.outf + pre + len, sufx); 3532 g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY | 3533 (g.force ? 0 : O_EXCL), 0600); 3534 3535 /* if exists and not -f, give user a chance to overwrite */ 3536 if (g.outd < 0 && errno == EEXIST && isatty(0) && g.verbosity) { 3537 int ch, reply; 3538 3539 fprintf(stderr, "%s exists -- overwrite (y/n)? ", g.outf); 3540 fflush(stderr); 3541 reply = -1; 3542 do { 3543 ch = getchar(); 3544 if (reply < 0 && ch != ' ' && ch != '\t') 3545 reply = ch == 'y' || ch == 'Y' ? 1 : 0; 3546 } while (ch != EOF && ch != '\n' && ch != '\r'); 3547 if (reply == 1) 3548 g.outd = open(g.outf, O_CREAT | O_TRUNC | O_WRONLY, 3549 0600); 3550 } 3551 3552 /* if exists and no overwrite, report and go on to next */ 3553 if (g.outd < 0 && errno == EEXIST) { 3554 complain("%s exists -- skipping", g.outf); 3555 RELEASE(g.outf); 3556 RELEASE(g.hname); 3557 if (g.ind != 0) 3558 close(g.ind); 3559 return; 3560 } 3561 3562 /* if some other error, give up */ 3563 if (g.outd < 0) 3564 bail("write error on ", g.outf); 3565 } 3566 SET_BINARY_MODE(g.outd); 3567 RELEASE(g.hname); 3568 3569 /* process ind to outd */ 3570 if (g.verbosity > 1) 3571 fprintf(stderr, "%s to %s ", g.inf, g.outf); 3572 if (g.decode) { 3573 if (method == 8) 3574 infchk(); 3575 else if (method == 257) 3576 unlzw(); 3577 else 3578 cat(); 3579 } 3580#ifndef NOTHREAD 3581 else if (g.procs > 1) 3582 parallel_compress(); 3583#endif 3584 else 3585 single_compress(0); 3586 if (g.verbosity > 1) { 3587 putc('\n', stderr); 3588 fflush(stderr); 3589 } 3590 3591 /* finish up, copy attributes, set times, delete original */ 3592 if (g.ind != 0) 3593 close(g.ind); 3594 if (g.outd != 1) { 3595 if (close(g.outd)) 3596 bail("write error on ", g.outf); 3597 g.outd = -1; /* now prevent deletion on interrupt */ 3598 if (g.ind != 0) { 3599 copymeta(g.inf, g.outf); 3600 if (!g.keep) 3601 unlink(g.inf); 3602 } 3603 if (g.decode && (g.headis & 2) != 0 && g.stamp) 3604 touch(g.outf, g.stamp); 3605 } 3606 RELEASE(g.outf); 3607} 3608 3609local char *helptext[] = { 3610"Usage: pigz [options] [files ...]", 3611" will compress files in place, adding the suffix '.gz'. If no files are", 3612#ifdef NOTHREAD 3613" specified, stdin will be compressed to stdout. pigz does what gzip does.", 3614#else 3615" specified, stdin will be compressed to stdout. pigz does what gzip does,", 3616" but spreads the work over multiple processors and cores when compressing.", 3617#endif 3618"", 3619"Options:", 3620" -0 to -9, -11 Compression level (11 is much slower, a few % better)", 3621" --fast, --best Compression levels 1 and 9 respectively", 3622" -b, --blocksize mmm Set compression block size to mmmK (default 128K)", 3623" -c, --stdout Write all processed output to stdout (won't delete)", 3624" -d, --decompress Decompress the compressed input", 3625" -f, --force Force overwrite, compress .gz, links, and to terminal", 3626" -F --first Do iterations first, before block split for -11", 3627" -h, --help Display a help screen and quit", 3628" -i, --independent Compress blocks independently for damage recovery", 3629" -I, --iterations n Number of iterations for -11 optimization", 3630" -k, --keep Do not delete original file after processing", 3631" -K, --zip Compress to PKWare zip (.zip) single entry format", 3632" -l, --list List the contents of the compressed input", 3633" -L, --license Display the pigz license and quit", 3634" -M, --maxsplits n Maximum number of split blocks for -11", 3635" -n, --no-name Do not store or restore file name in/from header", 3636" -N, --name Store/restore file name and mod time in/from header", 3637" -O --oneblock Do not split into smaller blocks for -11", 3638#ifndef NOTHREAD 3639" -p, --processes n Allow up to n compression threads (default is the", 3640" number of online processors, or 8 if unknown)", 3641#endif 3642" -q, --quiet Print no messages, even on error", 3643" -r, --recursive Process the contents of all subdirectories", 3644" -R, --rsyncable Input-determined block locations for rsync", 3645" -S, --suffix .sss Use suffix .sss instead of .gz (for compression)", 3646" -t, --test Test the integrity of the compressed input", 3647" -T, --no-time Do not store or restore mod time in/from header", 3648#ifdef DEBUG 3649" -v, --verbose Provide more verbose output (-vv to debug)", 3650#else 3651" -v, --verbose Provide more verbose output", 3652#endif 3653" -V --version Show the version of pigz", 3654" -z, --zlib Compress to zlib (.zz) instead of gzip format", 3655" -- All arguments after \"--\" are treated as files" 3656}; 3657 3658/* display the help text above */ 3659local void help(void) 3660{ 3661 int n; 3662 3663 if (g.verbosity == 0) 3664 return; 3665 for (n = 0; n < (int)(sizeof(helptext) / sizeof(char *)); n++) 3666 fprintf(stderr, "%s\n", helptext[n]); 3667 fflush(stderr); 3668 exit(0); 3669} 3670 3671#ifndef NOTHREAD 3672 3673/* try to determine the number of processors */ 3674local int nprocs(int n) 3675{ 3676# ifdef _SC_NPROCESSORS_ONLN 3677 n = (int)sysconf(_SC_NPROCESSORS_ONLN); 3678# else 3679# ifdef _SC_NPROC_ONLN 3680 n = (int)sysconf(_SC_NPROC_ONLN); 3681# else 3682# ifdef __hpux 3683 struct pst_dynamic psd; 3684 3685 if (pstat_getdynamic(&psd, sizeof(psd), (size_t)1, 0) != -1) 3686 n = psd.psd_proc_cnt; 3687# endif 3688# endif 3689# endif 3690 return n; 3691} 3692 3693#endif 3694 3695/* set option defaults */ 3696local void defaults(void) 3697{ 3698 g.level = Z_DEFAULT_COMPRESSION; 3699 /* default zopfli options as set by ZopfliInitOptions(): 3700 verbose = 0 3701 numiterations = 15 3702 blocksplitting = 1 3703 blocksplittinglast = 0 3704 blocksplittingmax = 15 3705 */ 3706 ZopfliInitOptions(&g.zopts); 3707#ifdef NOTHREAD 3708 g.procs = 1; 3709#else 3710 g.procs = nprocs(8); 3711#endif 3712 g.block = 131072UL; /* 128K */ 3713 g.rsync = 0; /* don't do rsync blocking */ 3714 g.setdict = 1; /* initialize dictionary each thread */ 3715 g.verbosity = 1; /* normal message level */ 3716 g.headis = 3; /* store/restore name and timestamp */ 3717 g.pipeout = 0; /* don't force output to stdout */ 3718 g.sufx = ".gz"; /* compressed file suffix */ 3719 g.decode = 0; /* compress */ 3720 g.list = 0; /* compress */ 3721 g.keep = 0; /* delete input file once compressed */ 3722 g.force = 0; /* don't overwrite, don't compress links */ 3723 g.recurse = 0; /* don't go into directories */ 3724 g.form = 0; /* use gzip format */ 3725} 3726 3727/* long options conversion to short options */ 3728local char *longopts[][2] = { 3729 {"LZW", "Z"}, {"ascii", "a"}, {"best", "9"}, {"bits", "Z"}, 3730 {"blocksize", "b"}, {"decompress", "d"}, {"fast", "1"}, {"first", "F"}, 3731 {"force", "f"}, {"help", "h"}, {"independent", "i"}, {"iterations", "I"}, 3732 {"keep", "k"}, {"license", "L"}, {"list", "l"}, {"maxsplits", "M"}, 3733 {"name", "N"}, {"no-name", "n"}, {"no-time", "T"}, {"oneblock", "O"}, 3734 {"processes", "p"}, {"quiet", "q"}, {"recursive", "r"}, {"rsyncable", "R"}, 3735 {"silent", "q"}, {"stdout", "c"}, {"suffix", "S"}, {"test", "t"}, 3736 {"to-stdout", "c"}, {"uncompress", "d"}, {"verbose", "v"}, 3737 {"version", "V"}, {"zip", "K"}, {"zlib", "z"}}; 3738#define NLOPTS (sizeof(longopts) / (sizeof(char *) << 1)) 3739 3740/* either new buffer size, new compression level, or new number of processes -- 3741 get rid of old buffers and threads to force the creation of new ones with 3742 the new settings */ 3743local void new_opts(void) 3744{ 3745 single_compress(1); 3746#ifndef NOTHREAD 3747 finish_jobs(); 3748#endif 3749} 3750 3751/* verify that arg is only digits, and if so, return the decimal value */ 3752local size_t num(char *arg) 3753{ 3754 char *str = arg; 3755 size_t val = 0; 3756 3757 if (*str == 0) 3758 bail("internal error: empty parameter", ""); 3759 do { 3760 if (*str < '0' || *str > '9' || 3761 (val && ((~(size_t)0) - (*str - '0')) / val < 10)) 3762 bail("invalid numeric parameter: ", arg); 3763 val = val * 10 + (*str - '0'); 3764 } while (*++str); 3765 return val; 3766} 3767 3768/* process an option, return true if a file name and not an option */ 3769local int option(char *arg) 3770{ 3771 static int get = 0; /* if not zero, look for option parameter */ 3772 char bad[3] = "-X"; /* for error messages (X is replaced) */ 3773 3774 /* if no argument or dash option, check status of get */ 3775 if (get && (arg == NULL || *arg == '-')) { 3776 bad[1] = "bpSIM"[get - 1]; 3777 bail("missing parameter after ", bad); 3778 } 3779 if (arg == NULL) 3780 return 0; 3781 3782 /* process long option or short options */ 3783 if (*arg == '-') { 3784 /* a single dash will be interpreted as stdin */ 3785 if (*++arg == 0) 3786 return 1; 3787 3788 /* process long option (fall through with equivalent short option) */ 3789 if (*arg == '-') { 3790 int j; 3791 3792 arg++; 3793 for (j = NLOPTS - 1; j >= 0; j--) 3794 if (strcmp(arg, longopts[j][0]) == 0) { 3795 arg = longopts[j][1]; 3796 break; 3797 } 3798 if (j < 0) 3799 bail("invalid option: ", arg - 2); 3800 } 3801 3802 /* process short options (more than one allowed after dash) */ 3803 do { 3804 /* if looking for a parameter, don't process more single character 3805 options until we have the parameter */ 3806 if (get) { 3807 if (get == 3) 3808 bail("invalid usage: -s must be followed by space", ""); 3809 break; /* allow -pnnn and -bnnn, fall to parameter code */ 3810 } 3811 3812 /* process next single character option or compression level */ 3813 bad[1] = *arg; 3814 switch (*arg) { 3815 case '0': case '1': case '2': case '3': case '4': 3816 case '5': case '6': case '7': case '8': case '9': 3817 g.level = *arg - '0'; 3818 while (arg[1] >= '0' && arg[1] <= '9') { 3819 if (g.level && (INT_MAX - (arg[1] - '0')) / g.level < 10) 3820 bail("only levels 0..9 and 11 are allowed", ""); 3821 g.level = g.level * 10 + *++arg - '0'; 3822 } 3823 if (g.level == 10 || g.level > 11) 3824 bail("only levels 0..9 and 11 are allowed", ""); 3825 new_opts(); 3826 break; 3827 case 'F': g.zopts.blocksplittinglast = 1; break; 3828 case 'I': get = 4; break; 3829 case 'K': g.form = 2; g.sufx = ".zip"; break; 3830 case 'L': 3831 fputs(VERSION, stderr); 3832 fputs("Copyright (C) 2007, 2008, 2009, 2010, 2011, 2012, 2013" 3833 " Mark Adler\n", 3834 stderr); 3835 fputs("Subject to the terms of the zlib license.\n", 3836 stderr); 3837 fputs("No warranty is provided or implied.\n", stderr); 3838 exit(0); 3839 case 'M': get = 5; break; 3840 case 'N': g.headis = 3; break; 3841 case 'O': g.zopts.blocksplitting = 0; break; 3842 case 'R': g.rsync = 1; break; 3843 case 'S': get = 3; break; 3844 case 'T': g.headis &= ~2; break; 3845 case 'V': fputs(VERSION, stderr); exit(0); 3846 case 'Z': 3847 bail("invalid option: LZW output not supported: ", bad); 3848 case 'a': 3849 bail("invalid option: ascii conversion not supported: ", bad); 3850 case 'b': get = 1; break; 3851 case 'c': g.pipeout = 1; break; 3852 case 'd': g.decode = 1; g.headis = 0; break; 3853 case 'f': g.force = 1; break; 3854 case 'h': help(); break; 3855 case 'i': g.setdict = 0; break; 3856 case 'k': g.keep = 1; break; 3857 case 'l': g.list = 1; break; 3858 case 'n': g.headis &= ~1; break; 3859 case 'p': get = 2; break; 3860 case 'q': g.verbosity = 0; break; 3861 case 'r': g.recurse = 1; break; 3862 case 't': g.decode = 2; break; 3863 case 'v': g.verbosity++; break; 3864 case 'z': g.form = 1; g.sufx = ".zz"; break; 3865 default: 3866 bail("invalid option: ", bad); 3867 } 3868 } while (*++arg); 3869 if (*arg == 0) 3870 return 0; 3871 } 3872 3873 /* process option parameter for -b, -p, -S, -I, or -M */ 3874 if (get) { 3875 size_t n; 3876 3877 if (get == 1) { 3878 n = num(arg); 3879 g.block = n << 10; /* chunk size */ 3880 if (g.block < DICT) 3881 bail("block size too small (must be >= 32K)", ""); 3882 if (n != g.block >> 10 || 3883 OUTPOOL(g.block) < g.block || 3884 (ssize_t)OUTPOOL(g.block) < 0 || 3885 g.block > (1UL << 22)) 3886 bail("block size too large: ", arg); 3887 new_opts(); 3888 } 3889 else if (get == 2) { 3890 n = num(arg); 3891 g.procs = (int)n; /* # processes */ 3892 if (g.procs < 1) 3893 bail("invalid number of processes: ", arg); 3894 if ((size_t)g.procs != n || INBUFS(g.procs) < 1) 3895 bail("too many processes: ", arg); 3896#ifdef NOTHREAD 3897 if (g.procs > 1) 3898 bail("compiled without threads", ""); 3899#endif 3900 new_opts(); 3901 } 3902 else if (get == 3) 3903 g.sufx = arg; /* gz suffix */ 3904 else if (get == 4) 3905 g.zopts.numiterations = num(arg); /* optimization iterations */ 3906 else if (get == 5) 3907 g.zopts.blocksplittingmax = num(arg); /* max block splits */ 3908 get = 0; 3909 return 0; 3910 } 3911 3912 /* neither an option nor parameter */ 3913 return 1; 3914} 3915 3916/* catch termination signal */ 3917local void cut_short(int sig) 3918{ 3919 (void)sig; 3920 Trace(("termination by user")); 3921 if (g.outd != -1 && g.outf != NULL) 3922 unlink(g.outf); 3923 log_dump(); 3924 _exit(1); 3925} 3926 3927/* Process arguments, compress in the gzip format. Note that procs must be at 3928 least two in order to provide a dictionary in one work unit for the other 3929 work unit, and that size must be at least 32K to store a full dictionary. */ 3930int main(int argc, char **argv) 3931{ 3932 int n; /* general index */ 3933 int noop; /* true to suppress option decoding */ 3934 unsigned long done; /* number of named files processed */ 3935 char *opts, *p; /* environment default options, marker */ 3936 3937 /* initialize globals */ 3938 g.outf = NULL; 3939 g.first = 1; 3940 g.warned = 0; 3941 g.hname = NULL; 3942 3943 /* save pointer to program name for error messages */ 3944 p = strrchr(argv[0], '/'); 3945 p = p == NULL ? argv[0] : p + 1; 3946 g.prog = *p ? p : "pigz"; 3947 3948 /* prepare for interrupts and logging */ 3949 signal(SIGINT, cut_short); 3950#ifndef NOTHREAD 3951 yarn_prefix = g.prog; /* prefix for yarn error messages */ 3952 yarn_abort = cut_short; /* call on thread error */ 3953#endif 3954#ifdef DEBUG 3955 gettimeofday(&start, NULL); /* starting time for log entries */ 3956 log_init(); /* initialize logging */ 3957#endif 3958 3959 /* set all options to defaults */ 3960 defaults(); 3961 3962 /* process user environment variable defaults in GZIP */ 3963 opts = getenv("GZIP"); 3964 if (opts != NULL) { 3965 while (*opts) { 3966 while (*opts == ' ' || *opts == '\t') 3967 opts++; 3968 p = opts; 3969 while (*p && *p != ' ' && *p != '\t') 3970 p++; 3971 n = *p; 3972 *p = 0; 3973 if (option(opts)) 3974 bail("cannot provide files in GZIP environment variable", ""); 3975 opts = p + (n ? 1 : 0); 3976 } 3977 option(NULL); 3978 } 3979 3980 /* process user environment variable defaults in PIGZ as well */ 3981 opts = getenv("PIGZ"); 3982 if (opts != NULL) { 3983 while (*opts) { 3984 while (*opts == ' ' || *opts == '\t') 3985 opts++; 3986 p = opts; 3987 while (*p && *p != ' ' && *p != '\t') 3988 p++; 3989 n = *p; 3990 *p = 0; 3991 if (option(opts)) 3992 bail("cannot provide files in PIGZ environment variable", ""); 3993 opts = p + (n ? 1 : 0); 3994 } 3995 option(NULL); 3996 } 3997 3998 /* decompress if named "unpigz" or "gunzip", to stdout if "*cat" */ 3999 if (strcmp(g.prog, "unpigz") == 0 || strcmp(g.prog, "gunzip") == 0) 4000 g.decode = 1, g.headis = 0; 4001 if ((n = strlen(g.prog)) > 2 && strcmp(g.prog + n - 3, "cat") == 0) 4002 g.decode = 1, g.headis = 0, g.pipeout = 1; 4003 4004 /* if no arguments and compressed data to or from a terminal, show help */ 4005 if (argc < 2 && isatty(g.decode ? 0 : 1)) 4006 help(); 4007 4008 /* process command-line arguments, no options after "--" */ 4009 done = noop = 0; 4010 for (n = 1; n < argc; n++) 4011 if (noop == 0 && strcmp(argv[n], "--") == 0) { 4012 noop = 1; 4013 option(NULL); 4014 } 4015 else if (noop || option(argv[n])) { /* true if file name, process it */ 4016 if (done == 1 && g.pipeout && !g.decode && !g.list && g.form > 1) 4017 complain("warning: output will be concatenated zip files -- " 4018 "will not be able to extract"); 4019 process(strcmp(argv[n], "-") ? argv[n] : NULL); 4020 done++; 4021 } 4022 option(NULL); 4023 4024 /* list stdin or compress stdin to stdout if no file names provided */ 4025 if (done == 0) 4026 process(NULL); 4027 4028 /* done -- release resources, show log */ 4029 new_opts(); 4030 log_dump(); 4031 return g.warned ? 2 : 0; 4032} 4033