archive_write_set_compression_gzip.c revision 229592
1228753Smm/*- 2228753Smm * Copyright (c) 2003-2007 Tim Kientzle 3228753Smm * All rights reserved. 4228753Smm * 5228753Smm * Redistribution and use in source and binary forms, with or without 6228753Smm * modification, are permitted provided that the following conditions 7228753Smm * are met: 8228753Smm * 1. Redistributions of source code must retain the above copyright 9228753Smm * notice, this list of conditions and the following disclaimer. 10228753Smm * 2. Redistributions in binary form must reproduce the above copyright 11228753Smm * notice, this list of conditions and the following disclaimer in the 12228753Smm * documentation and/or other materials provided with the distribution. 13228753Smm * 14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24228753Smm */ 25228753Smm 26228753Smm#include "archive_platform.h" 27228753Smm 28229592Smm__FBSDID("$FreeBSD: stable/9/contrib/libarchive/libarchive/archive_write_set_compression_gzip.c 229592 2012-01-05 12:06:54Z mm $"); 29228753Smm 30228753Smm#ifdef HAVE_ERRNO_H 31228753Smm#include <errno.h> 32228753Smm#endif 33228753Smm#ifdef HAVE_STDLIB_H 34228753Smm#include <stdlib.h> 35228753Smm#endif 36228753Smm#ifdef HAVE_STRING_H 37228753Smm#include <string.h> 38228753Smm#endif 39228753Smm#include <time.h> 40228753Smm#ifdef HAVE_ZLIB_H 41228753Smm#include <zlib.h> 42228753Smm#endif 43228753Smm 44228753Smm#include "archive.h" 45228753Smm#include "archive_private.h" 46228753Smm#include "archive_write_private.h" 47228753Smm 48228753Smm#ifndef HAVE_ZLIB_H 49228753Smmint 50228753Smmarchive_write_set_compression_gzip(struct archive *a) 51228753Smm{ 52228753Smm archive_set_error(a, ARCHIVE_ERRNO_MISC, 53228753Smm "gzip compression not supported on this platform"); 54228753Smm return (ARCHIVE_FATAL); 55228753Smm} 56228753Smm#else 57228753Smm/* Don't compile this if we don't have zlib. */ 58228753Smm 59228753Smmstruct private_data { 60228753Smm z_stream stream; 61228753Smm int64_t total_in; 62228753Smm unsigned char *compressed; 63228753Smm size_t compressed_buffer_size; 64228753Smm unsigned long crc; 65228753Smm}; 66228753Smm 67228753Smmstruct private_config { 68228753Smm int compression_level; 69228753Smm}; 70228753Smm 71228753Smm 72228753Smm/* 73228753Smm * Yuck. zlib.h is not const-correct, so I need this one bit 74228753Smm * of ugly hackery to convert a const * pointer to a non-const pointer. 75228753Smm */ 76228753Smm#define SET_NEXT_IN(st,src) \ 77228753Smm (st)->stream.next_in = (Bytef *)(uintptr_t)(const void *)(src) 78228753Smm 79228753Smmstatic int archive_compressor_gzip_finish(struct archive_write *); 80228753Smmstatic int archive_compressor_gzip_init(struct archive_write *); 81228753Smmstatic int archive_compressor_gzip_options(struct archive_write *, 82228753Smm const char *, const char *); 83228753Smmstatic int archive_compressor_gzip_write(struct archive_write *, 84228753Smm const void *, size_t); 85228753Smmstatic int drive_compressor(struct archive_write *, struct private_data *, 86228753Smm int finishing); 87228753Smm 88228753Smm 89228753Smm/* 90228753Smm * Allocate, initialize and return a archive object. 91228753Smm */ 92228753Smmint 93228753Smmarchive_write_set_compression_gzip(struct archive *_a) 94228753Smm{ 95228753Smm struct archive_write *a = (struct archive_write *)_a; 96228753Smm struct private_config *config; 97228753Smm __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 98228753Smm ARCHIVE_STATE_NEW, "archive_write_set_compression_gzip"); 99228753Smm config = malloc(sizeof(*config)); 100228753Smm if (config == NULL) { 101228753Smm archive_set_error(&a->archive, ENOMEM, "Out of memory"); 102228753Smm return (ARCHIVE_FATAL); 103228753Smm } 104228753Smm a->compressor.config = config; 105228753Smm a->compressor.finish = &archive_compressor_gzip_finish; 106228753Smm config->compression_level = Z_DEFAULT_COMPRESSION; 107228753Smm a->compressor.init = &archive_compressor_gzip_init; 108228753Smm a->compressor.options = &archive_compressor_gzip_options; 109228753Smm a->archive.compression_code = ARCHIVE_COMPRESSION_GZIP; 110228753Smm a->archive.compression_name = "gzip"; 111228753Smm return (ARCHIVE_OK); 112228753Smm} 113228753Smm 114228753Smm/* 115228753Smm * Setup callback. 116228753Smm */ 117228753Smmstatic int 118228753Smmarchive_compressor_gzip_init(struct archive_write *a) 119228753Smm{ 120228753Smm int ret; 121228753Smm struct private_data *state; 122228753Smm struct private_config *config; 123228753Smm time_t t; 124228753Smm 125228753Smm config = (struct private_config *)a->compressor.config; 126228753Smm 127228753Smm if (a->client_opener != NULL) { 128228753Smm ret = (a->client_opener)(&a->archive, a->client_data); 129228753Smm if (ret != ARCHIVE_OK) 130228753Smm return (ret); 131228753Smm } 132228753Smm 133228753Smm /* 134228753Smm * The next check is a temporary workaround until the gzip 135228753Smm * code can be overhauled some. The code should not require 136228753Smm * that compressed_buffer_size == bytes_per_block. Removing 137228753Smm * this assumption will allow us to compress larger chunks at 138228753Smm * a time, which should improve overall performance 139228753Smm * marginally. As a minor side-effect, such a cleanup would 140228753Smm * allow us to support truly arbitrary block sizes. 141228753Smm */ 142228753Smm if (a->bytes_per_block < 10) { 143228753Smm archive_set_error(&a->archive, EINVAL, 144228753Smm "GZip compressor requires a minimum 10 byte block size"); 145228753Smm return (ARCHIVE_FATAL); 146228753Smm } 147228753Smm 148228753Smm state = (struct private_data *)malloc(sizeof(*state)); 149228753Smm if (state == NULL) { 150228753Smm archive_set_error(&a->archive, ENOMEM, 151228753Smm "Can't allocate data for compression"); 152228753Smm return (ARCHIVE_FATAL); 153228753Smm } 154228753Smm memset(state, 0, sizeof(*state)); 155228753Smm 156228753Smm /* 157228753Smm * See comment above. We should set compressed_buffer_size to 158228753Smm * max(bytes_per_block, 65536), but the code can't handle that yet. 159228753Smm */ 160228753Smm state->compressed_buffer_size = a->bytes_per_block; 161228753Smm state->compressed = (unsigned char *)malloc(state->compressed_buffer_size); 162228753Smm state->crc = crc32(0L, NULL, 0); 163228753Smm 164228753Smm if (state->compressed == NULL) { 165228753Smm archive_set_error(&a->archive, ENOMEM, 166228753Smm "Can't allocate data for compression buffer"); 167228753Smm free(state); 168228753Smm return (ARCHIVE_FATAL); 169228753Smm } 170228753Smm 171228753Smm state->stream.next_out = state->compressed; 172228753Smm state->stream.avail_out = state->compressed_buffer_size; 173228753Smm 174228753Smm /* Prime output buffer with a gzip header. */ 175228753Smm t = time(NULL); 176228753Smm state->compressed[0] = 0x1f; /* GZip signature bytes */ 177228753Smm state->compressed[1] = 0x8b; 178228753Smm state->compressed[2] = 0x08; /* "Deflate" compression */ 179228753Smm state->compressed[3] = 0; /* No options */ 180228753Smm state->compressed[4] = (t)&0xff; /* Timestamp */ 181228753Smm state->compressed[5] = (t>>8)&0xff; 182228753Smm state->compressed[6] = (t>>16)&0xff; 183228753Smm state->compressed[7] = (t>>24)&0xff; 184228753Smm state->compressed[8] = 0; /* No deflate options */ 185228753Smm state->compressed[9] = 3; /* OS=Unix */ 186228753Smm state->stream.next_out += 10; 187228753Smm state->stream.avail_out -= 10; 188228753Smm 189228753Smm a->compressor.write = archive_compressor_gzip_write; 190228753Smm 191228753Smm /* Initialize compression library. */ 192228753Smm ret = deflateInit2(&(state->stream), 193228753Smm config->compression_level, 194228753Smm Z_DEFLATED, 195228753Smm -15 /* < 0 to suppress zlib header */, 196228753Smm 8, 197228753Smm Z_DEFAULT_STRATEGY); 198228753Smm 199228753Smm if (ret == Z_OK) { 200228753Smm a->compressor.data = state; 201228753Smm return (0); 202228753Smm } 203228753Smm 204228753Smm /* Library setup failed: clean up. */ 205228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Internal error " 206228753Smm "initializing compression library"); 207228753Smm free(state->compressed); 208228753Smm free(state); 209228753Smm 210228753Smm /* Override the error message if we know what really went wrong. */ 211228753Smm switch (ret) { 212228753Smm case Z_STREAM_ERROR: 213228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 214228753Smm "Internal error initializing " 215228753Smm "compression library: invalid setup parameter"); 216228753Smm break; 217228753Smm case Z_MEM_ERROR: 218228753Smm archive_set_error(&a->archive, ENOMEM, "Internal error initializing " 219228753Smm "compression library"); 220228753Smm break; 221228753Smm case Z_VERSION_ERROR: 222228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 223228753Smm "Internal error initializing " 224228753Smm "compression library: invalid library version"); 225228753Smm break; 226228753Smm } 227228753Smm 228228753Smm return (ARCHIVE_FATAL); 229228753Smm} 230228753Smm 231228753Smm/* 232228753Smm * Set write options. 233228753Smm */ 234228753Smmstatic int 235228753Smmarchive_compressor_gzip_options(struct archive_write *a, const char *key, 236228753Smm const char *value) 237228753Smm{ 238228753Smm struct private_config *config; 239228753Smm 240228753Smm config = (struct private_config *)a->compressor.config; 241228753Smm if (strcmp(key, "compression-level") == 0) { 242228753Smm if (value == NULL || !(value[0] >= '0' && value[0] <= '9') || 243228753Smm value[1] != '\0') 244228753Smm return (ARCHIVE_WARN); 245228753Smm config->compression_level = value[0] - '0'; 246228753Smm return (ARCHIVE_OK); 247228753Smm } 248228753Smm 249228753Smm return (ARCHIVE_WARN); 250228753Smm} 251228753Smm 252228753Smm/* 253228753Smm * Write data to the compressed stream. 254228753Smm */ 255228753Smmstatic int 256228753Smmarchive_compressor_gzip_write(struct archive_write *a, const void *buff, 257228753Smm size_t length) 258228753Smm{ 259228753Smm struct private_data *state; 260228753Smm int ret; 261228753Smm 262228753Smm state = (struct private_data *)a->compressor.data; 263228753Smm if (a->client_writer == NULL) { 264228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 265228753Smm "No write callback is registered? " 266228753Smm "This is probably an internal programming error."); 267228753Smm return (ARCHIVE_FATAL); 268228753Smm } 269228753Smm 270228753Smm /* Update statistics */ 271228753Smm state->crc = crc32(state->crc, (const Bytef *)buff, length); 272228753Smm state->total_in += length; 273228753Smm 274228753Smm /* Compress input data to output buffer */ 275228753Smm SET_NEXT_IN(state, buff); 276228753Smm state->stream.avail_in = length; 277228753Smm if ((ret = drive_compressor(a, state, 0)) != ARCHIVE_OK) 278228753Smm return (ret); 279228753Smm 280228753Smm a->archive.file_position += length; 281228753Smm return (ARCHIVE_OK); 282228753Smm} 283228753Smm 284228753Smm/* 285228753Smm * Finish the compression... 286228753Smm */ 287228753Smmstatic int 288228753Smmarchive_compressor_gzip_finish(struct archive_write *a) 289228753Smm{ 290228753Smm ssize_t block_length, target_block_length, bytes_written; 291228753Smm int ret; 292228753Smm struct private_data *state; 293228753Smm unsigned tocopy; 294228753Smm unsigned char trailer[8]; 295228753Smm 296228753Smm state = (struct private_data *)a->compressor.data; 297228753Smm ret = 0; 298228753Smm if (state != NULL) { 299228753Smm if (a->client_writer == NULL) { 300228753Smm archive_set_error(&a->archive, 301228753Smm ARCHIVE_ERRNO_PROGRAMMER, 302228753Smm "No write callback is registered? " 303228753Smm "This is probably an internal programming error."); 304228753Smm ret = ARCHIVE_FATAL; 305228753Smm goto cleanup; 306228753Smm } 307228753Smm 308228753Smm /* By default, always pad the uncompressed data. */ 309228753Smm if (a->pad_uncompressed) { 310228753Smm tocopy = a->bytes_per_block - 311228753Smm (state->total_in % a->bytes_per_block); 312228753Smm while (tocopy > 0 && tocopy < (unsigned)a->bytes_per_block) { 313228753Smm SET_NEXT_IN(state, a->nulls); 314228753Smm state->stream.avail_in = tocopy < a->null_length ? 315228753Smm tocopy : a->null_length; 316228753Smm state->crc = crc32(state->crc, a->nulls, 317228753Smm state->stream.avail_in); 318228753Smm state->total_in += state->stream.avail_in; 319228753Smm tocopy -= state->stream.avail_in; 320228753Smm ret = drive_compressor(a, state, 0); 321228753Smm if (ret != ARCHIVE_OK) 322228753Smm goto cleanup; 323228753Smm } 324228753Smm } 325228753Smm 326228753Smm /* Finish compression cycle */ 327228753Smm if (((ret = drive_compressor(a, state, 1))) != ARCHIVE_OK) 328228753Smm goto cleanup; 329228753Smm 330228753Smm /* Build trailer: 4-byte CRC and 4-byte length. */ 331228753Smm trailer[0] = (state->crc)&0xff; 332228753Smm trailer[1] = (state->crc >> 8)&0xff; 333228753Smm trailer[2] = (state->crc >> 16)&0xff; 334228753Smm trailer[3] = (state->crc >> 24)&0xff; 335228753Smm trailer[4] = (state->total_in)&0xff; 336228753Smm trailer[5] = (state->total_in >> 8)&0xff; 337228753Smm trailer[6] = (state->total_in >> 16)&0xff; 338228753Smm trailer[7] = (state->total_in >> 24)&0xff; 339228753Smm 340228753Smm /* Add trailer to current block. */ 341228753Smm tocopy = 8; 342228753Smm if (tocopy > state->stream.avail_out) 343228753Smm tocopy = state->stream.avail_out; 344228753Smm memcpy(state->stream.next_out, trailer, tocopy); 345228753Smm state->stream.next_out += tocopy; 346228753Smm state->stream.avail_out -= tocopy; 347228753Smm 348228753Smm /* If it overflowed, flush and start a new block. */ 349228753Smm if (tocopy < 8) { 350228753Smm bytes_written = (a->client_writer)(&a->archive, a->client_data, 351228753Smm state->compressed, state->compressed_buffer_size); 352228753Smm if (bytes_written <= 0) { 353228753Smm ret = ARCHIVE_FATAL; 354228753Smm goto cleanup; 355228753Smm } 356228753Smm a->archive.raw_position += bytes_written; 357228753Smm state->stream.next_out = state->compressed; 358228753Smm state->stream.avail_out = state->compressed_buffer_size; 359228753Smm memcpy(state->stream.next_out, trailer + tocopy, 8-tocopy); 360228753Smm state->stream.next_out += 8-tocopy; 361228753Smm state->stream.avail_out -= 8-tocopy; 362228753Smm } 363228753Smm 364228753Smm /* Optionally, pad the final compressed block. */ 365228753Smm block_length = state->stream.next_out - state->compressed; 366228753Smm 367228753Smm /* Tricky calculation to determine size of last block. */ 368228753Smm if (a->bytes_in_last_block <= 0) 369228753Smm /* Default or Zero: pad to full block */ 370228753Smm target_block_length = a->bytes_per_block; 371228753Smm else 372228753Smm /* Round length to next multiple of bytes_in_last_block. */ 373228753Smm target_block_length = a->bytes_in_last_block * 374228753Smm ( (block_length + a->bytes_in_last_block - 1) / 375228753Smm a->bytes_in_last_block); 376228753Smm if (target_block_length > a->bytes_per_block) 377228753Smm target_block_length = a->bytes_per_block; 378228753Smm if (block_length < target_block_length) { 379228753Smm memset(state->stream.next_out, 0, 380228753Smm target_block_length - block_length); 381228753Smm block_length = target_block_length; 382228753Smm } 383228753Smm 384228753Smm /* Write the last block */ 385228753Smm bytes_written = (a->client_writer)(&a->archive, a->client_data, 386228753Smm state->compressed, block_length); 387228753Smm if (bytes_written <= 0) { 388228753Smm ret = ARCHIVE_FATAL; 389228753Smm goto cleanup; 390228753Smm } 391228753Smm a->archive.raw_position += bytes_written; 392228753Smm 393228753Smm /* Cleanup: shut down compressor, release memory, etc. */ 394228753Smm cleanup: 395228753Smm switch (deflateEnd(&(state->stream))) { 396228753Smm case Z_OK: 397228753Smm break; 398228753Smm default: 399228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 400228753Smm "Failed to clean up compressor"); 401228753Smm ret = ARCHIVE_FATAL; 402228753Smm } 403228753Smm free(state->compressed); 404228753Smm free(state); 405228753Smm } 406228753Smm /* Clean up config area even if we never initialized. */ 407228753Smm free(a->compressor.config); 408228753Smm a->compressor.config = NULL; 409228753Smm return (ret); 410228753Smm} 411228753Smm 412228753Smm/* 413228753Smm * Utility function to push input data through compressor, 414228753Smm * writing full output blocks as necessary. 415228753Smm * 416228753Smm * Note that this handles both the regular write case (finishing == 417228753Smm * false) and the end-of-archive case (finishing == true). 418228753Smm */ 419228753Smmstatic int 420228753Smmdrive_compressor(struct archive_write *a, struct private_data *state, int finishing) 421228753Smm{ 422228753Smm ssize_t bytes_written; 423228753Smm int ret; 424228753Smm 425228753Smm for (;;) { 426228753Smm if (state->stream.avail_out == 0) { 427228753Smm bytes_written = (a->client_writer)(&a->archive, 428228753Smm a->client_data, state->compressed, 429228753Smm state->compressed_buffer_size); 430228753Smm if (bytes_written <= 0) { 431228753Smm /* TODO: Handle this write failure */ 432228753Smm return (ARCHIVE_FATAL); 433228753Smm } else if ((size_t)bytes_written < state->compressed_buffer_size) { 434228753Smm /* Short write: Move remaining to 435228753Smm * front of block and keep filling */ 436228753Smm memmove(state->compressed, 437228753Smm state->compressed + bytes_written, 438228753Smm state->compressed_buffer_size - bytes_written); 439228753Smm } 440228753Smm a->archive.raw_position += bytes_written; 441228753Smm state->stream.next_out 442228753Smm = state->compressed + 443228753Smm state->compressed_buffer_size - bytes_written; 444228753Smm state->stream.avail_out = bytes_written; 445228753Smm } 446228753Smm 447228753Smm /* If there's nothing to do, we're done. */ 448228753Smm if (!finishing && state->stream.avail_in == 0) 449228753Smm return (ARCHIVE_OK); 450228753Smm 451228753Smm ret = deflate(&(state->stream), 452228753Smm finishing ? Z_FINISH : Z_NO_FLUSH ); 453228753Smm 454228753Smm switch (ret) { 455228753Smm case Z_OK: 456228753Smm /* In non-finishing case, check if compressor 457228753Smm * consumed everything */ 458228753Smm if (!finishing && state->stream.avail_in == 0) 459228753Smm return (ARCHIVE_OK); 460228753Smm /* In finishing case, this return always means 461228753Smm * there's more work */ 462228753Smm break; 463228753Smm case Z_STREAM_END: 464228753Smm /* This return can only occur in finishing case. */ 465228753Smm return (ARCHIVE_OK); 466228753Smm default: 467228753Smm /* Any other return value indicates an error. */ 468228753Smm archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 469228753Smm "GZip compression failed:" 470228753Smm " deflate() call returned status %d", 471228753Smm ret); 472228753Smm return (ARCHIVE_FATAL); 473228753Smm } 474228753Smm } 475228753Smm} 476228753Smm 477228753Smm#endif /* HAVE_ZLIB_H */ 478