archive_write_set_compression_gzip.c revision 267654
1169689Skan/*- 2169689Skan * Copyright (c) 2003-2007 Tim Kientzle 3169689Skan * All rights reserved. 4169689Skan * 5169689Skan * Redistribution and use in source and binary forms, with or without 6169689Skan * modification, are permitted provided that the following conditions 7169689Skan * are met: 8169689Skan * 1. Redistributions of source code must retain the above copyright 9169689Skan * notice, this list of conditions and the following disclaimer. 10169689Skan * 2. Redistributions in binary form must reproduce the above copyright 11169689Skan * notice, this list of conditions and the following disclaimer in the 12169689Skan * documentation and/or other materials provided with the distribution. 13169689Skan * 14169689Skan * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR 15169689Skan * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16169689Skan * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17169689Skan * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, 18169689Skan * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19169689Skan * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20169689Skan * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21169689Skan * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22169689Skan * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23169689Skan * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24169689Skan */ 25169689Skan 26169689Skan#include "archive_platform.h" 27169689Skan 28169689Skan__FBSDID("$FreeBSD: releng/9.3/contrib/libarchive/libarchive/archive_write_set_compression_gzip.c 229592 2012-01-05 12:06:54Z mm $"); 29169689Skan 30169689Skan#ifdef HAVE_ERRNO_H 31169689Skan#include <errno.h> 32169689Skan#endif 33169689Skan#ifdef HAVE_STDLIB_H 34169689Skan#include <stdlib.h> 35169689Skan#endif 36169689Skan#ifdef HAVE_STRING_H 37169689Skan#include <string.h> 38169689Skan#endif 39169689Skan#include <time.h> 40169689Skan#ifdef HAVE_ZLIB_H 41169689Skan#include <zlib.h> 42169689Skan#endif 43169689Skan 44169689Skan#include "archive.h" 45169689Skan#include "archive_private.h" 46169689Skan#include "archive_write_private.h" 47169689Skan 48169689Skan#ifndef HAVE_ZLIB_H 49169689Skanint 50169689Skanarchive_write_set_compression_gzip(struct archive *a) 51169689Skan{ 52169689Skan archive_set_error(a, ARCHIVE_ERRNO_MISC, 53169689Skan "gzip compression not supported on this platform"); 54169689Skan return (ARCHIVE_FATAL); 55169689Skan} 56169689Skan#else 57169689Skan/* Don't compile this if we don't have zlib. */ 58169689Skan 59169689Skanstruct private_data { 60169689Skan z_stream stream; 61169689Skan int64_t total_in; 62169689Skan unsigned char *compressed; 63169689Skan size_t compressed_buffer_size; 64169689Skan unsigned long crc; 65169689Skan}; 66169689Skan 67169689Skanstruct private_config { 68169689Skan int compression_level; 69169689Skan}; 70169689Skan 71169689Skan 72169689Skan/* 73169689Skan * Yuck. zlib.h is not const-correct, so I need this one bit 74169689Skan * of ugly hackery to convert a const * pointer to a non-const pointer. 75169689Skan */ 76169689Skan#define SET_NEXT_IN(st,src) \ 77169689Skan (st)->stream.next_in = (Bytef *)(uintptr_t)(const void *)(src) 78169689Skan 79169689Skanstatic int archive_compressor_gzip_finish(struct archive_write *); 80169689Skanstatic int archive_compressor_gzip_init(struct archive_write *); 81169689Skanstatic int archive_compressor_gzip_options(struct archive_write *, 82169689Skan const char *, const char *); 83169689Skanstatic int archive_compressor_gzip_write(struct archive_write *, 84169689Skan const void *, size_t); 85169689Skanstatic int drive_compressor(struct archive_write *, struct private_data *, 86169689Skan int finishing); 87169689Skan 88169689Skan 89169689Skan/* 90169689Skan * Allocate, initialize and return a archive object. 91169689Skan */ 92169689Skanint 93169689Skanarchive_write_set_compression_gzip(struct archive *_a) 94169689Skan{ 95169689Skan struct archive_write *a = (struct archive_write *)_a; 96169689Skan struct private_config *config; 97169689Skan __archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC, 98169689Skan ARCHIVE_STATE_NEW, "archive_write_set_compression_gzip"); 99169689Skan config = malloc(sizeof(*config)); 100169689Skan if (config == NULL) { 101169689Skan archive_set_error(&a->archive, ENOMEM, "Out of memory"); 102169689Skan return (ARCHIVE_FATAL); 103169689Skan } 104169689Skan a->compressor.config = config; 105169689Skan a->compressor.finish = &archive_compressor_gzip_finish; 106169689Skan config->compression_level = Z_DEFAULT_COMPRESSION; 107169689Skan a->compressor.init = &archive_compressor_gzip_init; 108169689Skan a->compressor.options = &archive_compressor_gzip_options; 109169689Skan a->archive.compression_code = ARCHIVE_COMPRESSION_GZIP; 110169689Skan a->archive.compression_name = "gzip"; 111169689Skan return (ARCHIVE_OK); 112169689Skan} 113169689Skan 114169689Skan/* 115169689Skan * Setup callback. 116169689Skan */ 117169689Skanstatic int 118169689Skanarchive_compressor_gzip_init(struct archive_write *a) 119169689Skan{ 120169689Skan int ret; 121169689Skan struct private_data *state; 122169689Skan struct private_config *config; 123169689Skan time_t t; 124169689Skan 125169689Skan config = (struct private_config *)a->compressor.config; 126169689Skan 127169689Skan if (a->client_opener != NULL) { 128169689Skan ret = (a->client_opener)(&a->archive, a->client_data); 129169689Skan if (ret != ARCHIVE_OK) 130169689Skan return (ret); 131169689Skan } 132169689Skan 133169689Skan /* 134169689Skan * The next check is a temporary workaround until the gzip 135169689Skan * code can be overhauled some. The code should not require 136169689Skan * that compressed_buffer_size == bytes_per_block. Removing 137169689Skan * this assumption will allow us to compress larger chunks at 138169689Skan * a time, which should improve overall performance 139169689Skan * marginally. As a minor side-effect, such a cleanup would 140169689Skan * allow us to support truly arbitrary block sizes. 141169689Skan */ 142169689Skan if (a->bytes_per_block < 10) { 143169689Skan archive_set_error(&a->archive, EINVAL, 144169689Skan "GZip compressor requires a minimum 10 byte block size"); 145169689Skan return (ARCHIVE_FATAL); 146169689Skan } 147169689Skan 148169689Skan state = (struct private_data *)malloc(sizeof(*state)); 149169689Skan if (state == NULL) { 150169689Skan archive_set_error(&a->archive, ENOMEM, 151169689Skan "Can't allocate data for compression"); 152169689Skan return (ARCHIVE_FATAL); 153169689Skan } 154169689Skan memset(state, 0, sizeof(*state)); 155169689Skan 156169689Skan /* 157169689Skan * See comment above. We should set compressed_buffer_size to 158169689Skan * max(bytes_per_block, 65536), but the code can't handle that yet. 159169689Skan */ 160169689Skan state->compressed_buffer_size = a->bytes_per_block; 161169689Skan state->compressed = (unsigned char *)malloc(state->compressed_buffer_size); 162169689Skan state->crc = crc32(0L, NULL, 0); 163169689Skan 164169689Skan if (state->compressed == NULL) { 165169689Skan archive_set_error(&a->archive, ENOMEM, 166169689Skan "Can't allocate data for compression buffer"); 167169689Skan free(state); 168169689Skan return (ARCHIVE_FATAL); 169169689Skan } 170169689Skan 171169689Skan state->stream.next_out = state->compressed; 172169689Skan state->stream.avail_out = state->compressed_buffer_size; 173169689Skan 174169689Skan /* Prime output buffer with a gzip header. */ 175169689Skan t = time(NULL); 176169689Skan state->compressed[0] = 0x1f; /* GZip signature bytes */ 177169689Skan state->compressed[1] = 0x8b; 178169689Skan state->compressed[2] = 0x08; /* "Deflate" compression */ 179169689Skan state->compressed[3] = 0; /* No options */ 180169689Skan state->compressed[4] = (t)&0xff; /* Timestamp */ 181169689Skan state->compressed[5] = (t>>8)&0xff; 182169689Skan state->compressed[6] = (t>>16)&0xff; 183169689Skan state->compressed[7] = (t>>24)&0xff; 184169689Skan state->compressed[8] = 0; /* No deflate options */ 185169689Skan state->compressed[9] = 3; /* OS=Unix */ 186169689Skan state->stream.next_out += 10; 187169689Skan state->stream.avail_out -= 10; 188169689Skan 189169689Skan a->compressor.write = archive_compressor_gzip_write; 190169689Skan 191169689Skan /* Initialize compression library. */ 192169689Skan ret = deflateInit2(&(state->stream), 193169689Skan config->compression_level, 194169689Skan Z_DEFLATED, 195169689Skan -15 /* < 0 to suppress zlib header */, 196169689Skan 8, 197169689Skan Z_DEFAULT_STRATEGY); 198169689Skan 199169689Skan if (ret == Z_OK) { 200169689Skan a->compressor.data = state; 201169689Skan return (0); 202169689Skan } 203169689Skan 204169689Skan /* Library setup failed: clean up. */ 205169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Internal error " 206169689Skan "initializing compression library"); 207169689Skan free(state->compressed); 208169689Skan free(state); 209169689Skan 210169689Skan /* Override the error message if we know what really went wrong. */ 211169689Skan switch (ret) { 212169689Skan case Z_STREAM_ERROR: 213169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 214169689Skan "Internal error initializing " 215169689Skan "compression library: invalid setup parameter"); 216169689Skan break; 217169689Skan case Z_MEM_ERROR: 218169689Skan archive_set_error(&a->archive, ENOMEM, "Internal error initializing " 219169689Skan "compression library"); 220169689Skan break; 221169689Skan case Z_VERSION_ERROR: 222169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 223169689Skan "Internal error initializing " 224169689Skan "compression library: invalid library version"); 225169689Skan break; 226169689Skan } 227169689Skan 228169689Skan return (ARCHIVE_FATAL); 229169689Skan} 230169689Skan 231169689Skan/* 232169689Skan * Set write options. 233169689Skan */ 234169689Skanstatic int 235169689Skanarchive_compressor_gzip_options(struct archive_write *a, const char *key, 236169689Skan const char *value) 237169689Skan{ 238169689Skan struct private_config *config; 239169689Skan 240169689Skan config = (struct private_config *)a->compressor.config; 241169689Skan if (strcmp(key, "compression-level") == 0) { 242169689Skan if (value == NULL || !(value[0] >= '0' && value[0] <= '9') || 243169689Skan value[1] != '\0') 244169689Skan return (ARCHIVE_WARN); 245169689Skan config->compression_level = value[0] - '0'; 246169689Skan return (ARCHIVE_OK); 247169689Skan } 248169689Skan 249169689Skan return (ARCHIVE_WARN); 250169689Skan} 251169689Skan 252169689Skan/* 253169689Skan * Write data to the compressed stream. 254169689Skan */ 255169689Skanstatic int 256169689Skanarchive_compressor_gzip_write(struct archive_write *a, const void *buff, 257169689Skan size_t length) 258169689Skan{ 259169689Skan struct private_data *state; 260169689Skan int ret; 261169689Skan 262169689Skan state = (struct private_data *)a->compressor.data; 263169689Skan if (a->client_writer == NULL) { 264169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER, 265169689Skan "No write callback is registered? " 266169689Skan "This is probably an internal programming error."); 267169689Skan return (ARCHIVE_FATAL); 268169689Skan } 269169689Skan 270169689Skan /* Update statistics */ 271169689Skan state->crc = crc32(state->crc, (const Bytef *)buff, length); 272169689Skan state->total_in += length; 273169689Skan 274169689Skan /* Compress input data to output buffer */ 275169689Skan SET_NEXT_IN(state, buff); 276169689Skan state->stream.avail_in = length; 277169689Skan if ((ret = drive_compressor(a, state, 0)) != ARCHIVE_OK) 278169689Skan return (ret); 279169689Skan 280169689Skan a->archive.file_position += length; 281169689Skan return (ARCHIVE_OK); 282169689Skan} 283169689Skan 284169689Skan/* 285169689Skan * Finish the compression... 286169689Skan */ 287169689Skanstatic int 288169689Skanarchive_compressor_gzip_finish(struct archive_write *a) 289169689Skan{ 290169689Skan ssize_t block_length, target_block_length, bytes_written; 291169689Skan int ret; 292169689Skan struct private_data *state; 293169689Skan unsigned tocopy; 294169689Skan unsigned char trailer[8]; 295169689Skan 296169689Skan state = (struct private_data *)a->compressor.data; 297169689Skan ret = 0; 298169689Skan if (state != NULL) { 299169689Skan if (a->client_writer == NULL) { 300169689Skan archive_set_error(&a->archive, 301169689Skan ARCHIVE_ERRNO_PROGRAMMER, 302169689Skan "No write callback is registered? " 303169689Skan "This is probably an internal programming error."); 304169689Skan ret = ARCHIVE_FATAL; 305169689Skan goto cleanup; 306169689Skan } 307169689Skan 308169689Skan /* By default, always pad the uncompressed data. */ 309169689Skan if (a->pad_uncompressed) { 310169689Skan tocopy = a->bytes_per_block - 311169689Skan (state->total_in % a->bytes_per_block); 312169689Skan while (tocopy > 0 && tocopy < (unsigned)a->bytes_per_block) { 313169689Skan SET_NEXT_IN(state, a->nulls); 314169689Skan state->stream.avail_in = tocopy < a->null_length ? 315169689Skan tocopy : a->null_length; 316169689Skan state->crc = crc32(state->crc, a->nulls, 317169689Skan state->stream.avail_in); 318169689Skan state->total_in += state->stream.avail_in; 319169689Skan tocopy -= state->stream.avail_in; 320169689Skan ret = drive_compressor(a, state, 0); 321169689Skan if (ret != ARCHIVE_OK) 322169689Skan goto cleanup; 323169689Skan } 324169689Skan } 325169689Skan 326169689Skan /* Finish compression cycle */ 327169689Skan if (((ret = drive_compressor(a, state, 1))) != ARCHIVE_OK) 328169689Skan goto cleanup; 329169689Skan 330169689Skan /* Build trailer: 4-byte CRC and 4-byte length. */ 331169689Skan trailer[0] = (state->crc)&0xff; 332169689Skan trailer[1] = (state->crc >> 8)&0xff; 333169689Skan trailer[2] = (state->crc >> 16)&0xff; 334169689Skan trailer[3] = (state->crc >> 24)&0xff; 335169689Skan trailer[4] = (state->total_in)&0xff; 336169689Skan trailer[5] = (state->total_in >> 8)&0xff; 337169689Skan trailer[6] = (state->total_in >> 16)&0xff; 338169689Skan trailer[7] = (state->total_in >> 24)&0xff; 339169689Skan 340169689Skan /* Add trailer to current block. */ 341169689Skan tocopy = 8; 342169689Skan if (tocopy > state->stream.avail_out) 343169689Skan tocopy = state->stream.avail_out; 344169689Skan memcpy(state->stream.next_out, trailer, tocopy); 345169689Skan state->stream.next_out += tocopy; 346169689Skan state->stream.avail_out -= tocopy; 347169689Skan 348169689Skan /* If it overflowed, flush and start a new block. */ 349169689Skan if (tocopy < 8) { 350169689Skan bytes_written = (a->client_writer)(&a->archive, a->client_data, 351169689Skan state->compressed, state->compressed_buffer_size); 352169689Skan if (bytes_written <= 0) { 353169689Skan ret = ARCHIVE_FATAL; 354169689Skan goto cleanup; 355169689Skan } 356169689Skan a->archive.raw_position += bytes_written; 357169689Skan state->stream.next_out = state->compressed; 358169689Skan state->stream.avail_out = state->compressed_buffer_size; 359169689Skan memcpy(state->stream.next_out, trailer + tocopy, 8-tocopy); 360169689Skan state->stream.next_out += 8-tocopy; 361169689Skan state->stream.avail_out -= 8-tocopy; 362169689Skan } 363169689Skan 364169689Skan /* Optionally, pad the final compressed block. */ 365169689Skan block_length = state->stream.next_out - state->compressed; 366169689Skan 367169689Skan /* Tricky calculation to determine size of last block. */ 368169689Skan if (a->bytes_in_last_block <= 0) 369169689Skan /* Default or Zero: pad to full block */ 370169689Skan target_block_length = a->bytes_per_block; 371169689Skan else 372169689Skan /* Round length to next multiple of bytes_in_last_block. */ 373169689Skan target_block_length = a->bytes_in_last_block * 374169689Skan ( (block_length + a->bytes_in_last_block - 1) / 375169689Skan a->bytes_in_last_block); 376169689Skan if (target_block_length > a->bytes_per_block) 377169689Skan target_block_length = a->bytes_per_block; 378169689Skan if (block_length < target_block_length) { 379169689Skan memset(state->stream.next_out, 0, 380169689Skan target_block_length - block_length); 381169689Skan block_length = target_block_length; 382169689Skan } 383169689Skan 384169689Skan /* Write the last block */ 385169689Skan bytes_written = (a->client_writer)(&a->archive, a->client_data, 386169689Skan state->compressed, block_length); 387169689Skan if (bytes_written <= 0) { 388169689Skan ret = ARCHIVE_FATAL; 389169689Skan goto cleanup; 390169689Skan } 391169689Skan a->archive.raw_position += bytes_written; 392169689Skan 393169689Skan /* Cleanup: shut down compressor, release memory, etc. */ 394169689Skan cleanup: 395169689Skan switch (deflateEnd(&(state->stream))) { 396169689Skan case Z_OK: 397169689Skan break; 398169689Skan default: 399169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 400169689Skan "Failed to clean up compressor"); 401169689Skan ret = ARCHIVE_FATAL; 402169689Skan } 403169689Skan free(state->compressed); 404169689Skan free(state); 405169689Skan } 406169689Skan /* Clean up config area even if we never initialized. */ 407169689Skan free(a->compressor.config); 408169689Skan a->compressor.config = NULL; 409169689Skan return (ret); 410169689Skan} 411169689Skan 412169689Skan/* 413169689Skan * Utility function to push input data through compressor, 414169689Skan * writing full output blocks as necessary. 415169689Skan * 416169689Skan * Note that this handles both the regular write case (finishing == 417169689Skan * false) and the end-of-archive case (finishing == true). 418169689Skan */ 419169689Skanstatic int 420169689Skandrive_compressor(struct archive_write *a, struct private_data *state, int finishing) 421169689Skan{ 422169689Skan ssize_t bytes_written; 423169689Skan int ret; 424169689Skan 425169689Skan for (;;) { 426169689Skan if (state->stream.avail_out == 0) { 427169689Skan bytes_written = (a->client_writer)(&a->archive, 428169689Skan a->client_data, state->compressed, 429169689Skan state->compressed_buffer_size); 430169689Skan if (bytes_written <= 0) { 431169689Skan /* TODO: Handle this write failure */ 432169689Skan return (ARCHIVE_FATAL); 433169689Skan } else if ((size_t)bytes_written < state->compressed_buffer_size) { 434169689Skan /* Short write: Move remaining to 435169689Skan * front of block and keep filling */ 436169689Skan memmove(state->compressed, 437169689Skan state->compressed + bytes_written, 438169689Skan state->compressed_buffer_size - bytes_written); 439169689Skan } 440169689Skan a->archive.raw_position += bytes_written; 441169689Skan state->stream.next_out 442169689Skan = state->compressed + 443169689Skan state->compressed_buffer_size - bytes_written; 444169689Skan state->stream.avail_out = bytes_written; 445169689Skan } 446169689Skan 447169689Skan /* If there's nothing to do, we're done. */ 448169689Skan if (!finishing && state->stream.avail_in == 0) 449169689Skan return (ARCHIVE_OK); 450169689Skan 451169689Skan ret = deflate(&(state->stream), 452169689Skan finishing ? Z_FINISH : Z_NO_FLUSH ); 453169689Skan 454169689Skan switch (ret) { 455169689Skan case Z_OK: 456169689Skan /* In non-finishing case, check if compressor 457169689Skan * consumed everything */ 458169689Skan if (!finishing && state->stream.avail_in == 0) 459169689Skan return (ARCHIVE_OK); 460169689Skan /* In finishing case, this return always means 461169689Skan * there's more work */ 462169689Skan break; 463169689Skan case Z_STREAM_END: 464169689Skan /* This return can only occur in finishing case. */ 465169689Skan return (ARCHIVE_OK); 466169689Skan default: 467169689Skan /* Any other return value indicates an error. */ 468169689Skan archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, 469169689Skan "GZip compression failed:" 470169689Skan " deflate() call returned status %d", 471169689Skan ret); 472169689Skan return (ARCHIVE_FATAL); 473169689Skan } 474169689Skan } 475169689Skan} 476169689Skan 477169689Skan#endif /* HAVE_ZLIB_H */ 478169689Skan