1228753Smm/*-
2228753Smm * Copyright (c) 2003-2007 Tim Kientzle
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm#include "archive_platform.h"
27228753Smm
28229592Smm__FBSDID("$FreeBSD$");
29228753Smm
30228753Smm#ifdef HAVE_ERRNO_H
31228753Smm#include <errno.h>
32228753Smm#endif
33228753Smm#ifdef HAVE_STDLIB_H
34228753Smm#include <stdlib.h>
35228753Smm#endif
36228753Smm#ifdef HAVE_STRING_H
37228753Smm#include <string.h>
38228753Smm#endif
39228753Smm#include <time.h>
40228753Smm#ifdef HAVE_ZLIB_H
41228753Smm#include <zlib.h>
42228753Smm#endif
43228753Smm
44228753Smm#include "archive.h"
45228753Smm#include "archive_private.h"
46228753Smm#include "archive_write_private.h"
47228753Smm
48228753Smm#ifndef HAVE_ZLIB_H
49228753Smmint
50228753Smmarchive_write_set_compression_gzip(struct archive *a)
51228753Smm{
52228753Smm	archive_set_error(a, ARCHIVE_ERRNO_MISC,
53228753Smm	    "gzip compression not supported on this platform");
54228753Smm	return (ARCHIVE_FATAL);
55228753Smm}
56228753Smm#else
57228753Smm/* Don't compile this if we don't have zlib. */
58228753Smm
59228753Smmstruct private_data {
60228753Smm	z_stream	 stream;
61228753Smm	int64_t		 total_in;
62228753Smm	unsigned char	*compressed;
63228753Smm	size_t		 compressed_buffer_size;
64228753Smm	unsigned long	 crc;
65228753Smm};
66228753Smm
67228753Smmstruct private_config {
68228753Smm	int		 compression_level;
69228753Smm};
70228753Smm
71228753Smm
72228753Smm/*
73228753Smm * Yuck.  zlib.h is not const-correct, so I need this one bit
74228753Smm * of ugly hackery to convert a const * pointer to a non-const pointer.
75228753Smm */
76228753Smm#define	SET_NEXT_IN(st,src)					\
77228753Smm	(st)->stream.next_in = (Bytef *)(uintptr_t)(const void *)(src)
78228753Smm
79228753Smmstatic int	archive_compressor_gzip_finish(struct archive_write *);
80228753Smmstatic int	archive_compressor_gzip_init(struct archive_write *);
81228753Smmstatic int	archive_compressor_gzip_options(struct archive_write *,
82228753Smm		    const char *, const char *);
83228753Smmstatic int	archive_compressor_gzip_write(struct archive_write *,
84228753Smm		    const void *, size_t);
85228753Smmstatic int	drive_compressor(struct archive_write *, struct private_data *,
86228753Smm		    int finishing);
87228753Smm
88228753Smm
89228753Smm/*
90228753Smm * Allocate, initialize and return a archive object.
91228753Smm */
92228753Smmint
93228753Smmarchive_write_set_compression_gzip(struct archive *_a)
94228753Smm{
95228753Smm	struct archive_write *a = (struct archive_write *)_a;
96228753Smm	struct private_config *config;
97228753Smm	__archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
98228753Smm	    ARCHIVE_STATE_NEW, "archive_write_set_compression_gzip");
99228753Smm	config = malloc(sizeof(*config));
100228753Smm	if (config == NULL) {
101228753Smm		archive_set_error(&a->archive, ENOMEM, "Out of memory");
102228753Smm		return (ARCHIVE_FATAL);
103228753Smm	}
104228753Smm	a->compressor.config = config;
105228753Smm	a->compressor.finish = &archive_compressor_gzip_finish;
106228753Smm	config->compression_level = Z_DEFAULT_COMPRESSION;
107228753Smm	a->compressor.init = &archive_compressor_gzip_init;
108228753Smm	a->compressor.options = &archive_compressor_gzip_options;
109228753Smm	a->archive.compression_code = ARCHIVE_COMPRESSION_GZIP;
110228753Smm	a->archive.compression_name = "gzip";
111228753Smm	return (ARCHIVE_OK);
112228753Smm}
113228753Smm
114228753Smm/*
115228753Smm * Setup callback.
116228753Smm */
117228753Smmstatic int
118228753Smmarchive_compressor_gzip_init(struct archive_write *a)
119228753Smm{
120228753Smm	int ret;
121228753Smm	struct private_data *state;
122228753Smm	struct private_config *config;
123228753Smm	time_t t;
124228753Smm
125228753Smm	config = (struct private_config *)a->compressor.config;
126228753Smm
127228753Smm	if (a->client_opener != NULL) {
128228753Smm		ret = (a->client_opener)(&a->archive, a->client_data);
129228753Smm		if (ret != ARCHIVE_OK)
130228753Smm			return (ret);
131228753Smm	}
132228753Smm
133228753Smm	/*
134228753Smm	 * The next check is a temporary workaround until the gzip
135228753Smm	 * code can be overhauled some.  The code should not require
136228753Smm	 * that compressed_buffer_size == bytes_per_block.  Removing
137228753Smm	 * this assumption will allow us to compress larger chunks at
138228753Smm	 * a time, which should improve overall performance
139228753Smm	 * marginally.  As a minor side-effect, such a cleanup would
140228753Smm	 * allow us to support truly arbitrary block sizes.
141228753Smm	 */
142228753Smm	if (a->bytes_per_block < 10) {
143228753Smm		archive_set_error(&a->archive, EINVAL,
144228753Smm		    "GZip compressor requires a minimum 10 byte block size");
145228753Smm		return (ARCHIVE_FATAL);
146228753Smm	}
147228753Smm
148228753Smm	state = (struct private_data *)malloc(sizeof(*state));
149228753Smm	if (state == NULL) {
150228753Smm		archive_set_error(&a->archive, ENOMEM,
151228753Smm		    "Can't allocate data for compression");
152228753Smm		return (ARCHIVE_FATAL);
153228753Smm	}
154228753Smm	memset(state, 0, sizeof(*state));
155228753Smm
156228753Smm	/*
157228753Smm	 * See comment above.  We should set compressed_buffer_size to
158228753Smm	 * max(bytes_per_block, 65536), but the code can't handle that yet.
159228753Smm	 */
160228753Smm	state->compressed_buffer_size = a->bytes_per_block;
161228753Smm	state->compressed = (unsigned char *)malloc(state->compressed_buffer_size);
162228753Smm	state->crc = crc32(0L, NULL, 0);
163228753Smm
164228753Smm	if (state->compressed == NULL) {
165228753Smm		archive_set_error(&a->archive, ENOMEM,
166228753Smm		    "Can't allocate data for compression buffer");
167228753Smm		free(state);
168228753Smm		return (ARCHIVE_FATAL);
169228753Smm	}
170228753Smm
171228753Smm	state->stream.next_out = state->compressed;
172228753Smm	state->stream.avail_out = state->compressed_buffer_size;
173228753Smm
174228753Smm	/* Prime output buffer with a gzip header. */
175228753Smm	t = time(NULL);
176228753Smm	state->compressed[0] = 0x1f; /* GZip signature bytes */
177228753Smm	state->compressed[1] = 0x8b;
178228753Smm	state->compressed[2] = 0x08; /* "Deflate" compression */
179228753Smm	state->compressed[3] = 0; /* No options */
180228753Smm	state->compressed[4] = (t)&0xff;  /* Timestamp */
181228753Smm	state->compressed[5] = (t>>8)&0xff;
182228753Smm	state->compressed[6] = (t>>16)&0xff;
183228753Smm	state->compressed[7] = (t>>24)&0xff;
184228753Smm	state->compressed[8] = 0; /* No deflate options */
185228753Smm	state->compressed[9] = 3; /* OS=Unix */
186228753Smm	state->stream.next_out += 10;
187228753Smm	state->stream.avail_out -= 10;
188228753Smm
189228753Smm	a->compressor.write = archive_compressor_gzip_write;
190228753Smm
191228753Smm	/* Initialize compression library. */
192228753Smm	ret = deflateInit2(&(state->stream),
193228753Smm	    config->compression_level,
194228753Smm	    Z_DEFLATED,
195228753Smm	    -15 /* < 0 to suppress zlib header */,
196228753Smm	    8,
197228753Smm	    Z_DEFAULT_STRATEGY);
198228753Smm
199228753Smm	if (ret == Z_OK) {
200228753Smm		a->compressor.data = state;
201228753Smm		return (0);
202228753Smm	}
203228753Smm
204228753Smm	/* Library setup failed: clean up. */
205228753Smm	archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC, "Internal error "
206228753Smm	    "initializing compression library");
207228753Smm	free(state->compressed);
208228753Smm	free(state);
209228753Smm
210228753Smm	/* Override the error message if we know what really went wrong. */
211228753Smm	switch (ret) {
212228753Smm	case Z_STREAM_ERROR:
213228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
214228753Smm		    "Internal error initializing "
215228753Smm		    "compression library: invalid setup parameter");
216228753Smm		break;
217228753Smm	case Z_MEM_ERROR:
218228753Smm		archive_set_error(&a->archive, ENOMEM, "Internal error initializing "
219228753Smm		    "compression library");
220228753Smm		break;
221228753Smm	case Z_VERSION_ERROR:
222228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
223228753Smm		    "Internal error initializing "
224228753Smm		    "compression library: invalid library version");
225228753Smm		break;
226228753Smm	}
227228753Smm
228228753Smm	return (ARCHIVE_FATAL);
229228753Smm}
230228753Smm
231228753Smm/*
232228753Smm * Set write options.
233228753Smm */
234228753Smmstatic int
235228753Smmarchive_compressor_gzip_options(struct archive_write *a, const char *key,
236228753Smm    const char *value)
237228753Smm{
238228753Smm	struct private_config *config;
239228753Smm
240228753Smm	config = (struct private_config *)a->compressor.config;
241228753Smm	if (strcmp(key, "compression-level") == 0) {
242228753Smm		if (value == NULL || !(value[0] >= '0' && value[0] <= '9') ||
243228753Smm		    value[1] != '\0')
244228753Smm			return (ARCHIVE_WARN);
245228753Smm		config->compression_level = value[0] - '0';
246228753Smm		return (ARCHIVE_OK);
247228753Smm	}
248228753Smm
249228753Smm	return (ARCHIVE_WARN);
250228753Smm}
251228753Smm
252228753Smm/*
253228753Smm * Write data to the compressed stream.
254228753Smm */
255228753Smmstatic int
256228753Smmarchive_compressor_gzip_write(struct archive_write *a, const void *buff,
257228753Smm    size_t length)
258228753Smm{
259228753Smm	struct private_data *state;
260228753Smm	int ret;
261228753Smm
262228753Smm	state = (struct private_data *)a->compressor.data;
263228753Smm	if (a->client_writer == NULL) {
264228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
265228753Smm		    "No write callback is registered?  "
266228753Smm		    "This is probably an internal programming error.");
267228753Smm		return (ARCHIVE_FATAL);
268228753Smm	}
269228753Smm
270228753Smm	/* Update statistics */
271228753Smm	state->crc = crc32(state->crc, (const Bytef *)buff, length);
272228753Smm	state->total_in += length;
273228753Smm
274228753Smm	/* Compress input data to output buffer */
275228753Smm	SET_NEXT_IN(state, buff);
276228753Smm	state->stream.avail_in = length;
277228753Smm	if ((ret = drive_compressor(a, state, 0)) != ARCHIVE_OK)
278228753Smm		return (ret);
279228753Smm
280228753Smm	a->archive.file_position += length;
281228753Smm	return (ARCHIVE_OK);
282228753Smm}
283228753Smm
284228753Smm/*
285228753Smm * Finish the compression...
286228753Smm */
287228753Smmstatic int
288228753Smmarchive_compressor_gzip_finish(struct archive_write *a)
289228753Smm{
290228753Smm	ssize_t block_length, target_block_length, bytes_written;
291228753Smm	int ret;
292228753Smm	struct private_data *state;
293228753Smm	unsigned tocopy;
294228753Smm	unsigned char trailer[8];
295228753Smm
296228753Smm	state = (struct private_data *)a->compressor.data;
297228753Smm	ret = 0;
298228753Smm	if (state != NULL) {
299228753Smm		if (a->client_writer == NULL) {
300228753Smm			archive_set_error(&a->archive,
301228753Smm			    ARCHIVE_ERRNO_PROGRAMMER,
302228753Smm			    "No write callback is registered?  "
303228753Smm			    "This is probably an internal programming error.");
304228753Smm			ret = ARCHIVE_FATAL;
305228753Smm			goto cleanup;
306228753Smm		}
307228753Smm
308228753Smm		/* By default, always pad the uncompressed data. */
309228753Smm		if (a->pad_uncompressed) {
310228753Smm			tocopy = a->bytes_per_block -
311228753Smm			    (state->total_in % a->bytes_per_block);
312228753Smm			while (tocopy > 0 && tocopy < (unsigned)a->bytes_per_block) {
313228753Smm				SET_NEXT_IN(state, a->nulls);
314228753Smm				state->stream.avail_in = tocopy < a->null_length ?
315228753Smm				    tocopy : a->null_length;
316228753Smm				state->crc = crc32(state->crc, a->nulls,
317228753Smm				    state->stream.avail_in);
318228753Smm				state->total_in += state->stream.avail_in;
319228753Smm				tocopy -= state->stream.avail_in;
320228753Smm				ret = drive_compressor(a, state, 0);
321228753Smm				if (ret != ARCHIVE_OK)
322228753Smm					goto cleanup;
323228753Smm			}
324228753Smm		}
325228753Smm
326228753Smm		/* Finish compression cycle */
327228753Smm		if (((ret = drive_compressor(a, state, 1))) != ARCHIVE_OK)
328228753Smm			goto cleanup;
329228753Smm
330228753Smm		/* Build trailer: 4-byte CRC and 4-byte length. */
331228753Smm		trailer[0] = (state->crc)&0xff;
332228753Smm		trailer[1] = (state->crc >> 8)&0xff;
333228753Smm		trailer[2] = (state->crc >> 16)&0xff;
334228753Smm		trailer[3] = (state->crc >> 24)&0xff;
335228753Smm		trailer[4] = (state->total_in)&0xff;
336228753Smm		trailer[5] = (state->total_in >> 8)&0xff;
337228753Smm		trailer[6] = (state->total_in >> 16)&0xff;
338228753Smm		trailer[7] = (state->total_in >> 24)&0xff;
339228753Smm
340228753Smm		/* Add trailer to current block. */
341228753Smm		tocopy = 8;
342228753Smm		if (tocopy > state->stream.avail_out)
343228753Smm			tocopy = state->stream.avail_out;
344228753Smm		memcpy(state->stream.next_out, trailer, tocopy);
345228753Smm		state->stream.next_out += tocopy;
346228753Smm		state->stream.avail_out -= tocopy;
347228753Smm
348228753Smm		/* If it overflowed, flush and start a new block. */
349228753Smm		if (tocopy < 8) {
350228753Smm			bytes_written = (a->client_writer)(&a->archive, a->client_data,
351228753Smm			    state->compressed, state->compressed_buffer_size);
352228753Smm			if (bytes_written <= 0) {
353228753Smm				ret = ARCHIVE_FATAL;
354228753Smm				goto cleanup;
355228753Smm			}
356228753Smm			a->archive.raw_position += bytes_written;
357228753Smm			state->stream.next_out = state->compressed;
358228753Smm			state->stream.avail_out = state->compressed_buffer_size;
359228753Smm			memcpy(state->stream.next_out, trailer + tocopy, 8-tocopy);
360228753Smm			state->stream.next_out += 8-tocopy;
361228753Smm			state->stream.avail_out -= 8-tocopy;
362228753Smm		}
363228753Smm
364228753Smm		/* Optionally, pad the final compressed block. */
365228753Smm		block_length = state->stream.next_out - state->compressed;
366228753Smm
367228753Smm		/* Tricky calculation to determine size of last block. */
368228753Smm		if (a->bytes_in_last_block <= 0)
369228753Smm			/* Default or Zero: pad to full block */
370228753Smm			target_block_length = a->bytes_per_block;
371228753Smm		else
372228753Smm			/* Round length to next multiple of bytes_in_last_block. */
373228753Smm			target_block_length = a->bytes_in_last_block *
374228753Smm			    ( (block_length + a->bytes_in_last_block - 1) /
375228753Smm				a->bytes_in_last_block);
376228753Smm		if (target_block_length > a->bytes_per_block)
377228753Smm			target_block_length = a->bytes_per_block;
378228753Smm		if (block_length < target_block_length) {
379228753Smm			memset(state->stream.next_out, 0,
380228753Smm			    target_block_length - block_length);
381228753Smm			block_length = target_block_length;
382228753Smm		}
383228753Smm
384228753Smm		/* Write the last block */
385228753Smm		bytes_written = (a->client_writer)(&a->archive, a->client_data,
386228753Smm		    state->compressed, block_length);
387228753Smm		if (bytes_written <= 0) {
388228753Smm			ret = ARCHIVE_FATAL;
389228753Smm			goto cleanup;
390228753Smm		}
391228753Smm		a->archive.raw_position += bytes_written;
392228753Smm
393228753Smm		/* Cleanup: shut down compressor, release memory, etc. */
394228753Smm	cleanup:
395228753Smm		switch (deflateEnd(&(state->stream))) {
396228753Smm		case Z_OK:
397228753Smm			break;
398228753Smm		default:
399228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
400228753Smm			    "Failed to clean up compressor");
401228753Smm			ret = ARCHIVE_FATAL;
402228753Smm		}
403228753Smm		free(state->compressed);
404228753Smm		free(state);
405228753Smm	}
406228753Smm	/* Clean up config area even if we never initialized. */
407228753Smm	free(a->compressor.config);
408228753Smm	a->compressor.config = NULL;
409228753Smm	return (ret);
410228753Smm}
411228753Smm
412228753Smm/*
413228753Smm * Utility function to push input data through compressor,
414228753Smm * writing full output blocks as necessary.
415228753Smm *
416228753Smm * Note that this handles both the regular write case (finishing ==
417228753Smm * false) and the end-of-archive case (finishing == true).
418228753Smm */
419228753Smmstatic int
420228753Smmdrive_compressor(struct archive_write *a, struct private_data *state, int finishing)
421228753Smm{
422228753Smm	ssize_t bytes_written;
423228753Smm	int ret;
424228753Smm
425228753Smm	for (;;) {
426228753Smm		if (state->stream.avail_out == 0) {
427228753Smm			bytes_written = (a->client_writer)(&a->archive,
428228753Smm			    a->client_data, state->compressed,
429228753Smm			    state->compressed_buffer_size);
430228753Smm			if (bytes_written <= 0) {
431228753Smm				/* TODO: Handle this write failure */
432228753Smm				return (ARCHIVE_FATAL);
433228753Smm			} else if ((size_t)bytes_written < state->compressed_buffer_size) {
434228753Smm				/* Short write: Move remaining to
435228753Smm				 * front of block and keep filling */
436228753Smm				memmove(state->compressed,
437228753Smm				    state->compressed + bytes_written,
438228753Smm				    state->compressed_buffer_size - bytes_written);
439228753Smm			}
440228753Smm			a->archive.raw_position += bytes_written;
441228753Smm			state->stream.next_out
442228753Smm			    = state->compressed +
443228753Smm			    state->compressed_buffer_size - bytes_written;
444228753Smm			state->stream.avail_out = bytes_written;
445228753Smm		}
446228753Smm
447228753Smm		/* If there's nothing to do, we're done. */
448228753Smm		if (!finishing && state->stream.avail_in == 0)
449228753Smm			return (ARCHIVE_OK);
450228753Smm
451228753Smm		ret = deflate(&(state->stream),
452228753Smm		    finishing ? Z_FINISH : Z_NO_FLUSH );
453228753Smm
454228753Smm		switch (ret) {
455228753Smm		case Z_OK:
456228753Smm			/* In non-finishing case, check if compressor
457228753Smm			 * consumed everything */
458228753Smm			if (!finishing && state->stream.avail_in == 0)
459228753Smm				return (ARCHIVE_OK);
460228753Smm			/* In finishing case, this return always means
461228753Smm			 * there's more work */
462228753Smm			break;
463228753Smm		case Z_STREAM_END:
464228753Smm			/* This return can only occur in finishing case. */
465228753Smm			return (ARCHIVE_OK);
466228753Smm		default:
467228753Smm			/* Any other return value indicates an error. */
468228753Smm			archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
469228753Smm			    "GZip compression failed:"
470228753Smm			    " deflate() call returned status %d",
471228753Smm			    ret);
472228753Smm			return (ARCHIVE_FATAL);
473228753Smm		}
474228753Smm	}
475228753Smm}
476228753Smm
477228753Smm#endif /* HAVE_ZLIB_H */
478