1228753Smm/*-
2228753Smm * Copyright (c) 2008 Joerg Sonnenberger
3228753Smm * All rights reserved.
4228753Smm *
5228753Smm * Redistribution and use in source and binary forms, with or without
6228753Smm * modification, are permitted provided that the following conditions
7228753Smm * are met:
8228753Smm * 1. Redistributions of source code must retain the above copyright
9228753Smm *    notice, this list of conditions and the following disclaimer.
10228753Smm * 2. Redistributions in binary form must reproduce the above copyright
11228753Smm *    notice, this list of conditions and the following disclaimer in the
12228753Smm *    documentation and/or other materials provided with the distribution.
13228753Smm *
14228753Smm * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15228753Smm * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16228753Smm * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17228753Smm * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18228753Smm * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19228753Smm * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20228753Smm * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21228753Smm * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22228753Smm * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23228753Smm * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24228753Smm */
25228753Smm
26228753Smm/*-
27228753Smm * Copyright (c) 1985, 1986, 1992, 1993
28228753Smm *	The Regents of the University of California.  All rights reserved.
29228753Smm *
30228753Smm * This code is derived from software contributed to Berkeley by
31228753Smm * Diomidis Spinellis and James A. Woods, derived from original
32228753Smm * work by Spencer Thomas and Joseph Orost.
33228753Smm *
34228753Smm * Redistribution and use in source and binary forms, with or without
35228753Smm * modification, are permitted provided that the following conditions
36228753Smm * are met:
37228753Smm * 1. Redistributions of source code must retain the above copyright
38228753Smm *    notice, this list of conditions and the following disclaimer.
39228753Smm * 2. Redistributions in binary form must reproduce the above copyright
40228753Smm *    notice, this list of conditions and the following disclaimer in the
41228753Smm *    documentation and/or other materials provided with the distribution.
42228753Smm * 3. Neither the name of the University nor the names of its contributors
43228753Smm *    may be used to endorse or promote products derived from this software
44228753Smm *    without specific prior written permission.
45228753Smm *
46228753Smm * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
47228753Smm * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
48228753Smm * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
49228753Smm * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
50228753Smm * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
51228753Smm * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
52228753Smm * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
53228753Smm * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
54228753Smm * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
55228753Smm * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
56228753Smm * SUCH DAMAGE.
57228753Smm */
58228753Smm
59228753Smm#include "archive_platform.h"
60228753Smm
61229592Smm__FBSDID("$FreeBSD$");
62228753Smm
63228753Smm#ifdef HAVE_ERRNO_H
64228753Smm#include <errno.h>
65228753Smm#endif
66228753Smm#ifdef HAVE_STDLIB_H
67228753Smm#include <stdlib.h>
68228753Smm#endif
69228753Smm#ifdef HAVE_STRING_H
70228753Smm#include <string.h>
71228753Smm#endif
72228753Smm
73228753Smm#include "archive.h"
74228753Smm#include "archive_private.h"
75228753Smm#include "archive_write_private.h"
76228753Smm
77228753Smm#define	HSIZE		69001	/* 95% occupancy */
78228753Smm#define	HSHIFT		8	/* 8 - trunc(log2(HSIZE / 65536)) */
79228753Smm#define	CHECK_GAP 10000		/* Ratio check interval. */
80228753Smm
81228753Smm#define	MAXCODE(bits)	((1 << (bits)) - 1)
82228753Smm
83228753Smm/*
84228753Smm * the next two codes should not be changed lightly, as they must not
85228753Smm * lie within the contiguous general code space.
86228753Smm */
87228753Smm#define	FIRST	257		/* First free entry. */
88228753Smm#define	CLEAR	256		/* Table clear output code. */
89228753Smm
90228753Smmstruct private_data {
91228753Smm	off_t in_count, out_count, checkpoint;
92228753Smm
93228753Smm	int code_len;			/* Number of bits/code. */
94228753Smm	int cur_maxcode;		/* Maximum code, given n_bits. */
95228753Smm	int max_maxcode;		/* Should NEVER generate this code. */
96228753Smm	int hashtab [HSIZE];
97228753Smm	unsigned short codetab [HSIZE];
98228753Smm	int first_free;		/* First unused entry. */
99228753Smm	int compress_ratio;
100228753Smm
101228753Smm	int cur_code, cur_fcode;
102228753Smm
103228753Smm	int bit_offset;
104228753Smm	unsigned char bit_buf;
105228753Smm
106228753Smm	unsigned char	*compressed;
107228753Smm	size_t		 compressed_buffer_size;
108228753Smm	size_t		 compressed_offset;
109228753Smm};
110228753Smm
111228753Smmstatic int	archive_compressor_compress_finish(struct archive_write *);
112228753Smmstatic int	archive_compressor_compress_init(struct archive_write *);
113228753Smmstatic int	archive_compressor_compress_write(struct archive_write *,
114228753Smm		    const void *, size_t);
115228753Smm
116228753Smm/*
117228753Smm * Allocate, initialize and return a archive object.
118228753Smm */
119228753Smmint
120228753Smmarchive_write_set_compression_compress(struct archive *_a)
121228753Smm{
122228753Smm	struct archive_write *a = (struct archive_write *)_a;
123228753Smm	__archive_check_magic(&a->archive, ARCHIVE_WRITE_MAGIC,
124228753Smm	    ARCHIVE_STATE_NEW, "archive_write_set_compression_compress");
125228753Smm	a->compressor.init = &archive_compressor_compress_init;
126228753Smm	a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
127228753Smm	a->archive.compression_name = "compress";
128228753Smm	return (ARCHIVE_OK);
129228753Smm}
130228753Smm
131228753Smm/*
132228753Smm * Setup callback.
133228753Smm */
134228753Smmstatic int
135228753Smmarchive_compressor_compress_init(struct archive_write *a)
136228753Smm{
137228753Smm	int ret;
138228753Smm	struct private_data *state;
139228753Smm
140228753Smm	a->archive.compression_code = ARCHIVE_COMPRESSION_COMPRESS;
141228753Smm	a->archive.compression_name = "compress";
142228753Smm
143228753Smm	if (a->bytes_per_block < 4) {
144228753Smm		archive_set_error(&a->archive, EINVAL,
145228753Smm		    "Can't write Compress header as single block");
146228753Smm		return (ARCHIVE_FATAL);
147228753Smm	}
148228753Smm
149228753Smm	if (a->client_opener != NULL) {
150228753Smm		ret = (a->client_opener)(&a->archive, a->client_data);
151228753Smm		if (ret != ARCHIVE_OK)
152228753Smm			return (ret);
153228753Smm	}
154228753Smm
155228753Smm	state = (struct private_data *)malloc(sizeof(*state));
156228753Smm	if (state == NULL) {
157228753Smm		archive_set_error(&a->archive, ENOMEM,
158228753Smm		    "Can't allocate data for compression");
159228753Smm		return (ARCHIVE_FATAL);
160228753Smm	}
161228753Smm	memset(state, 0, sizeof(*state));
162228753Smm
163228753Smm	state->compressed_buffer_size = a->bytes_per_block;
164228753Smm	state->compressed = malloc(state->compressed_buffer_size);
165228753Smm
166228753Smm	if (state->compressed == NULL) {
167228753Smm		archive_set_error(&a->archive, ENOMEM,
168228753Smm		    "Can't allocate data for compression buffer");
169228753Smm		free(state);
170228753Smm		return (ARCHIVE_FATAL);
171228753Smm	}
172228753Smm
173228753Smm	a->compressor.write = archive_compressor_compress_write;
174228753Smm	a->compressor.finish = archive_compressor_compress_finish;
175228753Smm
176228753Smm	state->max_maxcode = 0x10000;	/* Should NEVER generate this code. */
177228753Smm	state->in_count = 0;		/* Length of input. */
178228753Smm	state->bit_buf = 0;
179228753Smm	state->bit_offset = 0;
180228753Smm	state->out_count = 3;		/* Includes 3-byte header mojo. */
181228753Smm	state->compress_ratio = 0;
182228753Smm	state->checkpoint = CHECK_GAP;
183228753Smm	state->code_len = 9;
184228753Smm	state->cur_maxcode = MAXCODE(state->code_len);
185228753Smm	state->first_free = FIRST;
186228753Smm
187228753Smm	memset(state->hashtab, 0xff, sizeof(state->hashtab));
188228753Smm
189228753Smm	/* Prime output buffer with a gzip header. */
190228753Smm	state->compressed[0] = 0x1f; /* Compress */
191228753Smm	state->compressed[1] = 0x9d;
192228753Smm	state->compressed[2] = 0x90; /* Block mode, 16bit max */
193228753Smm	state->compressed_offset = 3;
194228753Smm
195228753Smm	a->compressor.data = state;
196228753Smm	return (0);
197228753Smm}
198228753Smm
199228753Smm/*-
200228753Smm * Output the given code.
201228753Smm * Inputs:
202228753Smm * 	code:	A n_bits-bit integer.  If == -1, then EOF.  This assumes
203228753Smm *		that n_bits =< (long)wordsize - 1.
204228753Smm * Outputs:
205228753Smm * 	Outputs code to the file.
206228753Smm * Assumptions:
207228753Smm *	Chars are 8 bits long.
208228753Smm * Algorithm:
209228753Smm * 	Maintain a BITS character long buffer (so that 8 codes will
210228753Smm * fit in it exactly).  Use the VAX insv instruction to insert each
211228753Smm * code in turn.  When the buffer fills up empty it and start over.
212228753Smm */
213228753Smm
214228753Smmstatic unsigned char rmask[9] =
215228753Smm	{0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
216228753Smm
217228753Smmstatic int
218228753Smmoutput_byte(struct archive_write *a, unsigned char c)
219228753Smm{
220228753Smm	struct private_data *state = a->compressor.data;
221228753Smm	ssize_t bytes_written;
222228753Smm
223228753Smm	state->compressed[state->compressed_offset++] = c;
224228753Smm	++state->out_count;
225228753Smm
226228753Smm	if (state->compressed_buffer_size == state->compressed_offset) {
227228753Smm		bytes_written = (a->client_writer)(&a->archive,
228228753Smm		    a->client_data,
229228753Smm		    state->compressed, state->compressed_buffer_size);
230228753Smm		if (bytes_written <= 0)
231228753Smm			return ARCHIVE_FATAL;
232228753Smm		a->archive.raw_position += bytes_written;
233228753Smm		state->compressed_offset = 0;
234228753Smm	}
235228753Smm
236228753Smm	return ARCHIVE_OK;
237228753Smm}
238228753Smm
239228753Smmstatic int
240228753Smmoutput_code(struct archive_write *a, int ocode)
241228753Smm{
242228753Smm	struct private_data *state = a->compressor.data;
243228753Smm	int bits, ret, clear_flg, bit_offset;
244228753Smm
245228753Smm	clear_flg = ocode == CLEAR;
246228753Smm
247228753Smm	/*
248228753Smm	 * Since ocode is always >= 8 bits, only need to mask the first
249228753Smm	 * hunk on the left.
250228753Smm	 */
251228753Smm	bit_offset = state->bit_offset % 8;
252228753Smm	state->bit_buf |= (ocode << bit_offset) & 0xff;
253228753Smm	output_byte(a, state->bit_buf);
254228753Smm
255228753Smm	bits = state->code_len - (8 - bit_offset);
256228753Smm	ocode >>= 8 - bit_offset;
257228753Smm	/* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */
258228753Smm	if (bits >= 8) {
259228753Smm		output_byte(a, ocode & 0xff);
260228753Smm		ocode >>= 8;
261228753Smm		bits -= 8;
262228753Smm	}
263228753Smm	/* Last bits. */
264228753Smm	state->bit_offset += state->code_len;
265228753Smm	state->bit_buf = ocode & rmask[bits];
266228753Smm	if (state->bit_offset == state->code_len * 8)
267228753Smm		state->bit_offset = 0;
268228753Smm
269228753Smm	/*
270228753Smm	 * If the next entry is going to be too big for the ocode size,
271228753Smm	 * then increase it, if possible.
272228753Smm	 */
273228753Smm	if (clear_flg || state->first_free > state->cur_maxcode) {
274228753Smm	       /*
275228753Smm		* Write the whole buffer, because the input side won't
276228753Smm		* discover the size increase until after it has read it.
277228753Smm		*/
278228753Smm		if (state->bit_offset > 0) {
279228753Smm			while (state->bit_offset < state->code_len * 8) {
280228753Smm				ret = output_byte(a, state->bit_buf);
281228753Smm				if (ret != ARCHIVE_OK)
282228753Smm					return ret;
283228753Smm				state->bit_offset += 8;
284228753Smm				state->bit_buf = 0;
285228753Smm			}
286228753Smm		}
287228753Smm		state->bit_buf = 0;
288228753Smm		state->bit_offset = 0;
289228753Smm
290228753Smm		if (clear_flg) {
291228753Smm			state->code_len = 9;
292228753Smm			state->cur_maxcode = MAXCODE(state->code_len);
293228753Smm		} else {
294228753Smm			state->code_len++;
295228753Smm			if (state->code_len == 16)
296228753Smm				state->cur_maxcode = state->max_maxcode;
297228753Smm			else
298228753Smm				state->cur_maxcode = MAXCODE(state->code_len);
299228753Smm		}
300228753Smm	}
301228753Smm
302228753Smm	return (ARCHIVE_OK);
303228753Smm}
304228753Smm
305228753Smmstatic int
306228753Smmoutput_flush(struct archive_write *a)
307228753Smm{
308228753Smm	struct private_data *state = a->compressor.data;
309228753Smm	int ret;
310228753Smm
311228753Smm	/* At EOF, write the rest of the buffer. */
312228753Smm	if (state->bit_offset % 8) {
313228753Smm		state->code_len = (state->bit_offset % 8 + 7) / 8;
314228753Smm		ret = output_byte(a, state->bit_buf);
315228753Smm		if (ret != ARCHIVE_OK)
316228753Smm			return ret;
317228753Smm	}
318228753Smm
319228753Smm	return (ARCHIVE_OK);
320228753Smm}
321228753Smm
322228753Smm/*
323228753Smm * Write data to the compressed stream.
324228753Smm */
325228753Smmstatic int
326228753Smmarchive_compressor_compress_write(struct archive_write *a, const void *buff,
327228753Smm    size_t length)
328228753Smm{
329228753Smm	struct private_data *state;
330228753Smm	int i;
331228753Smm	int ratio;
332228753Smm	int c, disp, ret;
333228753Smm	const unsigned char *bp;
334228753Smm
335228753Smm	state = (struct private_data *)a->compressor.data;
336228753Smm	if (a->client_writer == NULL) {
337228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
338228753Smm		    "No write callback is registered?  "
339228753Smm		    "This is probably an internal programming error.");
340228753Smm		return (ARCHIVE_FATAL);
341228753Smm	}
342228753Smm
343228753Smm	if (length == 0)
344228753Smm		return ARCHIVE_OK;
345228753Smm
346228753Smm	bp = buff;
347228753Smm
348228753Smm	if (state->in_count == 0) {
349228753Smm		state->cur_code = *bp++;
350228753Smm		++state->in_count;
351228753Smm		--length;
352228753Smm	}
353228753Smm
354228753Smm	while (length--) {
355228753Smm		c = *bp++;
356228753Smm		state->in_count++;
357228753Smm		state->cur_fcode = (c << 16) + state->cur_code;
358228753Smm		i = ((c << HSHIFT) ^ state->cur_code);	/* Xor hashing. */
359228753Smm
360228753Smm		if (state->hashtab[i] == state->cur_fcode) {
361228753Smm			state->cur_code = state->codetab[i];
362228753Smm			continue;
363228753Smm		}
364228753Smm		if (state->hashtab[i] < 0)	/* Empty slot. */
365228753Smm			goto nomatch;
366228753Smm		/* Secondary hash (after G. Knott). */
367228753Smm		if (i == 0)
368228753Smm			disp = 1;
369228753Smm		else
370228753Smm			disp = HSIZE - i;
371228753Smm probe:
372228753Smm		if ((i -= disp) < 0)
373228753Smm			i += HSIZE;
374228753Smm
375228753Smm		if (state->hashtab[i] == state->cur_fcode) {
376228753Smm			state->cur_code = state->codetab[i];
377228753Smm			continue;
378228753Smm		}
379228753Smm		if (state->hashtab[i] >= 0)
380228753Smm			goto probe;
381228753Smm nomatch:
382228753Smm		ret = output_code(a, state->cur_code);
383228753Smm		if (ret != ARCHIVE_OK)
384228753Smm			return ret;
385228753Smm		state->cur_code = c;
386228753Smm		if (state->first_free < state->max_maxcode) {
387228753Smm			state->codetab[i] = state->first_free++;	/* code -> hashtable */
388228753Smm			state->hashtab[i] = state->cur_fcode;
389228753Smm			continue;
390228753Smm		}
391228753Smm		if (state->in_count < state->checkpoint)
392228753Smm			continue;
393228753Smm
394228753Smm		state->checkpoint = state->in_count + CHECK_GAP;
395228753Smm
396228753Smm		if (state->in_count <= 0x007fffff)
397228753Smm			ratio = state->in_count * 256 / state->out_count;
398228753Smm		else if ((ratio = state->out_count / 256) == 0)
399228753Smm			ratio = 0x7fffffff;
400228753Smm		else
401228753Smm			ratio = state->in_count / ratio;
402228753Smm
403228753Smm		if (ratio > state->compress_ratio)
404228753Smm			state->compress_ratio = ratio;
405228753Smm		else {
406228753Smm			state->compress_ratio = 0;
407228753Smm			memset(state->hashtab, 0xff, sizeof(state->hashtab));
408228753Smm			state->first_free = FIRST;
409228753Smm			ret = output_code(a, CLEAR);
410228753Smm			if (ret != ARCHIVE_OK)
411228753Smm				return ret;
412228753Smm		}
413228753Smm	}
414228753Smm
415228753Smm	return (ARCHIVE_OK);
416228753Smm}
417228753Smm
418228753Smm
419228753Smm/*
420228753Smm * Finish the compression...
421228753Smm */
422228753Smmstatic int
423228753Smmarchive_compressor_compress_finish(struct archive_write *a)
424228753Smm{
425228753Smm	ssize_t block_length, target_block_length, bytes_written;
426228753Smm	int ret;
427228753Smm	struct private_data *state;
428228753Smm	size_t tocopy;
429228753Smm
430228753Smm	state = (struct private_data *)a->compressor.data;
431228753Smm	if (a->client_writer == NULL) {
432228753Smm		archive_set_error(&a->archive, ARCHIVE_ERRNO_PROGRAMMER,
433228753Smm		    "No write callback is registered?  "
434228753Smm		    "This is probably an internal programming error.");
435228753Smm		ret = ARCHIVE_FATAL;
436228753Smm		goto cleanup;
437228753Smm	}
438228753Smm
439228753Smm	/* By default, always pad the uncompressed data. */
440228753Smm	if (a->pad_uncompressed) {
441228753Smm		while (state->in_count % a->bytes_per_block != 0) {
442228753Smm			tocopy = a->bytes_per_block -
443228753Smm			    (state->in_count % a->bytes_per_block);
444228753Smm			if (tocopy > a->null_length)
445228753Smm				tocopy = a->null_length;
446228753Smm			ret = archive_compressor_compress_write(a, a->nulls,
447228753Smm			    tocopy);
448228753Smm			if (ret != ARCHIVE_OK)
449228753Smm				goto cleanup;
450228753Smm		}
451228753Smm	}
452228753Smm
453228753Smm	ret = output_code(a, state->cur_code);
454228753Smm	if (ret != ARCHIVE_OK)
455228753Smm		goto cleanup;
456228753Smm	ret = output_flush(a);
457228753Smm	if (ret != ARCHIVE_OK)
458228753Smm		goto cleanup;
459228753Smm
460228753Smm	/* Optionally, pad the final compressed block. */
461228753Smm	block_length = state->compressed_offset;
462228753Smm
463228753Smm	/* Tricky calculation to determine size of last block. */
464228753Smm	if (a->bytes_in_last_block <= 0)
465228753Smm		/* Default or Zero: pad to full block */
466228753Smm		target_block_length = a->bytes_per_block;
467228753Smm	else
468228753Smm		/* Round length to next multiple of bytes_in_last_block. */
469228753Smm		target_block_length = a->bytes_in_last_block *
470228753Smm		    ( (block_length + a->bytes_in_last_block - 1) /
471228753Smm			a->bytes_in_last_block);
472228753Smm	if (target_block_length > a->bytes_per_block)
473228753Smm		target_block_length = a->bytes_per_block;
474228753Smm	if (block_length < target_block_length) {
475228753Smm		memset(state->compressed + state->compressed_offset, 0,
476228753Smm		    target_block_length - block_length);
477228753Smm		block_length = target_block_length;
478228753Smm	}
479228753Smm
480228753Smm	/* Write the last block */
481228753Smm	bytes_written = (a->client_writer)(&a->archive, a->client_data,
482228753Smm	    state->compressed, block_length);
483228753Smm	if (bytes_written <= 0)
484228753Smm		ret = ARCHIVE_FATAL;
485228753Smm	else
486228753Smm		a->archive.raw_position += bytes_written;
487228753Smm
488228753Smmcleanup:
489228753Smm	free(state->compressed);
490228753Smm	free(state);
491228753Smm	return (ret);
492228753Smm}
493