bzipfs.c revision 200919
1/*
2 * Copyright (c) 1998 Michael Smith.
3 * Copyright (c) 2000 Maxim Sobolev
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: head/lib/libstand/bzipfs.c 200919 2009-12-23 21:11:03Z jhb $");
30
31#ifndef REGRESSION
32#include "stand.h"
33#else
34#include <stdlib.h>
35#include <sys/errno.h>
36#include <sys/fcntl.h>
37#include <sys/types.h>
38#include <sys/unistd.h>
39
40struct open_file {
41    int                 f_flags;        /* see F_* below */
42    void                *f_fsdata;      /* file system specific data */
43};
44#define F_READ          0x0001  /* file opened for reading */
45#define EOFFSET (ELAST+8)       /* relative seek not supported */
46static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
47#define panic(x, y) abort()
48#endif
49
50#include <sys/stat.h>
51#include <string.h>
52#include <bzlib.h>
53
54#define BZ_BUFSIZE 2048	/* XXX larger? */
55
56struct bz_file
57{
58    int			bzf_rawfd;
59    bz_stream		bzf_bzstream;
60    char		bzf_buf[BZ_BUFSIZE];
61    int			bzf_endseen;
62};
63
64static int	bzf_fill(struct bz_file *z);
65static int	bzf_open(const char *path, struct open_file *f);
66static int	bzf_close(struct open_file *f);
67static int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
68static off_t	bzf_seek(struct open_file *f, off_t offset, int where);
69static int	bzf_stat(struct open_file *f, struct stat *sb);
70
71#ifndef REGRESSION
72struct fs_ops bzipfs_fsops = {
73    "bzip",
74    bzf_open,
75    bzf_close,
76    bzf_read,
77    null_write,
78    bzf_seek,
79    bzf_stat,
80    null_readdir
81};
82#endif
83
84#if 0
85void *
86calloc(int items, size_t size)
87{
88    return(malloc(items * size));
89}
90#endif
91
92static int
93bzf_fill(struct bz_file *bzf)
94{
95    int		result;
96    int		req;
97
98    req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
99    result = 0;
100
101    /* If we need more */
102    if (req > 0) {
103	/* move old data to bottom of buffer */
104	if (req < BZ_BUFSIZE)
105	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
106
107	/* read to fill buffer and update availibility data */
108	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
109	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
110	if (result >= 0)
111	    bzf->bzf_bzstream.avail_in += result;
112    }
113    return(result);
114}
115
116/*
117 * Adapted from get_byte/check_header in libz
118 *
119 * Returns 0 if the header is OK, nonzero if not.
120 */
121static int
122get_byte(struct bz_file *bzf)
123{
124    if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
125	return(-1);
126    bzf->bzf_bzstream.avail_in--;
127    return(*(bzf->bzf_bzstream.next_in)++);
128}
129
130static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
131
132static int
133check_header(struct bz_file *bzf)
134{
135    unsigned int len;
136    int		 c;
137
138    /* Check the bzip2 magic header */
139    for (len = 0; len < 3; len++) {
140	c = get_byte(bzf);
141	if (c != bz_magic[len]) {
142	    return(1);
143	}
144    }
145    /* Check that the block size is valid */
146    c = get_byte(bzf);
147    if (c < '1' || c > '9')
148	return(1);
149
150    /* Put back bytes that we've took from the input stream */
151    bzf->bzf_bzstream.next_in -= 4;
152    bzf->bzf_bzstream.avail_in += 4;
153
154    return(0);
155}
156
157static int
158bzf_open(const char *fname, struct open_file *f)
159{
160    static char		*bzfname;
161    int			rawfd;
162    struct bz_file	*bzf;
163    char		*cp;
164    int			error;
165    struct stat		sb;
166
167    /* Have to be in "just read it" mode */
168    if (f->f_flags != F_READ)
169	return(EPERM);
170
171    /* If the name already ends in .gz or .bz2, ignore it */
172    if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
173	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
174	return(ENOENT);
175
176    /* Construct new name */
177    bzfname = malloc(strlen(fname) + 5);
178    if (bzfname == NULL)
179	return(ENOMEM);
180    sprintf(bzfname, "%s.bz2", fname);
181
182    /* Try to open the compressed datafile */
183    rawfd = open(bzfname, O_RDONLY);
184    free(bzfname);
185    if (rawfd == -1)
186	return(ENOENT);
187
188    if (fstat(rawfd, &sb) < 0) {
189	printf("bzf_open: stat failed\n");
190	close(rawfd);
191	return(ENOENT);
192    }
193    if (!S_ISREG(sb.st_mode)) {
194	printf("bzf_open: not a file\n");
195	close(rawfd);
196	return(EISDIR);			/* best guess */
197    }
198
199    /* Allocate a bz_file structure, populate it */
200    bzf = malloc(sizeof(struct bz_file));
201    if (bzf == NULL)
202	return(ENOMEM);
203    bzero(bzf, sizeof(struct bz_file));
204    bzf->bzf_rawfd = rawfd;
205
206    /* Verify that the file is bzipped */
207    if (check_header(bzf)) {
208	close(bzf->bzf_rawfd);
209	free(bzf);
210	return(EFTYPE);
211    }
212
213    /* Initialise the inflation engine */
214    if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
215	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
216	close(bzf->bzf_rawfd);
217	free(bzf);
218	return(EIO);
219    }
220
221    /* Looks OK, we'll take it */
222    f->f_fsdata = bzf;
223    return(0);
224}
225
226static int
227bzf_close(struct open_file *f)
228{
229    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
230
231    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
232    close(bzf->bzf_rawfd);
233    free(bzf);
234    return(0);
235}
236
237static int
238bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
239{
240    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
241    int			error;
242
243    bzf->bzf_bzstream.next_out = buf;			/* where and how much */
244    bzf->bzf_bzstream.avail_out = size;
245
246    while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
247	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
248	    printf("bzf_read: fill error\n");
249	    return(EIO);
250	}
251	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
252	    printf("bzf_read: unexpected EOF\n");
253	    if (bzf->bzf_bzstream.avail_out == size)
254		return(EIO);
255	    break;
256	}
257
258	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
259	if (error == BZ_STREAM_END) {			/* EOF, all done */
260	    bzf->bzf_endseen = 1;
261	    break;
262	}
263	if (error != BZ_OK) {				/* argh, decompression error */
264	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
265	    return(EIO);
266	}
267    }
268    if (resid != NULL)
269	*resid = bzf->bzf_bzstream.avail_out;
270    return(0);
271}
272
273static int
274bzf_rewind(struct open_file *f)
275{
276    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
277    struct bz_file	*bzf_tmp;
278
279    /*
280     * Since bzip2 does not have an equivalent inflateReset function a crude
281     * one needs to be provided.  The functions all called in such a way that
282     * at any time an error occurs a role back can be done (effectively making
283     * this rewind 'atomic', either the reset occurs successfully or not at all,
284     * with no 'undefined' state happening).
285     */
286
287    /* Allocate a bz_file structure, populate it */
288    bzf_tmp = malloc(sizeof(struct bz_file));
289    if (bzf_tmp == NULL)
290	return(-1);
291    bzero(bzf_tmp, sizeof(struct bz_file));
292    bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
293
294    /* Initialise the inflation engine */
295    if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
296	free(bzf_tmp);
297	return(-1);
298    }
299
300    /* Seek back to the beginning of the file */
301    if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
302	BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
303	free(bzf_tmp);
304	return(-1);
305    }
306
307    /* Free old bz_file data */
308    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
309    free(bzf);
310
311    /* Use the new bz_file data */
312    f->f_fsdata = bzf_tmp;
313
314    return(0);
315}
316
317static off_t
318bzf_seek(struct open_file *f, off_t offset, int where)
319{
320    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
321    off_t		target;
322    char		discard[16];
323
324    switch (where) {
325    case SEEK_SET:
326	target = offset;
327	break;
328    case SEEK_CUR:
329	target = offset + bzf->bzf_bzstream.total_out_lo32;
330	break;
331    case SEEK_END:
332	target = -1;
333    default:
334	errno = EINVAL;
335	return(-1);
336    }
337
338    /* Can we get there from here? */
339    if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
340	errno = EOFFSET;
341	return -1;
342    }
343
344    /* if bzf_rewind was called then bzf has changed */
345    bzf = (struct bz_file *)f->f_fsdata;
346
347    /* skip forwards if required */
348    while (target > bzf->bzf_bzstream.total_out_lo32) {
349	errno = bzf_read(f, discard, min(sizeof(discard),
350	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
351	if (errno)
352	    return(-1);
353    }
354    /* This is where we are (be honest if we overshot) */
355    return(bzf->bzf_bzstream.total_out_lo32);
356}
357
358static int
359bzf_stat(struct open_file *f, struct stat *sb)
360{
361    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
362    int			result;
363
364    /* stat as normal, but indicate that size is unknown */
365    if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
366	sb->st_size = -1;
367    return(result);
368}
369
370void
371bz_internal_error(int errorcode)
372{
373    panic("bzipfs: critical error %d in bzip2 library occured\n", errorcode);
374}
375
376#ifdef REGRESSION
377/* Small test case, open and decompress test.bz2 */
378int main()
379{
380    struct open_file f;
381    char buf[1024];
382    size_t resid;
383    int err;
384
385    memset(&f, '\0', sizeof(f));
386    f.f_flags = F_READ;
387    err = bzf_open("test", &f);
388    if (err != 0)
389	exit(1);
390    do {
391	err = bzf_read(&f, buf, sizeof(buf), &resid);
392    } while (err == 0 && resid != sizeof(buf));
393
394    if (err != 0)
395	exit(2);
396    exit(0);
397}
398#endif
399