1/*
2 * Copyright (c) 1998 Michael Smith.
3 * Copyright (c) 2000 Maxim Sobolev
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in the
13 *    documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD: stable/11/stand/libsa/bzipfs.c 346476 2019-04-21 03:36:05Z kevans $");
30
31#ifndef REGRESSION
32#include "stand.h"
33#else
34#include <stdlib.h>
35#include <sys/errno.h>
36#include <sys/fcntl.h>
37#include <sys/types.h>
38#include <sys/unistd.h>
39
40struct open_file {
41    int                 f_flags;        /* see F_* below */
42    void                *f_fsdata;      /* file system specific data */
43};
44#define F_READ          0x0001  /* file opened for reading */
45#define EOFFSET (ELAST+8)       /* relative seek not supported */
46static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
47#define panic(x, y) abort()
48#endif
49
50#include <sys/stat.h>
51#include <string.h>
52#include <bzlib.h>
53
54#define BZ_BUFSIZE 2048	/* XXX larger? */
55
56struct bz_file
57{
58    int			bzf_rawfd;
59    bz_stream		bzf_bzstream;
60    char		bzf_buf[BZ_BUFSIZE];
61    int			bzf_endseen;
62};
63
64static int	bzf_fill(struct bz_file *z);
65static int	bzf_open(const char *path, struct open_file *f);
66static int	bzf_close(struct open_file *f);
67static int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
68static off_t	bzf_seek(struct open_file *f, off_t offset, int where);
69static int	bzf_stat(struct open_file *f, struct stat *sb);
70
71#ifndef REGRESSION
72struct fs_ops bzipfs_fsops = {
73    "bzip",
74    bzf_open,
75    bzf_close,
76    bzf_read,
77    null_write,
78    bzf_seek,
79    bzf_stat,
80    null_readdir
81};
82#endif
83
84static int
85bzf_fill(struct bz_file *bzf)
86{
87    int		result;
88    int		req;
89
90    req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
91    result = 0;
92
93    /* If we need more */
94    if (req > 0) {
95	/* move old data to bottom of buffer */
96	if (req < BZ_BUFSIZE)
97	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
98
99	/* read to fill buffer and update availibility data */
100	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
101	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
102	if (result >= 0)
103	    bzf->bzf_bzstream.avail_in += result;
104    }
105    return(result);
106}
107
108/*
109 * Adapted from get_byte/check_header in libz
110 *
111 * Returns 0 if the header is OK, nonzero if not.
112 */
113static int
114get_byte(struct bz_file *bzf)
115{
116    if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
117	return(-1);
118    bzf->bzf_bzstream.avail_in--;
119    return(*(bzf->bzf_bzstream.next_in)++);
120}
121
122static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
123
124static int
125check_header(struct bz_file *bzf)
126{
127    unsigned int len;
128    int		 c;
129
130    /* Check the bzip2 magic header */
131    for (len = 0; len < 3; len++) {
132	c = get_byte(bzf);
133	if (c != bz_magic[len]) {
134	    return(1);
135	}
136    }
137    /* Check that the block size is valid */
138    c = get_byte(bzf);
139    if (c < '1' || c > '9')
140	return(1);
141
142    /* Put back bytes that we've took from the input stream */
143    bzf->bzf_bzstream.next_in -= 4;
144    bzf->bzf_bzstream.avail_in += 4;
145
146    return(0);
147}
148
149static int
150bzf_open(const char *fname, struct open_file *f)
151{
152    static char		*bzfname;
153    int			rawfd;
154    struct bz_file	*bzf;
155    char		*cp;
156    int			error;
157    struct stat		sb;
158
159    /* Have to be in "just read it" mode */
160    if (f->f_flags != F_READ)
161	return(EPERM);
162
163    /* If the name already ends in .gz or .bz2, ignore it */
164    if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
165	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
166	return(ENOENT);
167
168    /* Construct new name */
169    bzfname = malloc(strlen(fname) + 5);
170    if (bzfname == NULL)
171	return(ENOMEM);
172    sprintf(bzfname, "%s.bz2", fname);
173
174    /* Try to open the compressed datafile */
175    rawfd = open(bzfname, O_RDONLY);
176    free(bzfname);
177    if (rawfd == -1)
178	return(ENOENT);
179
180    if (fstat(rawfd, &sb) < 0) {
181	printf("bzf_open: stat failed\n");
182	close(rawfd);
183	return(ENOENT);
184    }
185    if (!S_ISREG(sb.st_mode)) {
186	printf("bzf_open: not a file\n");
187	close(rawfd);
188	return(EISDIR);			/* best guess */
189    }
190
191    /* Allocate a bz_file structure, populate it */
192    bzf = malloc(sizeof(struct bz_file));
193    if (bzf == NULL)
194	return(ENOMEM);
195    bzero(bzf, sizeof(struct bz_file));
196    bzf->bzf_rawfd = rawfd;
197
198    /* Verify that the file is bzipped */
199    if (check_header(bzf)) {
200	close(bzf->bzf_rawfd);
201	free(bzf);
202	return(EFTYPE);
203    }
204
205    /* Initialise the inflation engine */
206    if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
207	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
208	close(bzf->bzf_rawfd);
209	free(bzf);
210	return(EIO);
211    }
212
213    /* Looks OK, we'll take it */
214    f->f_fsdata = bzf;
215    return(0);
216}
217
218static int
219bzf_close(struct open_file *f)
220{
221    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
222
223    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
224    close(bzf->bzf_rawfd);
225    free(bzf);
226    return(0);
227}
228
229static int
230bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
231{
232    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
233    int			error;
234
235    bzf->bzf_bzstream.next_out = buf;			/* where and how much */
236    bzf->bzf_bzstream.avail_out = size;
237
238    while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
239	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
240	    printf("bzf_read: fill error\n");
241	    return(EIO);
242	}
243	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
244	    printf("bzf_read: unexpected EOF\n");
245	    if (bzf->bzf_bzstream.avail_out == size)
246		return(EIO);
247	    break;
248	}
249
250	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
251	if (error == BZ_STREAM_END) {			/* EOF, all done */
252	    bzf->bzf_endseen = 1;
253	    break;
254	}
255	if (error != BZ_OK) {				/* argh, decompression error */
256	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
257	    return(EIO);
258	}
259    }
260    if (resid != NULL)
261	*resid = bzf->bzf_bzstream.avail_out;
262    return(0);
263}
264
265static int
266bzf_rewind(struct open_file *f)
267{
268    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
269    struct bz_file	*bzf_tmp;
270
271    /*
272     * Since bzip2 does not have an equivalent inflateReset function a crude
273     * one needs to be provided.  The functions all called in such a way that
274     * at any time an error occurs a roll back can be done (effectively making
275     * this rewind 'atomic', either the reset occurs successfully or not at all,
276     * with no 'undefined' state happening).
277     */
278
279    /* Allocate a bz_file structure, populate it */
280    bzf_tmp = malloc(sizeof(struct bz_file));
281    if (bzf_tmp == NULL)
282	return(-1);
283    bzero(bzf_tmp, sizeof(struct bz_file));
284    bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
285
286    /* Initialise the inflation engine */
287    if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
288	free(bzf_tmp);
289	return(-1);
290    }
291
292    /* Seek back to the beginning of the file */
293    if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
294	BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
295	free(bzf_tmp);
296	return(-1);
297    }
298
299    /* Free old bz_file data */
300    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
301    free(bzf);
302
303    /* Use the new bz_file data */
304    f->f_fsdata = bzf_tmp;
305
306    return(0);
307}
308
309static off_t
310bzf_seek(struct open_file *f, off_t offset, int where)
311{
312    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
313    off_t		target;
314    char		discard[16];
315
316    switch (where) {
317    case SEEK_SET:
318	target = offset;
319	break;
320    case SEEK_CUR:
321	target = offset + bzf->bzf_bzstream.total_out_lo32;
322	break;
323    default:
324	errno = EINVAL;
325	return(-1);
326    }
327
328    /* Can we get there from here? */
329    if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
330	errno = EOFFSET;
331	return -1;
332    }
333
334    /* if bzf_rewind was called then bzf has changed */
335    bzf = (struct bz_file *)f->f_fsdata;
336
337    /* skip forwards if required */
338    while (target > bzf->bzf_bzstream.total_out_lo32) {
339	errno = bzf_read(f, discard, min(sizeof(discard),
340	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
341	if (errno)
342	    return(-1);
343    }
344    /* This is where we are (be honest if we overshot) */
345    return(bzf->bzf_bzstream.total_out_lo32);
346}
347
348static int
349bzf_stat(struct open_file *f, struct stat *sb)
350{
351    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
352    int			result;
353
354    /* stat as normal, but indicate that size is unknown */
355    if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
356	sb->st_size = -1;
357    return(result);
358}
359
360void
361bz_internal_error(int errorcode)
362{
363    panic("bzipfs: critical error %d in bzip2 library occured", errorcode);
364}
365
366#ifdef REGRESSION
367/* Small test case, open and decompress test.bz2 */
368int main()
369{
370    struct open_file f;
371    char buf[1024];
372    size_t resid;
373    int err;
374
375    memset(&f, '\0', sizeof(f));
376    f.f_flags = F_READ;
377    err = bzf_open("test", &f);
378    if (err != 0)
379	exit(1);
380    do {
381	err = bzf_read(&f, buf, sizeof(buf), &resid);
382    } while (err == 0 && resid != sizeof(buf));
383
384    if (err != 0)
385	exit(2);
386    exit(0);
387}
388#endif
389