183610Ssobomax/*
283610Ssobomax * Copyright (c) 1998 Michael Smith.
383610Ssobomax * Copyright (c) 2000 Maxim Sobolev
483610Ssobomax * All rights reserved.
583610Ssobomax *
683610Ssobomax * Redistribution and use in source and binary forms, with or without
783610Ssobomax * modification, are permitted provided that the following conditions
883610Ssobomax * are met:
983610Ssobomax * 1. Redistributions of source code must retain the above copyright
1083610Ssobomax *    notice, this list of conditions and the following disclaimer.
1183610Ssobomax * 2. Redistributions in binary form must reproduce the above copyright
1283610Ssobomax *    notice, this list of conditions and the following disclaimer in the
1383610Ssobomax *    documentation and/or other materials provided with the distribution.
1483610Ssobomax *
1583610Ssobomax * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
1683610Ssobomax * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
1783610Ssobomax * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
1883610Ssobomax * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
1983610Ssobomax * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
2083610Ssobomax * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
2183610Ssobomax * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
2283610Ssobomax * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
2383610Ssobomax * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
2483610Ssobomax * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
2583610Ssobomax * SUCH DAMAGE.
2683610Ssobomax */
2783610Ssobomax
2884221Sdillon#include <sys/cdefs.h>
2984221Sdillon__FBSDID("$FreeBSD: stable/11/stand/libsa/bzipfs.c 346476 2019-04-21 03:36:05Z kevans $");
3084221Sdillon
31174741Ssobomax#ifndef REGRESSION
3283610Ssobomax#include "stand.h"
33174741Ssobomax#else
34200919Sjhb#include <stdlib.h>
35174741Ssobomax#include <sys/errno.h>
36174741Ssobomax#include <sys/fcntl.h>
37174741Ssobomax#include <sys/types.h>
38174741Ssobomax#include <sys/unistd.h>
3983610Ssobomax
40174741Ssobomaxstruct open_file {
41174741Ssobomax    int                 f_flags;        /* see F_* below */
42174741Ssobomax    void                *f_fsdata;      /* file system specific data */
43174741Ssobomax};
44174741Ssobomax#define F_READ          0x0001  /* file opened for reading */
45174741Ssobomax#define EOFFSET (ELAST+8)       /* relative seek not supported */
46200919Sjhbstatic inline u_int min(u_int a, u_int b) { return(a < b ? a : b); }
47174741Ssobomax#define panic(x, y) abort()
48174741Ssobomax#endif
49174741Ssobomax
5083610Ssobomax#include <sys/stat.h>
5183610Ssobomax#include <string.h>
52146324Sobrien#include <bzlib.h>
5383610Ssobomax
5483610Ssobomax#define BZ_BUFSIZE 2048	/* XXX larger? */
5583610Ssobomax
5683610Ssobomaxstruct bz_file
5783610Ssobomax{
5883610Ssobomax    int			bzf_rawfd;
5983610Ssobomax    bz_stream		bzf_bzstream;
6083610Ssobomax    char		bzf_buf[BZ_BUFSIZE];
61174741Ssobomax    int			bzf_endseen;
6283610Ssobomax};
6383610Ssobomax
6483610Ssobomaxstatic int	bzf_fill(struct bz_file *z);
6583610Ssobomaxstatic int	bzf_open(const char *path, struct open_file *f);
6683610Ssobomaxstatic int	bzf_close(struct open_file *f);
6783610Ssobomaxstatic int	bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid);
6883610Ssobomaxstatic off_t	bzf_seek(struct open_file *f, off_t offset, int where);
6983610Ssobomaxstatic int	bzf_stat(struct open_file *f, struct stat *sb);
7083610Ssobomax
71174741Ssobomax#ifndef REGRESSION
7283610Ssobomaxstruct fs_ops bzipfs_fsops = {
7383610Ssobomax    "bzip",
7483610Ssobomax    bzf_open,
7583610Ssobomax    bzf_close,
7683610Ssobomax    bzf_read,
7783610Ssobomax    null_write,
7883610Ssobomax    bzf_seek,
7983610Ssobomax    bzf_stat,
8083610Ssobomax    null_readdir
8183610Ssobomax};
82174741Ssobomax#endif
8383610Ssobomax
8483610Ssobomaxstatic int
8583610Ssobomaxbzf_fill(struct bz_file *bzf)
8683610Ssobomax{
8783610Ssobomax    int		result;
8883610Ssobomax    int		req;
8983610Ssobomax
9083610Ssobomax    req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in;
9183610Ssobomax    result = 0;
9283610Ssobomax
9383610Ssobomax    /* If we need more */
9483610Ssobomax    if (req > 0) {
9583610Ssobomax	/* move old data to bottom of buffer */
9683610Ssobomax	if (req < BZ_BUFSIZE)
9783610Ssobomax	    bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req);
9883610Ssobomax
9983610Ssobomax	/* read to fill buffer and update availibility data */
10083610Ssobomax	result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req);
10183610Ssobomax	bzf->bzf_bzstream.next_in = bzf->bzf_buf;
10283610Ssobomax	if (result >= 0)
10383610Ssobomax	    bzf->bzf_bzstream.avail_in += result;
10483610Ssobomax    }
10583610Ssobomax    return(result);
10683610Ssobomax}
10783610Ssobomax
10883610Ssobomax/*
10983610Ssobomax * Adapted from get_byte/check_header in libz
11083610Ssobomax *
11183610Ssobomax * Returns 0 if the header is OK, nonzero if not.
11283610Ssobomax */
11383610Ssobomaxstatic int
11483610Ssobomaxget_byte(struct bz_file *bzf)
11583610Ssobomax{
11683610Ssobomax    if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1))
11783610Ssobomax	return(-1);
11883610Ssobomax    bzf->bzf_bzstream.avail_in--;
11983610Ssobomax    return(*(bzf->bzf_bzstream.next_in)++);
12083610Ssobomax}
12183610Ssobomax
12283610Ssobomaxstatic int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */
12383610Ssobomax
12483610Ssobomaxstatic int
12583610Ssobomaxcheck_header(struct bz_file *bzf)
12683610Ssobomax{
12783610Ssobomax    unsigned int len;
12883610Ssobomax    int		 c;
12983610Ssobomax
13083610Ssobomax    /* Check the bzip2 magic header */
13183610Ssobomax    for (len = 0; len < 3; len++) {
13283610Ssobomax	c = get_byte(bzf);
13383610Ssobomax	if (c != bz_magic[len]) {
13483610Ssobomax	    return(1);
13583610Ssobomax	}
13683610Ssobomax    }
13783610Ssobomax    /* Check that the block size is valid */
13883610Ssobomax    c = get_byte(bzf);
13983610Ssobomax    if (c < '1' || c > '9')
14083610Ssobomax	return(1);
14183610Ssobomax
14283610Ssobomax    /* Put back bytes that we've took from the input stream */
14383610Ssobomax    bzf->bzf_bzstream.next_in -= 4;
14483610Ssobomax    bzf->bzf_bzstream.avail_in += 4;
14583610Ssobomax
14683610Ssobomax    return(0);
14783610Ssobomax}
14883610Ssobomax
14983610Ssobomaxstatic int
15083610Ssobomaxbzf_open(const char *fname, struct open_file *f)
15183610Ssobomax{
15283610Ssobomax    static char		*bzfname;
15383610Ssobomax    int			rawfd;
15483610Ssobomax    struct bz_file	*bzf;
15583610Ssobomax    char		*cp;
15683610Ssobomax    int			error;
15783610Ssobomax    struct stat		sb;
15883610Ssobomax
15983610Ssobomax    /* Have to be in "just read it" mode */
16083610Ssobomax    if (f->f_flags != F_READ)
16183610Ssobomax	return(EPERM);
16283610Ssobomax
16383610Ssobomax    /* If the name already ends in .gz or .bz2, ignore it */
16483610Ssobomax    if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz")
16592494Ssobomax	    || !strcmp(cp, ".bz2") || !strcmp(cp, ".split")))
16683610Ssobomax	return(ENOENT);
16783610Ssobomax
16883610Ssobomax    /* Construct new name */
16983610Ssobomax    bzfname = malloc(strlen(fname) + 5);
170200919Sjhb    if (bzfname == NULL)
171200919Sjhb	return(ENOMEM);
17283610Ssobomax    sprintf(bzfname, "%s.bz2", fname);
17383610Ssobomax
17483610Ssobomax    /* Try to open the compressed datafile */
17583610Ssobomax    rawfd = open(bzfname, O_RDONLY);
17683610Ssobomax    free(bzfname);
17783610Ssobomax    if (rawfd == -1)
17883610Ssobomax	return(ENOENT);
17983610Ssobomax
18083610Ssobomax    if (fstat(rawfd, &sb) < 0) {
18183610Ssobomax	printf("bzf_open: stat failed\n");
18283610Ssobomax	close(rawfd);
18383610Ssobomax	return(ENOENT);
18483610Ssobomax    }
18583610Ssobomax    if (!S_ISREG(sb.st_mode)) {
18683610Ssobomax	printf("bzf_open: not a file\n");
18783610Ssobomax	close(rawfd);
18883610Ssobomax	return(EISDIR);			/* best guess */
18983610Ssobomax    }
19083610Ssobomax
19183610Ssobomax    /* Allocate a bz_file structure, populate it */
19283610Ssobomax    bzf = malloc(sizeof(struct bz_file));
193200919Sjhb    if (bzf == NULL)
194200919Sjhb	return(ENOMEM);
19583610Ssobomax    bzero(bzf, sizeof(struct bz_file));
19683610Ssobomax    bzf->bzf_rawfd = rawfd;
19783610Ssobomax
198200919Sjhb    /* Verify that the file is bzipped */
19983610Ssobomax    if (check_header(bzf)) {
20083610Ssobomax	close(bzf->bzf_rawfd);
20183610Ssobomax	free(bzf);
20283610Ssobomax	return(EFTYPE);
20383610Ssobomax    }
20483610Ssobomax
20583610Ssobomax    /* Initialise the inflation engine */
20683610Ssobomax    if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) {
20783610Ssobomax	printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error);
20883610Ssobomax	close(bzf->bzf_rawfd);
20983610Ssobomax	free(bzf);
21083610Ssobomax	return(EIO);
21183610Ssobomax    }
21283610Ssobomax
21383610Ssobomax    /* Looks OK, we'll take it */
21483610Ssobomax    f->f_fsdata = bzf;
21583610Ssobomax    return(0);
21683610Ssobomax}
21783610Ssobomax
21883610Ssobomaxstatic int
21983610Ssobomaxbzf_close(struct open_file *f)
22083610Ssobomax{
22183610Ssobomax    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
22283610Ssobomax
22383610Ssobomax    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
22483610Ssobomax    close(bzf->bzf_rawfd);
22583610Ssobomax    free(bzf);
22683610Ssobomax    return(0);
22783610Ssobomax}
22883610Ssobomax
22983610Ssobomaxstatic int
23083610Ssobomaxbzf_read(struct open_file *f, void *buf, size_t size, size_t *resid)
23183610Ssobomax{
23283610Ssobomax    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
23383610Ssobomax    int			error;
23483610Ssobomax
23583610Ssobomax    bzf->bzf_bzstream.next_out = buf;			/* where and how much */
23683610Ssobomax    bzf->bzf_bzstream.avail_out = size;
23783610Ssobomax
238174741Ssobomax    while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) {
23983610Ssobomax	if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) {
24083610Ssobomax	    printf("bzf_read: fill error\n");
241124811Sjhb	    return(EIO);
24283610Ssobomax	}
24383610Ssobomax	if (bzf->bzf_bzstream.avail_in == 0) {		/* oops, unexpected EOF */
24483610Ssobomax	    printf("bzf_read: unexpected EOF\n");
245124811Sjhb	    if (bzf->bzf_bzstream.avail_out == size)
246200919Sjhb		return(EIO);
24783610Ssobomax	    break;
24883610Ssobomax	}
24983610Ssobomax
25083610Ssobomax	error = BZ2_bzDecompress(&bzf->bzf_bzstream);	/* decompression pass */
25183610Ssobomax	if (error == BZ_STREAM_END) {			/* EOF, all done */
252174741Ssobomax	    bzf->bzf_endseen = 1;
25383610Ssobomax	    break;
25483610Ssobomax	}
25583610Ssobomax	if (error != BZ_OK) {				/* argh, decompression error */
25683610Ssobomax	    printf("bzf_read: BZ2_bzDecompress returned %d\n", error);
257124811Sjhb	    return(EIO);
25883610Ssobomax	}
25983610Ssobomax    }
26083610Ssobomax    if (resid != NULL)
26183610Ssobomax	*resid = bzf->bzf_bzstream.avail_out;
26283610Ssobomax    return(0);
26383610Ssobomax}
26483610Ssobomax
265200919Sjhbstatic int
266200919Sjhbbzf_rewind(struct open_file *f)
267200919Sjhb{
268200919Sjhb    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
269200919Sjhb    struct bz_file	*bzf_tmp;
270200919Sjhb
271200919Sjhb    /*
272200919Sjhb     * Since bzip2 does not have an equivalent inflateReset function a crude
273200919Sjhb     * one needs to be provided.  The functions all called in such a way that
274202585Savg     * at any time an error occurs a roll back can be done (effectively making
275200919Sjhb     * this rewind 'atomic', either the reset occurs successfully or not at all,
276200919Sjhb     * with no 'undefined' state happening).
277200919Sjhb     */
278200919Sjhb
279200919Sjhb    /* Allocate a bz_file structure, populate it */
280200919Sjhb    bzf_tmp = malloc(sizeof(struct bz_file));
281200919Sjhb    if (bzf_tmp == NULL)
282200919Sjhb	return(-1);
283200919Sjhb    bzero(bzf_tmp, sizeof(struct bz_file));
284200919Sjhb    bzf_tmp->bzf_rawfd = bzf->bzf_rawfd;
285200919Sjhb
286200919Sjhb    /* Initialise the inflation engine */
287200919Sjhb    if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) {
288200919Sjhb	free(bzf_tmp);
289200919Sjhb	return(-1);
290200919Sjhb    }
291200919Sjhb
292200919Sjhb    /* Seek back to the beginning of the file */
293200919Sjhb    if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) {
294200919Sjhb	BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream));
295200919Sjhb	free(bzf_tmp);
296200919Sjhb	return(-1);
297200919Sjhb    }
298200919Sjhb
299200919Sjhb    /* Free old bz_file data */
300200919Sjhb    BZ2_bzDecompressEnd(&(bzf->bzf_bzstream));
301200919Sjhb    free(bzf);
302200919Sjhb
303200919Sjhb    /* Use the new bz_file data */
304200919Sjhb    f->f_fsdata = bzf_tmp;
305200919Sjhb
306200919Sjhb    return(0);
307200919Sjhb}
308200919Sjhb
30983610Ssobomaxstatic off_t
31083610Ssobomaxbzf_seek(struct open_file *f, off_t offset, int where)
31183610Ssobomax{
31283610Ssobomax    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
31383610Ssobomax    off_t		target;
31483610Ssobomax    char		discard[16];
31583610Ssobomax
31683610Ssobomax    switch (where) {
31783610Ssobomax    case SEEK_SET:
31883610Ssobomax	target = offset;
31983610Ssobomax	break;
32083610Ssobomax    case SEEK_CUR:
32183610Ssobomax	target = offset + bzf->bzf_bzstream.total_out_lo32;
32283610Ssobomax	break;
32383610Ssobomax    default:
324124811Sjhb	errno = EINVAL;
325200919Sjhb	return(-1);
32683610Ssobomax    }
32783610Ssobomax
32883610Ssobomax    /* Can we get there from here? */
329200919Sjhb    if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) {
33083610Ssobomax	errno = EOFFSET;
33183610Ssobomax	return -1;
332200919Sjhb    }
33383610Ssobomax
334200919Sjhb    /* if bzf_rewind was called then bzf has changed */
335200919Sjhb    bzf = (struct bz_file *)f->f_fsdata;
336200919Sjhb
33783610Ssobomax    /* skip forwards if required */
33883610Ssobomax    while (target > bzf->bzf_bzstream.total_out_lo32) {
339124811Sjhb	errno = bzf_read(f, discard, min(sizeof(discard),
340124811Sjhb	    target - bzf->bzf_bzstream.total_out_lo32), NULL);
341124811Sjhb	if (errno)
34283610Ssobomax	    return(-1);
34383610Ssobomax    }
34483610Ssobomax    /* This is where we are (be honest if we overshot) */
345200919Sjhb    return(bzf->bzf_bzstream.total_out_lo32);
34683610Ssobomax}
34783610Ssobomax
34883610Ssobomaxstatic int
34983610Ssobomaxbzf_stat(struct open_file *f, struct stat *sb)
35083610Ssobomax{
35183610Ssobomax    struct bz_file	*bzf = (struct bz_file *)f->f_fsdata;
35283610Ssobomax    int			result;
35383610Ssobomax
35483610Ssobomax    /* stat as normal, but indicate that size is unknown */
35583610Ssobomax    if ((result = fstat(bzf->bzf_rawfd, sb)) == 0)
35683610Ssobomax	sb->st_size = -1;
35783610Ssobomax    return(result);
35883610Ssobomax}
35983610Ssobomax
36083610Ssobomaxvoid
36183610Ssobomaxbz_internal_error(int errorcode)
36283610Ssobomax{
363346476Skevans    panic("bzipfs: critical error %d in bzip2 library occured", errorcode);
36483610Ssobomax}
365174741Ssobomax
366174741Ssobomax#ifdef REGRESSION
367174741Ssobomax/* Small test case, open and decompress test.bz2 */
368174741Ssobomaxint main()
369174741Ssobomax{
370174741Ssobomax    struct open_file f;
371174741Ssobomax    char buf[1024];
372174741Ssobomax    size_t resid;
373174741Ssobomax    int err;
374174741Ssobomax
375174741Ssobomax    memset(&f, '\0', sizeof(f));
376174741Ssobomax    f.f_flags = F_READ;
377174741Ssobomax    err = bzf_open("test", &f);
378174741Ssobomax    if (err != 0)
379174741Ssobomax	exit(1);
380174741Ssobomax    do {
381174741Ssobomax	err = bzf_read(&f, buf, sizeof(buf), &resid);
382174741Ssobomax    } while (err == 0 && resid != sizeof(buf));
383174741Ssobomax
384174741Ssobomax    if (err != 0)
385174741Ssobomax	exit(2);
386174741Ssobomax    exit(0);
387174741Ssobomax}
388174741Ssobomax#endif
389