bzipfs.c revision 200919
1/* 2 * Copyright (c) 1998 Michael Smith. 3 * Copyright (c) 2000 Maxim Sobolev 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28#include <sys/cdefs.h> 29__FBSDID("$FreeBSD: head/lib/libstand/bzipfs.c 200919 2009-12-23 21:11:03Z jhb $"); 30 31#ifndef REGRESSION 32#include "stand.h" 33#else 34#include <stdlib.h> 35#include <sys/errno.h> 36#include <sys/fcntl.h> 37#include <sys/types.h> 38#include <sys/unistd.h> 39 40struct open_file { 41 int f_flags; /* see F_* below */ 42 void *f_fsdata; /* file system specific data */ 43}; 44#define F_READ 0x0001 /* file opened for reading */ 45#define EOFFSET (ELAST+8) /* relative seek not supported */ 46static inline u_int min(u_int a, u_int b) { return(a < b ? a : b); } 47#define panic(x, y) abort() 48#endif 49 50#include <sys/stat.h> 51#include <string.h> 52#include <bzlib.h> 53 54#define BZ_BUFSIZE 2048 /* XXX larger? */ 55 56struct bz_file 57{ 58 int bzf_rawfd; 59 bz_stream bzf_bzstream; 60 char bzf_buf[BZ_BUFSIZE]; 61 int bzf_endseen; 62}; 63 64static int bzf_fill(struct bz_file *z); 65static int bzf_open(const char *path, struct open_file *f); 66static int bzf_close(struct open_file *f); 67static int bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid); 68static off_t bzf_seek(struct open_file *f, off_t offset, int where); 69static int bzf_stat(struct open_file *f, struct stat *sb); 70 71#ifndef REGRESSION 72struct fs_ops bzipfs_fsops = { 73 "bzip", 74 bzf_open, 75 bzf_close, 76 bzf_read, 77 null_write, 78 bzf_seek, 79 bzf_stat, 80 null_readdir 81}; 82#endif 83 84#if 0 85void * 86calloc(int items, size_t size) 87{ 88 return(malloc(items * size)); 89} 90#endif 91 92static int 93bzf_fill(struct bz_file *bzf) 94{ 95 int result; 96 int req; 97 98 req = BZ_BUFSIZE - bzf->bzf_bzstream.avail_in; 99 result = 0; 100 101 /* If we need more */ 102 if (req > 0) { 103 /* move old data to bottom of buffer */ 104 if (req < BZ_BUFSIZE) 105 bcopy(bzf->bzf_buf + req, bzf->bzf_buf, BZ_BUFSIZE - req); 106 107 /* read to fill buffer and update availibility data */ 108 result = read(bzf->bzf_rawfd, bzf->bzf_buf + bzf->bzf_bzstream.avail_in, req); 109 bzf->bzf_bzstream.next_in = bzf->bzf_buf; 110 if (result >= 0) 111 bzf->bzf_bzstream.avail_in += result; 112 } 113 return(result); 114} 115 116/* 117 * Adapted from get_byte/check_header in libz 118 * 119 * Returns 0 if the header is OK, nonzero if not. 120 */ 121static int 122get_byte(struct bz_file *bzf) 123{ 124 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) 125 return(-1); 126 bzf->bzf_bzstream.avail_in--; 127 return(*(bzf->bzf_bzstream.next_in)++); 128} 129 130static int bz_magic[3] = {'B', 'Z', 'h'}; /* bzip2 magic header */ 131 132static int 133check_header(struct bz_file *bzf) 134{ 135 unsigned int len; 136 int c; 137 138 /* Check the bzip2 magic header */ 139 for (len = 0; len < 3; len++) { 140 c = get_byte(bzf); 141 if (c != bz_magic[len]) { 142 return(1); 143 } 144 } 145 /* Check that the block size is valid */ 146 c = get_byte(bzf); 147 if (c < '1' || c > '9') 148 return(1); 149 150 /* Put back bytes that we've took from the input stream */ 151 bzf->bzf_bzstream.next_in -= 4; 152 bzf->bzf_bzstream.avail_in += 4; 153 154 return(0); 155} 156 157static int 158bzf_open(const char *fname, struct open_file *f) 159{ 160 static char *bzfname; 161 int rawfd; 162 struct bz_file *bzf; 163 char *cp; 164 int error; 165 struct stat sb; 166 167 /* Have to be in "just read it" mode */ 168 if (f->f_flags != F_READ) 169 return(EPERM); 170 171 /* If the name already ends in .gz or .bz2, ignore it */ 172 if ((cp = strrchr(fname, '.')) && (!strcmp(cp, ".gz") 173 || !strcmp(cp, ".bz2") || !strcmp(cp, ".split"))) 174 return(ENOENT); 175 176 /* Construct new name */ 177 bzfname = malloc(strlen(fname) + 5); 178 if (bzfname == NULL) 179 return(ENOMEM); 180 sprintf(bzfname, "%s.bz2", fname); 181 182 /* Try to open the compressed datafile */ 183 rawfd = open(bzfname, O_RDONLY); 184 free(bzfname); 185 if (rawfd == -1) 186 return(ENOENT); 187 188 if (fstat(rawfd, &sb) < 0) { 189 printf("bzf_open: stat failed\n"); 190 close(rawfd); 191 return(ENOENT); 192 } 193 if (!S_ISREG(sb.st_mode)) { 194 printf("bzf_open: not a file\n"); 195 close(rawfd); 196 return(EISDIR); /* best guess */ 197 } 198 199 /* Allocate a bz_file structure, populate it */ 200 bzf = malloc(sizeof(struct bz_file)); 201 if (bzf == NULL) 202 return(ENOMEM); 203 bzero(bzf, sizeof(struct bz_file)); 204 bzf->bzf_rawfd = rawfd; 205 206 /* Verify that the file is bzipped */ 207 if (check_header(bzf)) { 208 close(bzf->bzf_rawfd); 209 free(bzf); 210 return(EFTYPE); 211 } 212 213 /* Initialise the inflation engine */ 214 if ((error = BZ2_bzDecompressInit(&(bzf->bzf_bzstream), 0, 1)) != BZ_OK) { 215 printf("bzf_open: BZ2_bzDecompressInit returned %d\n", error); 216 close(bzf->bzf_rawfd); 217 free(bzf); 218 return(EIO); 219 } 220 221 /* Looks OK, we'll take it */ 222 f->f_fsdata = bzf; 223 return(0); 224} 225 226static int 227bzf_close(struct open_file *f) 228{ 229 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 230 231 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 232 close(bzf->bzf_rawfd); 233 free(bzf); 234 return(0); 235} 236 237static int 238bzf_read(struct open_file *f, void *buf, size_t size, size_t *resid) 239{ 240 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 241 int error; 242 243 bzf->bzf_bzstream.next_out = buf; /* where and how much */ 244 bzf->bzf_bzstream.avail_out = size; 245 246 while (bzf->bzf_bzstream.avail_out && bzf->bzf_endseen == 0) { 247 if ((bzf->bzf_bzstream.avail_in == 0) && (bzf_fill(bzf) == -1)) { 248 printf("bzf_read: fill error\n"); 249 return(EIO); 250 } 251 if (bzf->bzf_bzstream.avail_in == 0) { /* oops, unexpected EOF */ 252 printf("bzf_read: unexpected EOF\n"); 253 if (bzf->bzf_bzstream.avail_out == size) 254 return(EIO); 255 break; 256 } 257 258 error = BZ2_bzDecompress(&bzf->bzf_bzstream); /* decompression pass */ 259 if (error == BZ_STREAM_END) { /* EOF, all done */ 260 bzf->bzf_endseen = 1; 261 break; 262 } 263 if (error != BZ_OK) { /* argh, decompression error */ 264 printf("bzf_read: BZ2_bzDecompress returned %d\n", error); 265 return(EIO); 266 } 267 } 268 if (resid != NULL) 269 *resid = bzf->bzf_bzstream.avail_out; 270 return(0); 271} 272 273static int 274bzf_rewind(struct open_file *f) 275{ 276 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 277 struct bz_file *bzf_tmp; 278 279 /* 280 * Since bzip2 does not have an equivalent inflateReset function a crude 281 * one needs to be provided. The functions all called in such a way that 282 * at any time an error occurs a role back can be done (effectively making 283 * this rewind 'atomic', either the reset occurs successfully or not at all, 284 * with no 'undefined' state happening). 285 */ 286 287 /* Allocate a bz_file structure, populate it */ 288 bzf_tmp = malloc(sizeof(struct bz_file)); 289 if (bzf_tmp == NULL) 290 return(-1); 291 bzero(bzf_tmp, sizeof(struct bz_file)); 292 bzf_tmp->bzf_rawfd = bzf->bzf_rawfd; 293 294 /* Initialise the inflation engine */ 295 if (BZ2_bzDecompressInit(&(bzf_tmp->bzf_bzstream), 0, 1) != BZ_OK) { 296 free(bzf_tmp); 297 return(-1); 298 } 299 300 /* Seek back to the beginning of the file */ 301 if (lseek(bzf->bzf_rawfd, 0, SEEK_SET) == -1) { 302 BZ2_bzDecompressEnd(&(bzf_tmp->bzf_bzstream)); 303 free(bzf_tmp); 304 return(-1); 305 } 306 307 /* Free old bz_file data */ 308 BZ2_bzDecompressEnd(&(bzf->bzf_bzstream)); 309 free(bzf); 310 311 /* Use the new bz_file data */ 312 f->f_fsdata = bzf_tmp; 313 314 return(0); 315} 316 317static off_t 318bzf_seek(struct open_file *f, off_t offset, int where) 319{ 320 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 321 off_t target; 322 char discard[16]; 323 324 switch (where) { 325 case SEEK_SET: 326 target = offset; 327 break; 328 case SEEK_CUR: 329 target = offset + bzf->bzf_bzstream.total_out_lo32; 330 break; 331 case SEEK_END: 332 target = -1; 333 default: 334 errno = EINVAL; 335 return(-1); 336 } 337 338 /* Can we get there from here? */ 339 if (target < bzf->bzf_bzstream.total_out_lo32 && bzf_rewind(f) != 0) { 340 errno = EOFFSET; 341 return -1; 342 } 343 344 /* if bzf_rewind was called then bzf has changed */ 345 bzf = (struct bz_file *)f->f_fsdata; 346 347 /* skip forwards if required */ 348 while (target > bzf->bzf_bzstream.total_out_lo32) { 349 errno = bzf_read(f, discard, min(sizeof(discard), 350 target - bzf->bzf_bzstream.total_out_lo32), NULL); 351 if (errno) 352 return(-1); 353 } 354 /* This is where we are (be honest if we overshot) */ 355 return(bzf->bzf_bzstream.total_out_lo32); 356} 357 358static int 359bzf_stat(struct open_file *f, struct stat *sb) 360{ 361 struct bz_file *bzf = (struct bz_file *)f->f_fsdata; 362 int result; 363 364 /* stat as normal, but indicate that size is unknown */ 365 if ((result = fstat(bzf->bzf_rawfd, sb)) == 0) 366 sb->st_size = -1; 367 return(result); 368} 369 370void 371bz_internal_error(int errorcode) 372{ 373 panic("bzipfs: critical error %d in bzip2 library occured\n", errorcode); 374} 375 376#ifdef REGRESSION 377/* Small test case, open and decompress test.bz2 */ 378int main() 379{ 380 struct open_file f; 381 char buf[1024]; 382 size_t resid; 383 int err; 384 385 memset(&f, '\0', sizeof(f)); 386 f.f_flags = F_READ; 387 err = bzf_open("test", &f); 388 if (err != 0) 389 exit(1); 390 do { 391 err = bzf_read(&f, buf, sizeof(buf), &resid); 392 } while (err == 0 && resid != sizeof(buf)); 393 394 if (err != 0) 395 exit(2); 396 exit(0); 397} 398#endif 399