vnd.c revision 1.266
1/* $NetBSD: vnd.c,v 1.266 2018/10/05 09:51:55 hannken Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70/* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93#include <sys/cdefs.h> 94__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.266 2018/10/05 09:51:55 hannken Exp $"); 95 96#if defined(_KERNEL_OPT) 97#include "opt_vnd.h" 98#include "opt_compat_netbsd.h" 99#endif 100 101#include <sys/param.h> 102#include <sys/systm.h> 103#include <sys/namei.h> 104#include <sys/proc.h> 105#include <sys/kthread.h> 106#include <sys/errno.h> 107#include <sys/buf.h> 108#include <sys/bufq.h> 109#include <sys/malloc.h> 110#include <sys/ioctl.h> 111#include <sys/disklabel.h> 112#include <sys/device.h> 113#include <sys/disk.h> 114#include <sys/stat.h> 115#include <sys/mount.h> 116#include <sys/vnode.h> 117#include <sys/fstrans.h> 118#include <sys/file.h> 119#include <sys/uio.h> 120#include <sys/conf.h> 121#include <sys/kauth.h> 122#include <sys/module.h> 123 124#include <net/zlib.h> 125 126#include <miscfs/genfs/genfs.h> 127#include <miscfs/specfs/specdev.h> 128 129#include <dev/dkvar.h> 130#include <dev/vndvar.h> 131 132#include "ioconf.h" 133 134#if defined(VNDDEBUG) && !defined(DEBUG) 135#define DEBUG 136#endif 137 138#ifdef DEBUG 139int dovndcluster = 1; 140#define VDB_FOLLOW 0x01 141#define VDB_INIT 0x02 142#define VDB_IO 0x04 143#define VDB_LABEL 0x08 144int vnddebug = 0; 145#endif 146 147#define vndunit(x) DISKUNIT(x) 148 149struct vndxfer { 150 struct buf vx_buf; 151 struct vnd_softc *vx_vnd; 152}; 153#define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 154 155#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 156#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 157 158#define VNDLABELDEV(dev) \ 159 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 160 161#define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 162 163 164static void vndclear(struct vnd_softc *, int); 165static int vnddoclear(struct vnd_softc *, int, int, bool); 166static int vndsetcred(struct vnd_softc *, kauth_cred_t); 167static void vndthrottle(struct vnd_softc *, struct vnode *); 168static void vndiodone(struct buf *); 169#if 0 170static void vndshutdown(void); 171#endif 172 173static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 174static void vndgetdisklabel(dev_t, struct vnd_softc *); 175 176static int vndlock(struct vnd_softc *); 177static void vndunlock(struct vnd_softc *); 178#ifdef VND_COMPRESSION 179static void compstrategy(struct buf *, off_t); 180static void *vnd_alloc(void *, u_int, u_int); 181static void vnd_free(void *, void *); 182#endif /* VND_COMPRESSION */ 183 184static void vndthread(void *); 185static bool vnode_has_op(const struct vnode *, int); 186static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 187 struct buf *); 188static void handle_with_strategy(struct vnd_softc *, const struct buf *, 189 struct buf *); 190static void vnd_set_geometry(struct vnd_softc *); 191 192static dev_type_open(vndopen); 193static dev_type_close(vndclose); 194static dev_type_read(vndread); 195static dev_type_write(vndwrite); 196static dev_type_ioctl(vndioctl); 197static dev_type_strategy(vndstrategy); 198static dev_type_dump(vnddump); 199static dev_type_size(vndsize); 200 201const struct bdevsw vnd_bdevsw = { 202 .d_open = vndopen, 203 .d_close = vndclose, 204 .d_strategy = vndstrategy, 205 .d_ioctl = vndioctl, 206 .d_dump = vnddump, 207 .d_psize = vndsize, 208 .d_discard = nodiscard, 209 .d_flag = D_DISK 210}; 211 212const struct cdevsw vnd_cdevsw = { 213 .d_open = vndopen, 214 .d_close = vndclose, 215 .d_read = vndread, 216 .d_write = vndwrite, 217 .d_ioctl = vndioctl, 218 .d_stop = nostop, 219 .d_tty = notty, 220 .d_poll = nopoll, 221 .d_mmap = nommap, 222 .d_kqfilter = nokqfilter, 223 .d_discard = nodiscard, 224 .d_flag = D_DISK 225}; 226 227static int vnd_match(device_t, cfdata_t, void *); 228static void vnd_attach(device_t, device_t, void *); 229static int vnd_detach(device_t, int); 230 231CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 232 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 233 234static struct vnd_softc *vnd_spawn(int); 235int vnd_destroy(device_t); 236 237static struct dkdriver vnddkdriver = { 238 .d_strategy = vndstrategy, 239 .d_minphys = minphys 240}; 241 242void 243vndattach(int num) 244{ 245 int error; 246 247 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 248 if (error) 249 aprint_error("%s: unable to register cfattach, error = %d\n", 250 vnd_cd.cd_name, error); 251} 252 253static int 254vnd_match(device_t self, cfdata_t cfdata, void *aux) 255{ 256 257 return 1; 258} 259 260static void 261vnd_attach(device_t parent, device_t self, void *aux) 262{ 263 struct vnd_softc *sc = device_private(self); 264 265 sc->sc_dev = self; 266 sc->sc_comp_offsets = NULL; 267 sc->sc_comp_buff = NULL; 268 sc->sc_comp_decombuf = NULL; 269 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 270 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 271 if (!pmf_device_register(self, NULL, NULL)) 272 aprint_error_dev(self, "couldn't establish power handler\n"); 273} 274 275static int 276vnd_detach(device_t self, int flags) 277{ 278 int error; 279 struct vnd_softc *sc = device_private(self); 280 281 if (sc->sc_flags & VNF_INITED) { 282 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 283 if (error != 0) 284 return error; 285 } 286 287 pmf_device_deregister(self); 288 bufq_free(sc->sc_tab); 289 disk_destroy(&sc->sc_dkdev); 290 291 return 0; 292} 293 294static struct vnd_softc * 295vnd_spawn(int unit) 296{ 297 cfdata_t cf; 298 299 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 300 cf->cf_name = vnd_cd.cd_name; 301 cf->cf_atname = vnd_cd.cd_name; 302 cf->cf_unit = unit; 303 cf->cf_fstate = FSTATE_STAR; 304 305 return device_private(config_attach_pseudo(cf)); 306} 307 308int 309vnd_destroy(device_t dev) 310{ 311 int error; 312 cfdata_t cf; 313 314 cf = device_cfdata(dev); 315 error = config_detach(dev, DETACH_QUIET); 316 if (error) 317 return error; 318 free(cf, M_DEVBUF); 319 return 0; 320} 321 322static int 323vndopen(dev_t dev, int flags, int mode, struct lwp *l) 324{ 325 int unit = vndunit(dev); 326 struct vnd_softc *sc; 327 int error = 0, part, pmask; 328 struct disklabel *lp; 329 330#ifdef DEBUG 331 if (vnddebug & VDB_FOLLOW) 332 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 333#endif 334 sc = device_lookup_private(&vnd_cd, unit); 335 if (sc == NULL) { 336 sc = vnd_spawn(unit); 337 if (sc == NULL) 338 return ENOMEM; 339 340 /* compatibility, keep disklabel after close */ 341 sc->sc_flags = VNF_KLABEL; 342 } 343 344 if ((error = vndlock(sc)) != 0) 345 return error; 346 347 mutex_enter(&sc->sc_dkdev.dk_openlock); 348 349 if ((sc->sc_flags & VNF_CLEARING) != 0) { 350 error = ENXIO; 351 goto done; 352 } 353 354 lp = sc->sc_dkdev.dk_label; 355 356 part = DISKPART(dev); 357 pmask = (1 << part); 358 359 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 360 error = EBUSY; 361 goto done; 362 } 363 364 if (sc->sc_flags & VNF_INITED) { 365 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 366 /* 367 * If any non-raw partition is open, but the disk 368 * has been invalidated, disallow further opens. 369 */ 370 if ((sc->sc_flags & VNF_VLABEL) == 0) { 371 error = EIO; 372 goto done; 373 } 374 } else { 375 /* 376 * Load the partition info if not already loaded. 377 */ 378 if ((sc->sc_flags & VNF_VLABEL) == 0) { 379 sc->sc_flags |= VNF_VLABEL; 380 vndgetdisklabel(dev, sc); 381 } 382 } 383 } 384 385 /* Check that the partitions exists. */ 386 if (part != RAW_PART) { 387 if (((sc->sc_flags & VNF_INITED) == 0) || 388 ((part >= lp->d_npartitions) || 389 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 390 error = ENXIO; 391 goto done; 392 } 393 } 394 395 /* Prevent our unit from being unconfigured while open. */ 396 switch (mode) { 397 case S_IFCHR: 398 sc->sc_dkdev.dk_copenmask |= pmask; 399 break; 400 401 case S_IFBLK: 402 sc->sc_dkdev.dk_bopenmask |= pmask; 403 break; 404 } 405 sc->sc_dkdev.dk_openmask = 406 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 407 408 done: 409 mutex_exit(&sc->sc_dkdev.dk_openlock); 410 vndunlock(sc); 411 return error; 412} 413 414static int 415vndclose(dev_t dev, int flags, int mode, struct lwp *l) 416{ 417 int unit = vndunit(dev); 418 struct vnd_softc *sc; 419 int error = 0, part; 420 421#ifdef DEBUG 422 if (vnddebug & VDB_FOLLOW) 423 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 424#endif 425 sc = device_lookup_private(&vnd_cd, unit); 426 if (sc == NULL) 427 return ENXIO; 428 429 if ((error = vndlock(sc)) != 0) 430 return error; 431 432 mutex_enter(&sc->sc_dkdev.dk_openlock); 433 434 part = DISKPART(dev); 435 436 /* ...that much closer to allowing unconfiguration... */ 437 switch (mode) { 438 case S_IFCHR: 439 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 440 break; 441 442 case S_IFBLK: 443 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 444 break; 445 } 446 sc->sc_dkdev.dk_openmask = 447 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 448 449 /* are we last opener ? */ 450 if (sc->sc_dkdev.dk_openmask == 0) { 451 if ((sc->sc_flags & VNF_KLABEL) == 0) 452 sc->sc_flags &= ~VNF_VLABEL; 453 } 454 455 mutex_exit(&sc->sc_dkdev.dk_openlock); 456 457 vndunlock(sc); 458 459 if ((sc->sc_flags & VNF_INITED) == 0) { 460 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 461 aprint_error_dev(sc->sc_dev, 462 "unable to detach instance\n"); 463 return error; 464 } 465 } 466 467 return 0; 468} 469 470/* 471 * Queue the request, and wakeup the kernel thread to handle it. 472 */ 473static void 474vndstrategy(struct buf *bp) 475{ 476 int unit = vndunit(bp->b_dev); 477 struct vnd_softc *vnd = 478 device_lookup_private(&vnd_cd, unit); 479 struct disklabel *lp; 480 daddr_t blkno; 481 int s = splbio(); 482 483 if (vnd == NULL) { 484 bp->b_error = ENXIO; 485 goto done; 486 } 487 lp = vnd->sc_dkdev.dk_label; 488 489 if ((vnd->sc_flags & VNF_INITED) == 0) { 490 bp->b_error = ENXIO; 491 goto done; 492 } 493 494 /* 495 * The transfer must be a whole number of blocks. 496 */ 497 if ((bp->b_bcount % lp->d_secsize) != 0) { 498 bp->b_error = EINVAL; 499 goto done; 500 } 501 502 /* 503 * check if we're read-only. 504 */ 505 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 506 bp->b_error = EACCES; 507 goto done; 508 } 509 510 /* If it's a nil transfer, wake up the top half now. */ 511 if (bp->b_bcount == 0) { 512 goto done; 513 } 514 515 /* 516 * Do bounds checking and adjust transfer. If there's an error, 517 * the bounds check will flag that for us. 518 */ 519 if (DISKPART(bp->b_dev) == RAW_PART) { 520 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 521 vnd->sc_size) <= 0) 522 goto done; 523 } else { 524 if (bounds_check_with_label(&vnd->sc_dkdev, 525 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 526 goto done; 527 } 528 529 /* 530 * Put the block number in terms of the logical blocksize 531 * of the "device". 532 */ 533 534 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 535 536 /* 537 * Translate the partition-relative block number to an absolute. 538 */ 539 if (DISKPART(bp->b_dev) != RAW_PART) { 540 struct partition *pp; 541 542 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 543 DISKPART(bp->b_dev)]; 544 blkno += pp->p_offset; 545 } 546 bp->b_rawblkno = blkno; 547 548#ifdef DEBUG 549 if (vnddebug & VDB_FOLLOW) 550 printf("vndstrategy(%p): unit %d\n", bp, unit); 551#endif 552 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 553 KASSERT(vnd->sc_pending >= 0 && 554 vnd->sc_pending <= VND_MAXPENDING(vnd)); 555 while (vnd->sc_pending == VND_MAXPENDING(vnd)) 556 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 557 vnd->sc_pending++; 558 } 559 bufq_put(vnd->sc_tab, bp); 560 wakeup(&vnd->sc_tab); 561 splx(s); 562 return; 563 564done: 565 bp->b_resid = bp->b_bcount; 566 biodone(bp); 567 splx(s); 568} 569 570static bool 571vnode_has_strategy(struct vnd_softc *vnd) 572{ 573 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 574 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 575} 576 577/* Verify that I/O requests cannot be smaller than the 578 * smallest I/O size supported by the backend. 579 */ 580static bool 581vnode_has_large_blocks(struct vnd_softc *vnd) 582{ 583 u_int32_t vnd_secsize, iosize; 584 585 iosize = vnd->sc_iosize; 586 vnd_secsize = vnd->sc_geom.vng_secsize; 587 588 return vnd_secsize % iosize != 0; 589} 590 591/* XXX this function needs a reliable check to detect 592 * sparse files. Otherwise, bmap/strategy may be used 593 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 594 * works on sparse files. 595 */ 596#if notyet 597static bool 598vnode_strategy_probe(struct vnd_softc *vnd) 599{ 600 int error; 601 daddr_t nbn; 602 603 if (!vnode_has_strategy(vnd)) 604 return false; 605 606 if (vnode_has_large_blocks(vnd)) 607 return false; 608 609 /* Convert the first logical block number to its 610 * physical block number. 611 */ 612 error = 0; 613 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 614 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 615 VOP_UNLOCK(vnd->sc_vp); 616 617 /* Test if that worked. */ 618 if (error == 0 && (long)nbn == -1) 619 return false; 620 621 return true; 622} 623#endif 624 625static void 626vndthread(void *arg) 627{ 628 struct vnd_softc *vnd = arg; 629 int s; 630 631 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 632 * directly access the backing vnode. If we can, use these two 633 * operations to avoid messing with the local buffer cache. 634 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 635 * which are guaranteed to work with any file system. */ 636 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 637 ! vnode_has_strategy(vnd)) 638 vnd->sc_flags |= VNF_USE_VN_RDWR; 639 640 /* VOP_STRATEGY can only be used if the backing vnode allows 641 * to access blocks as small as defined by the vnd geometry. 642 */ 643 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 644 vnode_has_large_blocks(vnd)) 645 vnd->sc_flags |= VNF_USE_VN_RDWR; 646 647#ifdef DEBUG 648 if (vnddebug & VDB_INIT) 649 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 650 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 651 "using bmap/strategy operations" : 652 "using read/write operations"); 653#endif 654 655 s = splbio(); 656 vnd->sc_flags |= VNF_KTHREAD; 657 wakeup(&vnd->sc_kthread); 658 659 /* 660 * Dequeue requests and serve them depending on the available 661 * vnode operations. 662 */ 663 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 664 struct vndxfer *vnx; 665 struct buf *obp; 666 struct buf *bp; 667 668 obp = bufq_get(vnd->sc_tab); 669 if (obp == NULL) { 670 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 671 continue; 672 }; 673 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 674 KASSERT(vnd->sc_pending > 0 && 675 vnd->sc_pending <= VND_MAXPENDING(vnd)); 676 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 677 wakeup(&vnd->sc_pending); 678 } 679 splx(s); 680#ifdef DEBUG 681 if (vnddebug & VDB_FOLLOW) 682 printf("vndthread(%p)\n", obp); 683#endif 684 685 if (vnd->sc_vp->v_mount == NULL) { 686 obp->b_error = ENXIO; 687 goto done; 688 } 689#ifdef VND_COMPRESSION 690 /* handle a compressed read */ 691 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 692 off_t bn; 693 694 /* Convert to a byte offset within the file. */ 695 bn = obp->b_rawblkno * 696 vnd->sc_dkdev.dk_label->d_secsize; 697 698 compstrategy(obp, bn); 699 goto done; 700 } 701#endif /* VND_COMPRESSION */ 702 703 /* 704 * Allocate a header for this transfer and link it to the 705 * buffer 706 */ 707 s = splbio(); 708 vnx = VND_GETXFER(vnd); 709 splx(s); 710 vnx->vx_vnd = vnd; 711 712 s = splbio(); 713 while (vnd->sc_active >= vnd->sc_maxactive) { 714 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 715 } 716 vnd->sc_active++; 717 splx(s); 718 719 /* Instrumentation. */ 720 disk_busy(&vnd->sc_dkdev); 721 722 bp = &vnx->vx_buf; 723 buf_init(bp); 724 bp->b_flags = (obp->b_flags & B_READ); 725 bp->b_oflags = obp->b_oflags; 726 bp->b_cflags = obp->b_cflags; 727 bp->b_iodone = vndiodone; 728 bp->b_private = obp; 729 bp->b_vp = vnd->sc_vp; 730 bp->b_objlock = bp->b_vp->v_interlock; 731 bp->b_data = obp->b_data; 732 bp->b_bcount = obp->b_bcount; 733 BIO_COPYPRIO(bp, obp); 734 735 /* Handle the request using the appropriate operations. */ 736 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 737 handle_with_strategy(vnd, obp, bp); 738 else 739 handle_with_rdwr(vnd, obp, bp); 740 741 s = splbio(); 742 continue; 743 744done: 745 biodone(obp); 746 s = splbio(); 747 } 748 749 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 750 wakeup(&vnd->sc_kthread); 751 splx(s); 752 kthread_exit(0); 753} 754 755/* 756 * Checks if the given vnode supports the requested operation. 757 * The operation is specified the offset returned by VOFFSET. 758 * 759 * XXX The test below used to determine this is quite fragile 760 * because it relies on the file system to use genfs to specify 761 * unimplemented operations. There might be another way to do 762 * it more cleanly. 763 */ 764static bool 765vnode_has_op(const struct vnode *vp, int opoffset) 766{ 767 int (*defaultp)(void *); 768 int (*opp)(void *); 769 770 defaultp = vp->v_op[VOFFSET(vop_default)]; 771 opp = vp->v_op[opoffset]; 772 773 return opp != defaultp && opp != genfs_eopnotsupp && 774 opp != genfs_badop && opp != genfs_nullop; 775} 776 777/* 778 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 779 * and VOP_WRITE operations. 780 * 781 * 'obp' is a pointer to the original request fed to the vnd device. 782 */ 783static void 784handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 785{ 786 bool doread; 787 off_t offset; 788 size_t len, resid; 789 struct vnode *vp; 790 791 doread = bp->b_flags & B_READ; 792 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 793 len = bp->b_bcount; 794 vp = vnd->sc_vp; 795 796#if defined(DEBUG) 797 if (vnddebug & VDB_IO) 798 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 799 ", secsize %d, offset %" PRIu64 800 ", bcount %d\n", 801 vp, doread ? "read" : "write", obp->b_rawblkno, 802 vnd->sc_dkdev.dk_label->d_secsize, offset, 803 bp->b_bcount); 804#endif 805 806 /* Make sure the request succeeds while suspending this fs. */ 807 fstrans_start_lazy(vp->v_mount); 808 809 /* Issue the read or write operation. */ 810 bp->b_error = 811 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 812 vp, bp->b_data, len, offset, UIO_SYSSPACE, 813 IO_ADV_ENCODE(POSIX_FADV_NOREUSE), vnd->sc_cred, &resid, NULL); 814 bp->b_resid = resid; 815 816 mutex_enter(vp->v_interlock); 817 (void) VOP_PUTPAGES(vp, 0, 0, 818 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); 819 820 fstrans_done(vp->v_mount); 821 822 /* We need to increase the number of outputs on the vnode if 823 * there was any write to it. */ 824 if (!doread) { 825 mutex_enter(vp->v_interlock); 826 vp->v_numoutput++; 827 mutex_exit(vp->v_interlock); 828 } 829 830 biodone(bp); 831} 832 833/* 834 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 835 * and VOP_STRATEGY operations. 836 * 837 * 'obp' is a pointer to the original request fed to the vnd device. 838 */ 839static void 840handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 841 struct buf *bp) 842{ 843 int bsize, error, flags, skipped; 844 size_t resid, sz; 845 off_t bn, offset; 846 struct vnode *vp; 847 struct buf *nbp = NULL; 848 849 flags = obp->b_flags; 850 851 852 /* convert to a byte offset within the file. */ 853 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 854 855 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 856 skipped = 0; 857 858 /* 859 * Break the request into bsize pieces and feed them 860 * sequentially using VOP_BMAP/VOP_STRATEGY. 861 * We do it this way to keep from flooding NFS servers if we 862 * are connected to an NFS file. This places the burden on 863 * the client rather than the server. 864 */ 865 error = 0; 866 bp->b_resid = bp->b_bcount; 867 for (offset = 0, resid = bp->b_resid; /* true */; 868 resid -= sz, offset += sz) { 869 daddr_t nbn; 870 int off, nra; 871 872 nra = 0; 873 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 874 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 875 VOP_UNLOCK(vnd->sc_vp); 876 877 if (error == 0 && (long)nbn == -1) 878 error = EIO; 879 880 /* 881 * If there was an error or a hole in the file...punt. 882 * Note that we may have to wait for any operations 883 * that we have already fired off before releasing 884 * the buffer. 885 * 886 * XXX we could deal with holes here but it would be 887 * a hassle (in the write case). 888 */ 889 if (error) { 890 skipped += resid; 891 break; 892 } 893 894#ifdef DEBUG 895 if (!dovndcluster) 896 nra = 0; 897#endif 898 899 off = bn % bsize; 900 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 901#ifdef DEBUG 902 if (vnddebug & VDB_IO) 903 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 904 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 905 nbn, sz); 906#endif 907 908 nbp = getiobuf(vp, true); 909 nestiobuf_setup(bp, nbp, offset, sz); 910 nbp->b_blkno = nbn + btodb(off); 911 912#if 0 /* XXX #ifdef DEBUG */ 913 if (vnddebug & VDB_IO) 914 printf("vndstart(%ld): bp %p vp %p blkno " 915 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 916 (long) (vnd-vnd_softc), &nbp->vb_buf, 917 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 918 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 919 nbp->vb_buf.b_bcount); 920#endif 921 if (resid == sz) { 922 break; 923 } 924 VOP_STRATEGY(vp, nbp); 925 bn += sz; 926 } 927 if (!(flags & B_READ)) { 928 struct vnode *w_vp; 929 /* 930 * this is the last nested buf, account for 931 * the parent buf write too. 932 * This has to be done last, so that 933 * fsync won't wait for this write which 934 * has no chance to complete before all nested bufs 935 * have been queued. But it has to be done 936 * before the last VOP_STRATEGY() 937 * or the call to nestiobuf_done(). 938 */ 939 w_vp = bp->b_vp; 940 mutex_enter(w_vp->v_interlock); 941 w_vp->v_numoutput++; 942 mutex_exit(w_vp->v_interlock); 943 } 944 KASSERT(skipped != 0 || nbp != NULL); 945 if (skipped) 946 nestiobuf_done(bp, skipped, error); 947 else 948 VOP_STRATEGY(vp, nbp); 949} 950 951static void 952vndiodone(struct buf *bp) 953{ 954 struct vndxfer *vnx = VND_BUFTOXFER(bp); 955 struct vnd_softc *vnd = vnx->vx_vnd; 956 struct buf *obp = bp->b_private; 957 int s = splbio(); 958 959 KASSERT(&vnx->vx_buf == bp); 960 KASSERT(vnd->sc_active > 0); 961#ifdef DEBUG 962 if (vnddebug & VDB_IO) { 963 printf("vndiodone1: bp %p iodone: error %d\n", 964 bp, bp->b_error); 965 } 966#endif 967 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 968 (bp->b_flags & B_READ)); 969 vnd->sc_active--; 970 if (vnd->sc_active == 0) { 971 wakeup(&vnd->sc_tab); 972 } 973 splx(s); 974 obp->b_error = bp->b_error; 975 obp->b_resid = bp->b_resid; 976 buf_destroy(bp); 977 VND_PUTXFER(vnd, vnx); 978 biodone(obp); 979} 980 981/* ARGSUSED */ 982static int 983vndread(dev_t dev, struct uio *uio, int flags) 984{ 985 int unit = vndunit(dev); 986 struct vnd_softc *sc; 987 988#ifdef DEBUG 989 if (vnddebug & VDB_FOLLOW) 990 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 991#endif 992 993 sc = device_lookup_private(&vnd_cd, unit); 994 if (sc == NULL) 995 return ENXIO; 996 997 if ((sc->sc_flags & VNF_INITED) == 0) 998 return ENXIO; 999 1000 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1001} 1002 1003/* ARGSUSED */ 1004static int 1005vndwrite(dev_t dev, struct uio *uio, int flags) 1006{ 1007 int unit = vndunit(dev); 1008 struct vnd_softc *sc; 1009 1010#ifdef DEBUG 1011 if (vnddebug & VDB_FOLLOW) 1012 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1013#endif 1014 1015 sc = device_lookup_private(&vnd_cd, unit); 1016 if (sc == NULL) 1017 return ENXIO; 1018 1019 if ((sc->sc_flags & VNF_INITED) == 0) 1020 return ENXIO; 1021 1022 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1023} 1024 1025static int 1026vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1027{ 1028 int error; 1029 struct vnd_softc *vnd; 1030 1031 if (*un == -1) 1032 *un = unit; 1033 if (*un < 0) 1034 return EINVAL; 1035 1036 vnd = device_lookup_private(&vnd_cd, *un); 1037 if (vnd == NULL) 1038 return -1; 1039 1040 if ((vnd->sc_flags & VNF_INITED) == 0) 1041 return -1; 1042 1043 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1044 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1045 VOP_UNLOCK(vnd->sc_vp); 1046 return error; 1047} 1048 1049static int 1050vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1051{ 1052 int error; 1053 1054 if ((error = vndlock(vnd)) != 0) 1055 return error; 1056 1057 /* 1058 * Don't unconfigure if any other partitions are open 1059 * or if both the character and block flavors of this 1060 * partition are open. 1061 */ 1062 if (DK_BUSY(vnd, pmask) && !force) { 1063 vndunlock(vnd); 1064 return EBUSY; 1065 } 1066 1067 /* Delete all of our wedges */ 1068 dkwedge_delall(&vnd->sc_dkdev); 1069 1070 /* 1071 * XXX vndclear() might call vndclose() implicitly; 1072 * release lock to avoid recursion 1073 * 1074 * Set VNF_CLEARING to prevent vndopen() from 1075 * sneaking in after we vndunlock(). 1076 */ 1077 vnd->sc_flags |= VNF_CLEARING; 1078 vndunlock(vnd); 1079 vndclear(vnd, minor); 1080#ifdef DEBUG 1081 if (vnddebug & VDB_INIT) 1082 printf("%s: CLRed\n", __func__); 1083#endif 1084 1085 /* Destroy the xfer and buffer pools. */ 1086 pool_destroy(&vnd->sc_vxpool); 1087 1088 /* Detach the disk. */ 1089 disk_detach(&vnd->sc_dkdev); 1090 1091 return 0; 1092} 1093 1094static int 1095vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1096{ 1097 int error; 1098 1099 KASSERT(l); 1100 1101 /* the first member is always int vnd_unit in all the versions */ 1102 if (*(int *)data >= vnd_cd.cd_ndevs) 1103 return ENXIO; 1104 1105 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1106 case -1: 1107 /* unused is not an error */ 1108 memset(va, 0, sizeof(*va)); 1109 /*FALLTHROUGH*/ 1110 case 0: 1111 return 0; 1112 default: 1113 return error; 1114 } 1115} 1116 1117/* ARGSUSED */ 1118static int 1119vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1120{ 1121 bool force; 1122 int unit = vndunit(dev); 1123 struct vnd_softc *vnd; 1124 struct vnd_ioctl *vio; 1125 struct vattr vattr; 1126 struct pathbuf *pb; 1127 struct nameidata nd; 1128 int error, part, pmask; 1129 uint64_t geomsize; 1130 int fflags; 1131#ifdef __HAVE_OLD_DISKLABEL 1132 struct disklabel newlabel; 1133#endif 1134 1135#ifdef DEBUG 1136 if (vnddebug & VDB_FOLLOW) 1137 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1138 dev, cmd, data, flag, l->l_proc, unit); 1139#endif 1140 /* Do the get's first; they don't need initialization or verification */ 1141 switch (cmd) { 1142#ifdef COMPAT_30 1143 case VNDIOCGET30: { 1144 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1145 return error; 1146 1147 struct vnd_user30 *vnu = data; 1148 vnu->vnu_dev = vattr.va_fsid; 1149 vnu->vnu_ino = vattr.va_fileid; 1150 return 0; 1151 } 1152#endif 1153#ifdef COMPAT_50 1154 case VNDIOCGET50: { 1155 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1156 return error; 1157 1158 struct vnd_user50 *vnu = data; 1159 vnu->vnu_dev = vattr.va_fsid; 1160 vnu->vnu_ino = vattr.va_fileid; 1161 return 0; 1162 } 1163#endif 1164 1165 case VNDIOCGET: { 1166 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1167 return error; 1168 1169 struct vnd_user *vnu = data; 1170 vnu->vnu_dev = vattr.va_fsid; 1171 vnu->vnu_ino = vattr.va_fileid; 1172 return 0; 1173 } 1174 default: 1175 break; 1176 } 1177 1178 vnd = device_lookup_private(&vnd_cd, unit); 1179 if (vnd == NULL) 1180 return ENXIO; 1181 vio = (struct vnd_ioctl *)data; 1182 1183 /* Must be open for writes for these commands... */ 1184 switch (cmd) { 1185 case VNDIOCSET: 1186 case VNDIOCCLR: 1187#ifdef COMPAT_50 1188 case VNDIOCSET50: 1189 case VNDIOCCLR50: 1190#endif 1191 case DIOCSDINFO: 1192 case DIOCWDINFO: 1193#ifdef __HAVE_OLD_DISKLABEL 1194 case ODIOCSDINFO: 1195 case ODIOCWDINFO: 1196#endif 1197 case DIOCKLABEL: 1198 case DIOCWLABEL: 1199 if ((flag & FWRITE) == 0) 1200 return EBADF; 1201 } 1202 1203 /* Must be initialized for these... */ 1204 switch (cmd) { 1205 case VNDIOCCLR: 1206#ifdef VNDIOCCLR50 1207 case VNDIOCCLR50: 1208#endif 1209 case DIOCGDINFO: 1210 case DIOCSDINFO: 1211 case DIOCWDINFO: 1212 case DIOCGPARTINFO: 1213 case DIOCKLABEL: 1214 case DIOCWLABEL: 1215 case DIOCGDEFLABEL: 1216 case DIOCCACHESYNC: 1217#ifdef __HAVE_OLD_DISKLABEL 1218 case ODIOCGDINFO: 1219 case ODIOCSDINFO: 1220 case ODIOCWDINFO: 1221 case ODIOCGDEFLABEL: 1222#endif 1223 if ((vnd->sc_flags & VNF_INITED) == 0) 1224 return ENXIO; 1225 } 1226 1227 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1228 if (error != EPASSTHROUGH) 1229 return error; 1230 1231 1232 switch (cmd) { 1233#ifdef VNDIOCSET50 1234 case VNDIOCSET50: 1235#endif 1236 case VNDIOCSET: 1237 if (vnd->sc_flags & VNF_INITED) 1238 return EBUSY; 1239 1240 if ((error = vndlock(vnd)) != 0) 1241 return error; 1242 1243 fflags = FREAD; 1244 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1245 fflags |= FWRITE; 1246 error = pathbuf_copyin(vio->vnd_file, &pb); 1247 if (error) { 1248 goto unlock_and_exit; 1249 } 1250 NDINIT(&nd, LOOKUP, FOLLOW, pb); 1251 if ((error = vn_open(&nd, fflags, 0)) != 0) { 1252 pathbuf_destroy(pb); 1253 goto unlock_and_exit; 1254 } 1255 KASSERT(l); 1256 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1257 if (!error && nd.ni_vp->v_type != VREG) 1258 error = EOPNOTSUPP; 1259 if (!error && vattr.va_bytes < vattr.va_size) 1260 /* File is definitely sparse, use vn_rdwr() */ 1261 vnd->sc_flags |= VNF_USE_VN_RDWR; 1262 if (error) { 1263 VOP_UNLOCK(nd.ni_vp); 1264 goto close_and_exit; 1265 } 1266 1267 /* If using a compressed file, initialize its info */ 1268 /* (or abort with an error if kernel has no compression) */ 1269 if (vio->vnd_flags & VNF_COMP) { 1270#ifdef VND_COMPRESSION 1271 struct vnd_comp_header *ch; 1272 int i; 1273 uint32_t comp_size; 1274 uint32_t comp_maxsize; 1275 1276 /* allocate space for compresed file header */ 1277 ch = malloc(sizeof(struct vnd_comp_header), 1278 M_TEMP, M_WAITOK); 1279 1280 /* read compressed file header */ 1281 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1282 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1283 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1284 if (error) { 1285 free(ch, M_TEMP); 1286 VOP_UNLOCK(nd.ni_vp); 1287 goto close_and_exit; 1288 } 1289 1290 if (be32toh(ch->block_size) == 0 || 1291 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1292 free(ch, M_TEMP); 1293 VOP_UNLOCK(nd.ni_vp); 1294 goto close_and_exit; 1295 } 1296 1297 /* save some header info */ 1298 vnd->sc_comp_blksz = be32toh(ch->block_size); 1299 /* note last offset is the file byte size */ 1300 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1301 free(ch, M_TEMP); 1302 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1303 VOP_UNLOCK(nd.ni_vp); 1304 error = EINVAL; 1305 goto close_and_exit; 1306 } 1307 KASSERT(0 < vnd->sc_comp_blksz); 1308 KASSERT(0 < vnd->sc_comp_numoffs); 1309 /* 1310 * @#^@!$& gcc -Wtype-limits refuses to let me 1311 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1312 * because the range of the type on amd64 makes 1313 * the comparisons always false. 1314 */ 1315#if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1316 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1317 VOP_UNLOCK(nd.ni_vp); 1318 error = EINVAL; 1319 goto close_and_exit; 1320 } 1321#endif 1322 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1323 (vattr.va_size - sizeof(struct vnd_comp_header) < 1324 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1325 (UQUAD_MAX/vnd->sc_comp_blksz < 1326 vnd->sc_comp_numoffs - 1)) { 1327 VOP_UNLOCK(nd.ni_vp); 1328 error = EINVAL; 1329 goto close_and_exit; 1330 } 1331 1332 /* set decompressed file size */ 1333 KASSERT(vnd->sc_comp_numoffs - 1 <= 1334 UQUAD_MAX/vnd->sc_comp_blksz); 1335 vattr.va_size = 1336 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1337 (u_quad_t)vnd->sc_comp_blksz; 1338 1339 /* allocate space for all the compressed offsets */ 1340 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1341 vnd->sc_comp_offsets = 1342 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1343 M_DEVBUF, M_WAITOK); 1344 1345 /* read in the offsets */ 1346 error = vn_rdwr(UIO_READ, nd.ni_vp, 1347 (void *)vnd->sc_comp_offsets, 1348 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1349 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1350 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1351 if (error) { 1352 VOP_UNLOCK(nd.ni_vp); 1353 goto close_and_exit; 1354 } 1355 /* 1356 * find largest block size (used for allocation limit). 1357 * Also convert offset to native byte order. 1358 */ 1359 comp_maxsize = 0; 1360 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1361 vnd->sc_comp_offsets[i] = 1362 be64toh(vnd->sc_comp_offsets[i]); 1363 comp_size = 1364 be64toh(vnd->sc_comp_offsets[i + 1]) 1365 - vnd->sc_comp_offsets[i]; 1366 if (comp_size > comp_maxsize) 1367 comp_maxsize = comp_size; 1368 } 1369 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1370 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1371 - 1]); 1372 1373 /* create compressed data buffer */ 1374 vnd->sc_comp_buff = malloc(comp_maxsize, 1375 M_DEVBUF, M_WAITOK); 1376 1377 /* create decompressed buffer */ 1378 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1379 M_DEVBUF, M_WAITOK); 1380 vnd->sc_comp_buffblk = -1; 1381 1382 /* Initialize decompress stream */ 1383 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1384 vnd->sc_comp_stream.zalloc = vnd_alloc; 1385 vnd->sc_comp_stream.zfree = vnd_free; 1386 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1387 if (error) { 1388 if (vnd->sc_comp_stream.msg) 1389 printf("vnd%d: compressed file, %s\n", 1390 unit, vnd->sc_comp_stream.msg); 1391 VOP_UNLOCK(nd.ni_vp); 1392 error = EINVAL; 1393 goto close_and_exit; 1394 } 1395 1396 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1397#else /* !VND_COMPRESSION */ 1398 VOP_UNLOCK(nd.ni_vp); 1399 error = EOPNOTSUPP; 1400 goto close_and_exit; 1401#endif /* VND_COMPRESSION */ 1402 } 1403 1404 VOP_UNLOCK(nd.ni_vp); 1405 vnd->sc_vp = nd.ni_vp; 1406 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1407 1408 /* get smallest I/O size for underlying device, fall back to 1409 * fundamental I/O size of underlying filesystem 1410 */ 1411 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1412 if (error) 1413 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1414 1415 /* 1416 * Use pseudo-geometry specified. If none was provided, 1417 * use "standard" Adaptec fictitious geometry. 1418 */ 1419 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1420 1421 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1422 sizeof(vio->vnd_geom)); 1423 1424 /* 1425 * Sanity-check the sector size. 1426 */ 1427 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1428 vnd->sc_geom.vng_ncylinders == 0 || 1429 vnd->sc_geom.vng_ntracks == 0 || 1430 vnd->sc_geom.vng_nsectors == 0) { 1431 error = EINVAL; 1432 goto close_and_exit; 1433 } 1434 1435 /* 1436 * Compute the size (in DEV_BSIZE blocks) specified 1437 * by the geometry. 1438 */ 1439 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1440 vnd->sc_geom.vng_ntracks * 1441 vnd->sc_geom.vng_ncylinders * 1442 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1443 1444 /* 1445 * Sanity-check the size against the specified 1446 * geometry. 1447 */ 1448 if (vnd->sc_size < geomsize) { 1449 error = EINVAL; 1450 goto close_and_exit; 1451 } 1452 } else if (vnd->sc_size >= (32 * 64)) { 1453 /* 1454 * Size must be at least 2048 DEV_BSIZE blocks 1455 * (1M) in order to use this geometry. 1456 */ 1457 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1458 vnd->sc_geom.vng_nsectors = 32; 1459 vnd->sc_geom.vng_ntracks = 64; 1460 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1461 } else { 1462 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1463 vnd->sc_geom.vng_nsectors = 1; 1464 vnd->sc_geom.vng_ntracks = 1; 1465 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1466 } 1467 1468 vnd_set_geometry(vnd); 1469 1470 if (vio->vnd_flags & VNDIOF_READONLY) { 1471 vnd->sc_flags |= VNF_READONLY; 1472 } 1473 1474 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1475 goto close_and_exit; 1476 1477 vndthrottle(vnd, vnd->sc_vp); 1478 vio->vnd_osize = dbtob(vnd->sc_size); 1479#ifdef VNDIOCSET50 1480 if (cmd != VNDIOCSET50) 1481#endif 1482 vio->vnd_size = dbtob(vnd->sc_size); 1483 vnd->sc_flags |= VNF_INITED; 1484 1485 /* create the kernel thread, wait for it to be up */ 1486 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1487 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1488 if (error) 1489 goto close_and_exit; 1490 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1491 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1492 } 1493#ifdef DEBUG 1494 if (vnddebug & VDB_INIT) 1495 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1496 vnd->sc_vp, (unsigned long) vnd->sc_size, 1497 vnd->sc_geom.vng_secsize, 1498 vnd->sc_geom.vng_nsectors, 1499 vnd->sc_geom.vng_ntracks, 1500 vnd->sc_geom.vng_ncylinders); 1501#endif 1502 1503 /* Attach the disk. */ 1504 disk_attach(&vnd->sc_dkdev); 1505 1506 /* Initialize the xfer and buffer pools. */ 1507 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1508 0, 0, "vndxpl", NULL, IPL_BIO); 1509 1510 vndunlock(vnd); 1511 1512 pathbuf_destroy(pb); 1513 1514 /* Discover wedges on this disk */ 1515 dkwedge_discover(&vnd->sc_dkdev); 1516 1517 break; 1518 1519close_and_exit: 1520 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1521 pathbuf_destroy(pb); 1522unlock_and_exit: 1523#ifdef VND_COMPRESSION 1524 /* free any allocated memory (for compressed file) */ 1525 if (vnd->sc_comp_offsets) { 1526 free(vnd->sc_comp_offsets, M_DEVBUF); 1527 vnd->sc_comp_offsets = NULL; 1528 } 1529 if (vnd->sc_comp_buff) { 1530 free(vnd->sc_comp_buff, M_DEVBUF); 1531 vnd->sc_comp_buff = NULL; 1532 } 1533 if (vnd->sc_comp_decombuf) { 1534 free(vnd->sc_comp_decombuf, M_DEVBUF); 1535 vnd->sc_comp_decombuf = NULL; 1536 } 1537#endif /* VND_COMPRESSION */ 1538 vndunlock(vnd); 1539 return error; 1540 1541#ifdef VNDIOCCLR50 1542 case VNDIOCCLR50: 1543#endif 1544 case VNDIOCCLR: 1545 part = DISKPART(dev); 1546 pmask = (1 << part); 1547 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1548 1549 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1550 return error; 1551 1552 break; 1553 1554 1555 case DIOCWDINFO: 1556 case DIOCSDINFO: 1557#ifdef __HAVE_OLD_DISKLABEL 1558 case ODIOCWDINFO: 1559 case ODIOCSDINFO: 1560#endif 1561 { 1562 struct disklabel *lp; 1563 1564 if ((error = vndlock(vnd)) != 0) 1565 return error; 1566 1567 vnd->sc_flags |= VNF_LABELLING; 1568 1569#ifdef __HAVE_OLD_DISKLABEL 1570 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1571 memset(&newlabel, 0, sizeof newlabel); 1572 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1573 lp = &newlabel; 1574 } else 1575#endif 1576 lp = (struct disklabel *)data; 1577 1578 error = setdisklabel(vnd->sc_dkdev.dk_label, 1579 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1580 if (error == 0) { 1581 if (cmd == DIOCWDINFO 1582#ifdef __HAVE_OLD_DISKLABEL 1583 || cmd == ODIOCWDINFO 1584#endif 1585 ) 1586 error = writedisklabel(VNDLABELDEV(dev), 1587 vndstrategy, vnd->sc_dkdev.dk_label, 1588 vnd->sc_dkdev.dk_cpulabel); 1589 } 1590 1591 vnd->sc_flags &= ~VNF_LABELLING; 1592 1593 vndunlock(vnd); 1594 1595 if (error) 1596 return error; 1597 break; 1598 } 1599 1600 case DIOCKLABEL: 1601 if (*(int *)data != 0) 1602 vnd->sc_flags |= VNF_KLABEL; 1603 else 1604 vnd->sc_flags &= ~VNF_KLABEL; 1605 break; 1606 1607 case DIOCWLABEL: 1608 if (*(int *)data != 0) 1609 vnd->sc_flags |= VNF_WLABEL; 1610 else 1611 vnd->sc_flags &= ~VNF_WLABEL; 1612 break; 1613 1614 case DIOCGDEFLABEL: 1615 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1616 break; 1617 1618#ifdef __HAVE_OLD_DISKLABEL 1619 case ODIOCGDEFLABEL: 1620 vndgetdefaultlabel(vnd, &newlabel); 1621 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1622 return ENOTTY; 1623 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1624 break; 1625#endif 1626 1627 case DIOCCACHESYNC: 1628 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1629 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1630 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1631 VOP_UNLOCK(vnd->sc_vp); 1632 return error; 1633 1634 default: 1635 return ENOTTY; 1636 } 1637 1638 return 0; 1639} 1640 1641/* 1642 * Duplicate the current processes' credentials. Since we are called only 1643 * as the result of a SET ioctl and only root can do that, any future access 1644 * to this "disk" is essentially as root. Note that credentials may change 1645 * if some other uid can write directly to the mapped file (NFS). 1646 */ 1647static int 1648vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1649{ 1650 struct uio auio; 1651 struct iovec aiov; 1652 char *tmpbuf; 1653 int error; 1654 1655 vnd->sc_cred = kauth_cred_dup(cred); 1656 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1657 1658 /* XXX: Horrible kludge to establish credentials for NFS */ 1659 aiov.iov_base = tmpbuf; 1660 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1661 auio.uio_iov = &aiov; 1662 auio.uio_iovcnt = 1; 1663 auio.uio_offset = 0; 1664 auio.uio_rw = UIO_READ; 1665 auio.uio_resid = aiov.iov_len; 1666 UIO_SETUP_SYSSPACE(&auio); 1667 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1668 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1669 if (error == 0) { 1670 /* 1671 * Because vnd does all IO directly through the vnode 1672 * we need to flush (at least) the buffer from the above 1673 * VOP_READ from the buffer cache to prevent cache 1674 * incoherencies. Also, be careful to write dirty 1675 * buffers back to stable storage. 1676 */ 1677 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1678 curlwp, 0, 0); 1679 } 1680 VOP_UNLOCK(vnd->sc_vp); 1681 1682 free(tmpbuf, M_TEMP); 1683 return error; 1684} 1685 1686/* 1687 * Set maxactive based on FS type 1688 */ 1689static void 1690vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1691{ 1692 1693 if (vp->v_tag == VT_NFS) 1694 vnd->sc_maxactive = 2; 1695 else 1696 vnd->sc_maxactive = 8; 1697 1698 if (vnd->sc_maxactive < 1) 1699 vnd->sc_maxactive = 1; 1700} 1701 1702#if 0 1703static void 1704vndshutdown(void) 1705{ 1706 struct vnd_softc *vnd; 1707 1708 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1709 if (vnd->sc_flags & VNF_INITED) 1710 vndclear(vnd); 1711} 1712#endif 1713 1714static void 1715vndclear(struct vnd_softc *vnd, int myminor) 1716{ 1717 struct vnode *vp = vnd->sc_vp; 1718 int fflags = FREAD; 1719 int bmaj, cmaj, i, mn; 1720 int s; 1721 1722#ifdef DEBUG 1723 if (vnddebug & VDB_FOLLOW) 1724 printf("vndclear(%p): vp %p\n", vnd, vp); 1725#endif 1726 /* locate the major number */ 1727 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1728 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1729 1730 /* Nuke the vnodes for any open instances */ 1731 for (i = 0; i < MAXPARTITIONS; i++) { 1732 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1733 vdevgone(bmaj, mn, mn, VBLK); 1734 if (mn != myminor) /* XXX avoid to kill own vnode */ 1735 vdevgone(cmaj, mn, mn, VCHR); 1736 } 1737 1738 if ((vnd->sc_flags & VNF_READONLY) == 0) 1739 fflags |= FWRITE; 1740 1741 s = splbio(); 1742 bufq_drain(vnd->sc_tab); 1743 splx(s); 1744 1745 vnd->sc_flags |= VNF_VUNCONF; 1746 wakeup(&vnd->sc_tab); 1747 while (vnd->sc_flags & VNF_KTHREAD) 1748 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1749 1750#ifdef VND_COMPRESSION 1751 /* free the compressed file buffers */ 1752 if (vnd->sc_flags & VNF_COMP) { 1753 if (vnd->sc_comp_offsets) { 1754 free(vnd->sc_comp_offsets, M_DEVBUF); 1755 vnd->sc_comp_offsets = NULL; 1756 } 1757 if (vnd->sc_comp_buff) { 1758 free(vnd->sc_comp_buff, M_DEVBUF); 1759 vnd->sc_comp_buff = NULL; 1760 } 1761 if (vnd->sc_comp_decombuf) { 1762 free(vnd->sc_comp_decombuf, M_DEVBUF); 1763 vnd->sc_comp_decombuf = NULL; 1764 } 1765 } 1766#endif /* VND_COMPRESSION */ 1767 vnd->sc_flags &= 1768 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1769 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1770 if (vp == NULL) 1771 panic("vndclear: null vp"); 1772 (void) vn_close(vp, fflags, vnd->sc_cred); 1773 kauth_cred_free(vnd->sc_cred); 1774 vnd->sc_vp = NULL; 1775 vnd->sc_cred = NULL; 1776 vnd->sc_size = 0; 1777} 1778 1779static int 1780vndsize(dev_t dev) 1781{ 1782 struct vnd_softc *sc; 1783 struct disklabel *lp; 1784 int part, unit, omask; 1785 int size; 1786 1787 unit = vndunit(dev); 1788 sc = device_lookup_private(&vnd_cd, unit); 1789 if (sc == NULL) 1790 return -1; 1791 1792 if ((sc->sc_flags & VNF_INITED) == 0) 1793 return -1; 1794 1795 part = DISKPART(dev); 1796 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1797 lp = sc->sc_dkdev.dk_label; 1798 1799 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1800 return -1; 1801 1802 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1803 size = -1; 1804 else 1805 size = lp->d_partitions[part].p_size * 1806 (lp->d_secsize / DEV_BSIZE); 1807 1808 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1809 return -1; 1810 1811 return size; 1812} 1813 1814static int 1815vnddump(dev_t dev, daddr_t blkno, void *va, 1816 size_t size) 1817{ 1818 1819 /* Not implemented. */ 1820 return ENXIO; 1821} 1822 1823static void 1824vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1825{ 1826 struct vndgeom *vng = &sc->sc_geom; 1827 struct partition *pp; 1828 unsigned spb; 1829 1830 memset(lp, 0, sizeof(*lp)); 1831 1832 spb = vng->vng_secsize / DEV_BSIZE; 1833 if (sc->sc_size / spb > UINT32_MAX) 1834 lp->d_secperunit = UINT32_MAX; 1835 else 1836 lp->d_secperunit = sc->sc_size / spb; 1837 lp->d_secsize = vng->vng_secsize; 1838 lp->d_nsectors = vng->vng_nsectors; 1839 lp->d_ntracks = vng->vng_ntracks; 1840 lp->d_ncylinders = vng->vng_ncylinders; 1841 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1842 1843 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1844 lp->d_type = DKTYPE_VND; 1845 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1846 lp->d_rpm = 3600; 1847 lp->d_interleave = 1; 1848 lp->d_flags = 0; 1849 1850 pp = &lp->d_partitions[RAW_PART]; 1851 pp->p_offset = 0; 1852 pp->p_size = lp->d_secperunit; 1853 pp->p_fstype = FS_UNUSED; 1854 lp->d_npartitions = RAW_PART + 1; 1855 1856 lp->d_magic = DISKMAGIC; 1857 lp->d_magic2 = DISKMAGIC; 1858 lp->d_checksum = dkcksum(lp); 1859} 1860 1861/* 1862 * Read the disklabel from a vnd. If one is not present, create a fake one. 1863 */ 1864static void 1865vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1866{ 1867 const char *errstring; 1868 struct disklabel *lp = sc->sc_dkdev.dk_label; 1869 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1870 int i; 1871 1872 memset(clp, 0, sizeof(*clp)); 1873 1874 vndgetdefaultlabel(sc, lp); 1875 1876 /* 1877 * Call the generic disklabel extraction routine. 1878 */ 1879 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1880 if (errstring) { 1881 /* 1882 * Lack of disklabel is common, but we print the warning 1883 * anyway, since it might contain other useful information. 1884 */ 1885 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1886 1887 /* 1888 * For historical reasons, if there's no disklabel 1889 * present, all partitions must be FS_BSDFFS and 1890 * occupy the entire disk. 1891 */ 1892 for (i = 0; i < MAXPARTITIONS; i++) { 1893 /* 1894 * Don't wipe out port specific hack (such as 1895 * dos partition hack of i386 port). 1896 */ 1897 if (lp->d_partitions[i].p_size != 0) 1898 continue; 1899 1900 lp->d_partitions[i].p_size = lp->d_secperunit; 1901 lp->d_partitions[i].p_offset = 0; 1902 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1903 } 1904 1905 strncpy(lp->d_packname, "default label", 1906 sizeof(lp->d_packname)); 1907 1908 lp->d_npartitions = MAXPARTITIONS; 1909 lp->d_checksum = dkcksum(lp); 1910 } 1911} 1912 1913/* 1914 * Wait interruptibly for an exclusive lock. 1915 * 1916 * XXX 1917 * Several drivers do this; it should be abstracted and made MP-safe. 1918 */ 1919static int 1920vndlock(struct vnd_softc *sc) 1921{ 1922 int error; 1923 1924 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1925 sc->sc_flags |= VNF_WANTED; 1926 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1927 return error; 1928 } 1929 sc->sc_flags |= VNF_LOCKED; 1930 return 0; 1931} 1932 1933/* 1934 * Unlock and wake up any waiters. 1935 */ 1936static void 1937vndunlock(struct vnd_softc *sc) 1938{ 1939 1940 sc->sc_flags &= ~VNF_LOCKED; 1941 if ((sc->sc_flags & VNF_WANTED) != 0) { 1942 sc->sc_flags &= ~VNF_WANTED; 1943 wakeup(sc); 1944 } 1945} 1946 1947#ifdef VND_COMPRESSION 1948/* compressed file read */ 1949static void 1950compstrategy(struct buf *bp, off_t bn) 1951{ 1952 int error; 1953 int unit = vndunit(bp->b_dev); 1954 struct vnd_softc *vnd = 1955 device_lookup_private(&vnd_cd, unit); 1956 u_int32_t comp_block; 1957 struct uio auio; 1958 char *addr; 1959 int s; 1960 1961 /* set up constants for data move */ 1962 auio.uio_rw = UIO_READ; 1963 UIO_SETUP_SYSSPACE(&auio); 1964 1965 /* read, and transfer the data */ 1966 addr = bp->b_data; 1967 bp->b_resid = bp->b_bcount; 1968 s = splbio(); 1969 while (bp->b_resid > 0) { 1970 unsigned length; 1971 size_t length_in_buffer; 1972 u_int32_t offset_in_buffer; 1973 struct iovec aiov; 1974 1975 /* calculate the compressed block number */ 1976 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1977 1978 /* check for good block number */ 1979 if (comp_block >= vnd->sc_comp_numoffs) { 1980 bp->b_error = EINVAL; 1981 splx(s); 1982 return; 1983 } 1984 1985 /* read in the compressed block, if not in buffer */ 1986 if (comp_block != vnd->sc_comp_buffblk) { 1987 length = vnd->sc_comp_offsets[comp_block + 1] - 1988 vnd->sc_comp_offsets[comp_block]; 1989 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1990 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1991 length, vnd->sc_comp_offsets[comp_block], 1992 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 1993 NULL, NULL); 1994 if (error) { 1995 bp->b_error = error; 1996 VOP_UNLOCK(vnd->sc_vp); 1997 splx(s); 1998 return; 1999 } 2000 /* uncompress the buffer */ 2001 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2002 vnd->sc_comp_stream.avail_in = length; 2003 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2004 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2005 inflateReset(&vnd->sc_comp_stream); 2006 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2007 if (error != Z_STREAM_END) { 2008 if (vnd->sc_comp_stream.msg) 2009 aprint_normal_dev(vnd->sc_dev, 2010 "compressed file, %s\n", 2011 vnd->sc_comp_stream.msg); 2012 bp->b_error = EBADMSG; 2013 VOP_UNLOCK(vnd->sc_vp); 2014 splx(s); 2015 return; 2016 } 2017 vnd->sc_comp_buffblk = comp_block; 2018 VOP_UNLOCK(vnd->sc_vp); 2019 } 2020 2021 /* transfer the usable uncompressed data */ 2022 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2023 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2024 if (length_in_buffer > bp->b_resid) 2025 length_in_buffer = bp->b_resid; 2026 auio.uio_iov = &aiov; 2027 auio.uio_iovcnt = 1; 2028 aiov.iov_base = addr; 2029 aiov.iov_len = length_in_buffer; 2030 auio.uio_resid = aiov.iov_len; 2031 auio.uio_offset = 0; 2032 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2033 length_in_buffer, &auio); 2034 if (error) { 2035 bp->b_error = error; 2036 splx(s); 2037 return; 2038 } 2039 2040 bn += length_in_buffer; 2041 addr += length_in_buffer; 2042 bp->b_resid -= length_in_buffer; 2043 } 2044 splx(s); 2045} 2046 2047/* compression memory allocation routines */ 2048static void * 2049vnd_alloc(void *aux, u_int items, u_int siz) 2050{ 2051 return malloc(items * siz, M_TEMP, M_NOWAIT); 2052} 2053 2054static void 2055vnd_free(void *aux, void *ptr) 2056{ 2057 free(ptr, M_TEMP); 2058} 2059#endif /* VND_COMPRESSION */ 2060 2061static void 2062vnd_set_geometry(struct vnd_softc *vnd) 2063{ 2064 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2065 2066 memset(dg, 0, sizeof(*dg)); 2067 2068 dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors * 2069 vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders; 2070 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2071 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2072 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2073 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2074 2075#ifdef DEBUG 2076 if (vnddebug & VDB_LABEL) { 2077 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2078 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2079 } 2080#endif 2081 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2082} 2083 2084#ifdef VND_COMPRESSION 2085#define VND_DEPENDS "zlib" 2086#else 2087#define VND_DEPENDS NULL 2088#endif 2089 2090MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2091 2092#ifdef _MODULE 2093int vnd_bmajor = -1, vnd_cmajor = -1; 2094 2095CFDRIVER_DECL(vnd, DV_DISK, NULL); 2096#endif 2097 2098static int 2099vnd_modcmd(modcmd_t cmd, void *arg) 2100{ 2101 int error = 0; 2102 2103 switch (cmd) { 2104 case MODULE_CMD_INIT: 2105#ifdef _MODULE 2106 error = config_cfdriver_attach(&vnd_cd); 2107 if (error) 2108 break; 2109 2110 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2111 if (error) { 2112 config_cfdriver_detach(&vnd_cd); 2113#ifdef DIAGNOSTIC 2114 aprint_error("%s: unable to register cfattach for \n" 2115 "%s, error %d", __func__, vnd_cd.cd_name, error); 2116#endif 2117 break; 2118 } 2119 2120 /* 2121 * Attach the {b,c}devsw's 2122 */ 2123 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2124 &vnd_cdevsw, &vnd_cmajor); 2125 /* 2126 * If devsw_attach fails, remove from autoconf database 2127 */ 2128 if (error) { 2129 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2130 config_cfdriver_detach(&vnd_cd); 2131#ifdef DIAGNOSTIC 2132 aprint_error("%s: unable to attach %s devsw, " 2133 "error %d", __func__, vnd_cd.cd_name, error); 2134#endif 2135 break; 2136 } 2137#endif 2138 break; 2139 2140 case MODULE_CMD_FINI: 2141#ifdef _MODULE 2142 /* 2143 * Remove {b,c}devsw's 2144 */ 2145 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2146 2147 /* 2148 * Now remove device from autoconf database 2149 */ 2150 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2151 if (error) { 2152 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2153 &vnd_cdevsw, &vnd_cmajor); 2154#ifdef DIAGNOSTIC 2155 aprint_error("%s: failed to detach %s cfattach, " 2156 "error %d\n", __func__, vnd_cd.cd_name, error); 2157#endif 2158 break; 2159 } 2160 error = config_cfdriver_detach(&vnd_cd); 2161 if (error) { 2162 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2163 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2164 &vnd_cdevsw, &vnd_cmajor); 2165#ifdef DIAGNOSTIC 2166 aprint_error("%s: failed to detach %s cfdriver, " 2167 "error %d\n", __func__, vnd_cd.cd_name, error); 2168 break; 2169#endif 2170 } 2171#endif 2172 break; 2173 2174 case MODULE_CMD_STAT: 2175 return ENOTTY; 2176 2177 default: 2178 return ENOTTY; 2179 } 2180 2181 return error; 2182} 2183