vnd.c revision 1.265
1/* $NetBSD: vnd.c,v 1.265 2018/09/20 07:18:38 mlelstv Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998, 2008 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70/* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93#include <sys/cdefs.h> 94__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.265 2018/09/20 07:18:38 mlelstv Exp $"); 95 96#if defined(_KERNEL_OPT) 97#include "opt_vnd.h" 98#include "opt_compat_netbsd.h" 99#endif 100 101#include <sys/param.h> 102#include <sys/systm.h> 103#include <sys/namei.h> 104#include <sys/proc.h> 105#include <sys/kthread.h> 106#include <sys/errno.h> 107#include <sys/buf.h> 108#include <sys/bufq.h> 109#include <sys/malloc.h> 110#include <sys/ioctl.h> 111#include <sys/disklabel.h> 112#include <sys/device.h> 113#include <sys/disk.h> 114#include <sys/stat.h> 115#include <sys/mount.h> 116#include <sys/vnode.h> 117#include <sys/file.h> 118#include <sys/uio.h> 119#include <sys/conf.h> 120#include <sys/kauth.h> 121#include <sys/module.h> 122 123#include <net/zlib.h> 124 125#include <miscfs/genfs/genfs.h> 126#include <miscfs/specfs/specdev.h> 127 128#include <dev/dkvar.h> 129#include <dev/vndvar.h> 130 131#include "ioconf.h" 132 133#if defined(VNDDEBUG) && !defined(DEBUG) 134#define DEBUG 135#endif 136 137#ifdef DEBUG 138int dovndcluster = 1; 139#define VDB_FOLLOW 0x01 140#define VDB_INIT 0x02 141#define VDB_IO 0x04 142#define VDB_LABEL 0x08 143int vnddebug = 0; 144#endif 145 146#define vndunit(x) DISKUNIT(x) 147 148struct vndxfer { 149 struct buf vx_buf; 150 struct vnd_softc *vx_vnd; 151}; 152#define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 153 154#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 155#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 156 157#define VNDLABELDEV(dev) \ 158 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 159 160#define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 161 162 163static void vndclear(struct vnd_softc *, int); 164static int vnddoclear(struct vnd_softc *, int, int, bool); 165static int vndsetcred(struct vnd_softc *, kauth_cred_t); 166static void vndthrottle(struct vnd_softc *, struct vnode *); 167static void vndiodone(struct buf *); 168#if 0 169static void vndshutdown(void); 170#endif 171 172static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 173static void vndgetdisklabel(dev_t, struct vnd_softc *); 174 175static int vndlock(struct vnd_softc *); 176static void vndunlock(struct vnd_softc *); 177#ifdef VND_COMPRESSION 178static void compstrategy(struct buf *, off_t); 179static void *vnd_alloc(void *, u_int, u_int); 180static void vnd_free(void *, void *); 181#endif /* VND_COMPRESSION */ 182 183static void vndthread(void *); 184static bool vnode_has_op(const struct vnode *, int); 185static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 186 struct buf *); 187static void handle_with_strategy(struct vnd_softc *, const struct buf *, 188 struct buf *); 189static void vnd_set_geometry(struct vnd_softc *); 190 191static dev_type_open(vndopen); 192static dev_type_close(vndclose); 193static dev_type_read(vndread); 194static dev_type_write(vndwrite); 195static dev_type_ioctl(vndioctl); 196static dev_type_strategy(vndstrategy); 197static dev_type_dump(vnddump); 198static dev_type_size(vndsize); 199 200const struct bdevsw vnd_bdevsw = { 201 .d_open = vndopen, 202 .d_close = vndclose, 203 .d_strategy = vndstrategy, 204 .d_ioctl = vndioctl, 205 .d_dump = vnddump, 206 .d_psize = vndsize, 207 .d_discard = nodiscard, 208 .d_flag = D_DISK 209}; 210 211const struct cdevsw vnd_cdevsw = { 212 .d_open = vndopen, 213 .d_close = vndclose, 214 .d_read = vndread, 215 .d_write = vndwrite, 216 .d_ioctl = vndioctl, 217 .d_stop = nostop, 218 .d_tty = notty, 219 .d_poll = nopoll, 220 .d_mmap = nommap, 221 .d_kqfilter = nokqfilter, 222 .d_discard = nodiscard, 223 .d_flag = D_DISK 224}; 225 226static int vnd_match(device_t, cfdata_t, void *); 227static void vnd_attach(device_t, device_t, void *); 228static int vnd_detach(device_t, int); 229 230CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 231 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 232 233static struct vnd_softc *vnd_spawn(int); 234int vnd_destroy(device_t); 235 236static struct dkdriver vnddkdriver = { 237 .d_strategy = vndstrategy, 238 .d_minphys = minphys 239}; 240 241void 242vndattach(int num) 243{ 244 int error; 245 246 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 247 if (error) 248 aprint_error("%s: unable to register cfattach, error = %d\n", 249 vnd_cd.cd_name, error); 250} 251 252static int 253vnd_match(device_t self, cfdata_t cfdata, void *aux) 254{ 255 256 return 1; 257} 258 259static void 260vnd_attach(device_t parent, device_t self, void *aux) 261{ 262 struct vnd_softc *sc = device_private(self); 263 264 sc->sc_dev = self; 265 sc->sc_comp_offsets = NULL; 266 sc->sc_comp_buff = NULL; 267 sc->sc_comp_decombuf = NULL; 268 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 269 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 270 if (!pmf_device_register(self, NULL, NULL)) 271 aprint_error_dev(self, "couldn't establish power handler\n"); 272} 273 274static int 275vnd_detach(device_t self, int flags) 276{ 277 int error; 278 struct vnd_softc *sc = device_private(self); 279 280 if (sc->sc_flags & VNF_INITED) { 281 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 282 if (error != 0) 283 return error; 284 } 285 286 pmf_device_deregister(self); 287 bufq_free(sc->sc_tab); 288 disk_destroy(&sc->sc_dkdev); 289 290 return 0; 291} 292 293static struct vnd_softc * 294vnd_spawn(int unit) 295{ 296 cfdata_t cf; 297 298 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 299 cf->cf_name = vnd_cd.cd_name; 300 cf->cf_atname = vnd_cd.cd_name; 301 cf->cf_unit = unit; 302 cf->cf_fstate = FSTATE_STAR; 303 304 return device_private(config_attach_pseudo(cf)); 305} 306 307int 308vnd_destroy(device_t dev) 309{ 310 int error; 311 cfdata_t cf; 312 313 cf = device_cfdata(dev); 314 error = config_detach(dev, DETACH_QUIET); 315 if (error) 316 return error; 317 free(cf, M_DEVBUF); 318 return 0; 319} 320 321static int 322vndopen(dev_t dev, int flags, int mode, struct lwp *l) 323{ 324 int unit = vndunit(dev); 325 struct vnd_softc *sc; 326 int error = 0, part, pmask; 327 struct disklabel *lp; 328 329#ifdef DEBUG 330 if (vnddebug & VDB_FOLLOW) 331 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 332#endif 333 sc = device_lookup_private(&vnd_cd, unit); 334 if (sc == NULL) { 335 sc = vnd_spawn(unit); 336 if (sc == NULL) 337 return ENOMEM; 338 339 /* compatibility, keep disklabel after close */ 340 sc->sc_flags = VNF_KLABEL; 341 } 342 343 if ((error = vndlock(sc)) != 0) 344 return error; 345 346 mutex_enter(&sc->sc_dkdev.dk_openlock); 347 348 if ((sc->sc_flags & VNF_CLEARING) != 0) { 349 error = ENXIO; 350 goto done; 351 } 352 353 lp = sc->sc_dkdev.dk_label; 354 355 part = DISKPART(dev); 356 pmask = (1 << part); 357 358 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 359 error = EBUSY; 360 goto done; 361 } 362 363 if (sc->sc_flags & VNF_INITED) { 364 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 365 /* 366 * If any non-raw partition is open, but the disk 367 * has been invalidated, disallow further opens. 368 */ 369 if ((sc->sc_flags & VNF_VLABEL) == 0) { 370 error = EIO; 371 goto done; 372 } 373 } else { 374 /* 375 * Load the partition info if not already loaded. 376 */ 377 if ((sc->sc_flags & VNF_VLABEL) == 0) { 378 sc->sc_flags |= VNF_VLABEL; 379 vndgetdisklabel(dev, sc); 380 } 381 } 382 } 383 384 /* Check that the partitions exists. */ 385 if (part != RAW_PART) { 386 if (((sc->sc_flags & VNF_INITED) == 0) || 387 ((part >= lp->d_npartitions) || 388 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 389 error = ENXIO; 390 goto done; 391 } 392 } 393 394 /* Prevent our unit from being unconfigured while open. */ 395 switch (mode) { 396 case S_IFCHR: 397 sc->sc_dkdev.dk_copenmask |= pmask; 398 break; 399 400 case S_IFBLK: 401 sc->sc_dkdev.dk_bopenmask |= pmask; 402 break; 403 } 404 sc->sc_dkdev.dk_openmask = 405 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 406 407 done: 408 mutex_exit(&sc->sc_dkdev.dk_openlock); 409 vndunlock(sc); 410 return error; 411} 412 413static int 414vndclose(dev_t dev, int flags, int mode, struct lwp *l) 415{ 416 int unit = vndunit(dev); 417 struct vnd_softc *sc; 418 int error = 0, part; 419 420#ifdef DEBUG 421 if (vnddebug & VDB_FOLLOW) 422 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 423#endif 424 sc = device_lookup_private(&vnd_cd, unit); 425 if (sc == NULL) 426 return ENXIO; 427 428 if ((error = vndlock(sc)) != 0) 429 return error; 430 431 mutex_enter(&sc->sc_dkdev.dk_openlock); 432 433 part = DISKPART(dev); 434 435 /* ...that much closer to allowing unconfiguration... */ 436 switch (mode) { 437 case S_IFCHR: 438 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 439 break; 440 441 case S_IFBLK: 442 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 443 break; 444 } 445 sc->sc_dkdev.dk_openmask = 446 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 447 448 /* are we last opener ? */ 449 if (sc->sc_dkdev.dk_openmask == 0) { 450 if ((sc->sc_flags & VNF_KLABEL) == 0) 451 sc->sc_flags &= ~VNF_VLABEL; 452 } 453 454 mutex_exit(&sc->sc_dkdev.dk_openlock); 455 456 vndunlock(sc); 457 458 if ((sc->sc_flags & VNF_INITED) == 0) { 459 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 460 aprint_error_dev(sc->sc_dev, 461 "unable to detach instance\n"); 462 return error; 463 } 464 } 465 466 return 0; 467} 468 469/* 470 * Queue the request, and wakeup the kernel thread to handle it. 471 */ 472static void 473vndstrategy(struct buf *bp) 474{ 475 int unit = vndunit(bp->b_dev); 476 struct vnd_softc *vnd = 477 device_lookup_private(&vnd_cd, unit); 478 struct disklabel *lp; 479 daddr_t blkno; 480 int s = splbio(); 481 482 if (vnd == NULL) { 483 bp->b_error = ENXIO; 484 goto done; 485 } 486 lp = vnd->sc_dkdev.dk_label; 487 488 if ((vnd->sc_flags & VNF_INITED) == 0) { 489 bp->b_error = ENXIO; 490 goto done; 491 } 492 493 /* 494 * The transfer must be a whole number of blocks. 495 */ 496 if ((bp->b_bcount % lp->d_secsize) != 0) { 497 bp->b_error = EINVAL; 498 goto done; 499 } 500 501 /* 502 * check if we're read-only. 503 */ 504 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 505 bp->b_error = EACCES; 506 goto done; 507 } 508 509 /* If it's a nil transfer, wake up the top half now. */ 510 if (bp->b_bcount == 0) { 511 goto done; 512 } 513 514 /* 515 * Do bounds checking and adjust transfer. If there's an error, 516 * the bounds check will flag that for us. 517 */ 518 if (DISKPART(bp->b_dev) == RAW_PART) { 519 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 520 vnd->sc_size) <= 0) 521 goto done; 522 } else { 523 if (bounds_check_with_label(&vnd->sc_dkdev, 524 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 525 goto done; 526 } 527 528 /* 529 * Put the block number in terms of the logical blocksize 530 * of the "device". 531 */ 532 533 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 534 535 /* 536 * Translate the partition-relative block number to an absolute. 537 */ 538 if (DISKPART(bp->b_dev) != RAW_PART) { 539 struct partition *pp; 540 541 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 542 DISKPART(bp->b_dev)]; 543 blkno += pp->p_offset; 544 } 545 bp->b_rawblkno = blkno; 546 547#ifdef DEBUG 548 if (vnddebug & VDB_FOLLOW) 549 printf("vndstrategy(%p): unit %d\n", bp, unit); 550#endif 551 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 552 KASSERT(vnd->sc_pending >= 0 && 553 vnd->sc_pending <= VND_MAXPENDING(vnd)); 554 while (vnd->sc_pending == VND_MAXPENDING(vnd)) 555 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 556 vnd->sc_pending++; 557 } 558 bufq_put(vnd->sc_tab, bp); 559 wakeup(&vnd->sc_tab); 560 splx(s); 561 return; 562 563done: 564 bp->b_resid = bp->b_bcount; 565 biodone(bp); 566 splx(s); 567} 568 569static bool 570vnode_has_strategy(struct vnd_softc *vnd) 571{ 572 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 573 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 574} 575 576/* Verify that I/O requests cannot be smaller than the 577 * smallest I/O size supported by the backend. 578 */ 579static bool 580vnode_has_large_blocks(struct vnd_softc *vnd) 581{ 582 u_int32_t vnd_secsize, iosize; 583 584 iosize = vnd->sc_iosize; 585 vnd_secsize = vnd->sc_geom.vng_secsize; 586 587 return vnd_secsize % iosize != 0; 588} 589 590/* XXX this function needs a reliable check to detect 591 * sparse files. Otherwise, bmap/strategy may be used 592 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 593 * works on sparse files. 594 */ 595#if notyet 596static bool 597vnode_strategy_probe(struct vnd_softc *vnd) 598{ 599 int error; 600 daddr_t nbn; 601 602 if (!vnode_has_strategy(vnd)) 603 return false; 604 605 if (vnode_has_large_blocks(vnd)) 606 return false; 607 608 /* Convert the first logical block number to its 609 * physical block number. 610 */ 611 error = 0; 612 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 613 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 614 VOP_UNLOCK(vnd->sc_vp); 615 616 /* Test if that worked. */ 617 if (error == 0 && (long)nbn == -1) 618 return false; 619 620 return true; 621} 622#endif 623 624static void 625vndthread(void *arg) 626{ 627 struct vnd_softc *vnd = arg; 628 int s; 629 630 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 631 * directly access the backing vnode. If we can, use these two 632 * operations to avoid messing with the local buffer cache. 633 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 634 * which are guaranteed to work with any file system. */ 635 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 636 ! vnode_has_strategy(vnd)) 637 vnd->sc_flags |= VNF_USE_VN_RDWR; 638 639 /* VOP_STRATEGY can only be used if the backing vnode allows 640 * to access blocks as small as defined by the vnd geometry. 641 */ 642 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 643 vnode_has_large_blocks(vnd)) 644 vnd->sc_flags |= VNF_USE_VN_RDWR; 645 646#ifdef DEBUG 647 if (vnddebug & VDB_INIT) 648 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 649 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 650 "using bmap/strategy operations" : 651 "using read/write operations"); 652#endif 653 654 s = splbio(); 655 vnd->sc_flags |= VNF_KTHREAD; 656 wakeup(&vnd->sc_kthread); 657 658 /* 659 * Dequeue requests and serve them depending on the available 660 * vnode operations. 661 */ 662 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 663 struct vndxfer *vnx; 664 struct buf *obp; 665 struct buf *bp; 666 667 obp = bufq_get(vnd->sc_tab); 668 if (obp == NULL) { 669 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 670 continue; 671 }; 672 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 673 KASSERT(vnd->sc_pending > 0 && 674 vnd->sc_pending <= VND_MAXPENDING(vnd)); 675 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 676 wakeup(&vnd->sc_pending); 677 } 678 splx(s); 679#ifdef DEBUG 680 if (vnddebug & VDB_FOLLOW) 681 printf("vndthread(%p)\n", obp); 682#endif 683 684 if (vnd->sc_vp->v_mount == NULL) { 685 obp->b_error = ENXIO; 686 goto done; 687 } 688#ifdef VND_COMPRESSION 689 /* handle a compressed read */ 690 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 691 off_t bn; 692 693 /* Convert to a byte offset within the file. */ 694 bn = obp->b_rawblkno * 695 vnd->sc_dkdev.dk_label->d_secsize; 696 697 compstrategy(obp, bn); 698 goto done; 699 } 700#endif /* VND_COMPRESSION */ 701 702 /* 703 * Allocate a header for this transfer and link it to the 704 * buffer 705 */ 706 s = splbio(); 707 vnx = VND_GETXFER(vnd); 708 splx(s); 709 vnx->vx_vnd = vnd; 710 711 s = splbio(); 712 while (vnd->sc_active >= vnd->sc_maxactive) { 713 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 714 } 715 vnd->sc_active++; 716 splx(s); 717 718 /* Instrumentation. */ 719 disk_busy(&vnd->sc_dkdev); 720 721 bp = &vnx->vx_buf; 722 buf_init(bp); 723 bp->b_flags = (obp->b_flags & B_READ); 724 bp->b_oflags = obp->b_oflags; 725 bp->b_cflags = obp->b_cflags; 726 bp->b_iodone = vndiodone; 727 bp->b_private = obp; 728 bp->b_vp = vnd->sc_vp; 729 bp->b_objlock = bp->b_vp->v_interlock; 730 bp->b_data = obp->b_data; 731 bp->b_bcount = obp->b_bcount; 732 BIO_COPYPRIO(bp, obp); 733 734 /* Handle the request using the appropriate operations. */ 735 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 736 handle_with_strategy(vnd, obp, bp); 737 else 738 handle_with_rdwr(vnd, obp, bp); 739 740 s = splbio(); 741 continue; 742 743done: 744 biodone(obp); 745 s = splbio(); 746 } 747 748 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 749 wakeup(&vnd->sc_kthread); 750 splx(s); 751 kthread_exit(0); 752} 753 754/* 755 * Checks if the given vnode supports the requested operation. 756 * The operation is specified the offset returned by VOFFSET. 757 * 758 * XXX The test below used to determine this is quite fragile 759 * because it relies on the file system to use genfs to specify 760 * unimplemented operations. There might be another way to do 761 * it more cleanly. 762 */ 763static bool 764vnode_has_op(const struct vnode *vp, int opoffset) 765{ 766 int (*defaultp)(void *); 767 int (*opp)(void *); 768 769 defaultp = vp->v_op[VOFFSET(vop_default)]; 770 opp = vp->v_op[opoffset]; 771 772 return opp != defaultp && opp != genfs_eopnotsupp && 773 opp != genfs_badop && opp != genfs_nullop; 774} 775 776/* 777 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 778 * and VOP_WRITE operations. 779 * 780 * 'obp' is a pointer to the original request fed to the vnd device. 781 */ 782static void 783handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 784{ 785 bool doread; 786 off_t offset; 787 size_t len, resid; 788 struct vnode *vp; 789 790 doread = bp->b_flags & B_READ; 791 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 792 len = bp->b_bcount; 793 vp = vnd->sc_vp; 794 795#if defined(DEBUG) 796 if (vnddebug & VDB_IO) 797 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 798 ", secsize %d, offset %" PRIu64 799 ", bcount %d\n", 800 vp, doread ? "read" : "write", obp->b_rawblkno, 801 vnd->sc_dkdev.dk_label->d_secsize, offset, 802 bp->b_bcount); 803#endif 804 805 /* Issue the read or write operation. */ 806 bp->b_error = 807 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 808 vp, bp->b_data, len, offset, UIO_SYSSPACE, 809 IO_ADV_ENCODE(POSIX_FADV_NOREUSE), vnd->sc_cred, &resid, NULL); 810 bp->b_resid = resid; 811 812 mutex_enter(vp->v_interlock); 813 (void) VOP_PUTPAGES(vp, 0, 0, 814 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE | PGO_SYNCIO); 815 816 /* We need to increase the number of outputs on the vnode if 817 * there was any write to it. */ 818 if (!doread) { 819 mutex_enter(vp->v_interlock); 820 vp->v_numoutput++; 821 mutex_exit(vp->v_interlock); 822 } 823 824 biodone(bp); 825} 826 827/* 828 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 829 * and VOP_STRATEGY operations. 830 * 831 * 'obp' is a pointer to the original request fed to the vnd device. 832 */ 833static void 834handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 835 struct buf *bp) 836{ 837 int bsize, error, flags, skipped; 838 size_t resid, sz; 839 off_t bn, offset; 840 struct vnode *vp; 841 struct buf *nbp = NULL; 842 843 flags = obp->b_flags; 844 845 846 /* convert to a byte offset within the file. */ 847 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 848 849 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 850 skipped = 0; 851 852 /* 853 * Break the request into bsize pieces and feed them 854 * sequentially using VOP_BMAP/VOP_STRATEGY. 855 * We do it this way to keep from flooding NFS servers if we 856 * are connected to an NFS file. This places the burden on 857 * the client rather than the server. 858 */ 859 error = 0; 860 bp->b_resid = bp->b_bcount; 861 for (offset = 0, resid = bp->b_resid; /* true */; 862 resid -= sz, offset += sz) { 863 daddr_t nbn; 864 int off, nra; 865 866 nra = 0; 867 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 868 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 869 VOP_UNLOCK(vnd->sc_vp); 870 871 if (error == 0 && (long)nbn == -1) 872 error = EIO; 873 874 /* 875 * If there was an error or a hole in the file...punt. 876 * Note that we may have to wait for any operations 877 * that we have already fired off before releasing 878 * the buffer. 879 * 880 * XXX we could deal with holes here but it would be 881 * a hassle (in the write case). 882 */ 883 if (error) { 884 skipped += resid; 885 break; 886 } 887 888#ifdef DEBUG 889 if (!dovndcluster) 890 nra = 0; 891#endif 892 893 off = bn % bsize; 894 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 895#ifdef DEBUG 896 if (vnddebug & VDB_IO) 897 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 898 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 899 nbn, sz); 900#endif 901 902 nbp = getiobuf(vp, true); 903 nestiobuf_setup(bp, nbp, offset, sz); 904 nbp->b_blkno = nbn + btodb(off); 905 906#if 0 /* XXX #ifdef DEBUG */ 907 if (vnddebug & VDB_IO) 908 printf("vndstart(%ld): bp %p vp %p blkno " 909 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 910 (long) (vnd-vnd_softc), &nbp->vb_buf, 911 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 912 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 913 nbp->vb_buf.b_bcount); 914#endif 915 if (resid == sz) { 916 break; 917 } 918 VOP_STRATEGY(vp, nbp); 919 bn += sz; 920 } 921 if (!(flags & B_READ)) { 922 struct vnode *w_vp; 923 /* 924 * this is the last nested buf, account for 925 * the parent buf write too. 926 * This has to be done last, so that 927 * fsync won't wait for this write which 928 * has no chance to complete before all nested bufs 929 * have been queued. But it has to be done 930 * before the last VOP_STRATEGY() 931 * or the call to nestiobuf_done(). 932 */ 933 w_vp = bp->b_vp; 934 mutex_enter(w_vp->v_interlock); 935 w_vp->v_numoutput++; 936 mutex_exit(w_vp->v_interlock); 937 } 938 KASSERT(skipped != 0 || nbp != NULL); 939 if (skipped) 940 nestiobuf_done(bp, skipped, error); 941 else 942 VOP_STRATEGY(vp, nbp); 943} 944 945static void 946vndiodone(struct buf *bp) 947{ 948 struct vndxfer *vnx = VND_BUFTOXFER(bp); 949 struct vnd_softc *vnd = vnx->vx_vnd; 950 struct buf *obp = bp->b_private; 951 int s = splbio(); 952 953 KASSERT(&vnx->vx_buf == bp); 954 KASSERT(vnd->sc_active > 0); 955#ifdef DEBUG 956 if (vnddebug & VDB_IO) { 957 printf("vndiodone1: bp %p iodone: error %d\n", 958 bp, bp->b_error); 959 } 960#endif 961 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 962 (bp->b_flags & B_READ)); 963 vnd->sc_active--; 964 if (vnd->sc_active == 0) { 965 wakeup(&vnd->sc_tab); 966 } 967 splx(s); 968 obp->b_error = bp->b_error; 969 obp->b_resid = bp->b_resid; 970 buf_destroy(bp); 971 VND_PUTXFER(vnd, vnx); 972 biodone(obp); 973} 974 975/* ARGSUSED */ 976static int 977vndread(dev_t dev, struct uio *uio, int flags) 978{ 979 int unit = vndunit(dev); 980 struct vnd_softc *sc; 981 982#ifdef DEBUG 983 if (vnddebug & VDB_FOLLOW) 984 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 985#endif 986 987 sc = device_lookup_private(&vnd_cd, unit); 988 if (sc == NULL) 989 return ENXIO; 990 991 if ((sc->sc_flags & VNF_INITED) == 0) 992 return ENXIO; 993 994 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 995} 996 997/* ARGSUSED */ 998static int 999vndwrite(dev_t dev, struct uio *uio, int flags) 1000{ 1001 int unit = vndunit(dev); 1002 struct vnd_softc *sc; 1003 1004#ifdef DEBUG 1005 if (vnddebug & VDB_FOLLOW) 1006 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1007#endif 1008 1009 sc = device_lookup_private(&vnd_cd, unit); 1010 if (sc == NULL) 1011 return ENXIO; 1012 1013 if ((sc->sc_flags & VNF_INITED) == 0) 1014 return ENXIO; 1015 1016 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1017} 1018 1019static int 1020vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1021{ 1022 int error; 1023 struct vnd_softc *vnd; 1024 1025 if (*un == -1) 1026 *un = unit; 1027 if (*un < 0) 1028 return EINVAL; 1029 1030 vnd = device_lookup_private(&vnd_cd, *un); 1031 if (vnd == NULL) 1032 return -1; 1033 1034 if ((vnd->sc_flags & VNF_INITED) == 0) 1035 return -1; 1036 1037 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1038 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1039 VOP_UNLOCK(vnd->sc_vp); 1040 return error; 1041} 1042 1043static int 1044vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1045{ 1046 int error; 1047 1048 if ((error = vndlock(vnd)) != 0) 1049 return error; 1050 1051 /* 1052 * Don't unconfigure if any other partitions are open 1053 * or if both the character and block flavors of this 1054 * partition are open. 1055 */ 1056 if (DK_BUSY(vnd, pmask) && !force) { 1057 vndunlock(vnd); 1058 return EBUSY; 1059 } 1060 1061 /* Delete all of our wedges */ 1062 dkwedge_delall(&vnd->sc_dkdev); 1063 1064 /* 1065 * XXX vndclear() might call vndclose() implicitly; 1066 * release lock to avoid recursion 1067 * 1068 * Set VNF_CLEARING to prevent vndopen() from 1069 * sneaking in after we vndunlock(). 1070 */ 1071 vnd->sc_flags |= VNF_CLEARING; 1072 vndunlock(vnd); 1073 vndclear(vnd, minor); 1074#ifdef DEBUG 1075 if (vnddebug & VDB_INIT) 1076 printf("%s: CLRed\n", __func__); 1077#endif 1078 1079 /* Destroy the xfer and buffer pools. */ 1080 pool_destroy(&vnd->sc_vxpool); 1081 1082 /* Detach the disk. */ 1083 disk_detach(&vnd->sc_dkdev); 1084 1085 return 0; 1086} 1087 1088static int 1089vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1090{ 1091 int error; 1092 1093 KASSERT(l); 1094 1095 /* the first member is always int vnd_unit in all the versions */ 1096 if (*(int *)data >= vnd_cd.cd_ndevs) 1097 return ENXIO; 1098 1099 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1100 case -1: 1101 /* unused is not an error */ 1102 memset(va, 0, sizeof(*va)); 1103 /*FALLTHROUGH*/ 1104 case 0: 1105 return 0; 1106 default: 1107 return error; 1108 } 1109} 1110 1111/* ARGSUSED */ 1112static int 1113vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1114{ 1115 bool force; 1116 int unit = vndunit(dev); 1117 struct vnd_softc *vnd; 1118 struct vnd_ioctl *vio; 1119 struct vattr vattr; 1120 struct pathbuf *pb; 1121 struct nameidata nd; 1122 int error, part, pmask; 1123 uint64_t geomsize; 1124 int fflags; 1125#ifdef __HAVE_OLD_DISKLABEL 1126 struct disklabel newlabel; 1127#endif 1128 1129#ifdef DEBUG 1130 if (vnddebug & VDB_FOLLOW) 1131 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1132 dev, cmd, data, flag, l->l_proc, unit); 1133#endif 1134 /* Do the get's first; they don't need initialization or verification */ 1135 switch (cmd) { 1136#ifdef COMPAT_30 1137 case VNDIOCGET30: { 1138 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1139 return error; 1140 1141 struct vnd_user30 *vnu = data; 1142 vnu->vnu_dev = vattr.va_fsid; 1143 vnu->vnu_ino = vattr.va_fileid; 1144 return 0; 1145 } 1146#endif 1147#ifdef COMPAT_50 1148 case VNDIOCGET50: { 1149 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1150 return error; 1151 1152 struct vnd_user50 *vnu = data; 1153 vnu->vnu_dev = vattr.va_fsid; 1154 vnu->vnu_ino = vattr.va_fileid; 1155 return 0; 1156 } 1157#endif 1158 1159 case VNDIOCGET: { 1160 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1161 return error; 1162 1163 struct vnd_user *vnu = data; 1164 vnu->vnu_dev = vattr.va_fsid; 1165 vnu->vnu_ino = vattr.va_fileid; 1166 return 0; 1167 } 1168 default: 1169 break; 1170 } 1171 1172 vnd = device_lookup_private(&vnd_cd, unit); 1173 if (vnd == NULL) 1174 return ENXIO; 1175 vio = (struct vnd_ioctl *)data; 1176 1177 /* Must be open for writes for these commands... */ 1178 switch (cmd) { 1179 case VNDIOCSET: 1180 case VNDIOCCLR: 1181#ifdef COMPAT_50 1182 case VNDIOCSET50: 1183 case VNDIOCCLR50: 1184#endif 1185 case DIOCSDINFO: 1186 case DIOCWDINFO: 1187#ifdef __HAVE_OLD_DISKLABEL 1188 case ODIOCSDINFO: 1189 case ODIOCWDINFO: 1190#endif 1191 case DIOCKLABEL: 1192 case DIOCWLABEL: 1193 if ((flag & FWRITE) == 0) 1194 return EBADF; 1195 } 1196 1197 /* Must be initialized for these... */ 1198 switch (cmd) { 1199 case VNDIOCCLR: 1200#ifdef VNDIOCCLR50 1201 case VNDIOCCLR50: 1202#endif 1203 case DIOCGDINFO: 1204 case DIOCSDINFO: 1205 case DIOCWDINFO: 1206 case DIOCGPARTINFO: 1207 case DIOCKLABEL: 1208 case DIOCWLABEL: 1209 case DIOCGDEFLABEL: 1210 case DIOCCACHESYNC: 1211#ifdef __HAVE_OLD_DISKLABEL 1212 case ODIOCGDINFO: 1213 case ODIOCSDINFO: 1214 case ODIOCWDINFO: 1215 case ODIOCGDEFLABEL: 1216#endif 1217 if ((vnd->sc_flags & VNF_INITED) == 0) 1218 return ENXIO; 1219 } 1220 1221 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1222 if (error != EPASSTHROUGH) 1223 return error; 1224 1225 1226 switch (cmd) { 1227#ifdef VNDIOCSET50 1228 case VNDIOCSET50: 1229#endif 1230 case VNDIOCSET: 1231 if (vnd->sc_flags & VNF_INITED) 1232 return EBUSY; 1233 1234 if ((error = vndlock(vnd)) != 0) 1235 return error; 1236 1237 fflags = FREAD; 1238 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1239 fflags |= FWRITE; 1240 error = pathbuf_copyin(vio->vnd_file, &pb); 1241 if (error) { 1242 goto unlock_and_exit; 1243 } 1244 NDINIT(&nd, LOOKUP, FOLLOW, pb); 1245 if ((error = vn_open(&nd, fflags, 0)) != 0) { 1246 pathbuf_destroy(pb); 1247 goto unlock_and_exit; 1248 } 1249 KASSERT(l); 1250 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred); 1251 if (!error && nd.ni_vp->v_type != VREG) 1252 error = EOPNOTSUPP; 1253 if (!error && vattr.va_bytes < vattr.va_size) 1254 /* File is definitely sparse, use vn_rdwr() */ 1255 vnd->sc_flags |= VNF_USE_VN_RDWR; 1256 if (error) { 1257 VOP_UNLOCK(nd.ni_vp); 1258 goto close_and_exit; 1259 } 1260 1261 /* If using a compressed file, initialize its info */ 1262 /* (or abort with an error if kernel has no compression) */ 1263 if (vio->vnd_flags & VNF_COMP) { 1264#ifdef VND_COMPRESSION 1265 struct vnd_comp_header *ch; 1266 int i; 1267 uint32_t comp_size; 1268 uint32_t comp_maxsize; 1269 1270 /* allocate space for compresed file header */ 1271 ch = malloc(sizeof(struct vnd_comp_header), 1272 M_TEMP, M_WAITOK); 1273 1274 /* read compressed file header */ 1275 error = vn_rdwr(UIO_READ, nd.ni_vp, (void *)ch, 1276 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1277 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1278 if (error) { 1279 free(ch, M_TEMP); 1280 VOP_UNLOCK(nd.ni_vp); 1281 goto close_and_exit; 1282 } 1283 1284 if (be32toh(ch->block_size) == 0 || 1285 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1286 free(ch, M_TEMP); 1287 VOP_UNLOCK(nd.ni_vp); 1288 goto close_and_exit; 1289 } 1290 1291 /* save some header info */ 1292 vnd->sc_comp_blksz = be32toh(ch->block_size); 1293 /* note last offset is the file byte size */ 1294 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1295 free(ch, M_TEMP); 1296 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1297 VOP_UNLOCK(nd.ni_vp); 1298 error = EINVAL; 1299 goto close_and_exit; 1300 } 1301 KASSERT(0 < vnd->sc_comp_blksz); 1302 KASSERT(0 < vnd->sc_comp_numoffs); 1303 /* 1304 * @#^@!$& gcc -Wtype-limits refuses to let me 1305 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1306 * because the range of the type on amd64 makes 1307 * the comparisons always false. 1308 */ 1309#if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1310 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1311 VOP_UNLOCK(nd.ni_vp); 1312 error = EINVAL; 1313 goto close_and_exit; 1314 } 1315#endif 1316 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1317 (vattr.va_size - sizeof(struct vnd_comp_header) < 1318 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1319 (UQUAD_MAX/vnd->sc_comp_blksz < 1320 vnd->sc_comp_numoffs - 1)) { 1321 VOP_UNLOCK(nd.ni_vp); 1322 error = EINVAL; 1323 goto close_and_exit; 1324 } 1325 1326 /* set decompressed file size */ 1327 KASSERT(vnd->sc_comp_numoffs - 1 <= 1328 UQUAD_MAX/vnd->sc_comp_blksz); 1329 vattr.va_size = 1330 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1331 (u_quad_t)vnd->sc_comp_blksz; 1332 1333 /* allocate space for all the compressed offsets */ 1334 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1335 vnd->sc_comp_offsets = 1336 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1337 M_DEVBUF, M_WAITOK); 1338 1339 /* read in the offsets */ 1340 error = vn_rdwr(UIO_READ, nd.ni_vp, 1341 (void *)vnd->sc_comp_offsets, 1342 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1343 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1344 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1345 if (error) { 1346 VOP_UNLOCK(nd.ni_vp); 1347 goto close_and_exit; 1348 } 1349 /* 1350 * find largest block size (used for allocation limit). 1351 * Also convert offset to native byte order. 1352 */ 1353 comp_maxsize = 0; 1354 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1355 vnd->sc_comp_offsets[i] = 1356 be64toh(vnd->sc_comp_offsets[i]); 1357 comp_size = 1358 be64toh(vnd->sc_comp_offsets[i + 1]) 1359 - vnd->sc_comp_offsets[i]; 1360 if (comp_size > comp_maxsize) 1361 comp_maxsize = comp_size; 1362 } 1363 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1364 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1365 - 1]); 1366 1367 /* create compressed data buffer */ 1368 vnd->sc_comp_buff = malloc(comp_maxsize, 1369 M_DEVBUF, M_WAITOK); 1370 1371 /* create decompressed buffer */ 1372 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1373 M_DEVBUF, M_WAITOK); 1374 vnd->sc_comp_buffblk = -1; 1375 1376 /* Initialize decompress stream */ 1377 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1378 vnd->sc_comp_stream.zalloc = vnd_alloc; 1379 vnd->sc_comp_stream.zfree = vnd_free; 1380 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1381 if (error) { 1382 if (vnd->sc_comp_stream.msg) 1383 printf("vnd%d: compressed file, %s\n", 1384 unit, vnd->sc_comp_stream.msg); 1385 VOP_UNLOCK(nd.ni_vp); 1386 error = EINVAL; 1387 goto close_and_exit; 1388 } 1389 1390 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1391#else /* !VND_COMPRESSION */ 1392 VOP_UNLOCK(nd.ni_vp); 1393 error = EOPNOTSUPP; 1394 goto close_and_exit; 1395#endif /* VND_COMPRESSION */ 1396 } 1397 1398 VOP_UNLOCK(nd.ni_vp); 1399 vnd->sc_vp = nd.ni_vp; 1400 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1401 1402 /* get smallest I/O size for underlying device, fall back to 1403 * fundamental I/O size of underlying filesystem 1404 */ 1405 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1406 if (error) 1407 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1408 1409 /* 1410 * Use pseudo-geometry specified. If none was provided, 1411 * use "standard" Adaptec fictitious geometry. 1412 */ 1413 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1414 1415 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1416 sizeof(vio->vnd_geom)); 1417 1418 /* 1419 * Sanity-check the sector size. 1420 */ 1421 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1422 vnd->sc_geom.vng_ncylinders == 0 || 1423 vnd->sc_geom.vng_ntracks == 0 || 1424 vnd->sc_geom.vng_nsectors == 0) { 1425 error = EINVAL; 1426 goto close_and_exit; 1427 } 1428 1429 /* 1430 * Compute the size (in DEV_BSIZE blocks) specified 1431 * by the geometry. 1432 */ 1433 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1434 vnd->sc_geom.vng_ntracks * 1435 vnd->sc_geom.vng_ncylinders * 1436 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1437 1438 /* 1439 * Sanity-check the size against the specified 1440 * geometry. 1441 */ 1442 if (vnd->sc_size < geomsize) { 1443 error = EINVAL; 1444 goto close_and_exit; 1445 } 1446 } else if (vnd->sc_size >= (32 * 64)) { 1447 /* 1448 * Size must be at least 2048 DEV_BSIZE blocks 1449 * (1M) in order to use this geometry. 1450 */ 1451 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1452 vnd->sc_geom.vng_nsectors = 32; 1453 vnd->sc_geom.vng_ntracks = 64; 1454 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1455 } else { 1456 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1457 vnd->sc_geom.vng_nsectors = 1; 1458 vnd->sc_geom.vng_ntracks = 1; 1459 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1460 } 1461 1462 vnd_set_geometry(vnd); 1463 1464 if (vio->vnd_flags & VNDIOF_READONLY) { 1465 vnd->sc_flags |= VNF_READONLY; 1466 } 1467 1468 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1469 goto close_and_exit; 1470 1471 vndthrottle(vnd, vnd->sc_vp); 1472 vio->vnd_osize = dbtob(vnd->sc_size); 1473#ifdef VNDIOCSET50 1474 if (cmd != VNDIOCSET50) 1475#endif 1476 vio->vnd_size = dbtob(vnd->sc_size); 1477 vnd->sc_flags |= VNF_INITED; 1478 1479 /* create the kernel thread, wait for it to be up */ 1480 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1481 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1482 if (error) 1483 goto close_and_exit; 1484 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1485 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1486 } 1487#ifdef DEBUG 1488 if (vnddebug & VDB_INIT) 1489 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1490 vnd->sc_vp, (unsigned long) vnd->sc_size, 1491 vnd->sc_geom.vng_secsize, 1492 vnd->sc_geom.vng_nsectors, 1493 vnd->sc_geom.vng_ntracks, 1494 vnd->sc_geom.vng_ncylinders); 1495#endif 1496 1497 /* Attach the disk. */ 1498 disk_attach(&vnd->sc_dkdev); 1499 1500 /* Initialize the xfer and buffer pools. */ 1501 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1502 0, 0, "vndxpl", NULL, IPL_BIO); 1503 1504 vndunlock(vnd); 1505 1506 pathbuf_destroy(pb); 1507 1508 /* Discover wedges on this disk */ 1509 dkwedge_discover(&vnd->sc_dkdev); 1510 1511 break; 1512 1513close_and_exit: 1514 (void) vn_close(nd.ni_vp, fflags, l->l_cred); 1515 pathbuf_destroy(pb); 1516unlock_and_exit: 1517#ifdef VND_COMPRESSION 1518 /* free any allocated memory (for compressed file) */ 1519 if (vnd->sc_comp_offsets) { 1520 free(vnd->sc_comp_offsets, M_DEVBUF); 1521 vnd->sc_comp_offsets = NULL; 1522 } 1523 if (vnd->sc_comp_buff) { 1524 free(vnd->sc_comp_buff, M_DEVBUF); 1525 vnd->sc_comp_buff = NULL; 1526 } 1527 if (vnd->sc_comp_decombuf) { 1528 free(vnd->sc_comp_decombuf, M_DEVBUF); 1529 vnd->sc_comp_decombuf = NULL; 1530 } 1531#endif /* VND_COMPRESSION */ 1532 vndunlock(vnd); 1533 return error; 1534 1535#ifdef VNDIOCCLR50 1536 case VNDIOCCLR50: 1537#endif 1538 case VNDIOCCLR: 1539 part = DISKPART(dev); 1540 pmask = (1 << part); 1541 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1542 1543 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1544 return error; 1545 1546 break; 1547 1548 1549 case DIOCWDINFO: 1550 case DIOCSDINFO: 1551#ifdef __HAVE_OLD_DISKLABEL 1552 case ODIOCWDINFO: 1553 case ODIOCSDINFO: 1554#endif 1555 { 1556 struct disklabel *lp; 1557 1558 if ((error = vndlock(vnd)) != 0) 1559 return error; 1560 1561 vnd->sc_flags |= VNF_LABELLING; 1562 1563#ifdef __HAVE_OLD_DISKLABEL 1564 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1565 memset(&newlabel, 0, sizeof newlabel); 1566 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1567 lp = &newlabel; 1568 } else 1569#endif 1570 lp = (struct disklabel *)data; 1571 1572 error = setdisklabel(vnd->sc_dkdev.dk_label, 1573 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1574 if (error == 0) { 1575 if (cmd == DIOCWDINFO 1576#ifdef __HAVE_OLD_DISKLABEL 1577 || cmd == ODIOCWDINFO 1578#endif 1579 ) 1580 error = writedisklabel(VNDLABELDEV(dev), 1581 vndstrategy, vnd->sc_dkdev.dk_label, 1582 vnd->sc_dkdev.dk_cpulabel); 1583 } 1584 1585 vnd->sc_flags &= ~VNF_LABELLING; 1586 1587 vndunlock(vnd); 1588 1589 if (error) 1590 return error; 1591 break; 1592 } 1593 1594 case DIOCKLABEL: 1595 if (*(int *)data != 0) 1596 vnd->sc_flags |= VNF_KLABEL; 1597 else 1598 vnd->sc_flags &= ~VNF_KLABEL; 1599 break; 1600 1601 case DIOCWLABEL: 1602 if (*(int *)data != 0) 1603 vnd->sc_flags |= VNF_WLABEL; 1604 else 1605 vnd->sc_flags &= ~VNF_WLABEL; 1606 break; 1607 1608 case DIOCGDEFLABEL: 1609 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1610 break; 1611 1612#ifdef __HAVE_OLD_DISKLABEL 1613 case ODIOCGDEFLABEL: 1614 vndgetdefaultlabel(vnd, &newlabel); 1615 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1616 return ENOTTY; 1617 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1618 break; 1619#endif 1620 1621 case DIOCCACHESYNC: 1622 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1623 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1624 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1625 VOP_UNLOCK(vnd->sc_vp); 1626 return error; 1627 1628 default: 1629 return ENOTTY; 1630 } 1631 1632 return 0; 1633} 1634 1635/* 1636 * Duplicate the current processes' credentials. Since we are called only 1637 * as the result of a SET ioctl and only root can do that, any future access 1638 * to this "disk" is essentially as root. Note that credentials may change 1639 * if some other uid can write directly to the mapped file (NFS). 1640 */ 1641static int 1642vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1643{ 1644 struct uio auio; 1645 struct iovec aiov; 1646 char *tmpbuf; 1647 int error; 1648 1649 vnd->sc_cred = kauth_cred_dup(cred); 1650 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1651 1652 /* XXX: Horrible kludge to establish credentials for NFS */ 1653 aiov.iov_base = tmpbuf; 1654 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1655 auio.uio_iov = &aiov; 1656 auio.uio_iovcnt = 1; 1657 auio.uio_offset = 0; 1658 auio.uio_rw = UIO_READ; 1659 auio.uio_resid = aiov.iov_len; 1660 UIO_SETUP_SYSSPACE(&auio); 1661 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1662 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1663 if (error == 0) { 1664 /* 1665 * Because vnd does all IO directly through the vnode 1666 * we need to flush (at least) the buffer from the above 1667 * VOP_READ from the buffer cache to prevent cache 1668 * incoherencies. Also, be careful to write dirty 1669 * buffers back to stable storage. 1670 */ 1671 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1672 curlwp, 0, 0); 1673 } 1674 VOP_UNLOCK(vnd->sc_vp); 1675 1676 free(tmpbuf, M_TEMP); 1677 return error; 1678} 1679 1680/* 1681 * Set maxactive based on FS type 1682 */ 1683static void 1684vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1685{ 1686 1687 if (vp->v_tag == VT_NFS) 1688 vnd->sc_maxactive = 2; 1689 else 1690 vnd->sc_maxactive = 8; 1691 1692 if (vnd->sc_maxactive < 1) 1693 vnd->sc_maxactive = 1; 1694} 1695 1696#if 0 1697static void 1698vndshutdown(void) 1699{ 1700 struct vnd_softc *vnd; 1701 1702 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1703 if (vnd->sc_flags & VNF_INITED) 1704 vndclear(vnd); 1705} 1706#endif 1707 1708static void 1709vndclear(struct vnd_softc *vnd, int myminor) 1710{ 1711 struct vnode *vp = vnd->sc_vp; 1712 int fflags = FREAD; 1713 int bmaj, cmaj, i, mn; 1714 int s; 1715 1716#ifdef DEBUG 1717 if (vnddebug & VDB_FOLLOW) 1718 printf("vndclear(%p): vp %p\n", vnd, vp); 1719#endif 1720 /* locate the major number */ 1721 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1722 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1723 1724 /* Nuke the vnodes for any open instances */ 1725 for (i = 0; i < MAXPARTITIONS; i++) { 1726 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1727 vdevgone(bmaj, mn, mn, VBLK); 1728 if (mn != myminor) /* XXX avoid to kill own vnode */ 1729 vdevgone(cmaj, mn, mn, VCHR); 1730 } 1731 1732 if ((vnd->sc_flags & VNF_READONLY) == 0) 1733 fflags |= FWRITE; 1734 1735 s = splbio(); 1736 bufq_drain(vnd->sc_tab); 1737 splx(s); 1738 1739 vnd->sc_flags |= VNF_VUNCONF; 1740 wakeup(&vnd->sc_tab); 1741 while (vnd->sc_flags & VNF_KTHREAD) 1742 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1743 1744#ifdef VND_COMPRESSION 1745 /* free the compressed file buffers */ 1746 if (vnd->sc_flags & VNF_COMP) { 1747 if (vnd->sc_comp_offsets) { 1748 free(vnd->sc_comp_offsets, M_DEVBUF); 1749 vnd->sc_comp_offsets = NULL; 1750 } 1751 if (vnd->sc_comp_buff) { 1752 free(vnd->sc_comp_buff, M_DEVBUF); 1753 vnd->sc_comp_buff = NULL; 1754 } 1755 if (vnd->sc_comp_decombuf) { 1756 free(vnd->sc_comp_decombuf, M_DEVBUF); 1757 vnd->sc_comp_decombuf = NULL; 1758 } 1759 } 1760#endif /* VND_COMPRESSION */ 1761 vnd->sc_flags &= 1762 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1763 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1764 if (vp == NULL) 1765 panic("vndclear: null vp"); 1766 (void) vn_close(vp, fflags, vnd->sc_cred); 1767 kauth_cred_free(vnd->sc_cred); 1768 vnd->sc_vp = NULL; 1769 vnd->sc_cred = NULL; 1770 vnd->sc_size = 0; 1771} 1772 1773static int 1774vndsize(dev_t dev) 1775{ 1776 struct vnd_softc *sc; 1777 struct disklabel *lp; 1778 int part, unit, omask; 1779 int size; 1780 1781 unit = vndunit(dev); 1782 sc = device_lookup_private(&vnd_cd, unit); 1783 if (sc == NULL) 1784 return -1; 1785 1786 if ((sc->sc_flags & VNF_INITED) == 0) 1787 return -1; 1788 1789 part = DISKPART(dev); 1790 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1791 lp = sc->sc_dkdev.dk_label; 1792 1793 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1794 return -1; 1795 1796 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1797 size = -1; 1798 else 1799 size = lp->d_partitions[part].p_size * 1800 (lp->d_secsize / DEV_BSIZE); 1801 1802 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1803 return -1; 1804 1805 return size; 1806} 1807 1808static int 1809vnddump(dev_t dev, daddr_t blkno, void *va, 1810 size_t size) 1811{ 1812 1813 /* Not implemented. */ 1814 return ENXIO; 1815} 1816 1817static void 1818vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1819{ 1820 struct vndgeom *vng = &sc->sc_geom; 1821 struct partition *pp; 1822 unsigned spb; 1823 1824 memset(lp, 0, sizeof(*lp)); 1825 1826 spb = vng->vng_secsize / DEV_BSIZE; 1827 if (sc->sc_size / spb > UINT32_MAX) 1828 lp->d_secperunit = UINT32_MAX; 1829 else 1830 lp->d_secperunit = sc->sc_size / spb; 1831 lp->d_secsize = vng->vng_secsize; 1832 lp->d_nsectors = vng->vng_nsectors; 1833 lp->d_ntracks = vng->vng_ntracks; 1834 lp->d_ncylinders = vng->vng_ncylinders; 1835 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1836 1837 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1838 lp->d_type = DKTYPE_VND; 1839 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1840 lp->d_rpm = 3600; 1841 lp->d_interleave = 1; 1842 lp->d_flags = 0; 1843 1844 pp = &lp->d_partitions[RAW_PART]; 1845 pp->p_offset = 0; 1846 pp->p_size = lp->d_secperunit; 1847 pp->p_fstype = FS_UNUSED; 1848 lp->d_npartitions = RAW_PART + 1; 1849 1850 lp->d_magic = DISKMAGIC; 1851 lp->d_magic2 = DISKMAGIC; 1852 lp->d_checksum = dkcksum(lp); 1853} 1854 1855/* 1856 * Read the disklabel from a vnd. If one is not present, create a fake one. 1857 */ 1858static void 1859vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1860{ 1861 const char *errstring; 1862 struct disklabel *lp = sc->sc_dkdev.dk_label; 1863 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1864 int i; 1865 1866 memset(clp, 0, sizeof(*clp)); 1867 1868 vndgetdefaultlabel(sc, lp); 1869 1870 /* 1871 * Call the generic disklabel extraction routine. 1872 */ 1873 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1874 if (errstring) { 1875 /* 1876 * Lack of disklabel is common, but we print the warning 1877 * anyway, since it might contain other useful information. 1878 */ 1879 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1880 1881 /* 1882 * For historical reasons, if there's no disklabel 1883 * present, all partitions must be FS_BSDFFS and 1884 * occupy the entire disk. 1885 */ 1886 for (i = 0; i < MAXPARTITIONS; i++) { 1887 /* 1888 * Don't wipe out port specific hack (such as 1889 * dos partition hack of i386 port). 1890 */ 1891 if (lp->d_partitions[i].p_size != 0) 1892 continue; 1893 1894 lp->d_partitions[i].p_size = lp->d_secperunit; 1895 lp->d_partitions[i].p_offset = 0; 1896 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1897 } 1898 1899 strncpy(lp->d_packname, "default label", 1900 sizeof(lp->d_packname)); 1901 1902 lp->d_npartitions = MAXPARTITIONS; 1903 lp->d_checksum = dkcksum(lp); 1904 } 1905} 1906 1907/* 1908 * Wait interruptibly for an exclusive lock. 1909 * 1910 * XXX 1911 * Several drivers do this; it should be abstracted and made MP-safe. 1912 */ 1913static int 1914vndlock(struct vnd_softc *sc) 1915{ 1916 int error; 1917 1918 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1919 sc->sc_flags |= VNF_WANTED; 1920 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1921 return error; 1922 } 1923 sc->sc_flags |= VNF_LOCKED; 1924 return 0; 1925} 1926 1927/* 1928 * Unlock and wake up any waiters. 1929 */ 1930static void 1931vndunlock(struct vnd_softc *sc) 1932{ 1933 1934 sc->sc_flags &= ~VNF_LOCKED; 1935 if ((sc->sc_flags & VNF_WANTED) != 0) { 1936 sc->sc_flags &= ~VNF_WANTED; 1937 wakeup(sc); 1938 } 1939} 1940 1941#ifdef VND_COMPRESSION 1942/* compressed file read */ 1943static void 1944compstrategy(struct buf *bp, off_t bn) 1945{ 1946 int error; 1947 int unit = vndunit(bp->b_dev); 1948 struct vnd_softc *vnd = 1949 device_lookup_private(&vnd_cd, unit); 1950 u_int32_t comp_block; 1951 struct uio auio; 1952 char *addr; 1953 int s; 1954 1955 /* set up constants for data move */ 1956 auio.uio_rw = UIO_READ; 1957 UIO_SETUP_SYSSPACE(&auio); 1958 1959 /* read, and transfer the data */ 1960 addr = bp->b_data; 1961 bp->b_resid = bp->b_bcount; 1962 s = splbio(); 1963 while (bp->b_resid > 0) { 1964 unsigned length; 1965 size_t length_in_buffer; 1966 u_int32_t offset_in_buffer; 1967 struct iovec aiov; 1968 1969 /* calculate the compressed block number */ 1970 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1971 1972 /* check for good block number */ 1973 if (comp_block >= vnd->sc_comp_numoffs) { 1974 bp->b_error = EINVAL; 1975 splx(s); 1976 return; 1977 } 1978 1979 /* read in the compressed block, if not in buffer */ 1980 if (comp_block != vnd->sc_comp_buffblk) { 1981 length = vnd->sc_comp_offsets[comp_block + 1] - 1982 vnd->sc_comp_offsets[comp_block]; 1983 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1984 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1985 length, vnd->sc_comp_offsets[comp_block], 1986 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 1987 NULL, NULL); 1988 if (error) { 1989 bp->b_error = error; 1990 VOP_UNLOCK(vnd->sc_vp); 1991 splx(s); 1992 return; 1993 } 1994 /* uncompress the buffer */ 1995 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1996 vnd->sc_comp_stream.avail_in = length; 1997 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1998 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1999 inflateReset(&vnd->sc_comp_stream); 2000 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2001 if (error != Z_STREAM_END) { 2002 if (vnd->sc_comp_stream.msg) 2003 aprint_normal_dev(vnd->sc_dev, 2004 "compressed file, %s\n", 2005 vnd->sc_comp_stream.msg); 2006 bp->b_error = EBADMSG; 2007 VOP_UNLOCK(vnd->sc_vp); 2008 splx(s); 2009 return; 2010 } 2011 vnd->sc_comp_buffblk = comp_block; 2012 VOP_UNLOCK(vnd->sc_vp); 2013 } 2014 2015 /* transfer the usable uncompressed data */ 2016 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2017 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2018 if (length_in_buffer > bp->b_resid) 2019 length_in_buffer = bp->b_resid; 2020 auio.uio_iov = &aiov; 2021 auio.uio_iovcnt = 1; 2022 aiov.iov_base = addr; 2023 aiov.iov_len = length_in_buffer; 2024 auio.uio_resid = aiov.iov_len; 2025 auio.uio_offset = 0; 2026 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2027 length_in_buffer, &auio); 2028 if (error) { 2029 bp->b_error = error; 2030 splx(s); 2031 return; 2032 } 2033 2034 bn += length_in_buffer; 2035 addr += length_in_buffer; 2036 bp->b_resid -= length_in_buffer; 2037 } 2038 splx(s); 2039} 2040 2041/* compression memory allocation routines */ 2042static void * 2043vnd_alloc(void *aux, u_int items, u_int siz) 2044{ 2045 return malloc(items * siz, M_TEMP, M_NOWAIT); 2046} 2047 2048static void 2049vnd_free(void *aux, void *ptr) 2050{ 2051 free(ptr, M_TEMP); 2052} 2053#endif /* VND_COMPRESSION */ 2054 2055static void 2056vnd_set_geometry(struct vnd_softc *vnd) 2057{ 2058 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2059 2060 memset(dg, 0, sizeof(*dg)); 2061 2062 dg->dg_secperunit = (int64_t)vnd->sc_geom.vng_nsectors * 2063 vnd->sc_geom.vng_ntracks * vnd->sc_geom.vng_ncylinders; 2064 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2065 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2066 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2067 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2068 2069#ifdef DEBUG 2070 if (vnddebug & VDB_LABEL) { 2071 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2072 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2073 } 2074#endif 2075 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2076} 2077 2078#ifdef VND_COMPRESSION 2079#define VND_DEPENDS "zlib" 2080#else 2081#define VND_DEPENDS NULL 2082#endif 2083 2084MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2085 2086#ifdef _MODULE 2087int vnd_bmajor = -1, vnd_cmajor = -1; 2088 2089CFDRIVER_DECL(vnd, DV_DISK, NULL); 2090#endif 2091 2092static int 2093vnd_modcmd(modcmd_t cmd, void *arg) 2094{ 2095 int error = 0; 2096 2097 switch (cmd) { 2098 case MODULE_CMD_INIT: 2099#ifdef _MODULE 2100 error = config_cfdriver_attach(&vnd_cd); 2101 if (error) 2102 break; 2103 2104 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2105 if (error) { 2106 config_cfdriver_detach(&vnd_cd); 2107#ifdef DIAGNOSTIC 2108 aprint_error("%s: unable to register cfattach for \n" 2109 "%s, error %d", __func__, vnd_cd.cd_name, error); 2110#endif 2111 break; 2112 } 2113 2114 /* 2115 * Attach the {b,c}devsw's 2116 */ 2117 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2118 &vnd_cdevsw, &vnd_cmajor); 2119 /* 2120 * If devsw_attach fails, remove from autoconf database 2121 */ 2122 if (error) { 2123 config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2124 config_cfdriver_detach(&vnd_cd); 2125#ifdef DIAGNOSTIC 2126 aprint_error("%s: unable to attach %s devsw, " 2127 "error %d", __func__, vnd_cd.cd_name, error); 2128#endif 2129 break; 2130 } 2131#endif 2132 break; 2133 2134 case MODULE_CMD_FINI: 2135#ifdef _MODULE 2136 /* 2137 * Remove {b,c}devsw's 2138 */ 2139 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2140 2141 /* 2142 * Now remove device from autoconf database 2143 */ 2144 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2145 if (error) { 2146 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2147 &vnd_cdevsw, &vnd_cmajor); 2148#ifdef DIAGNOSTIC 2149 aprint_error("%s: failed to detach %s cfattach, " 2150 "error %d\n", __func__, vnd_cd.cd_name, error); 2151#endif 2152 break; 2153 } 2154 error = config_cfdriver_detach(&vnd_cd); 2155 if (error) { 2156 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2157 (void)devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2158 &vnd_cdevsw, &vnd_cmajor); 2159#ifdef DIAGNOSTIC 2160 aprint_error("%s: failed to detach %s cfdriver, " 2161 "error %d\n", __func__, vnd_cd.cd_name, error); 2162 break; 2163#endif 2164 } 2165#endif 2166 break; 2167 2168 case MODULE_CMD_STAT: 2169 return ENOTTY; 2170 2171 default: 2172 return ENOTTY; 2173 } 2174 2175 return error; 2176} 2177