1/* $NetBSD: vnd.c,v 1.289 2023/05/19 15:42:43 mlelstv Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998, 2008, 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 * Copyright (c) 1988 University of Utah. 34 * Copyright (c) 1990, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * the Systems Programming Group of the University of Utah Computer 39 * Science Department. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 66 * 67 * @(#)vn.c 8.9 (Berkeley) 5/14/95 68 */ 69 70/* 71 * Vnode disk driver. 72 * 73 * Block/character interface to a vnode. Allows one to treat a file 74 * as a disk (e.g. build a filesystem in it, mount it, etc.). 75 * 76 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 77 * this uses them to avoid distorting the local buffer cache. If those 78 * block-level operations are not available, this falls back to the regular 79 * read and write calls. Using these may distort the cache in some cases 80 * but better have the driver working than preventing it to work on file 81 * systems where the block-level operations are not implemented for 82 * whatever reason. 83 * 84 * NOTE 2: There is a security issue involved with this driver. 85 * Once mounted all access to the contents of the "mapped" file via 86 * the special file is controlled by the permissions on the special 87 * file, the protection of the mapped file is ignored (effectively, 88 * by using root credentials in all transactions). 89 * 90 * NOTE 3: Doesn't interact with leases, should it? 91 */ 92 93#include <sys/cdefs.h> 94__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.289 2023/05/19 15:42:43 mlelstv Exp $"); 95 96#if defined(_KERNEL_OPT) 97#include "opt_vnd.h" 98#include "opt_compat_netbsd.h" 99#endif 100 101#include <sys/param.h> 102#include <sys/systm.h> 103#include <sys/namei.h> 104#include <sys/proc.h> 105#include <sys/kthread.h> 106#include <sys/errno.h> 107#include <sys/buf.h> 108#include <sys/bufq.h> 109#include <sys/malloc.h> 110#include <sys/ioctl.h> 111#include <sys/disklabel.h> 112#include <sys/device.h> 113#include <sys/disk.h> 114#include <sys/stat.h> 115#include <sys/mount.h> 116#include <sys/vnode.h> 117#include <sys/fstrans.h> 118#include <sys/file.h> 119#include <sys/uio.h> 120#include <sys/conf.h> 121#include <sys/kauth.h> 122#include <sys/module.h> 123#include <sys/compat_stub.h> 124#include <sys/atomic.h> 125 126#include <uvm/uvm.h> 127 128#include <net/zlib.h> 129 130#include <miscfs/genfs/genfs.h> 131#include <miscfs/specfs/specdev.h> 132 133#include <dev/dkvar.h> 134#include <dev/vndvar.h> 135 136#include "ioconf.h" 137 138#if defined(VNDDEBUG) && !defined(DEBUG) 139#define DEBUG 140#endif 141 142#ifdef DEBUG 143int dovndcluster = 1; 144#define VDB_FOLLOW 0x01 145#define VDB_INIT 0x02 146#define VDB_IO 0x04 147#define VDB_LABEL 0x08 148int vnddebug = 0; 149#endif 150 151#define vndunit(x) DISKUNIT(x) 152 153struct vndxfer { 154 struct buf vx_buf; 155 struct vnd_softc *vx_vnd; 156}; 157#define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 158 159#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 160#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 161 162#define VNDLABELDEV(dev) \ 163 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 164 165#define VND_MAXPENDING(vnd) ((vnd)->sc_maxactive * 4) 166#define VND_MAXPAGES(vnd) (1024 * 1024 / PAGE_SIZE) 167 168 169static void vndclear(struct vnd_softc *, int); 170static int vnddoclear(struct vnd_softc *, int, int, bool); 171static int vndsetcred(struct vnd_softc *, kauth_cred_t); 172static void vndthrottle(struct vnd_softc *, struct vnode *); 173static void vndiodone(struct buf *); 174#if 0 175static void vndshutdown(void); 176#endif 177 178static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 179static void vndgetdisklabel(dev_t, struct vnd_softc *); 180 181static int vndlock(struct vnd_softc *); 182static void vndunlock(struct vnd_softc *); 183#ifdef VND_COMPRESSION 184static void compstrategy(struct buf *, off_t); 185static void *vnd_alloc(void *, u_int, u_int); 186static void vnd_free(void *, void *); 187#endif /* VND_COMPRESSION */ 188 189static void vndthread(void *); 190static bool vnode_has_op(const struct vnode *, int); 191static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 192 struct buf *); 193static void handle_with_strategy(struct vnd_softc *, const struct buf *, 194 struct buf *); 195static void vnd_set_geometry(struct vnd_softc *); 196 197static dev_type_open(vndopen); 198static dev_type_close(vndclose); 199static dev_type_read(vndread); 200static dev_type_write(vndwrite); 201static dev_type_ioctl(vndioctl); 202static dev_type_strategy(vndstrategy); 203static dev_type_dump(vnddump); 204static dev_type_size(vndsize); 205 206const struct bdevsw vnd_bdevsw = { 207 .d_open = vndopen, 208 .d_close = vndclose, 209 .d_strategy = vndstrategy, 210 .d_ioctl = vndioctl, 211 .d_dump = vnddump, 212 .d_psize = vndsize, 213 .d_discard = nodiscard, 214 .d_flag = D_DISK 215}; 216 217const struct cdevsw vnd_cdevsw = { 218 .d_open = vndopen, 219 .d_close = vndclose, 220 .d_read = vndread, 221 .d_write = vndwrite, 222 .d_ioctl = vndioctl, 223 .d_stop = nostop, 224 .d_tty = notty, 225 .d_poll = nopoll, 226 .d_mmap = nommap, 227 .d_kqfilter = nokqfilter, 228 .d_discard = nodiscard, 229 .d_flag = D_DISK 230}; 231 232static int vnd_match(device_t, cfdata_t, void *); 233static void vnd_attach(device_t, device_t, void *); 234static int vnd_detach(device_t, int); 235 236CFATTACH_DECL3_NEW(vnd, sizeof(struct vnd_softc), 237 vnd_match, vnd_attach, vnd_detach, NULL, NULL, NULL, DVF_DETACH_SHUTDOWN); 238 239static struct vnd_softc *vnd_spawn(int); 240static int vnd_destroy(device_t); 241 242static const struct dkdriver vnddkdriver = { 243 .d_strategy = vndstrategy, 244 .d_minphys = minphys 245}; 246 247void 248vndattach(int num) 249{ 250 int error; 251 252 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 253 if (error) 254 aprint_error("%s: unable to register cfattach, error = %d\n", 255 vnd_cd.cd_name, error); 256} 257 258static int 259vnd_match(device_t self, cfdata_t cfdata, void *aux) 260{ 261 262 return 1; 263} 264 265static void 266vnd_attach(device_t parent, device_t self, void *aux) 267{ 268 struct vnd_softc *sc = device_private(self); 269 270 sc->sc_dev = self; 271 sc->sc_comp_offsets = NULL; 272 sc->sc_comp_buff = NULL; 273 sc->sc_comp_decombuf = NULL; 274 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 275 disk_init(&sc->sc_dkdev, device_xname(self), &vnddkdriver); 276 if (!pmf_device_register(self, NULL, NULL)) 277 aprint_error_dev(self, "couldn't establish power handler\n"); 278} 279 280static int 281vnd_detach(device_t self, int flags) 282{ 283 int error; 284 struct vnd_softc *sc = device_private(self); 285 286 if (sc->sc_flags & VNF_INITED) { 287 error = vnddoclear(sc, 0, -1, (flags & DETACH_FORCE) != 0); 288 if (error != 0) 289 return error; 290 } 291 292 pmf_device_deregister(self); 293 bufq_free(sc->sc_tab); 294 disk_destroy(&sc->sc_dkdev); 295 296 return 0; 297} 298 299static struct vnd_softc * 300vnd_spawn(int unit) 301{ 302 cfdata_t cf; 303 304 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 305 cf->cf_name = vnd_cd.cd_name; 306 cf->cf_atname = vnd_cd.cd_name; 307 cf->cf_unit = unit; 308 cf->cf_fstate = FSTATE_STAR; 309 310 return device_private(config_attach_pseudo(cf)); 311} 312 313static int 314vnd_destroy(device_t dev) 315{ 316 int error; 317 cfdata_t cf; 318 319 cf = device_cfdata(dev); 320 error = config_detach(dev, DETACH_QUIET); 321 if (error) 322 return error; 323 free(cf, M_DEVBUF); 324 return 0; 325} 326 327static int 328vndopen(dev_t dev, int flags, int mode, struct lwp *l) 329{ 330 int unit = vndunit(dev); 331 struct vnd_softc *sc; 332 int error = 0, part, pmask; 333 struct disklabel *lp; 334 335#ifdef DEBUG 336 if (vnddebug & VDB_FOLLOW) 337 printf("vndopen(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 338#endif 339 sc = device_lookup_private(&vnd_cd, unit); 340 if (sc == NULL) { 341 sc = vnd_spawn(unit); 342 if (sc == NULL) 343 return ENOMEM; 344 345 /* compatibility, keep disklabel after close */ 346 sc->sc_flags = VNF_KLABEL; 347 } 348 349 if ((error = vndlock(sc)) != 0) 350 return error; 351 352 mutex_enter(&sc->sc_dkdev.dk_openlock); 353 354 if ((sc->sc_flags & VNF_CLEARING) != 0) { 355 error = ENXIO; 356 goto done; 357 } 358 359 lp = sc->sc_dkdev.dk_label; 360 361 part = DISKPART(dev); 362 pmask = (1 << part); 363 364 if (sc->sc_dkdev.dk_nwedges != 0 && part != RAW_PART) { 365 error = EBUSY; 366 goto done; 367 } 368 369 if (sc->sc_flags & VNF_INITED) { 370 if ((sc->sc_dkdev.dk_openmask & ~(1<<RAW_PART)) != 0) { 371 /* 372 * If any non-raw partition is open, but the disk 373 * has been invalidated, disallow further opens. 374 */ 375 if ((sc->sc_flags & VNF_VLABEL) == 0) { 376 error = EIO; 377 goto done; 378 } 379 } else { 380 /* 381 * Load the partition info if not already loaded. 382 */ 383 if ((sc->sc_flags & VNF_VLABEL) == 0) { 384 sc->sc_flags |= VNF_VLABEL; 385 vndgetdisklabel(dev, sc); 386 } 387 } 388 } 389 390 /* Check that the partitions exists. */ 391 if (part != RAW_PART) { 392 if (((sc->sc_flags & VNF_INITED) == 0) || 393 ((part >= lp->d_npartitions) || 394 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 395 error = ENXIO; 396 goto done; 397 } 398 } 399 400 /* Prevent our unit from being unconfigured while open. */ 401 switch (mode) { 402 case S_IFCHR: 403 sc->sc_dkdev.dk_copenmask |= pmask; 404 break; 405 406 case S_IFBLK: 407 sc->sc_dkdev.dk_bopenmask |= pmask; 408 break; 409 } 410 sc->sc_dkdev.dk_openmask = 411 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 412 413 done: 414 mutex_exit(&sc->sc_dkdev.dk_openlock); 415 vndunlock(sc); 416 return error; 417} 418 419static int 420vndclose(dev_t dev, int flags, int mode, struct lwp *l) 421{ 422 int unit = vndunit(dev); 423 struct vnd_softc *sc; 424 int error = 0, part; 425 426#ifdef DEBUG 427 if (vnddebug & VDB_FOLLOW) 428 printf("vndclose(0x%"PRIx64", 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 429#endif 430 sc = device_lookup_private(&vnd_cd, unit); 431 if (sc == NULL) 432 return ENXIO; 433 434 if ((error = vndlock(sc)) != 0) 435 return error; 436 437 mutex_enter(&sc->sc_dkdev.dk_openlock); 438 439 part = DISKPART(dev); 440 441 /* ...that much closer to allowing unconfiguration... */ 442 switch (mode) { 443 case S_IFCHR: 444 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 445 break; 446 447 case S_IFBLK: 448 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 449 break; 450 } 451 sc->sc_dkdev.dk_openmask = 452 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 453 454 /* are we last opener ? */ 455 if (sc->sc_dkdev.dk_openmask == 0) { 456 if ((sc->sc_flags & VNF_KLABEL) == 0) 457 sc->sc_flags &= ~VNF_VLABEL; 458 } 459 460 mutex_exit(&sc->sc_dkdev.dk_openlock); 461 462 vndunlock(sc); 463 464 if ((sc->sc_flags & VNF_INITED) == 0) { 465 if ((error = vnd_destroy(sc->sc_dev)) != 0) { 466 aprint_error_dev(sc->sc_dev, 467 "unable to detach instance\n"); 468 return error; 469 } 470 } 471 472 return 0; 473} 474 475/* 476 * Queue the request, and wakeup the kernel thread to handle it. 477 */ 478static void 479vndstrategy(struct buf *bp) 480{ 481 int unit = vndunit(bp->b_dev); 482 struct vnd_softc *vnd = 483 device_lookup_private(&vnd_cd, unit); 484 struct disklabel *lp; 485 daddr_t blkno; 486 int s = splbio(); 487 488 if (vnd == NULL) { 489 bp->b_error = ENXIO; 490 goto done; 491 } 492 lp = vnd->sc_dkdev.dk_label; 493 494 if ((vnd->sc_flags & VNF_INITED) == 0) { 495 bp->b_error = ENXIO; 496 goto done; 497 } 498 499 /* 500 * The transfer must be a whole number of blocks. 501 */ 502 if ((bp->b_bcount % lp->d_secsize) != 0) { 503 bp->b_error = EINVAL; 504 goto done; 505 } 506 507 /* 508 * check if we're read-only. 509 */ 510 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 511 bp->b_error = EACCES; 512 goto done; 513 } 514 515 /* If it's a nil transfer, wake up the top half now. */ 516 if (bp->b_bcount == 0) { 517 goto done; 518 } 519 520 /* 521 * Do bounds checking and adjust transfer. If there's an error, 522 * the bounds check will flag that for us. 523 */ 524 if (DISKPART(bp->b_dev) == RAW_PART) { 525 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 526 vnd->sc_size) <= 0) 527 goto done; 528 } else { 529 if (bounds_check_with_label(&vnd->sc_dkdev, 530 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 531 goto done; 532 } 533 534 /* 535 * Put the block number in terms of the logical blocksize 536 * of the "device". 537 */ 538 539 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 540 541 /* 542 * Translate the partition-relative block number to an absolute. 543 */ 544 if (DISKPART(bp->b_dev) != RAW_PART) { 545 struct partition *pp; 546 547 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 548 DISKPART(bp->b_dev)]; 549 blkno += pp->p_offset; 550 } 551 bp->b_rawblkno = blkno; 552 553#ifdef DEBUG 554 if (vnddebug & VDB_FOLLOW) 555 printf("vndstrategy(%p): unit %d\n", bp, unit); 556#endif 557 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 558 /* 559 * Limit the number of pending requests to not exhaust 560 * resources needed for I/O but always allow the worker 561 * thread to add requests, as a wedge on vnd queues 562 * requests with biodone() -> dkstart() -> vndstrategy(). 563 */ 564 if (curlwp != vnd->sc_kthread && curlwp != uvm.pagedaemon_lwp) { 565 while (vnd->sc_pending >= VND_MAXPENDING(vnd)) 566 tsleep(&vnd->sc_pending, PRIBIO, "vndpc", 0); 567 } 568 vnd->sc_pending++; 569 KASSERT(vnd->sc_pending > 0); 570 } 571 bufq_put(vnd->sc_tab, bp); 572 wakeup(&vnd->sc_tab); 573 splx(s); 574 return; 575 576done: 577 bp->b_resid = bp->b_bcount; 578 biodone(bp); 579 splx(s); 580} 581 582static bool 583vnode_has_strategy(struct vnd_softc *vnd) 584{ 585 return vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 586 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 587} 588 589/* Verify that I/O requests cannot be smaller than the 590 * smallest I/O size supported by the backend. 591 */ 592static bool 593vnode_has_large_blocks(struct vnd_softc *vnd) 594{ 595 u_int32_t vnd_secsize, iosize; 596 597 iosize = vnd->sc_iosize; 598 vnd_secsize = vnd->sc_geom.vng_secsize; 599 600 return vnd_secsize % iosize != 0; 601} 602 603/* XXX this function needs a reliable check to detect 604 * sparse files. Otherwise, bmap/strategy may be used 605 * and fail on non-allocated blocks. VOP_READ/VOP_WRITE 606 * works on sparse files. 607 */ 608#if notyet 609static bool 610vnode_strategy_probe(struct vnd_softc *vnd) 611{ 612 int error; 613 daddr_t nbn; 614 615 if (!vnode_has_strategy(vnd)) 616 return false; 617 618 if (vnode_has_large_blocks(vnd)) 619 return false; 620 621 /* Convert the first logical block number to its 622 * physical block number. 623 */ 624 error = 0; 625 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 626 error = VOP_BMAP(vnd->sc_vp, 0, NULL, &nbn, NULL); 627 VOP_UNLOCK(vnd->sc_vp); 628 629 /* Test if that worked. */ 630 if (error == 0 && (long)nbn == -1) 631 return false; 632 633 return true; 634} 635#endif 636 637static void 638vndthread(void *arg) 639{ 640 struct vnd_softc *vnd = arg; 641 int s; 642 643 /* Determine whether we can *use* VOP_BMAP and VOP_STRATEGY to 644 * directly access the backing vnode. If we can, use these two 645 * operations to avoid messing with the local buffer cache. 646 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 647 * which are guaranteed to work with any file system. */ 648 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 649 ! vnode_has_strategy(vnd)) 650 vnd->sc_flags |= VNF_USE_VN_RDWR; 651 652 /* VOP_STRATEGY can only be used if the backing vnode allows 653 * to access blocks as small as defined by the vnd geometry. 654 */ 655 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0 && 656 vnode_has_large_blocks(vnd)) 657 vnd->sc_flags |= VNF_USE_VN_RDWR; 658 659#ifdef DEBUG 660 if (vnddebug & VDB_INIT) 661 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 662 (vnd->sc_flags & VNF_USE_VN_RDWR) == 0 ? 663 "using bmap/strategy operations" : 664 "using read/write operations"); 665#endif 666 667 s = splbio(); 668 vnd->sc_flags |= VNF_KTHREAD; 669 wakeup(&vnd->sc_kthread); 670 671 /* 672 * Dequeue requests and serve them depending on the available 673 * vnode operations. 674 */ 675 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 676 struct vndxfer *vnx; 677 struct buf *obp; 678 struct buf *bp; 679 680 obp = bufq_get(vnd->sc_tab); 681 if (obp == NULL) { 682 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 683 continue; 684 }; 685 if ((vnd->sc_flags & VNF_USE_VN_RDWR)) { 686 KASSERT(vnd->sc_pending > 0); 687 if (vnd->sc_pending-- == VND_MAXPENDING(vnd)) 688 wakeup(&vnd->sc_pending); 689 } 690 splx(s); 691#ifdef DEBUG 692 if (vnddebug & VDB_FOLLOW) 693 printf("vndthread(%p)\n", obp); 694#endif 695 696 if (vnd->sc_vp->v_mount == NULL) { 697 obp->b_error = ENXIO; 698 goto done; 699 } 700#ifdef VND_COMPRESSION 701 /* handle a compressed read */ 702 if ((obp->b_flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 703 off_t bn; 704 705 /* Convert to a byte offset within the file. */ 706 bn = obp->b_rawblkno * 707 vnd->sc_dkdev.dk_label->d_secsize; 708 709 compstrategy(obp, bn); 710 goto done; 711 } 712#endif /* VND_COMPRESSION */ 713 714 /* 715 * Allocate a header for this transfer and link it to the 716 * buffer 717 */ 718 s = splbio(); 719 vnx = VND_GETXFER(vnd); 720 splx(s); 721 vnx->vx_vnd = vnd; 722 723 s = splbio(); 724 while (vnd->sc_active >= vnd->sc_maxactive) { 725 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 726 } 727 vnd->sc_active++; 728 splx(s); 729 730 /* Instrumentation. */ 731 disk_busy(&vnd->sc_dkdev); 732 733 bp = &vnx->vx_buf; 734 buf_init(bp); 735 bp->b_flags = (obp->b_flags & (B_READ | B_PHYS | B_RAW)); 736 bp->b_oflags = obp->b_oflags; 737 bp->b_cflags = obp->b_cflags; 738 bp->b_iodone = vndiodone; 739 bp->b_private = obp; 740 bp->b_vp = vnd->sc_vp; 741 bp->b_objlock = bp->b_vp->v_interlock; 742 bp->b_data = obp->b_data; 743 bp->b_bcount = obp->b_bcount; 744 BIO_COPYPRIO(bp, obp); 745 746 /* Make sure the request succeeds while suspending this fs. */ 747 fstrans_start_lazy(vnd->sc_vp->v_mount); 748 749 /* Handle the request using the appropriate operations. */ 750 if ((vnd->sc_flags & VNF_USE_VN_RDWR) == 0) 751 handle_with_strategy(vnd, obp, bp); 752 else 753 handle_with_rdwr(vnd, obp, bp); 754 755 fstrans_done(vnd->sc_vp->v_mount); 756 757 s = splbio(); 758 continue; 759 760done: 761 biodone(obp); 762 s = splbio(); 763 } 764 765 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 766 wakeup(&vnd->sc_kthread); 767 splx(s); 768 kthread_exit(0); 769} 770 771/* 772 * Checks if the given vnode supports the requested operation. 773 * The operation is specified the offset returned by VOFFSET. 774 * 775 * XXX The test below used to determine this is quite fragile 776 * because it relies on the file system to use genfs to specify 777 * unimplemented operations. There might be another way to do 778 * it more cleanly. 779 */ 780static bool 781vnode_has_op(const struct vnode *vp, int opoffset) 782{ 783 int (*defaultp)(void *); 784 int (*opp)(void *); 785 786 defaultp = vp->v_op[VOFFSET(vop_default)]; 787 opp = vp->v_op[opoffset]; 788 789 return opp != defaultp && opp != genfs_eopnotsupp && 790 opp != genfs_badop && opp != genfs_nullop; 791} 792 793/* 794 * Handles the read/write request given in 'bp' using the vnode's VOP_READ 795 * and VOP_WRITE operations. 796 * 797 * 'obp' is a pointer to the original request fed to the vnd device. 798 */ 799static void 800handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 801{ 802 bool doread; 803 off_t offset; 804 size_t len, resid; 805 struct vnode *vp; 806 int npages; 807 808 doread = bp->b_flags & B_READ; 809 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 810 len = bp->b_bcount; 811 vp = vnd->sc_vp; 812 813#if defined(DEBUG) 814 if (vnddebug & VDB_IO) 815 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 816 ", secsize %d, offset %" PRIu64 817 ", bcount %d\n", 818 vp, doread ? "read" : "write", obp->b_rawblkno, 819 vnd->sc_dkdev.dk_label->d_secsize, offset, 820 bp->b_bcount); 821#endif 822 823 /* Issue the read or write operation. */ 824 bp->b_error = 825 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 826 vp, bp->b_data, len, offset, UIO_SYSSPACE, 827 IO_ADV_ENCODE(POSIX_FADV_NOREUSE) | IO_DIRECT, 828 vnd->sc_cred, &resid, NULL); 829 bp->b_resid = resid; 830 831 /* 832 * Avoid caching too many pages, the vnd user 833 * is usually a filesystem and caches itself. 834 * We need some amount of caching to not hinder 835 * read-ahead and write-behind operations. 836 */ 837 npages = atomic_load_relaxed(&vp->v_uobj.uo_npages); 838 if (npages > VND_MAXPAGES(vnd)) { 839 rw_enter(vp->v_uobj.vmobjlock, RW_WRITER); 840 (void) VOP_PUTPAGES(vp, 0, 0, 841 PGO_ALLPAGES | PGO_CLEANIT | PGO_FREE); 842 } 843 844 /* We need to increase the number of outputs on the vnode if 845 * there was any write to it. */ 846 if (!doread) { 847 mutex_enter(vp->v_interlock); 848 vp->v_numoutput++; 849 mutex_exit(vp->v_interlock); 850 } 851 852 biodone(bp); 853} 854 855/* 856 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 857 * and VOP_STRATEGY operations. 858 * 859 * 'obp' is a pointer to the original request fed to the vnd device. 860 */ 861static void 862handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 863 struct buf *bp) 864{ 865 int bsize, error, flags, skipped; 866 size_t resid, sz; 867 off_t bn, offset; 868 struct vnode *vp; 869 struct buf *nbp = NULL; 870 871 flags = obp->b_flags; 872 873 874 /* convert to a byte offset within the file. */ 875 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 876 877 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 878 skipped = 0; 879 880 /* 881 * Break the request into bsize pieces and feed them 882 * sequentially using VOP_BMAP/VOP_STRATEGY. 883 * We do it this way to keep from flooding NFS servers if we 884 * are connected to an NFS file. This places the burden on 885 * the client rather than the server. 886 */ 887 error = 0; 888 bp->b_resid = bp->b_bcount; 889 for (offset = 0, resid = bp->b_resid; /* true */; 890 resid -= sz, offset += sz) { 891 daddr_t nbn; 892 int off, nra; 893 894 nra = 0; 895 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 896 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 897 VOP_UNLOCK(vnd->sc_vp); 898 899 if (error == 0 && (long)nbn == -1) 900 error = EIO; 901 902 /* 903 * If there was an error or a hole in the file...punt. 904 * Note that we may have to wait for any operations 905 * that we have already fired off before releasing 906 * the buffer. 907 * 908 * XXX we could deal with holes here but it would be 909 * a hassle (in the write case). 910 */ 911 if (error) { 912 skipped += resid; 913 break; 914 } 915 916#ifdef DEBUG 917 if (!dovndcluster) 918 nra = 0; 919#endif 920 921 off = bn % bsize; 922 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 923#ifdef DEBUG 924 if (vnddebug & VDB_IO) 925 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 926 " sz 0x%zx\n", vnd->sc_vp, vp, (long long)bn, 927 nbn, sz); 928#endif 929 930 nbp = getiobuf(vp, true); 931 nestiobuf_setup(bp, nbp, offset, sz); 932 nbp->b_blkno = nbn + btodb(off); 933 934#if 0 /* XXX #ifdef DEBUG */ 935 if (vnddebug & VDB_IO) 936 printf("vndstart(%ld): bp %p vp %p blkno " 937 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 938 (long) (vnd-vnd_softc), &nbp->vb_buf, 939 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 940 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 941 nbp->vb_buf.b_bcount); 942#endif 943 if (resid == sz) { 944 break; 945 } 946 VOP_STRATEGY(vp, nbp); 947 bn += sz; 948 } 949 if (!(flags & B_READ)) { 950 struct vnode *w_vp; 951 /* 952 * this is the last nested buf, account for 953 * the parent buf write too. 954 * This has to be done last, so that 955 * fsync won't wait for this write which 956 * has no chance to complete before all nested bufs 957 * have been queued. But it has to be done 958 * before the last VOP_STRATEGY() 959 * or the call to nestiobuf_done(). 960 */ 961 w_vp = bp->b_vp; 962 mutex_enter(w_vp->v_interlock); 963 w_vp->v_numoutput++; 964 mutex_exit(w_vp->v_interlock); 965 } 966 KASSERT(skipped != 0 || nbp != NULL); 967 if (skipped) 968 nestiobuf_done(bp, skipped, error); 969 else 970 VOP_STRATEGY(vp, nbp); 971} 972 973static void 974vndiodone(struct buf *bp) 975{ 976 struct vndxfer *vnx = VND_BUFTOXFER(bp); 977 struct vnd_softc *vnd = vnx->vx_vnd; 978 struct buf *obp = bp->b_private; 979 int s = splbio(); 980 981 KERNEL_LOCK(1, NULL); /* XXXSMP */ 982 KASSERT(&vnx->vx_buf == bp); 983 KASSERT(vnd->sc_active > 0); 984#ifdef DEBUG 985 if (vnddebug & VDB_IO) { 986 printf("vndiodone1: bp %p iodone: error %d\n", 987 bp, bp->b_error); 988 } 989#endif 990 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 991 (bp->b_flags & B_READ)); 992 vnd->sc_active--; 993 if (vnd->sc_active == 0) { 994 wakeup(&vnd->sc_tab); 995 } 996 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 997 splx(s); 998 obp->b_error = bp->b_error; 999 obp->b_resid = bp->b_resid; 1000 buf_destroy(bp); 1001 VND_PUTXFER(vnd, vnx); 1002 biodone(obp); 1003} 1004 1005/* ARGSUSED */ 1006static int 1007vndread(dev_t dev, struct uio *uio, int flags) 1008{ 1009 int unit = vndunit(dev); 1010 struct vnd_softc *sc; 1011 1012#ifdef DEBUG 1013 if (vnddebug & VDB_FOLLOW) 1014 printf("vndread(0x%"PRIx64", %p)\n", dev, uio); 1015#endif 1016 1017 sc = device_lookup_private(&vnd_cd, unit); 1018 if (sc == NULL) 1019 return ENXIO; 1020 1021 if ((sc->sc_flags & VNF_INITED) == 0) 1022 return ENXIO; 1023 1024 return physio(vndstrategy, NULL, dev, B_READ, minphys, uio); 1025} 1026 1027/* ARGSUSED */ 1028static int 1029vndwrite(dev_t dev, struct uio *uio, int flags) 1030{ 1031 int unit = vndunit(dev); 1032 struct vnd_softc *sc; 1033 1034#ifdef DEBUG 1035 if (vnddebug & VDB_FOLLOW) 1036 printf("vndwrite(0x%"PRIx64", %p)\n", dev, uio); 1037#endif 1038 1039 sc = device_lookup_private(&vnd_cd, unit); 1040 if (sc == NULL) 1041 return ENXIO; 1042 1043 if ((sc->sc_flags & VNF_INITED) == 0) 1044 return ENXIO; 1045 1046 return physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio); 1047} 1048 1049static int 1050vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 1051{ 1052 int error; 1053 struct vnd_softc *vnd; 1054 1055 if (*un == -1) 1056 *un = unit; 1057 if (*un < 0) 1058 return EINVAL; 1059 1060 vnd = device_lookup_private(&vnd_cd, *un); 1061 if (vnd == NULL) 1062 return -1; 1063 1064 if ((vnd->sc_flags & VNF_INITED) == 0) 1065 return -1; 1066 1067 vn_lock(vnd->sc_vp, LK_SHARED | LK_RETRY); 1068 error = VOP_GETATTR(vnd->sc_vp, va, l->l_cred); 1069 VOP_UNLOCK(vnd->sc_vp); 1070 return error; 1071} 1072 1073static int 1074vnddoclear(struct vnd_softc *vnd, int pmask, int minor, bool force) 1075{ 1076 int error; 1077 1078 if ((error = vndlock(vnd)) != 0) 1079 return error; 1080 1081 /* 1082 * Don't unconfigure if any other partitions are open 1083 * or if both the character and block flavors of this 1084 * partition are open. 1085 */ 1086 if (DK_BUSY(vnd, pmask) && !force) { 1087 vndunlock(vnd); 1088 return EBUSY; 1089 } 1090 1091 /* Delete all of our wedges */ 1092 dkwedge_delall(&vnd->sc_dkdev); 1093 1094 /* 1095 * XXX vndclear() might call vndclose() implicitly; 1096 * release lock to avoid recursion 1097 * 1098 * Set VNF_CLEARING to prevent vndopen() from 1099 * sneaking in after we vndunlock(). 1100 */ 1101 vnd->sc_flags |= VNF_CLEARING; 1102 vndunlock(vnd); 1103 vndclear(vnd, minor); 1104#ifdef DEBUG 1105 if (vnddebug & VDB_INIT) 1106 printf("%s: CLRed\n", __func__); 1107#endif 1108 1109 /* Destroy the xfer and buffer pools. */ 1110 pool_destroy(&vnd->sc_vxpool); 1111 1112 /* Detach the disk. */ 1113 disk_detach(&vnd->sc_dkdev); 1114 1115 return 0; 1116} 1117 1118static int 1119vndioctl_get(struct lwp *l, void *data, int unit, struct vattr *va) 1120{ 1121 int error; 1122 1123 KASSERT(l); 1124 1125 /* the first member is always int vnd_unit in all the versions */ 1126 if (*(int *)data >= vnd_cd.cd_ndevs) 1127 return ENXIO; 1128 1129 switch (error = vnd_cget(l, unit, (int *)data, va)) { 1130 case -1: 1131 /* unused is not an error */ 1132 memset(va, 0, sizeof(*va)); 1133 /*FALLTHROUGH*/ 1134 case 0: 1135 return 0; 1136 default: 1137 return error; 1138 } 1139} 1140 1141/* ARGSUSED */ 1142static int 1143vndioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1144{ 1145 bool force; 1146 int unit = vndunit(dev); 1147 struct vnd_softc *vnd; 1148 struct vnd_ioctl *vio; 1149 struct vattr vattr; 1150 struct pathbuf *pb; 1151 struct vnode *vp; 1152 int error, part, pmask; 1153 uint64_t geomsize; 1154 int fflags; 1155#ifdef __HAVE_OLD_DISKLABEL 1156 struct disklabel newlabel; 1157#endif 1158 1159#ifdef DEBUG 1160 if (vnddebug & VDB_FOLLOW) 1161 printf("vndioctl(0x%"PRIx64", 0x%lx, %p, 0x%x, %p): unit %d\n", 1162 dev, cmd, data, flag, l->l_proc, unit); 1163#endif 1164 /* Do the get's first; they don't need initialization or verification */ 1165 switch (cmd) { 1166 case VNDIOCGET: 1167 if ((error = vndioctl_get(l, data, unit, &vattr)) != 0) 1168 return error; 1169 1170 struct vnd_user *vnu = data; 1171 vnu->vnu_dev = vattr.va_fsid; 1172 vnu->vnu_ino = vattr.va_fileid; 1173 return 0; 1174 1175 default: 1176 /* First check for COMPAT_50 hook */ 1177 MODULE_HOOK_CALL(compat_vndioctl_50_hook, 1178 (cmd, l, data, unit, &vattr, vndioctl_get), 1179 enosys(), error); 1180 1181 /* 1182 * If not present, then COMPAT_30 hook also not 1183 * present, so just continue with checks for the 1184 * "write" commands 1185 */ 1186 if (error == ENOSYS) { 1187 error = 0; 1188 break; 1189 } 1190 1191 /* If not already handled, try the COMPAT_30 hook */ 1192 if (error == EPASSTHROUGH) 1193 MODULE_HOOK_CALL(compat_vndioctl_30_hook, 1194 (cmd, l, data, unit, &vattr, vndioctl_get), 1195 enosys(), error); 1196 1197 /* If no COMPAT_30 module, or not handled, check writes */ 1198 if (error == ENOSYS || error == EPASSTHROUGH) { 1199 error = 0; 1200 break; 1201 } 1202 return error; 1203 } 1204 1205 vnd = device_lookup_private(&vnd_cd, unit); 1206 if (vnd == NULL) 1207 return ENXIO; 1208 vio = (struct vnd_ioctl *)data; 1209 1210 /* Must be open for writes for these commands... */ 1211 switch (cmd) { 1212 case VNDIOCSET50: 1213 case VNDIOCCLR50: 1214 if (!compat_vndioctl_50_hook.hooked) 1215 return EINVAL; 1216 /* FALLTHROUGH */ 1217 case VNDIOCSET: 1218 case VNDIOCCLR: 1219 case DIOCSDINFO: 1220 case DIOCWDINFO: 1221#ifdef __HAVE_OLD_DISKLABEL 1222 case ODIOCSDINFO: 1223 case ODIOCWDINFO: 1224#endif 1225 case DIOCKLABEL: 1226 case DIOCWLABEL: 1227 case DIOCCACHESYNC: 1228 if ((flag & FWRITE) == 0) 1229 return EBADF; 1230 } 1231 1232 switch (cmd) { 1233 case VNDIOCSET50: 1234 case VNDIOCSET: 1235 /* Must not be initialized */ 1236 if (vnd->sc_flags & VNF_INITED) 1237 return EBUSY; 1238 break; 1239 default: 1240 /* Must be initialized */ 1241 if ((vnd->sc_flags & VNF_INITED) == 0) 1242 return ENXIO; 1243 break; 1244 } 1245 1246 error = disk_ioctl(&vnd->sc_dkdev, dev, cmd, data, flag, l); 1247 if (error != EPASSTHROUGH) 1248 return error; 1249 1250 switch (cmd) { 1251 case VNDIOCSET50: 1252 case VNDIOCSET: 1253 if ((error = vndlock(vnd)) != 0) 1254 return error; 1255 1256 fflags = FREAD; 1257 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1258 fflags |= FWRITE; 1259 if ((vio->vnd_flags & VNDIOF_FILEIO) != 0) 1260 vnd->sc_flags |= VNF_USE_VN_RDWR; 1261 error = pathbuf_copyin(vio->vnd_file, &pb); 1262 if (error) { 1263 goto unlock_and_exit; 1264 } 1265 error = vn_open(NULL, pb, 0, fflags, 0, &vp, NULL, NULL); 1266 if (error != 0) { 1267 pathbuf_destroy(pb); 1268 goto unlock_and_exit; 1269 } 1270 KASSERT(l); 1271 error = VOP_GETATTR(vp, &vattr, l->l_cred); 1272 if (!error && vp->v_type != VREG) 1273 error = EOPNOTSUPP; 1274 if (!error && vattr.va_bytes < vattr.va_size) 1275 /* File is definitely sparse, use vn_rdwr() */ 1276 vnd->sc_flags |= VNF_USE_VN_RDWR; 1277 if (error) { 1278 VOP_UNLOCK(vp); 1279 goto close_and_exit; 1280 } 1281 1282 /* If using a compressed file, initialize its info */ 1283 /* (or abort with an error if kernel has no compression) */ 1284 if (vio->vnd_flags & VNDIOF_COMP) { 1285#ifdef VND_COMPRESSION 1286 struct vnd_comp_header *ch; 1287 int i; 1288 uint32_t comp_size; 1289 uint32_t comp_maxsize; 1290 1291 /* allocate space for compressed file header */ 1292 ch = malloc(sizeof(struct vnd_comp_header), 1293 M_TEMP, M_WAITOK); 1294 1295 /* read compressed file header */ 1296 error = vn_rdwr(UIO_READ, vp, (void *)ch, 1297 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1298 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1299 if (error) { 1300 free(ch, M_TEMP); 1301 VOP_UNLOCK(vp); 1302 goto close_and_exit; 1303 } 1304 1305 if (be32toh(ch->block_size) == 0 || 1306 be32toh(ch->num_blocks) > UINT32_MAX - 1) { 1307 free(ch, M_TEMP); 1308 VOP_UNLOCK(vp); 1309 goto close_and_exit; 1310 } 1311 1312 /* save some header info */ 1313 vnd->sc_comp_blksz = be32toh(ch->block_size); 1314 /* note last offset is the file byte size */ 1315 vnd->sc_comp_numoffs = be32toh(ch->num_blocks) + 1; 1316 free(ch, M_TEMP); 1317 if (!DK_DEV_BSIZE_OK(vnd->sc_comp_blksz)) { 1318 VOP_UNLOCK(vp); 1319 error = EINVAL; 1320 goto close_and_exit; 1321 } 1322 KASSERT(0 < vnd->sc_comp_blksz); 1323 KASSERT(0 < vnd->sc_comp_numoffs); 1324 /* 1325 * @#^@!$& gcc -Wtype-limits refuses to let me 1326 * write SIZE_MAX/sizeof(uint64_t) < numoffs, 1327 * because the range of the type on amd64 makes 1328 * the comparisons always false. 1329 */ 1330#if SIZE_MAX <= UINT32_MAX*(64/CHAR_BIT) 1331 if (SIZE_MAX/sizeof(uint64_t) < vnd->sc_comp_numoffs) { 1332 VOP_UNLOCK(vp); 1333 error = EINVAL; 1334 goto close_and_exit; 1335 } 1336#endif 1337 if ((vattr.va_size < sizeof(struct vnd_comp_header)) || 1338 (vattr.va_size - sizeof(struct vnd_comp_header) < 1339 sizeof(uint64_t)*vnd->sc_comp_numoffs) || 1340 (UQUAD_MAX/vnd->sc_comp_blksz < 1341 vnd->sc_comp_numoffs - 1)) { 1342 VOP_UNLOCK(vp); 1343 error = EINVAL; 1344 goto close_and_exit; 1345 } 1346 1347 /* set decompressed file size */ 1348 KASSERT(vnd->sc_comp_numoffs - 1 <= 1349 UQUAD_MAX/vnd->sc_comp_blksz); 1350 vattr.va_size = 1351 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1352 (u_quad_t)vnd->sc_comp_blksz; 1353 1354 /* allocate space for all the compressed offsets */ 1355 __CTASSERT(UINT32_MAX <= UQUAD_MAX/sizeof(uint64_t)); 1356 vnd->sc_comp_offsets = 1357 malloc(sizeof(uint64_t) * vnd->sc_comp_numoffs, 1358 M_DEVBUF, M_WAITOK); 1359 1360 /* read in the offsets */ 1361 error = vn_rdwr(UIO_READ, vp, 1362 (void *)vnd->sc_comp_offsets, 1363 sizeof(uint64_t) * vnd->sc_comp_numoffs, 1364 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1365 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1366 if (error) { 1367 VOP_UNLOCK(vp); 1368 goto close_and_exit; 1369 } 1370 /* 1371 * find largest block size (used for allocation limit). 1372 * Also convert offset to native byte order. 1373 */ 1374 comp_maxsize = 0; 1375 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1376 vnd->sc_comp_offsets[i] = 1377 be64toh(vnd->sc_comp_offsets[i]); 1378 comp_size = 1379 be64toh(vnd->sc_comp_offsets[i + 1]) 1380 - vnd->sc_comp_offsets[i]; 1381 if (comp_size > comp_maxsize) 1382 comp_maxsize = comp_size; 1383 } 1384 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1385 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs 1386 - 1]); 1387 1388 /* create compressed data buffer */ 1389 vnd->sc_comp_buff = malloc(comp_maxsize, 1390 M_DEVBUF, M_WAITOK); 1391 1392 /* create decompressed buffer */ 1393 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1394 M_DEVBUF, M_WAITOK); 1395 vnd->sc_comp_buffblk = -1; 1396 1397 /* Initialize decompress stream */ 1398 memset(&vnd->sc_comp_stream, 0, sizeof(z_stream)); 1399 vnd->sc_comp_stream.zalloc = vnd_alloc; 1400 vnd->sc_comp_stream.zfree = vnd_free; 1401 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1402 if (error) { 1403 if (vnd->sc_comp_stream.msg) 1404 printf("vnd%d: compressed file, %s\n", 1405 unit, vnd->sc_comp_stream.msg); 1406 VOP_UNLOCK(vp); 1407 error = EINVAL; 1408 goto close_and_exit; 1409 } 1410 1411 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1412#else /* !VND_COMPRESSION */ 1413 VOP_UNLOCK(vp); 1414 error = EOPNOTSUPP; 1415 goto close_and_exit; 1416#endif /* VND_COMPRESSION */ 1417 } 1418 1419 VOP_UNLOCK(vp); 1420 vnd->sc_vp = vp; 1421 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1422 1423 /* get smallest I/O size for underlying device, fall back to 1424 * fundamental I/O size of underlying filesystem 1425 */ 1426 error = bdev_ioctl(vattr.va_fsid, DIOCGSECTORSIZE, &vnd->sc_iosize, FKIOCTL, l); 1427 if (error) 1428 vnd->sc_iosize = vnd->sc_vp->v_mount->mnt_stat.f_frsize; 1429 1430 /* Default I/O size to DEV_BSIZE */ 1431 if (vnd->sc_iosize == 0) 1432 vnd->sc_iosize = DEV_BSIZE; 1433 1434 /* 1435 * Use pseudo-geometry specified. If none was provided, 1436 * use "standard" Adaptec fictitious geometry. 1437 */ 1438 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1439 1440 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1441 sizeof(vio->vnd_geom)); 1442 1443 /* 1444 * Sanity-check the sector size. 1445 */ 1446 if (!DK_DEV_BSIZE_OK(vnd->sc_geom.vng_secsize) || 1447 vnd->sc_geom.vng_ntracks == 0 || 1448 vnd->sc_geom.vng_nsectors == 0) { 1449 error = EINVAL; 1450 goto close_and_exit; 1451 } 1452 1453 /* 1454 * Compute missing cylinder count from size 1455 */ 1456 if (vnd->sc_geom.vng_ncylinders == 0) 1457 vnd->sc_geom.vng_ncylinders = vnd->sc_size / ( 1458 (vnd->sc_geom.vng_secsize / DEV_BSIZE) * 1459 vnd->sc_geom.vng_ntracks * 1460 vnd->sc_geom.vng_nsectors); 1461 1462 /* 1463 * Compute the size (in DEV_BSIZE blocks) specified 1464 * by the geometry. 1465 */ 1466 geomsize = (int64_t)vnd->sc_geom.vng_nsectors * 1467 vnd->sc_geom.vng_ntracks * 1468 vnd->sc_geom.vng_ncylinders * 1469 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1470 1471 /* 1472 * Sanity-check the size against the specified 1473 * geometry. 1474 */ 1475 if (vnd->sc_size < geomsize) { 1476 error = EINVAL; 1477 goto close_and_exit; 1478 } 1479 } else if (vnd->sc_size >= (32 * 64)) { 1480 /* 1481 * Size must be at least 2048 DEV_BSIZE blocks 1482 * (1M) in order to use this geometry. 1483 */ 1484 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1485 vnd->sc_geom.vng_nsectors = 32; 1486 vnd->sc_geom.vng_ntracks = 64; 1487 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1488 } else { 1489 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1490 vnd->sc_geom.vng_nsectors = 1; 1491 vnd->sc_geom.vng_ntracks = 1; 1492 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1493 } 1494 1495 vnd_set_geometry(vnd); 1496 1497 if (vio->vnd_flags & VNDIOF_READONLY) { 1498 vnd->sc_flags |= VNF_READONLY; 1499 } 1500 1501 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1502 goto close_and_exit; 1503 1504 vndthrottle(vnd, vnd->sc_vp); 1505 vio->vnd_osize = dbtob(vnd->sc_size); 1506 if (cmd != VNDIOCSET50) 1507 vio->vnd_size = dbtob(vnd->sc_size); 1508 vnd->sc_flags |= VNF_INITED; 1509 1510 /* create the kernel thread, wait for it to be up */ 1511 error = kthread_create(PRI_NONE, 0, NULL, vndthread, vnd, 1512 &vnd->sc_kthread, "%s", device_xname(vnd->sc_dev)); 1513 if (error) 1514 goto close_and_exit; 1515 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1516 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1517 } 1518#ifdef DEBUG 1519 if (vnddebug & VDB_INIT) 1520 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1521 vnd->sc_vp, (unsigned long) vnd->sc_size, 1522 vnd->sc_geom.vng_secsize, 1523 vnd->sc_geom.vng_nsectors, 1524 vnd->sc_geom.vng_ntracks, 1525 vnd->sc_geom.vng_ncylinders); 1526#endif 1527 1528 /* Attach the disk. */ 1529 disk_attach(&vnd->sc_dkdev); 1530 1531 /* Initialize the xfer and buffer pools. */ 1532 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1533 0, 0, "vndxpl", NULL, IPL_BIO); 1534 1535 vndunlock(vnd); 1536 1537 pathbuf_destroy(pb); 1538 1539 /* Discover wedges on this disk */ 1540 dkwedge_discover(&vnd->sc_dkdev); 1541 1542 break; 1543 1544close_and_exit: 1545 (void) vn_close(vp, fflags, l->l_cred); 1546 pathbuf_destroy(pb); 1547unlock_and_exit: 1548#ifdef VND_COMPRESSION 1549 /* free any allocated memory (for compressed file) */ 1550 if (vnd->sc_comp_offsets) { 1551 free(vnd->sc_comp_offsets, M_DEVBUF); 1552 vnd->sc_comp_offsets = NULL; 1553 } 1554 if (vnd->sc_comp_buff) { 1555 free(vnd->sc_comp_buff, M_DEVBUF); 1556 vnd->sc_comp_buff = NULL; 1557 } 1558 if (vnd->sc_comp_decombuf) { 1559 free(vnd->sc_comp_decombuf, M_DEVBUF); 1560 vnd->sc_comp_decombuf = NULL; 1561 } 1562#endif /* VND_COMPRESSION */ 1563 vndunlock(vnd); 1564 return error; 1565 1566 case VNDIOCCLR50: 1567 case VNDIOCCLR: 1568 part = DISKPART(dev); 1569 pmask = (1 << part); 1570 force = (vio->vnd_flags & VNDIOF_FORCE) != 0; 1571 1572 if ((error = vnddoclear(vnd, pmask, minor(dev), force)) != 0) 1573 return error; 1574 1575 break; 1576 1577 1578 case DIOCWDINFO: 1579 case DIOCSDINFO: 1580#ifdef __HAVE_OLD_DISKLABEL 1581 case ODIOCWDINFO: 1582 case ODIOCSDINFO: 1583#endif 1584 { 1585 struct disklabel *lp; 1586 1587 if ((error = vndlock(vnd)) != 0) 1588 return error; 1589 1590 vnd->sc_flags |= VNF_LABELLING; 1591 1592#ifdef __HAVE_OLD_DISKLABEL 1593 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1594 memset(&newlabel, 0, sizeof newlabel); 1595 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1596 lp = &newlabel; 1597 } else 1598#endif 1599 lp = (struct disklabel *)data; 1600 1601 error = setdisklabel(vnd->sc_dkdev.dk_label, 1602 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1603 if (error == 0) { 1604 if (cmd == DIOCWDINFO 1605#ifdef __HAVE_OLD_DISKLABEL 1606 || cmd == ODIOCWDINFO 1607#endif 1608 ) 1609 error = writedisklabel(VNDLABELDEV(dev), 1610 vndstrategy, vnd->sc_dkdev.dk_label, 1611 vnd->sc_dkdev.dk_cpulabel); 1612 } 1613 1614 vnd->sc_flags &= ~VNF_LABELLING; 1615 1616 vndunlock(vnd); 1617 1618 if (error) 1619 return error; 1620 break; 1621 } 1622 1623 case DIOCKLABEL: 1624 if (*(int *)data != 0) 1625 vnd->sc_flags |= VNF_KLABEL; 1626 else 1627 vnd->sc_flags &= ~VNF_KLABEL; 1628 break; 1629 1630 case DIOCWLABEL: 1631 if (*(int *)data != 0) 1632 vnd->sc_flags |= VNF_WLABEL; 1633 else 1634 vnd->sc_flags &= ~VNF_WLABEL; 1635 break; 1636 1637 case DIOCGDEFLABEL: 1638 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1639 break; 1640 1641#ifdef __HAVE_OLD_DISKLABEL 1642 case ODIOCGDEFLABEL: 1643 vndgetdefaultlabel(vnd, &newlabel); 1644 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1645 return ENOTTY; 1646 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1647 break; 1648#endif 1649 1650 case DIOCGSTRATEGY: 1651 { 1652 struct disk_strategy *dks = (void *)data; 1653 1654 /* No lock needed, never changed */ 1655 strlcpy(dks->dks_name, 1656 bufq_getstrategyname(vnd->sc_tab), 1657 sizeof(dks->dks_name)); 1658 dks->dks_paramlen = 0; 1659 break; 1660 } 1661 case DIOCGCACHE: 1662 { 1663 int *bits = (int *)data; 1664 *bits |= DKCACHE_READ | DKCACHE_WRITE; 1665 break; 1666 } 1667 case DIOCCACHESYNC: 1668 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1669 error = VOP_FSYNC(vnd->sc_vp, vnd->sc_cred, 1670 FSYNC_WAIT | FSYNC_DATAONLY | FSYNC_CACHE, 0, 0); 1671 VOP_UNLOCK(vnd->sc_vp); 1672 return error; 1673 1674 default: 1675 return ENOTTY; 1676 } 1677 1678 return 0; 1679} 1680 1681/* 1682 * Duplicate the current processes' credentials. Since we are called only 1683 * as the result of a SET ioctl and only root can do that, any future access 1684 * to this "disk" is essentially as root. Note that credentials may change 1685 * if some other uid can write directly to the mapped file (NFS). 1686 */ 1687static int 1688vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1689{ 1690 struct uio auio; 1691 struct iovec aiov; 1692 char *tmpbuf; 1693 int error; 1694 1695 vnd->sc_cred = kauth_cred_dup(cred); 1696 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1697 1698 /* XXX: Horrible kludge to establish credentials for NFS */ 1699 aiov.iov_base = tmpbuf; 1700 aiov.iov_len = uimin(DEV_BSIZE, dbtob(vnd->sc_size)); 1701 auio.uio_iov = &aiov; 1702 auio.uio_iovcnt = 1; 1703 auio.uio_offset = 0; 1704 auio.uio_rw = UIO_READ; 1705 auio.uio_resid = aiov.iov_len; 1706 UIO_SETUP_SYSSPACE(&auio); 1707 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1708 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1709 if (error == 0) { 1710 /* 1711 * Because vnd does all IO directly through the vnode 1712 * we need to flush (at least) the buffer from the above 1713 * VOP_READ from the buffer cache to prevent cache 1714 * incoherencies. Also, be careful to write dirty 1715 * buffers back to stable storage. 1716 */ 1717 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1718 curlwp, 0, 0); 1719 } 1720 VOP_UNLOCK(vnd->sc_vp); 1721 1722 free(tmpbuf, M_TEMP); 1723 return error; 1724} 1725 1726/* 1727 * Set maxactive based on FS type 1728 */ 1729static void 1730vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1731{ 1732 1733 if (vp->v_tag == VT_NFS) 1734 vnd->sc_maxactive = 2; 1735 else 1736 vnd->sc_maxactive = 8; 1737 1738 if (vnd->sc_maxactive < 1) 1739 vnd->sc_maxactive = 1; 1740} 1741 1742#if 0 1743static void 1744vndshutdown(void) 1745{ 1746 struct vnd_softc *vnd; 1747 1748 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1749 if (vnd->sc_flags & VNF_INITED) 1750 vndclear(vnd); 1751} 1752#endif 1753 1754static void 1755vndclear(struct vnd_softc *vnd, int myminor) 1756{ 1757 struct vnode *vp = vnd->sc_vp; 1758 int fflags = FREAD; 1759 int bmaj, cmaj, i, mn; 1760 int s; 1761 1762#ifdef DEBUG 1763 if (vnddebug & VDB_FOLLOW) 1764 printf("vndclear(%p): vp %p\n", vnd, vp); 1765#endif 1766 /* locate the major number */ 1767 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1768 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1769 1770 /* Nuke the vnodes for any open instances */ 1771 for (i = 0; i < MAXPARTITIONS; i++) { 1772 mn = DISKMINOR(device_unit(vnd->sc_dev), i); 1773 if (mn != myminor) { /* XXX avoid to kill own vnode */ 1774 vdevgone(bmaj, mn, mn, VBLK); 1775 vdevgone(cmaj, mn, mn, VCHR); 1776 } 1777 } 1778 1779 if ((vnd->sc_flags & VNF_READONLY) == 0) 1780 fflags |= FWRITE; 1781 1782 s = splbio(); 1783 bufq_drain(vnd->sc_tab); 1784 splx(s); 1785 1786 vnd->sc_flags |= VNF_VUNCONF; 1787 wakeup(&vnd->sc_tab); 1788 while (vnd->sc_flags & VNF_KTHREAD) 1789 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1790 1791#ifdef VND_COMPRESSION 1792 /* free the compressed file buffers */ 1793 if (vnd->sc_flags & VNF_COMP) { 1794 if (vnd->sc_comp_offsets) { 1795 free(vnd->sc_comp_offsets, M_DEVBUF); 1796 vnd->sc_comp_offsets = NULL; 1797 } 1798 if (vnd->sc_comp_buff) { 1799 free(vnd->sc_comp_buff, M_DEVBUF); 1800 vnd->sc_comp_buff = NULL; 1801 } 1802 if (vnd->sc_comp_decombuf) { 1803 free(vnd->sc_comp_decombuf, M_DEVBUF); 1804 vnd->sc_comp_decombuf = NULL; 1805 } 1806 } 1807#endif /* VND_COMPRESSION */ 1808 vnd->sc_flags &= 1809 ~(VNF_INITED | VNF_READONLY | VNF_KLABEL | VNF_VLABEL 1810 | VNF_VUNCONF | VNF_COMP | VNF_CLEARING); 1811 if (vp == NULL) 1812 panic("vndclear: null vp"); 1813 (void) vn_close(vp, fflags, vnd->sc_cred); 1814 kauth_cred_free(vnd->sc_cred); 1815 vnd->sc_vp = NULL; 1816 vnd->sc_cred = NULL; 1817 vnd->sc_size = 0; 1818} 1819 1820static int 1821vndsize(dev_t dev) 1822{ 1823 struct vnd_softc *sc; 1824 struct disklabel *lp; 1825 int part, unit, omask; 1826 int size; 1827 1828 unit = vndunit(dev); 1829 sc = device_lookup_private(&vnd_cd, unit); 1830 if (sc == NULL) 1831 return -1; 1832 1833 if ((sc->sc_flags & VNF_INITED) == 0) 1834 return -1; 1835 1836 part = DISKPART(dev); 1837 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1838 lp = sc->sc_dkdev.dk_label; 1839 1840 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1841 return -1; 1842 1843 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1844 size = -1; 1845 else 1846 size = lp->d_partitions[part].p_size * 1847 (lp->d_secsize / DEV_BSIZE); 1848 1849 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1850 return -1; 1851 1852 return size; 1853} 1854 1855static int 1856vnddump(dev_t dev, daddr_t blkno, void *va, 1857 size_t size) 1858{ 1859 1860 /* Not implemented. */ 1861 return ENXIO; 1862} 1863 1864static void 1865vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1866{ 1867 struct vndgeom *vng = &sc->sc_geom; 1868 struct partition *pp; 1869 unsigned spb; 1870 1871 memset(lp, 0, sizeof(*lp)); 1872 1873 spb = vng->vng_secsize / DEV_BSIZE; 1874 if (sc->sc_size / spb > UINT32_MAX) 1875 lp->d_secperunit = UINT32_MAX; 1876 else 1877 lp->d_secperunit = sc->sc_size / spb; 1878 lp->d_secsize = vng->vng_secsize; 1879 lp->d_nsectors = vng->vng_nsectors; 1880 lp->d_ntracks = vng->vng_ntracks; 1881 lp->d_ncylinders = vng->vng_ncylinders; 1882 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1883 1884 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1885 lp->d_type = DKTYPE_VND; 1886 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1887 lp->d_rpm = 3600; 1888 lp->d_interleave = 1; 1889 lp->d_flags = 0; 1890 1891 pp = &lp->d_partitions[RAW_PART]; 1892 pp->p_offset = 0; 1893 pp->p_size = lp->d_secperunit; 1894 pp->p_fstype = FS_UNUSED; 1895 lp->d_npartitions = RAW_PART + 1; 1896 1897 lp->d_magic = DISKMAGIC; 1898 lp->d_magic2 = DISKMAGIC; 1899 lp->d_checksum = dkcksum(lp); 1900} 1901 1902/* 1903 * Read the disklabel from a vnd. If one is not present, create a fake one. 1904 */ 1905static void 1906vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1907{ 1908 const char *errstring; 1909 struct disklabel *lp = sc->sc_dkdev.dk_label; 1910 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1911 int i; 1912 1913 memset(clp, 0, sizeof(*clp)); 1914 1915 vndgetdefaultlabel(sc, lp); 1916 1917 /* 1918 * Call the generic disklabel extraction routine. 1919 */ 1920 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1921 if (errstring) { 1922 /* 1923 * Lack of disklabel is common, but we print the warning 1924 * anyway, since it might contain other useful information. 1925 */ 1926 aprint_normal_dev(sc->sc_dev, "%s\n", errstring); 1927 1928 /* 1929 * For historical reasons, if there's no disklabel 1930 * present, all partitions must be FS_BSDFFS and 1931 * occupy the entire disk. 1932 */ 1933 for (i = 0; i < MAXPARTITIONS; i++) { 1934 /* 1935 * Don't wipe out port specific hack (such as 1936 * dos partition hack of i386 port). 1937 */ 1938 if (lp->d_partitions[i].p_size != 0) 1939 continue; 1940 1941 lp->d_partitions[i].p_size = lp->d_secperunit; 1942 lp->d_partitions[i].p_offset = 0; 1943 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1944 } 1945 1946 strncpy(lp->d_packname, "default label", 1947 sizeof(lp->d_packname)); 1948 1949 lp->d_npartitions = MAXPARTITIONS; 1950 lp->d_checksum = dkcksum(lp); 1951 } 1952} 1953 1954/* 1955 * Wait interruptibly for an exclusive lock. 1956 * 1957 * XXX 1958 * Several drivers do this; it should be abstracted and made MP-safe. 1959 */ 1960static int 1961vndlock(struct vnd_softc *sc) 1962{ 1963 int error; 1964 1965 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1966 sc->sc_flags |= VNF_WANTED; 1967 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1968 return error; 1969 } 1970 sc->sc_flags |= VNF_LOCKED; 1971 return 0; 1972} 1973 1974/* 1975 * Unlock and wake up any waiters. 1976 */ 1977static void 1978vndunlock(struct vnd_softc *sc) 1979{ 1980 1981 sc->sc_flags &= ~VNF_LOCKED; 1982 if ((sc->sc_flags & VNF_WANTED) != 0) { 1983 sc->sc_flags &= ~VNF_WANTED; 1984 wakeup(sc); 1985 } 1986} 1987 1988#ifdef VND_COMPRESSION 1989/* compressed file read */ 1990static void 1991compstrategy(struct buf *bp, off_t bn) 1992{ 1993 int error; 1994 int unit = vndunit(bp->b_dev); 1995 struct vnd_softc *vnd = 1996 device_lookup_private(&vnd_cd, unit); 1997 u_int32_t comp_block; 1998 struct uio auio; 1999 char *addr; 2000 int s; 2001 2002 /* set up constants for data move */ 2003 auio.uio_rw = UIO_READ; 2004 UIO_SETUP_SYSSPACE(&auio); 2005 2006 /* read, and transfer the data */ 2007 addr = bp->b_data; 2008 bp->b_resid = bp->b_bcount; 2009 s = splbio(); 2010 while (bp->b_resid > 0) { 2011 unsigned length; 2012 size_t length_in_buffer; 2013 u_int32_t offset_in_buffer; 2014 struct iovec aiov; 2015 2016 /* calculate the compressed block number */ 2017 comp_block = bn / (off_t)vnd->sc_comp_blksz; 2018 2019 /* check for good block number */ 2020 if (comp_block >= vnd->sc_comp_numoffs) { 2021 bp->b_error = EINVAL; 2022 splx(s); 2023 return; 2024 } 2025 2026 /* read in the compressed block, if not in buffer */ 2027 if (comp_block != vnd->sc_comp_buffblk) { 2028 length = vnd->sc_comp_offsets[comp_block + 1] - 2029 vnd->sc_comp_offsets[comp_block]; 2030 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 2031 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 2032 length, vnd->sc_comp_offsets[comp_block], 2033 UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, vnd->sc_cred, 2034 NULL, NULL); 2035 if (error) { 2036 bp->b_error = error; 2037 VOP_UNLOCK(vnd->sc_vp); 2038 splx(s); 2039 return; 2040 } 2041 /* uncompress the buffer */ 2042 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 2043 vnd->sc_comp_stream.avail_in = length; 2044 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 2045 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 2046 inflateReset(&vnd->sc_comp_stream); 2047 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 2048 if (error != Z_STREAM_END) { 2049 if (vnd->sc_comp_stream.msg) 2050 aprint_normal_dev(vnd->sc_dev, 2051 "compressed file, %s\n", 2052 vnd->sc_comp_stream.msg); 2053 bp->b_error = EBADMSG; 2054 VOP_UNLOCK(vnd->sc_vp); 2055 splx(s); 2056 return; 2057 } 2058 vnd->sc_comp_buffblk = comp_block; 2059 VOP_UNLOCK(vnd->sc_vp); 2060 } 2061 2062 /* transfer the usable uncompressed data */ 2063 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 2064 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 2065 if (length_in_buffer > bp->b_resid) 2066 length_in_buffer = bp->b_resid; 2067 auio.uio_iov = &aiov; 2068 auio.uio_iovcnt = 1; 2069 aiov.iov_base = addr; 2070 aiov.iov_len = length_in_buffer; 2071 auio.uio_resid = aiov.iov_len; 2072 auio.uio_offset = 0; 2073 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 2074 length_in_buffer, &auio); 2075 if (error) { 2076 bp->b_error = error; 2077 splx(s); 2078 return; 2079 } 2080 2081 bn += length_in_buffer; 2082 addr += length_in_buffer; 2083 bp->b_resid -= length_in_buffer; 2084 } 2085 splx(s); 2086} 2087 2088/* compression memory allocation routines */ 2089static void * 2090vnd_alloc(void *aux, u_int items, u_int siz) 2091{ 2092 return malloc(items * siz, M_TEMP, M_NOWAIT); 2093} 2094 2095static void 2096vnd_free(void *aux, void *ptr) 2097{ 2098 free(ptr, M_TEMP); 2099} 2100#endif /* VND_COMPRESSION */ 2101 2102static void 2103vnd_set_geometry(struct vnd_softc *vnd) 2104{ 2105 struct disk_geom *dg = &vnd->sc_dkdev.dk_geom; 2106 unsigned spb; 2107 2108 memset(dg, 0, sizeof(*dg)); 2109 2110 spb = vnd->sc_geom.vng_secsize / DEV_BSIZE; 2111 dg->dg_secperunit = vnd->sc_size / spb; 2112 dg->dg_secsize = vnd->sc_geom.vng_secsize; 2113 dg->dg_nsectors = vnd->sc_geom.vng_nsectors; 2114 dg->dg_ntracks = vnd->sc_geom.vng_ntracks; 2115 dg->dg_ncylinders = vnd->sc_geom.vng_ncylinders; 2116 2117#ifdef DEBUG 2118 if (vnddebug & VDB_LABEL) { 2119 printf("dg->dg_secperunit: %" PRId64 "\n", dg->dg_secperunit); 2120 printf("dg->dg_ncylinders: %u\n", dg->dg_ncylinders); 2121 } 2122#endif 2123 disk_set_info(vnd->sc_dev, &vnd->sc_dkdev, NULL); 2124} 2125 2126#ifdef VND_COMPRESSION 2127#define VND_DEPENDS "zlib" 2128#else 2129#define VND_DEPENDS NULL 2130#endif 2131 2132MODULE(MODULE_CLASS_DRIVER, vnd, VND_DEPENDS); 2133 2134#ifdef _MODULE 2135int vnd_bmajor = -1, vnd_cmajor = -1; 2136 2137CFDRIVER_DECL(vnd, DV_DISK, NULL); 2138#endif 2139 2140static int 2141vnd_modcmd(modcmd_t cmd, void *arg) 2142{ 2143 int error = 0; 2144 2145 switch (cmd) { 2146 case MODULE_CMD_INIT: 2147#ifdef _MODULE 2148 /* 2149 * Attach the {b,c}devsw's 2150 */ 2151 error = devsw_attach("vnd", &vnd_bdevsw, &vnd_bmajor, 2152 &vnd_cdevsw, &vnd_cmajor); 2153 if (error) { 2154#ifdef DIAGNOSTIC 2155 aprint_error("%s: unable to attach %s devsw, " 2156 "error %d", __func__, vnd_cd.cd_name, error); 2157#endif 2158 break; 2159 } 2160 2161 error = config_cfdriver_attach(&vnd_cd); 2162 if (error) { 2163 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2164 break; 2165 } 2166 2167 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2168 if (error) { 2169 config_cfdriver_detach(&vnd_cd); 2170 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2171#ifdef DIAGNOSTIC 2172 aprint_error("%s: unable to register cfattach for \n" 2173 "%s, error %d", __func__, vnd_cd.cd_name, error); 2174#endif 2175 break; 2176 } 2177#endif 2178 break; 2179 2180 case MODULE_CMD_FINI: 2181#ifdef _MODULE 2182 /* 2183 * Remove device from autoconf database 2184 */ 2185 error = config_cfattach_detach(vnd_cd.cd_name, &vnd_ca); 2186 if (error) { 2187#ifdef DIAGNOSTIC 2188 aprint_error("%s: failed to detach %s cfattach, " 2189 "error %d\n", __func__, vnd_cd.cd_name, error); 2190#endif 2191 break; 2192 } 2193 error = config_cfdriver_detach(&vnd_cd); 2194 if (error) { 2195 (void)config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 2196#ifdef DIAGNOSTIC 2197 aprint_error("%s: failed to detach %s cfdriver, " 2198 "error %d\n", __func__, vnd_cd.cd_name, error); 2199 break; 2200#endif 2201 } 2202 /* 2203 * Remove {b,c}devsw's 2204 */ 2205 devsw_detach(&vnd_bdevsw, &vnd_cdevsw); 2206 2207#endif 2208 break; 2209 2210 case MODULE_CMD_STAT: 2211 return ENOTTY; 2212 2213 default: 2214 return ENOTTY; 2215 } 2216 2217 return error; 2218} 2219