vnd.c revision 1.158
1/* $NetBSD: vnd.c,v 1.158 2006/11/10 14:31:14 martin Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76/* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116/* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: If the vnode supports the VOP_BMAP and VOP_STRATEGY operations, 123 * this uses them to avoid distorting the local buffer cache. If those 124 * block-level operations are not available, this falls back to the regular 125 * read and write calls. Using these may distort the cache in some cases 126 * but better have the driver working than preventing it to work on file 127 * systems where the block-level operations are not implemented for 128 * whatever reason. 129 * 130 * NOTE 2: There is a security issue involved with this driver. 131 * Once mounted all access to the contents of the "mapped" file via 132 * the special file is controlled by the permissions on the special 133 * file, the protection of the mapped file is ignored (effectively, 134 * by using root credentials in all transactions). 135 * 136 * NOTE 3: Doesn't interact with leases, should it? 137 */ 138 139#include <sys/cdefs.h> 140__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.158 2006/11/10 14:31:14 martin Exp $"); 141 142#if defined(_KERNEL_OPT) 143#include "fs_nfs.h" 144#include "opt_vnd.h" 145#endif 146 147#include <sys/param.h> 148#include <sys/systm.h> 149#include <sys/namei.h> 150#include <sys/proc.h> 151#include <sys/kthread.h> 152#include <sys/errno.h> 153#include <sys/buf.h> 154#include <sys/bufq.h> 155#include <sys/malloc.h> 156#include <sys/ioctl.h> 157#include <sys/disklabel.h> 158#include <sys/device.h> 159#include <sys/disk.h> 160#include <sys/stat.h> 161#include <sys/mount.h> 162#include <sys/vnode.h> 163#include <sys/file.h> 164#include <sys/uio.h> 165#include <sys/conf.h> 166#include <sys/kauth.h> 167 168#include <net/zlib.h> 169 170#include <miscfs/genfs/genfs.h> 171#include <miscfs/specfs/specdev.h> 172 173#include <dev/vndvar.h> 174 175#if defined(VNDDEBUG) && !defined(DEBUG) 176#define DEBUG 177#endif 178 179#ifdef DEBUG 180int dovndcluster = 1; 181#define VDB_FOLLOW 0x01 182#define VDB_INIT 0x02 183#define VDB_IO 0x04 184#define VDB_LABEL 0x08 185int vnddebug = 0x00; 186#endif 187 188#define vndunit(x) DISKUNIT(x) 189 190struct vndxfer { 191 struct buf vx_buf; 192 struct vnd_softc *vx_vnd; 193}; 194#define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 195 196#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 197#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 198 199#define VNDLABELDEV(dev) \ 200 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 201 202/* called by main() at boot time (XXX: and the LKM driver) */ 203void vndattach(int); 204 205static void vndclear(struct vnd_softc *, int); 206static int vndsetcred(struct vnd_softc *, kauth_cred_t); 207static void vndthrottle(struct vnd_softc *, struct vnode *); 208static void vndiodone(struct buf *); 209#if 0 210static void vndshutdown(void); 211#endif 212 213static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 214static void vndgetdisklabel(dev_t, struct vnd_softc *); 215 216static int vndlock(struct vnd_softc *); 217static void vndunlock(struct vnd_softc *); 218#ifdef VND_COMPRESSION 219static void compstrategy(struct buf *, off_t); 220static void *vnd_alloc(void *, u_int, u_int); 221static void vnd_free(void *, void *); 222#endif /* VND_COMPRESSION */ 223 224static void vndthread(void *); 225static boolean_t vnode_has_op(const struct vnode *, int); 226static void handle_with_rdwr(struct vnd_softc *, const struct buf *, 227 struct buf *); 228static void handle_with_strategy(struct vnd_softc *, const struct buf *, 229 struct buf *); 230 231static dev_type_open(vndopen); 232static dev_type_close(vndclose); 233static dev_type_read(vndread); 234static dev_type_write(vndwrite); 235static dev_type_ioctl(vndioctl); 236static dev_type_strategy(vndstrategy); 237static dev_type_dump(vnddump); 238static dev_type_size(vndsize); 239 240const struct bdevsw vnd_bdevsw = { 241 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 242}; 243 244const struct cdevsw vnd_cdevsw = { 245 vndopen, vndclose, vndread, vndwrite, vndioctl, 246 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 247}; 248 249static int vnd_match(struct device *, struct cfdata *, void *); 250static void vnd_attach(struct device *, struct device *, void *); 251static int vnd_detach(struct device *, int); 252 253CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 254 vnd_match, vnd_attach, vnd_detach, NULL); 255extern struct cfdriver vnd_cd; 256 257static struct vnd_softc *vnd_spawn(int); 258int vnd_destroy(struct device *); 259 260void 261vndattach(int num __unused) 262{ 263 int error; 264 265 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 266 if (error) 267 aprint_error("%s: unable to register cfattach\n", 268 vnd_cd.cd_name); 269} 270 271static int 272vnd_match(struct device *self __unused, struct cfdata *cfdata __unused, 273 void *aux __unused) 274{ 275 return 1; 276} 277 278static void 279vnd_attach(struct device *parent __unused, struct device *self, 280 void *aux __unused) 281{ 282 struct vnd_softc *sc = (struct vnd_softc *)self; 283 284 sc->sc_comp_offsets = NULL; 285 sc->sc_comp_buff = NULL; 286 sc->sc_comp_decombuf = NULL; 287 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 288 pseudo_disk_init(&sc->sc_dkdev); 289} 290 291static int 292vnd_detach(struct device *self, int flags __unused) 293{ 294 struct vnd_softc *sc = (struct vnd_softc *)self; 295 if (sc->sc_flags & VNF_INITED) 296 return EBUSY; 297 298 bufq_free(sc->sc_tab); 299 300 return 0; 301} 302 303static struct vnd_softc * 304vnd_spawn(int unit) 305{ 306 struct cfdata *cf; 307 308 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 309 cf->cf_name = vnd_cd.cd_name; 310 cf->cf_atname = vnd_cd.cd_name; 311 cf->cf_unit = unit; 312 cf->cf_fstate = FSTATE_STAR; 313 314 return (struct vnd_softc *)config_attach_pseudo(cf); 315} 316 317int 318vnd_destroy(struct device *dev) 319{ 320 int error; 321 struct cfdata *cf; 322 323 cf = device_cfdata(dev); 324 error = config_detach(dev, DETACH_QUIET); 325 if (error) 326 return error; 327 free(cf, M_DEVBUF); 328 return 0; 329} 330 331static int 332vndopen(dev_t dev, int flags __unused, int mode, struct lwp *l __unused) 333{ 334 int unit = vndunit(dev); 335 struct vnd_softc *sc; 336 int error = 0, part, pmask; 337 struct disklabel *lp; 338 339#ifdef DEBUG 340 if (vnddebug & VDB_FOLLOW) 341 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 342#endif 343 sc = device_lookup(&vnd_cd, unit); 344 if (sc == NULL) { 345 sc = vnd_spawn(unit); 346 if (sc == NULL) 347 return ENOMEM; 348 } 349 350 if ((error = vndlock(sc)) != 0) 351 return (error); 352 353 lp = sc->sc_dkdev.dk_label; 354 355 part = DISKPART(dev); 356 pmask = (1 << part); 357 358 /* 359 * If we're initialized, check to see if there are any other 360 * open partitions. If not, then it's safe to update the 361 * in-core disklabel. Only read the disklabel if it is 362 * not already valid. 363 */ 364 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 365 sc->sc_dkdev.dk_openmask == 0) 366 vndgetdisklabel(dev, sc); 367 368 /* Check that the partitions exists. */ 369 if (part != RAW_PART) { 370 if (((sc->sc_flags & VNF_INITED) == 0) || 371 ((part >= lp->d_npartitions) || 372 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 373 error = ENXIO; 374 goto done; 375 } 376 } 377 378 /* Prevent our unit from being unconfigured while open. */ 379 switch (mode) { 380 case S_IFCHR: 381 sc->sc_dkdev.dk_copenmask |= pmask; 382 break; 383 384 case S_IFBLK: 385 sc->sc_dkdev.dk_bopenmask |= pmask; 386 break; 387 } 388 sc->sc_dkdev.dk_openmask = 389 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 390 391 done: 392 vndunlock(sc); 393 return (error); 394} 395 396static int 397vndclose(dev_t dev, int flags __unused, int mode, struct lwp *l __unused) 398{ 399 int unit = vndunit(dev); 400 struct vnd_softc *sc; 401 int error = 0, part; 402 403#ifdef DEBUG 404 if (vnddebug & VDB_FOLLOW) 405 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 406#endif 407 sc = device_lookup(&vnd_cd, unit); 408 if (sc == NULL) 409 return ENXIO; 410 411 if ((error = vndlock(sc)) != 0) 412 return (error); 413 414 part = DISKPART(dev); 415 416 /* ...that much closer to allowing unconfiguration... */ 417 switch (mode) { 418 case S_IFCHR: 419 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 420 break; 421 422 case S_IFBLK: 423 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 424 break; 425 } 426 sc->sc_dkdev.dk_openmask = 427 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 428 429 vndunlock(sc); 430 431 if ((sc->sc_flags & VNF_INITED) == 0) { 432 if ((error = vnd_destroy((struct device *)sc)) != 0) { 433 aprint_error("%s: unable to detach instance\n", 434 sc->sc_dev.dv_xname); 435 return error; 436 } 437 } 438 439 return (0); 440} 441 442/* 443 * Queue the request, and wakeup the kernel thread to handle it. 444 */ 445static void 446vndstrategy(struct buf *bp) 447{ 448 int unit = vndunit(bp->b_dev); 449 struct vnd_softc *vnd = 450 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 451 struct disklabel *lp = vnd->sc_dkdev.dk_label; 452 daddr_t blkno; 453 int s = splbio(); 454 455 bp->b_resid = bp->b_bcount; 456 457 if ((vnd->sc_flags & VNF_INITED) == 0) { 458 bp->b_error = ENXIO; 459 bp->b_flags |= B_ERROR; 460 goto done; 461 } 462 463 /* 464 * The transfer must be a whole number of blocks. 465 */ 466 if ((bp->b_bcount % lp->d_secsize) != 0) { 467 bp->b_error = EINVAL; 468 bp->b_flags |= B_ERROR; 469 goto done; 470 } 471 472 /* 473 * check if we're read-only. 474 */ 475 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 476 bp->b_error = EACCES; 477 bp->b_flags |= B_ERROR; 478 goto done; 479 } 480 481 /* 482 * Do bounds checking and adjust transfer. If there's an error, 483 * the bounds check will flag that for us. 484 */ 485 if (DISKPART(bp->b_dev) == RAW_PART) { 486 if (bounds_check_with_mediasize(bp, DEV_BSIZE, 487 vnd->sc_size) <= 0) 488 goto done; 489 } else { 490 if (bounds_check_with_label(&vnd->sc_dkdev, 491 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 492 goto done; 493 } 494 495 /* If it's a nil transfer, wake up the top half now. */ 496 if (bp->b_bcount == 0) 497 goto done; 498 499 /* 500 * Put the block number in terms of the logical blocksize 501 * of the "device". 502 */ 503 504 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 505 506 /* 507 * Translate the partition-relative block number to an absolute. 508 */ 509 if (DISKPART(bp->b_dev) != RAW_PART) { 510 struct partition *pp; 511 512 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 513 DISKPART(bp->b_dev)]; 514 blkno += pp->p_offset; 515 } 516 bp->b_rawblkno = blkno; 517 518#ifdef DEBUG 519 if (vnddebug & VDB_FOLLOW) 520 printf("vndstrategy(%p): unit %d\n", bp, unit); 521#endif 522 BUFQ_PUT(vnd->sc_tab, bp); 523 wakeup(&vnd->sc_tab); 524 splx(s); 525 return; 526done: 527 biodone(bp); 528 splx(s); 529} 530 531static void 532vndthread(void *arg) 533{ 534 struct vnd_softc *vnd = arg; 535 boolean_t usestrategy; 536 int s; 537 538 /* Determine whether we can use VOP_BMAP and VOP_STRATEGY to 539 * directly access the backing vnode. If we can, use these two 540 * operations to avoid messing with the local buffer cache. 541 * Otherwise fall back to regular VOP_READ/VOP_WRITE operations 542 * which are guaranteed to work with any file system. */ 543 usestrategy = vnode_has_op(vnd->sc_vp, VOFFSET(vop_bmap)) && 544 vnode_has_op(vnd->sc_vp, VOFFSET(vop_strategy)); 545 546#ifdef DEBUG 547 if (vnddebug & VDB_INIT) 548 printf("vndthread: vp %p, %s\n", vnd->sc_vp, 549 usestrategy ? 550 "using bmap/strategy operations" : 551 "using read/write operations"); 552#endif 553 554 s = splbio(); 555 vnd->sc_flags |= VNF_KTHREAD; 556 wakeup(&vnd->sc_kthread); 557 558 /* 559 * Dequeue requests and serve them depending on the available 560 * vnode operations. 561 */ 562 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 563 struct vndxfer *vnx; 564 int flags; 565 struct buf *obp; 566 struct buf *bp; 567 568 obp = BUFQ_GET(vnd->sc_tab); 569 if (obp == NULL) { 570 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 571 continue; 572 }; 573 splx(s); 574 flags = obp->b_flags; 575#ifdef DEBUG 576 if (vnddebug & VDB_FOLLOW) 577 printf("vndthread(%p\n", obp); 578#endif 579 580 if (vnd->sc_vp->v_mount == NULL) { 581 obp->b_error = ENXIO; 582 obp->b_flags |= B_ERROR; 583 goto done; 584 } 585#ifdef VND_COMPRESSION 586 /* handle a compressed read */ 587 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 588 off_t bn; 589 590 /* Convert to a byte offset within the file. */ 591 bn = obp->b_rawblkno * 592 vnd->sc_dkdev.dk_label->d_secsize; 593 594 compstrategy(obp, bn); 595 goto done; 596 } 597#endif /* VND_COMPRESSION */ 598 599 /* 600 * Allocate a header for this transfer and link it to the 601 * buffer 602 */ 603 s = splbio(); 604 vnx = VND_GETXFER(vnd); 605 splx(s); 606 vnx->vx_vnd = vnd; 607 608 s = splbio(); 609 while (vnd->sc_active >= vnd->sc_maxactive) { 610 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 611 } 612 vnd->sc_active++; 613 splx(s); 614 615 /* Instrumentation. */ 616 disk_busy(&vnd->sc_dkdev); 617 618 bp = &vnx->vx_buf; 619 BUF_INIT(bp); 620 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 621 bp->b_iodone = vndiodone; 622 bp->b_private = obp; 623 bp->b_vp = vnd->sc_vp; 624 bp->b_data = obp->b_data; 625 bp->b_bcount = bp->b_resid = obp->b_bcount; 626 BIO_COPYPRIO(bp, obp); 627 628 /* Handle the request using the appropriate operations. */ 629 if (usestrategy) 630 handle_with_strategy(vnd, obp, bp); 631 else 632 handle_with_rdwr(vnd, obp, bp); 633 634 s = splbio(); 635 continue; 636 637done: 638 biodone(obp); 639 s = splbio(); 640 } 641 642 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 643 wakeup(&vnd->sc_kthread); 644 splx(s); 645 kthread_exit(0); 646} 647 648/* 649 * Checks if the given vnode supports the requested operation. 650 * The operation is specified the offset returned by VOFFSET. 651 * 652 * XXX The test below used to determine this is quite fragile 653 * because it relies on the file system to use genfs to specify 654 * unimplemented operations. There might be another way to do 655 * it more cleanly. 656 */ 657static boolean_t 658vnode_has_op(const struct vnode *vp, int opoffset) 659{ 660 int (*defaultp)(void *); 661 int (*opp)(void *); 662 663 defaultp = vp->v_op[VOFFSET(vop_default)]; 664 opp = vp->v_op[opoffset]; 665 666 return opp != defaultp && opp != genfs_eopnotsupp && 667 opp != genfs_badop && opp != genfs_nullop; 668} 669 670/* 671 * Handes the read/write request given in 'bp' using the vnode's VOP_READ 672 * and VOP_WRITE operations. 673 * 674 * 'obp' is a pointer to the original request fed to the vnd device. 675 */ 676static void 677handle_with_rdwr(struct vnd_softc *vnd, const struct buf *obp, struct buf *bp) 678{ 679 boolean_t doread; 680 off_t offset; 681 size_t resid; 682 struct vnode *vp; 683 684 doread = bp->b_flags & B_READ; 685 offset = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 686 vp = vnd->sc_vp; 687 688#if defined(DEBUG) 689 if (vnddebug & VDB_IO) 690 printf("vnd (rdwr): vp %p, %s, rawblkno 0x%" PRIx64 691 ", secsize %d, offset %" PRIu64 692 ", bcount %d, resid %d\n", 693 vp, doread ? "read" : "write", obp->b_rawblkno, 694 vnd->sc_dkdev.dk_label->d_secsize, offset, 695 bp->b_bcount, bp->b_resid); 696#endif 697 698 /* Issue the read or write operation. */ 699 bp->b_error = 700 vn_rdwr(doread ? UIO_READ : UIO_WRITE, 701 vp, bp->b_data, bp->b_bcount, offset, 702 UIO_SYSSPACE, 0, vnd->sc_cred, &resid, NULL); 703 bp->b_resid = resid; 704 if (bp->b_error != 0) 705 bp->b_flags |= B_ERROR; 706 else 707 KASSERT(!(bp->b_flags & B_ERROR)); 708 709 /* Flush the vnode if requested. */ 710 if (obp->b_flags & B_VFLUSH) { 711 if (vn_lock(vp, LK_EXCLUSIVE | LK_RETRY) == 0) { 712 VOP_FSYNC(vp, vnd->sc_cred, 713 FSYNC_WAIT | FSYNC_DATAONLY, 0, 0, NULL); 714 VOP_UNLOCK(vp, 0); 715 } 716 } 717 718 /* We need to increase the number of outputs on the vnode if 719 * there was any write to it (either due to a real write or due 720 * to a flush). */ 721 if (!doread || obp->b_flags & B_VFLUSH) 722 vp->v_numoutput++; 723 724 biodone(bp); 725} 726 727/* 728 * Handes the read/write request given in 'bp' using the vnode's VOP_BMAP 729 * and VOP_STRATEGY operations. 730 * 731 * 'obp' is a pointer to the original request fed to the vnd device. 732 */ 733static void 734handle_with_strategy(struct vnd_softc *vnd, const struct buf *obp, 735 struct buf *bp) 736{ 737 int bsize, error, flags, skipped; 738 size_t resid, sz; 739 off_t bn, offset; 740 struct mount *mp; 741 742 flags = obp->b_flags; 743 744 mp = NULL; 745 if (!(flags & B_READ)) { 746 int s; 747 748 s = splbio(); 749 V_INCR_NUMOUTPUT(bp->b_vp); 750 splx(s); 751 752 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 753 KASSERT(mp != NULL); 754 } 755 756 /* convert to a byte offset within the file. */ 757 bn = obp->b_rawblkno * vnd->sc_dkdev.dk_label->d_secsize; 758 759 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 760 skipped = 0; 761 762 /* 763 * Break the request into bsize pieces and feed them 764 * sequentially using VOP_BMAP/VOP_STRATEGY. 765 * We do it this way to keep from flooding NFS servers if we 766 * are connected to an NFS file. This places the burden on 767 * the client rather than the server. 768 */ 769 error = 0; 770 for (offset = 0, resid = bp->b_resid; resid; 771 resid -= sz, offset += sz) { 772 struct buf *nbp; 773 struct vnode *vp; 774 daddr_t nbn; 775 int off, nra; 776 777 nra = 0; 778 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 779 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 780 VOP_UNLOCK(vnd->sc_vp, 0); 781 782 if (error == 0 && (long)nbn == -1) 783 error = EIO; 784 785 /* 786 * If there was an error or a hole in the file...punt. 787 * Note that we may have to wait for any operations 788 * that we have already fired off before releasing 789 * the buffer. 790 * 791 * XXX we could deal with holes here but it would be 792 * a hassle (in the write case). 793 */ 794 if (error) { 795 skipped += resid; 796 break; 797 } 798 799#ifdef DEBUG 800 if (!dovndcluster) 801 nra = 0; 802#endif 803 804 off = bn % bsize; 805 sz = MIN(((off_t)1 + nra) * bsize - off, resid); 806#ifdef DEBUG 807 if (vnddebug & VDB_IO) 808 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 809 " sz 0x%zx\n", 810 vnd->sc_vp, vp, (long long)bn, nbn, sz); 811#endif 812 813 nbp = getiobuf(); 814 nestiobuf_setup(bp, nbp, offset, sz); 815 nbp->b_blkno = nbn + btodb(off); 816 817#if 0 /* XXX #ifdef DEBUG */ 818 if (vnddebug & VDB_IO) 819 printf("vndstart(%ld): bp %p vp %p blkno " 820 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 821 (long) (vnd-vnd_softc), &nbp->vb_buf, 822 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 823 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 824 nbp->vb_buf.b_bcount); 825#endif 826 VOP_STRATEGY(vp, nbp); 827 bn += sz; 828 } 829 nestiobuf_done(bp, skipped, error); 830 831 if (!(flags & B_READ)) { 832 KASSERT(mp != NULL); 833 vn_finished_write(mp, 0); 834 } 835} 836 837static void 838vndiodone(struct buf *bp) 839{ 840 struct vndxfer *vnx = VND_BUFTOXFER(bp); 841 struct vnd_softc *vnd = vnx->vx_vnd; 842 struct buf *obp = bp->b_private; 843 844 KASSERT(&vnx->vx_buf == bp); 845 KASSERT(vnd->sc_active > 0); 846#ifdef DEBUG 847 if (vnddebug & VDB_IO) { 848 printf("vndiodone1: bp %p iodone: error %d\n", 849 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0); 850 } 851#endif 852 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 853 (bp->b_flags & B_READ)); 854 vnd->sc_active--; 855 if (vnd->sc_active == 0) { 856 wakeup(&vnd->sc_tab); 857 } 858 obp->b_flags |= bp->b_flags & B_ERROR; 859 obp->b_error = bp->b_error; 860 obp->b_resid = bp->b_resid; 861 VND_PUTXFER(vnd, vnx); 862 biodone(obp); 863} 864 865/* ARGSUSED */ 866static int 867vndread(dev_t dev, struct uio *uio, int flags __unused) 868{ 869 int unit = vndunit(dev); 870 struct vnd_softc *sc; 871 872#ifdef DEBUG 873 if (vnddebug & VDB_FOLLOW) 874 printf("vndread(0x%x, %p)\n", dev, uio); 875#endif 876 877 sc = device_lookup(&vnd_cd, unit); 878 if (sc == NULL) 879 return ENXIO; 880 881 if ((sc->sc_flags & VNF_INITED) == 0) 882 return (ENXIO); 883 884 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 885} 886 887/* ARGSUSED */ 888static int 889vndwrite(dev_t dev, struct uio *uio, int flags __unused) 890{ 891 int unit = vndunit(dev); 892 struct vnd_softc *sc; 893 894#ifdef DEBUG 895 if (vnddebug & VDB_FOLLOW) 896 printf("vndwrite(0x%x, %p)\n", dev, uio); 897#endif 898 899 sc = device_lookup(&vnd_cd, unit); 900 if (sc == NULL) 901 return ENXIO; 902 903 if ((sc->sc_flags & VNF_INITED) == 0) 904 return (ENXIO); 905 906 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 907} 908 909static int 910vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 911{ 912 struct vnd_softc *vnd; 913 914 if (*un == -1) 915 *un = unit; 916 if (*un < 0) 917 return EINVAL; 918 919 vnd = device_lookup(&vnd_cd, *un); 920 if (vnd == NULL) 921 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 922 923 if ((vnd->sc_flags & VNF_INITED) == 0) 924 return -1; 925 926 return VOP_GETATTR(vnd->sc_vp, va, l->l_cred, l); 927} 928 929/* ARGSUSED */ 930static int 931vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 932{ 933 int unit = vndunit(dev); 934 struct vnd_softc *vnd; 935 struct vnd_ioctl *vio; 936 struct vattr vattr; 937 struct nameidata nd; 938 int error, part, pmask; 939 size_t geomsize; 940 int fflags; 941#ifdef __HAVE_OLD_DISKLABEL 942 struct disklabel newlabel; 943#endif 944 945#ifdef DEBUG 946 if (vnddebug & VDB_FOLLOW) 947 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 948 dev, cmd, data, flag, l->l_proc, unit); 949#endif 950 vnd = device_lookup(&vnd_cd, unit); 951 if (vnd == NULL && 952#ifdef COMPAT_30 953 cmd != VNDIOOCGET && 954#endif 955 cmd != VNDIOCGET) 956 return ENXIO; 957 vio = (struct vnd_ioctl *)data; 958 959 /* Must be open for writes for these commands... */ 960 switch (cmd) { 961 case VNDIOCSET: 962 case VNDIOCCLR: 963 case DIOCSDINFO: 964 case DIOCWDINFO: 965#ifdef __HAVE_OLD_DISKLABEL 966 case ODIOCSDINFO: 967 case ODIOCWDINFO: 968#endif 969 case DIOCKLABEL: 970 case DIOCWLABEL: 971 if ((flag & FWRITE) == 0) 972 return (EBADF); 973 } 974 975 /* Must be initialized for these... */ 976 switch (cmd) { 977 case VNDIOCCLR: 978 case DIOCGDINFO: 979 case DIOCSDINFO: 980 case DIOCWDINFO: 981 case DIOCGPART: 982 case DIOCKLABEL: 983 case DIOCWLABEL: 984 case DIOCGDEFLABEL: 985#ifdef __HAVE_OLD_DISKLABEL 986 case ODIOCGDINFO: 987 case ODIOCSDINFO: 988 case ODIOCWDINFO: 989 case ODIOCGDEFLABEL: 990#endif 991 if ((vnd->sc_flags & VNF_INITED) == 0) 992 return (ENXIO); 993 } 994 995 switch (cmd) { 996 case VNDIOCSET: 997 if (vnd->sc_flags & VNF_INITED) 998 return (EBUSY); 999 1000 if ((error = vndlock(vnd)) != 0) 1001 return (error); 1002 1003 fflags = FREAD; 1004 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 1005 fflags |= FWRITE; 1006 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 1007 if ((error = vn_open(&nd, fflags, 0)) != 0) 1008 goto unlock_and_exit; 1009 KASSERT(l); 1010 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_cred, l); 1011 if (!error && nd.ni_vp->v_type != VREG) 1012 error = EOPNOTSUPP; 1013 if (error) { 1014 VOP_UNLOCK(nd.ni_vp, 0); 1015 goto close_and_exit; 1016 } 1017 1018 /* If using a compressed file, initialize its info */ 1019 /* (or abort with an error if kernel has no compression) */ 1020 if (vio->vnd_flags & VNF_COMP) { 1021#ifdef VND_COMPRESSION 1022 struct vnd_comp_header *ch; 1023 int i; 1024 u_int32_t comp_size; 1025 u_int32_t comp_maxsize; 1026 1027 /* allocate space for compresed file header */ 1028 ch = malloc(sizeof(struct vnd_comp_header), 1029 M_TEMP, M_WAITOK); 1030 1031 /* read compressed file header */ 1032 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 1033 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 1034 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1035 if(error) { 1036 free(ch, M_TEMP); 1037 VOP_UNLOCK(nd.ni_vp, 0); 1038 goto close_and_exit; 1039 } 1040 1041 /* save some header info */ 1042 vnd->sc_comp_blksz = ntohl(ch->block_size); 1043 /* note last offset is the file byte size */ 1044 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 1045 free(ch, M_TEMP); 1046 if (vnd->sc_comp_blksz == 0 || 1047 vnd->sc_comp_blksz % DEV_BSIZE !=0) { 1048 VOP_UNLOCK(nd.ni_vp, 0); 1049 error = EINVAL; 1050 goto close_and_exit; 1051 } 1052 if(sizeof(struct vnd_comp_header) + 1053 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 1054 vattr.va_size) { 1055 VOP_UNLOCK(nd.ni_vp, 0); 1056 error = EINVAL; 1057 goto close_and_exit; 1058 } 1059 1060 /* set decompressed file size */ 1061 vattr.va_size = 1062 ((u_quad_t)vnd->sc_comp_numoffs - 1) * 1063 (u_quad_t)vnd->sc_comp_blksz; 1064 1065 /* allocate space for all the compressed offsets */ 1066 vnd->sc_comp_offsets = 1067 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1068 M_DEVBUF, M_WAITOK); 1069 1070 /* read in the offsets */ 1071 error = vn_rdwr(UIO_READ, nd.ni_vp, 1072 (caddr_t)vnd->sc_comp_offsets, 1073 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 1074 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 1075 IO_UNIT|IO_NODELOCKED, l->l_cred, NULL, NULL); 1076 if(error) { 1077 VOP_UNLOCK(nd.ni_vp, 0); 1078 goto close_and_exit; 1079 } 1080 /* 1081 * find largest block size (used for allocation limit). 1082 * Also convert offset to native byte order. 1083 */ 1084 comp_maxsize = 0; 1085 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 1086 vnd->sc_comp_offsets[i] = 1087 be64toh(vnd->sc_comp_offsets[i]); 1088 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 1089 - vnd->sc_comp_offsets[i]; 1090 if (comp_size > comp_maxsize) 1091 comp_maxsize = comp_size; 1092 } 1093 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 1094 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 1095 1096 /* create compressed data buffer */ 1097 vnd->sc_comp_buff = malloc(comp_maxsize, 1098 M_DEVBUF, M_WAITOK); 1099 1100 /* create decompressed buffer */ 1101 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 1102 M_DEVBUF, M_WAITOK); 1103 vnd->sc_comp_buffblk = -1; 1104 1105 /* Initialize decompress stream */ 1106 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 1107 vnd->sc_comp_stream.zalloc = vnd_alloc; 1108 vnd->sc_comp_stream.zfree = vnd_free; 1109 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1110 if(error) { 1111 if(vnd->sc_comp_stream.msg) 1112 printf("vnd%d: compressed file, %s\n", 1113 unit, vnd->sc_comp_stream.msg); 1114 VOP_UNLOCK(nd.ni_vp, 0); 1115 error = EINVAL; 1116 goto close_and_exit; 1117 } 1118 1119 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1120#else /* !VND_COMPRESSION */ 1121 VOP_UNLOCK(nd.ni_vp, 0); 1122 error = EOPNOTSUPP; 1123 goto close_and_exit; 1124#endif /* VND_COMPRESSION */ 1125 } 1126 1127 VOP_UNLOCK(nd.ni_vp, 0); 1128 vnd->sc_vp = nd.ni_vp; 1129 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1130 1131 /* 1132 * Use pseudo-geometry specified. If none was provided, 1133 * use "standard" Adaptec fictitious geometry. 1134 */ 1135 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1136 1137 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1138 sizeof(vio->vnd_geom)); 1139 1140 /* 1141 * Sanity-check the sector size. 1142 * XXX Don't allow secsize < DEV_BSIZE. Should 1143 * XXX we? 1144 */ 1145 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1146 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1147 vnd->sc_geom.vng_ncylinders == 0 || 1148 (vnd->sc_geom.vng_ntracks * 1149 vnd->sc_geom.vng_nsectors) == 0) { 1150 error = EINVAL; 1151 goto close_and_exit; 1152 } 1153 1154 /* 1155 * Compute the size (in DEV_BSIZE blocks) specified 1156 * by the geometry. 1157 */ 1158 geomsize = (vnd->sc_geom.vng_nsectors * 1159 vnd->sc_geom.vng_ntracks * 1160 vnd->sc_geom.vng_ncylinders) * 1161 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1162 1163 /* 1164 * Sanity-check the size against the specified 1165 * geometry. 1166 */ 1167 if (vnd->sc_size < geomsize) { 1168 error = EINVAL; 1169 goto close_and_exit; 1170 } 1171 } else if (vnd->sc_size >= (32 * 64)) { 1172 /* 1173 * Size must be at least 2048 DEV_BSIZE blocks 1174 * (1M) in order to use this geometry. 1175 */ 1176 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1177 vnd->sc_geom.vng_nsectors = 32; 1178 vnd->sc_geom.vng_ntracks = 64; 1179 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1180 } else { 1181 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1182 vnd->sc_geom.vng_nsectors = 1; 1183 vnd->sc_geom.vng_ntracks = 1; 1184 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1185 } 1186 1187 if (vio->vnd_flags & VNDIOF_READONLY) { 1188 vnd->sc_flags |= VNF_READONLY; 1189 } 1190 1191 if ((error = vndsetcred(vnd, l->l_cred)) != 0) 1192 goto close_and_exit; 1193 1194 vndthrottle(vnd, vnd->sc_vp); 1195 vio->vnd_size = dbtob(vnd->sc_size); 1196 vnd->sc_flags |= VNF_INITED; 1197 1198 /* create the kernel thread, wait for it to be up */ 1199 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1200 vnd->sc_dev.dv_xname); 1201 if (error) 1202 goto close_and_exit; 1203 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1204 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1205 } 1206#ifdef DEBUG 1207 if (vnddebug & VDB_INIT) 1208 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1209 vnd->sc_vp, (unsigned long) vnd->sc_size, 1210 vnd->sc_geom.vng_secsize, 1211 vnd->sc_geom.vng_nsectors, 1212 vnd->sc_geom.vng_ntracks, 1213 vnd->sc_geom.vng_ncylinders); 1214#endif 1215 1216 /* Attach the disk. */ 1217 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname; 1218 pseudo_disk_attach(&vnd->sc_dkdev); 1219 1220 /* Initialize the xfer and buffer pools. */ 1221 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1222 0, 0, "vndxpl", NULL); 1223 1224 /* Try and read the disklabel. */ 1225 vndgetdisklabel(dev, vnd); 1226 1227 vndunlock(vnd); 1228 1229 break; 1230 1231close_and_exit: 1232 (void) vn_close(nd.ni_vp, fflags, l->l_cred, l); 1233unlock_and_exit: 1234#ifdef VND_COMPRESSION 1235 /* free any allocated memory (for compressed file) */ 1236 if(vnd->sc_comp_offsets) { 1237 free(vnd->sc_comp_offsets, M_DEVBUF); 1238 vnd->sc_comp_offsets = NULL; 1239 } 1240 if(vnd->sc_comp_buff) { 1241 free(vnd->sc_comp_buff, M_DEVBUF); 1242 vnd->sc_comp_buff = NULL; 1243 } 1244 if(vnd->sc_comp_decombuf) { 1245 free(vnd->sc_comp_decombuf, M_DEVBUF); 1246 vnd->sc_comp_decombuf = NULL; 1247 } 1248#endif /* VND_COMPRESSION */ 1249 vndunlock(vnd); 1250 return (error); 1251 1252 case VNDIOCCLR: 1253 if ((error = vndlock(vnd)) != 0) 1254 return (error); 1255 1256 /* 1257 * Don't unconfigure if any other partitions are open 1258 * or if both the character and block flavors of this 1259 * partition are open. 1260 */ 1261 part = DISKPART(dev); 1262 pmask = (1 << part); 1263 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1264 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1265 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1266 !(vio->vnd_flags & VNDIOF_FORCE)) { 1267 vndunlock(vnd); 1268 return (EBUSY); 1269 } 1270 1271 /* 1272 * XXX vndclear() might call vndclose() implicitely; 1273 * release lock to avoid recursion 1274 */ 1275 vndunlock(vnd); 1276 vndclear(vnd, minor(dev)); 1277#ifdef DEBUG 1278 if (vnddebug & VDB_INIT) 1279 printf("vndioctl: CLRed\n"); 1280#endif 1281 1282 /* Destroy the xfer and buffer pools. */ 1283 pool_destroy(&vnd->sc_vxpool); 1284 1285 /* Detatch the disk. */ 1286 pseudo_disk_detach(&vnd->sc_dkdev); 1287 break; 1288 1289#ifdef COMPAT_30 1290 case VNDIOOCGET: { 1291 struct vnd_ouser *vnu; 1292 struct vattr va; 1293 vnu = (struct vnd_ouser *)data; 1294 KASSERT(l); 1295 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1296 case 0: 1297 vnu->vnu_dev = va.va_fsid; 1298 vnu->vnu_ino = va.va_fileid; 1299 break; 1300 case -1: 1301 /* unused is not an error */ 1302 vnu->vnu_dev = 0; 1303 vnu->vnu_ino = 0; 1304 break; 1305 default: 1306 return error; 1307 } 1308 break; 1309 } 1310#endif 1311 case VNDIOCGET: { 1312 struct vnd_user *vnu; 1313 struct vattr va; 1314 vnu = (struct vnd_user *)data; 1315 KASSERT(l); 1316 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1317 case 0: 1318 vnu->vnu_dev = va.va_fsid; 1319 vnu->vnu_ino = va.va_fileid; 1320 break; 1321 case -1: 1322 /* unused is not an error */ 1323 vnu->vnu_dev = 0; 1324 vnu->vnu_ino = 0; 1325 break; 1326 default: 1327 return error; 1328 } 1329 break; 1330 } 1331 1332 case DIOCGDINFO: 1333 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1334 break; 1335 1336#ifdef __HAVE_OLD_DISKLABEL 1337 case ODIOCGDINFO: 1338 newlabel = *(vnd->sc_dkdev.dk_label); 1339 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1340 return ENOTTY; 1341 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1342 break; 1343#endif 1344 1345 case DIOCGPART: 1346 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1347 ((struct partinfo *)data)->part = 1348 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1349 break; 1350 1351 case DIOCWDINFO: 1352 case DIOCSDINFO: 1353#ifdef __HAVE_OLD_DISKLABEL 1354 case ODIOCWDINFO: 1355 case ODIOCSDINFO: 1356#endif 1357 { 1358 struct disklabel *lp; 1359 1360 if ((error = vndlock(vnd)) != 0) 1361 return (error); 1362 1363 vnd->sc_flags |= VNF_LABELLING; 1364 1365#ifdef __HAVE_OLD_DISKLABEL 1366 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1367 memset(&newlabel, 0, sizeof newlabel); 1368 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1369 lp = &newlabel; 1370 } else 1371#endif 1372 lp = (struct disklabel *)data; 1373 1374 error = setdisklabel(vnd->sc_dkdev.dk_label, 1375 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1376 if (error == 0) { 1377 if (cmd == DIOCWDINFO 1378#ifdef __HAVE_OLD_DISKLABEL 1379 || cmd == ODIOCWDINFO 1380#endif 1381 ) 1382 error = writedisklabel(VNDLABELDEV(dev), 1383 vndstrategy, vnd->sc_dkdev.dk_label, 1384 vnd->sc_dkdev.dk_cpulabel); 1385 } 1386 1387 vnd->sc_flags &= ~VNF_LABELLING; 1388 1389 vndunlock(vnd); 1390 1391 if (error) 1392 return (error); 1393 break; 1394 } 1395 1396 case DIOCKLABEL: 1397 if (*(int *)data != 0) 1398 vnd->sc_flags |= VNF_KLABEL; 1399 else 1400 vnd->sc_flags &= ~VNF_KLABEL; 1401 break; 1402 1403 case DIOCWLABEL: 1404 if (*(int *)data != 0) 1405 vnd->sc_flags |= VNF_WLABEL; 1406 else 1407 vnd->sc_flags &= ~VNF_WLABEL; 1408 break; 1409 1410 case DIOCGDEFLABEL: 1411 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1412 break; 1413 1414#ifdef __HAVE_OLD_DISKLABEL 1415 case ODIOCGDEFLABEL: 1416 vndgetdefaultlabel(vnd, &newlabel); 1417 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1418 return ENOTTY; 1419 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1420 break; 1421#endif 1422 1423 default: 1424 return (ENOTTY); 1425 } 1426 1427 return (0); 1428} 1429 1430/* 1431 * Duplicate the current processes' credentials. Since we are called only 1432 * as the result of a SET ioctl and only root can do that, any future access 1433 * to this "disk" is essentially as root. Note that credentials may change 1434 * if some other uid can write directly to the mapped file (NFS). 1435 */ 1436static int 1437vndsetcred(struct vnd_softc *vnd, kauth_cred_t cred) 1438{ 1439 struct uio auio; 1440 struct iovec aiov; 1441 char *tmpbuf; 1442 int error; 1443 1444 vnd->sc_cred = kauth_cred_dup(cred); 1445 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1446 1447 /* XXX: Horrible kludge to establish credentials for NFS */ 1448 aiov.iov_base = tmpbuf; 1449 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1450 auio.uio_iov = &aiov; 1451 auio.uio_iovcnt = 1; 1452 auio.uio_offset = 0; 1453 auio.uio_rw = UIO_READ; 1454 auio.uio_resid = aiov.iov_len; 1455 UIO_SETUP_SYSSPACE(&auio); 1456 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1457 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1458 if (error == 0) { 1459 /* 1460 * Because vnd does all IO directly through the vnode 1461 * we need to flush (at least) the buffer from the above 1462 * VOP_READ from the buffer cache to prevent cache 1463 * incoherencies. Also, be careful to write dirty 1464 * buffers back to stable storage. 1465 */ 1466 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1467 curlwp, 0, 0); 1468 } 1469 VOP_UNLOCK(vnd->sc_vp, 0); 1470 1471 free(tmpbuf, M_TEMP); 1472 return (error); 1473} 1474 1475/* 1476 * Set maxactive based on FS type 1477 */ 1478static void 1479vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1480{ 1481#ifdef NFS 1482 extern int (**nfsv2_vnodeop_p)(void *); 1483 1484 if (vp->v_op == nfsv2_vnodeop_p) 1485 vnd->sc_maxactive = 2; 1486 else 1487#endif 1488 vnd->sc_maxactive = 8; 1489 1490 if (vnd->sc_maxactive < 1) 1491 vnd->sc_maxactive = 1; 1492} 1493 1494#if 0 1495static void 1496vndshutdown(void) 1497{ 1498 struct vnd_softc *vnd; 1499 1500 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1501 if (vnd->sc_flags & VNF_INITED) 1502 vndclear(vnd); 1503} 1504#endif 1505 1506static void 1507vndclear(struct vnd_softc *vnd, int myminor) 1508{ 1509 struct vnode *vp = vnd->sc_vp; 1510 struct lwp *l = curlwp; 1511 int fflags = FREAD; 1512 int bmaj, cmaj, i, mn; 1513 int s; 1514 1515#ifdef DEBUG 1516 if (vnddebug & VDB_FOLLOW) 1517 printf("vndclear(%p): vp %p\n", vnd, vp); 1518#endif 1519 /* locate the major number */ 1520 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1521 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1522 1523 /* Nuke the vnodes for any open instances */ 1524 for (i = 0; i < MAXPARTITIONS; i++) { 1525 mn = DISKMINOR(device_unit(&vnd->sc_dev), i); 1526 vdevgone(bmaj, mn, mn, VBLK); 1527 if (mn != myminor) /* XXX avoid to kill own vnode */ 1528 vdevgone(cmaj, mn, mn, VCHR); 1529 } 1530 1531 if ((vnd->sc_flags & VNF_READONLY) == 0) 1532 fflags |= FWRITE; 1533 1534 s = splbio(); 1535 bufq_drain(vnd->sc_tab); 1536 splx(s); 1537 1538 vnd->sc_flags |= VNF_VUNCONF; 1539 wakeup(&vnd->sc_tab); 1540 while (vnd->sc_flags & VNF_KTHREAD) 1541 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1542 1543#ifdef VND_COMPRESSION 1544 /* free the compressed file buffers */ 1545 if(vnd->sc_flags & VNF_COMP) { 1546 if(vnd->sc_comp_offsets) { 1547 free(vnd->sc_comp_offsets, M_DEVBUF); 1548 vnd->sc_comp_offsets = NULL; 1549 } 1550 if(vnd->sc_comp_buff) { 1551 free(vnd->sc_comp_buff, M_DEVBUF); 1552 vnd->sc_comp_buff = NULL; 1553 } 1554 if(vnd->sc_comp_decombuf) { 1555 free(vnd->sc_comp_decombuf, M_DEVBUF); 1556 vnd->sc_comp_decombuf = NULL; 1557 } 1558 } 1559#endif /* VND_COMPRESSION */ 1560 vnd->sc_flags &= 1561 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1562 | VNF_VUNCONF | VNF_COMP); 1563 if (vp == (struct vnode *)0) 1564 panic("vndclear: null vp"); 1565 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1566 kauth_cred_free(vnd->sc_cred); 1567 vnd->sc_vp = (struct vnode *)0; 1568 vnd->sc_cred = (kauth_cred_t)0; 1569 vnd->sc_size = 0; 1570} 1571 1572static int 1573vndsize(dev_t dev) 1574{ 1575 struct vnd_softc *sc; 1576 struct disklabel *lp; 1577 int part, unit, omask; 1578 int size; 1579 1580 unit = vndunit(dev); 1581 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1582 if (sc == NULL) 1583 return -1; 1584 1585 if ((sc->sc_flags & VNF_INITED) == 0) 1586 return (-1); 1587 1588 part = DISKPART(dev); 1589 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1590 lp = sc->sc_dkdev.dk_label; 1591 1592 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1593 return (-1); 1594 1595 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1596 size = -1; 1597 else 1598 size = lp->d_partitions[part].p_size * 1599 (lp->d_secsize / DEV_BSIZE); 1600 1601 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1602 return (-1); 1603 1604 return (size); 1605} 1606 1607static int 1608vnddump(dev_t dev __unused, daddr_t blkno __unused, caddr_t va __unused, 1609 size_t size __unused) 1610{ 1611 1612 /* Not implemented. */ 1613 return ENXIO; 1614} 1615 1616static void 1617vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1618{ 1619 struct vndgeom *vng = &sc->sc_geom; 1620 struct partition *pp; 1621 1622 memset(lp, 0, sizeof(*lp)); 1623 1624 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1625 lp->d_secsize = vng->vng_secsize; 1626 lp->d_nsectors = vng->vng_nsectors; 1627 lp->d_ntracks = vng->vng_ntracks; 1628 lp->d_ncylinders = vng->vng_ncylinders; 1629 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1630 1631 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1632 lp->d_type = DTYPE_VND; 1633 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1634 lp->d_rpm = 3600; 1635 lp->d_interleave = 1; 1636 lp->d_flags = 0; 1637 1638 pp = &lp->d_partitions[RAW_PART]; 1639 pp->p_offset = 0; 1640 pp->p_size = lp->d_secperunit; 1641 pp->p_fstype = FS_UNUSED; 1642 lp->d_npartitions = RAW_PART + 1; 1643 1644 lp->d_magic = DISKMAGIC; 1645 lp->d_magic2 = DISKMAGIC; 1646 lp->d_checksum = dkcksum(lp); 1647} 1648 1649/* 1650 * Read the disklabel from a vnd. If one is not present, create a fake one. 1651 */ 1652static void 1653vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1654{ 1655 const char *errstring; 1656 struct disklabel *lp = sc->sc_dkdev.dk_label; 1657 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1658 int i; 1659 1660 memset(clp, 0, sizeof(*clp)); 1661 1662 vndgetdefaultlabel(sc, lp); 1663 1664 /* 1665 * Call the generic disklabel extraction routine. 1666 */ 1667 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1668 if (errstring) { 1669 /* 1670 * Lack of disklabel is common, but we print the warning 1671 * anyway, since it might contain other useful information. 1672 */ 1673 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1674 1675 /* 1676 * For historical reasons, if there's no disklabel 1677 * present, all partitions must be FS_BSDFFS and 1678 * occupy the entire disk. 1679 */ 1680 for (i = 0; i < MAXPARTITIONS; i++) { 1681 /* 1682 * Don't wipe out port specific hack (such as 1683 * dos partition hack of i386 port). 1684 */ 1685 if (lp->d_partitions[i].p_size != 0) 1686 continue; 1687 1688 lp->d_partitions[i].p_size = lp->d_secperunit; 1689 lp->d_partitions[i].p_offset = 0; 1690 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1691 } 1692 1693 strncpy(lp->d_packname, "default label", 1694 sizeof(lp->d_packname)); 1695 1696 lp->d_npartitions = MAXPARTITIONS; 1697 lp->d_checksum = dkcksum(lp); 1698 } 1699 1700 /* In-core label now valid. */ 1701 sc->sc_flags |= VNF_VLABEL; 1702} 1703 1704/* 1705 * Wait interruptibly for an exclusive lock. 1706 * 1707 * XXX 1708 * Several drivers do this; it should be abstracted and made MP-safe. 1709 */ 1710static int 1711vndlock(struct vnd_softc *sc) 1712{ 1713 int error; 1714 1715 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1716 sc->sc_flags |= VNF_WANTED; 1717 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1718 return (error); 1719 } 1720 sc->sc_flags |= VNF_LOCKED; 1721 return (0); 1722} 1723 1724/* 1725 * Unlock and wake up any waiters. 1726 */ 1727static void 1728vndunlock(struct vnd_softc *sc) 1729{ 1730 1731 sc->sc_flags &= ~VNF_LOCKED; 1732 if ((sc->sc_flags & VNF_WANTED) != 0) { 1733 sc->sc_flags &= ~VNF_WANTED; 1734 wakeup(sc); 1735 } 1736} 1737 1738#ifdef VND_COMPRESSION 1739/* compressed file read */ 1740static void 1741compstrategy(struct buf *bp, off_t bn) 1742{ 1743 int error; 1744 int unit = vndunit(bp->b_dev); 1745 struct vnd_softc *vnd = 1746 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1747 u_int32_t comp_block; 1748 struct uio auio; 1749 caddr_t addr; 1750 int s; 1751 1752 /* set up constants for data move */ 1753 auio.uio_rw = UIO_READ; 1754 UIO_SETUP_SYSSPACE(&auio); 1755 1756 /* read, and transfer the data */ 1757 addr = bp->b_data; 1758 s = splbio(); 1759 while (bp->b_resid > 0) { 1760 unsigned length; 1761 size_t length_in_buffer; 1762 u_int32_t offset_in_buffer; 1763 struct iovec aiov; 1764 1765 /* calculate the compressed block number */ 1766 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1767 1768 /* check for good block number */ 1769 if (comp_block >= vnd->sc_comp_numoffs) { 1770 bp->b_error = EINVAL; 1771 bp->b_flags |= B_ERROR; 1772 splx(s); 1773 return; 1774 } 1775 1776 /* read in the compressed block, if not in buffer */ 1777 if (comp_block != vnd->sc_comp_buffblk) { 1778 length = vnd->sc_comp_offsets[comp_block + 1] - 1779 vnd->sc_comp_offsets[comp_block]; 1780 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1781 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1782 length, vnd->sc_comp_offsets[comp_block], 1783 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1784 if (error) { 1785 bp->b_error = error; 1786 bp->b_flags |= B_ERROR; 1787 VOP_UNLOCK(vnd->sc_vp, 0); 1788 splx(s); 1789 return; 1790 } 1791 /* uncompress the buffer */ 1792 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1793 vnd->sc_comp_stream.avail_in = length; 1794 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1795 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1796 inflateReset(&vnd->sc_comp_stream); 1797 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1798 if (error != Z_STREAM_END) { 1799 if (vnd->sc_comp_stream.msg) 1800 printf("%s: compressed file, %s\n", 1801 vnd->sc_dev.dv_xname, 1802 vnd->sc_comp_stream.msg); 1803 bp->b_error = EBADMSG; 1804 bp->b_flags |= B_ERROR; 1805 VOP_UNLOCK(vnd->sc_vp, 0); 1806 splx(s); 1807 return; 1808 } 1809 vnd->sc_comp_buffblk = comp_block; 1810 VOP_UNLOCK(vnd->sc_vp, 0); 1811 } 1812 1813 /* transfer the usable uncompressed data */ 1814 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1815 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1816 if (length_in_buffer > bp->b_resid) 1817 length_in_buffer = bp->b_resid; 1818 auio.uio_iov = &aiov; 1819 auio.uio_iovcnt = 1; 1820 aiov.iov_base = addr; 1821 aiov.iov_len = length_in_buffer; 1822 auio.uio_resid = aiov.iov_len; 1823 auio.uio_offset = 0; 1824 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1825 length_in_buffer, &auio); 1826 if (error) { 1827 bp->b_error = error; 1828 bp->b_flags |= B_ERROR; 1829 splx(s); 1830 return; 1831 } 1832 1833 bn += length_in_buffer; 1834 addr += length_in_buffer; 1835 bp->b_resid -= length_in_buffer; 1836 } 1837 splx(s); 1838} 1839 1840/* compression memory allocation routines */ 1841static void * 1842vnd_alloc(void *aux __unused, u_int items, u_int siz) 1843{ 1844 return malloc(items * siz, M_TEMP, M_NOWAIT); 1845} 1846 1847static void 1848vnd_free(void *aux __unused, void *ptr) 1849{ 1850 free(ptr, M_TEMP); 1851} 1852#endif /* VND_COMPRESSION */ 1853