vnd.c revision 1.94
1/* $NetBSD: vnd.c,v 1.94 2003/03/27 15:34:36 yamt Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81/* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100#include <sys/cdefs.h> 101__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.94 2003/03/27 15:34:36 yamt Exp $"); 102 103#if defined(_KERNEL_OPT) 104#include "fs_nfs.h" 105#endif 106 107#include <sys/param.h> 108#include <sys/systm.h> 109#include <sys/namei.h> 110#include <sys/proc.h> 111#include <sys/errno.h> 112#include <sys/buf.h> 113#include <sys/malloc.h> 114#include <sys/ioctl.h> 115#include <sys/disklabel.h> 116#include <sys/device.h> 117#include <sys/disk.h> 118#include <sys/stat.h> 119#include <sys/mount.h> 120#include <sys/vnode.h> 121#include <sys/file.h> 122#include <sys/uio.h> 123#include <sys/conf.h> 124 125#include <miscfs/specfs/specdev.h> 126 127#include <dev/vndvar.h> 128 129#if defined(VNDDEBUG) && !defined(DEBUG) 130#define DEBUG 131#endif 132 133#ifdef DEBUG 134int dovndcluster = 1; 135#define VDB_FOLLOW 0x01 136#define VDB_INIT 0x02 137#define VDB_IO 0x04 138#define VDB_LABEL 0x08 139int vnddebug = 0x00; 140#endif 141 142#define vndunit(x) DISKUNIT(x) 143 144struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149#define VX_BUSY 1 150}; 151 152struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155}; 156 157#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160#define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161#define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163struct vnd_softc *vnd_softc; 164int numvnd = 0; 165 166#define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169/* called by main() at boot time (XXX: and the LKM driver) */ 170void vndattach __P((int)); 171int vnddetach __P((void)); 172 173void vndclear __P((struct vnd_softc *)); 174void vndstart __P((struct vnd_softc *)); 175int vndsetcred __P((struct vnd_softc *, struct ucred *)); 176void vndthrottle __P((struct vnd_softc *, struct vnode *)); 177void vndiodone __P((struct buf *)); 178void vndshutdown __P((void)); 179 180void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 181void vndgetdisklabel __P((dev_t)); 182 183static int vndlock __P((struct vnd_softc *)); 184static void vndunlock __P((struct vnd_softc *)); 185 186dev_type_open(vndopen); 187dev_type_close(vndclose); 188dev_type_read(vndread); 189dev_type_write(vndwrite); 190dev_type_ioctl(vndioctl); 191dev_type_strategy(vndstrategy); 192dev_type_dump(vnddump); 193dev_type_size(vndsize); 194 195const struct bdevsw vnd_bdevsw = { 196 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 197}; 198 199const struct cdevsw vnd_cdevsw = { 200 vndopen, vndclose, vndread, vndwrite, vndioctl, 201 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 202}; 203 204int vndattached = 0; 205 206void 207vndattach(num) 208 int num; 209{ 210 int i; 211 char *mem; 212 213 if (vndattached) 214 return; 215 vndattached = 1; 216 if (num <= 0) 217 return; 218 i = num * sizeof(struct vnd_softc); 219 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 220 if (mem == NULL) { 221 printf("WARNING: no memory for vnode disks\n"); 222 return; 223 } 224 vnd_softc = (struct vnd_softc *)mem; 225 numvnd = num; 226 227 for (i = 0; i < numvnd; i++) 228 bufq_alloc(&vnd_softc[i].sc_tab, 229 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 230} 231 232int 233vnddetach() 234{ 235 int i; 236 237 /* First check we aren't in use. */ 238 for (i = 0; i < numvnd; i++) 239 if (vnd_softc[i].sc_flags & VNF_INITED) 240 return (EBUSY); 241 242 for (i = 0; i < numvnd; i++) 243 bufq_free(&vnd_softc[i].sc_tab); 244 245 free(vnd_softc, M_DEVBUF); 246 vndattached = 0; 247 248 return (0); 249} 250 251int 252vndopen(dev, flags, mode, p) 253 dev_t dev; 254 int flags, mode; 255 struct proc *p; 256{ 257 int unit = vndunit(dev); 258 struct vnd_softc *sc; 259 int error = 0, part, pmask; 260 struct disklabel *lp; 261 262#ifdef DEBUG 263 if (vnddebug & VDB_FOLLOW) 264 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 265#endif 266 if (unit >= numvnd) 267 return (ENXIO); 268 sc = &vnd_softc[unit]; 269 270 if ((error = vndlock(sc)) != 0) 271 return (error); 272 273 lp = sc->sc_dkdev.dk_label; 274 275 part = DISKPART(dev); 276 pmask = (1 << part); 277 278 /* 279 * If we're initialized, check to see if there are any other 280 * open partitions. If not, then it's safe to update the 281 * in-core disklabel. 282 */ 283 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 284 vndgetdisklabel(dev); 285 286 /* Check that the partitions exists. */ 287 if (part != RAW_PART) { 288 if (((sc->sc_flags & VNF_INITED) == 0) || 289 ((part >= lp->d_npartitions) || 290 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 291 error = ENXIO; 292 goto done; 293 } 294 } 295 296 /* Prevent our unit from being unconfigured while open. */ 297 switch (mode) { 298 case S_IFCHR: 299 sc->sc_dkdev.dk_copenmask |= pmask; 300 break; 301 302 case S_IFBLK: 303 sc->sc_dkdev.dk_bopenmask |= pmask; 304 break; 305 } 306 sc->sc_dkdev.dk_openmask = 307 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 308 309 done: 310 vndunlock(sc); 311 return (error); 312} 313 314int 315vndclose(dev, flags, mode, p) 316 dev_t dev; 317 int flags, mode; 318 struct proc *p; 319{ 320 int unit = vndunit(dev); 321 struct vnd_softc *sc; 322 int error = 0, part; 323 324#ifdef DEBUG 325 if (vnddebug & VDB_FOLLOW) 326 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 327#endif 328 329 if (unit >= numvnd) 330 return (ENXIO); 331 sc = &vnd_softc[unit]; 332 333 if ((error = vndlock(sc)) != 0) 334 return (error); 335 336 part = DISKPART(dev); 337 338 /* ...that much closer to allowing unconfiguration... */ 339 switch (mode) { 340 case S_IFCHR: 341 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 342 break; 343 344 case S_IFBLK: 345 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 346 break; 347 } 348 sc->sc_dkdev.dk_openmask = 349 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 350 351 vndunlock(sc); 352 return (0); 353} 354 355/* 356 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 357 */ 358void 359vndstrategy(bp) 360 struct buf *bp; 361{ 362 int unit = vndunit(bp->b_dev); 363 struct vnd_softc *vnd = &vnd_softc[unit]; 364 struct vndxfer *vnx; 365 int s, bsize, resid; 366 off_t bn; 367 caddr_t addr; 368 int sz, flags, error, wlabel; 369 struct disklabel *lp; 370 struct partition *pp; 371 372#ifdef DEBUG 373 if (vnddebug & VDB_FOLLOW) 374 printf("vndstrategy(%p): unit %d\n", bp, unit); 375#endif 376 if ((vnd->sc_flags & VNF_INITED) == 0) { 377 bp->b_error = ENXIO; 378 bp->b_flags |= B_ERROR; 379 goto done; 380 } 381 382 /* If it's a nil transfer, wake up the top half now. */ 383 if (bp->b_bcount == 0) 384 goto done; 385 386 lp = vnd->sc_dkdev.dk_label; 387 388 /* 389 * The transfer must be a whole number of blocks. 390 */ 391 if ((bp->b_bcount % lp->d_secsize) != 0) { 392 bp->b_error = EINVAL; 393 bp->b_flags |= B_ERROR; 394 goto done; 395 } 396 397 /* 398 * Do bounds checking and adjust transfer. If there's an error, 399 * the bounds check will flag that for us. 400 */ 401 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 402 if (DISKPART(bp->b_dev) != RAW_PART) 403 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 404 goto done; 405 406 /* 407 * check if we're read-only. 408 */ 409 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 410 bp->b_error = EACCES; 411 bp->b_flags |= B_ERROR; 412 goto done; 413 } 414 415 bp->b_resid = bp->b_bcount; 416 417 /* 418 * Put the block number in terms of the logical blocksize 419 * of the "device". 420 */ 421 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 422 423 /* 424 * Translate the partition-relative block number to an absolute. 425 */ 426 if (DISKPART(bp->b_dev) != RAW_PART) { 427 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 428 bn += pp->p_offset; 429 } 430 431 /* ...and convert to a byte offset within the file. */ 432 bn *= lp->d_secsize; 433 434 if (vnd->sc_vp->v_mount == NULL) { 435 bp->b_error = ENXIO; 436 bp->b_flags |= B_ERROR; 437 goto done; 438 } 439 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 440 addr = bp->b_data; 441 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 442 443 /* Allocate a header for this transfer and link it to the buffer */ 444 s = splbio(); 445 vnx = VND_GETXFER(vnd); 446 splx(s); 447 vnx->vx_flags = VX_BUSY; 448 vnx->vx_error = 0; 449 vnx->vx_pending = 0; 450 vnx->vx_bp = bp; 451 452 for (resid = bp->b_resid; resid; resid -= sz) { 453 struct vndbuf *nbp; 454 struct vnode *vp; 455 daddr_t nbn; 456 int off, nra; 457 458 nra = 0; 459 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 460 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 461 VOP_UNLOCK(vnd->sc_vp, 0); 462 463 if (error == 0 && (long)nbn == -1) 464 error = EIO; 465 466 /* 467 * If there was an error or a hole in the file...punt. 468 * Note that we may have to wait for any operations 469 * that we have already fired off before releasing 470 * the buffer. 471 * 472 * XXX we could deal with holes here but it would be 473 * a hassle (in the write case). 474 */ 475 if (error) { 476 s = splbio(); 477 vnx->vx_error = error; 478 goto out; 479 } 480 481#ifdef DEBUG 482 if (!dovndcluster) 483 nra = 0; 484#endif 485 486 if ((off = bn % bsize) != 0) 487 sz = bsize - off; 488 else 489 sz = (1 + nra) * bsize; 490 if (resid < sz) 491 sz = resid; 492#ifdef DEBUG 493 if (vnddebug & VDB_IO) 494 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 495 " sz 0x%x\n", 496 vnd->sc_vp, vp, (long long)bn, nbn, sz); 497#endif 498 499 s = splbio(); 500 nbp = VND_GETBUF(vnd); 501 splx(s); 502 BUF_INIT(&nbp->vb_buf); 503 nbp->vb_buf.b_flags = flags; 504 nbp->vb_buf.b_bcount = sz; 505 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 506 - trunc_page((ulong) addr); 507 nbp->vb_buf.b_error = 0; 508 nbp->vb_buf.b_data = addr; 509 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 510 nbp->vb_buf.b_proc = bp->b_proc; 511 nbp->vb_buf.b_iodone = vndiodone; 512 nbp->vb_buf.b_vp = NULLVP; 513 514 nbp->vb_xfer = vnx; 515 516 /* 517 * Just sort by block number 518 */ 519 s = splbio(); 520 if (vnx->vx_error != 0) { 521 VND_PUTBUF(vnd, nbp); 522 goto out; 523 } 524 vnx->vx_pending++; 525 bgetvp(vp, &nbp->vb_buf); 526 BUFQ_PUT(&vnd->sc_tab, &nbp->vb_buf); 527 vndstart(vnd); 528 splx(s); 529 bn += sz; 530 addr += sz; 531 } 532 533 s = splbio(); 534 535out: /* Arrive here at splbio */ 536 vnx->vx_flags &= ~VX_BUSY; 537 if (vnx->vx_pending == 0) { 538 if (vnx->vx_error != 0) { 539 bp->b_error = vnx->vx_error; 540 bp->b_flags |= B_ERROR; 541 } 542 VND_PUTXFER(vnd, vnx); 543 biodone(bp); 544 } 545 splx(s); 546 return; 547 548 done: 549 biodone(bp); 550} 551 552/* 553 * Feed requests sequentially. 554 * We do it this way to keep from flooding NFS servers if we are connected 555 * to an NFS file. This places the burden on the client rather than the 556 * server. 557 */ 558void 559vndstart(vnd) 560 struct vnd_softc *vnd; 561{ 562 struct buf *bp; 563 564 /* 565 * Dequeue now since lower level strategy routine might 566 * queue using same links 567 */ 568 569 if ((vnd->sc_flags & VNF_BUSY) != 0) 570 return; 571 572 vnd->sc_flags |= VNF_BUSY; 573 574 while (vnd->sc_active < vnd->sc_maxactive) { 575 bp = BUFQ_GET(&vnd->sc_tab); 576 if (bp == NULL) 577 break; 578 vnd->sc_active++; 579#ifdef DEBUG 580 if (vnddebug & VDB_IO) 581 printf("vndstart(%ld): bp %p vp %p blkno 0x%" PRIx64 582 " flags %lx addr %p cnt 0x%lx\n", 583 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 584 bp->b_flags, bp->b_data, bp->b_bcount); 585#endif 586 587 /* Instrumentation. */ 588 disk_busy(&vnd->sc_dkdev); 589 590 if ((bp->b_flags & B_READ) == 0) 591 bp->b_vp->v_numoutput++; 592 VOP_STRATEGY(bp); 593 } 594 vnd->sc_flags &= ~VNF_BUSY; 595} 596 597void 598vndiodone(bp) 599 struct buf *bp; 600{ 601 struct vndbuf *vbp = (struct vndbuf *) bp; 602 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 603 struct buf *pbp = vnx->vx_bp; 604 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 605 int s, resid; 606 607 s = splbio(); 608#ifdef DEBUG 609 if (vnddebug & VDB_IO) 610 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%" PRIx64 611 " addr %p cnt 0x%lx\n", 612 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 613 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 614 vbp->vb_buf.b_bcount); 615#endif 616 617 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 618 pbp->b_resid -= resid; 619 disk_unbusy(&vnd->sc_dkdev, resid, (pbp->b_flags & B_READ)); 620 vnx->vx_pending--; 621 622 if (vbp->vb_buf.b_error) { 623#ifdef DEBUG 624 if (vnddebug & VDB_IO) 625 printf("vndiodone: vbp %p error %d\n", vbp, 626 vbp->vb_buf.b_error); 627#endif 628 vnx->vx_error = vbp->vb_buf.b_error; 629 } 630 631 if (vbp->vb_buf.b_vp != NULLVP) 632 brelvp(&vbp->vb_buf); 633 634 VND_PUTBUF(vnd, vbp); 635 636 /* 637 * Wrap up this transaction if it has run to completion or, in 638 * case of an error, when all auxiliary buffers have returned. 639 */ 640 if (vnx->vx_error != 0) { 641 pbp->b_flags |= B_ERROR; 642 pbp->b_error = vnx->vx_error; 643 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 644 645#ifdef DEBUG 646 if (vnddebug & VDB_IO) 647 printf("vndiodone: pbp %p iodone: error %d\n", 648 pbp, vnx->vx_error); 649#endif 650 VND_PUTXFER(vnd, vnx); 651 biodone(pbp); 652 } 653 } else if (pbp->b_resid == 0) { 654 655#ifdef DIAGNOSTIC 656 if (vnx->vx_pending != 0) 657 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 658#endif 659 660 if ((vnx->vx_flags & VX_BUSY) == 0) { 661#ifdef DEBUG 662 if (vnddebug & VDB_IO) 663 printf("vndiodone: pbp %p iodone\n", pbp); 664#endif 665 VND_PUTXFER(vnd, vnx); 666 biodone(pbp); 667 } 668 } 669 670 vnd->sc_active--; 671 vndstart(vnd); 672 splx(s); 673} 674 675/* ARGSUSED */ 676int 677vndread(dev, uio, flags) 678 dev_t dev; 679 struct uio *uio; 680 int flags; 681{ 682 int unit = vndunit(dev); 683 struct vnd_softc *sc; 684 685#ifdef DEBUG 686 if (vnddebug & VDB_FOLLOW) 687 printf("vndread(0x%x, %p)\n", dev, uio); 688#endif 689 690 if (unit >= numvnd) 691 return (ENXIO); 692 sc = &vnd_softc[unit]; 693 694 if ((sc->sc_flags & VNF_INITED) == 0) 695 return (ENXIO); 696 697 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 698} 699 700/* ARGSUSED */ 701int 702vndwrite(dev, uio, flags) 703 dev_t dev; 704 struct uio *uio; 705 int flags; 706{ 707 int unit = vndunit(dev); 708 struct vnd_softc *sc; 709 710#ifdef DEBUG 711 if (vnddebug & VDB_FOLLOW) 712 printf("vndwrite(0x%x, %p)\n", dev, uio); 713#endif 714 715 if (unit >= numvnd) 716 return (ENXIO); 717 sc = &vnd_softc[unit]; 718 719 if ((sc->sc_flags & VNF_INITED) == 0) 720 return (ENXIO); 721 722 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 723} 724 725/* ARGSUSED */ 726int 727vndioctl(dev, cmd, data, flag, p) 728 dev_t dev; 729 u_long cmd; 730 caddr_t data; 731 int flag; 732 struct proc *p; 733{ 734 int unit = vndunit(dev); 735 struct vnd_softc *vnd; 736 struct vnd_ioctl *vio; 737 struct vattr vattr; 738 struct nameidata nd; 739 int error, part, pmask; 740 size_t geomsize; 741 int fflags; 742#ifdef __HAVE_OLD_DISKLABEL 743 struct disklabel newlabel; 744#endif 745 746#ifdef DEBUG 747 if (vnddebug & VDB_FOLLOW) 748 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 749 dev, cmd, data, flag, p, unit); 750#endif 751 if (unit >= numvnd) 752 return (ENXIO); 753 754 vnd = &vnd_softc[unit]; 755 vio = (struct vnd_ioctl *)data; 756 757 /* Must be open for writes for these commands... */ 758 switch (cmd) { 759 case VNDIOCSET: 760 case VNDIOCCLR: 761 case DIOCSDINFO: 762 case DIOCWDINFO: 763#ifdef __HAVE_OLD_DISKLABEL 764 case ODIOCSDINFO: 765 case ODIOCWDINFO: 766#endif 767 case DIOCWLABEL: 768 if ((flag & FWRITE) == 0) 769 return (EBADF); 770 } 771 772 /* Must be initialized for these... */ 773 switch (cmd) { 774 case VNDIOCCLR: 775 case DIOCGDINFO: 776 case DIOCSDINFO: 777 case DIOCWDINFO: 778 case DIOCGPART: 779 case DIOCWLABEL: 780 case DIOCGDEFLABEL: 781#ifdef __HAVE_OLD_DISKLABEL 782 case ODIOCGDINFO: 783 case ODIOCSDINFO: 784 case ODIOCWDINFO: 785 case ODIOCGDEFLABEL: 786#endif 787 if ((vnd->sc_flags & VNF_INITED) == 0) 788 return (ENXIO); 789 } 790 791 switch (cmd) { 792 case VNDIOCSET: 793 if (vnd->sc_flags & VNF_INITED) 794 return (EBUSY); 795 796 if ((error = vndlock(vnd)) != 0) 797 return (error); 798 799 fflags = FREAD; 800 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 801 fflags |= FWRITE; 802 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 803 if ((error = vn_open(&nd, fflags, 0)) != 0) 804 goto unlock_and_exit; 805 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 806 VOP_UNLOCK(nd.ni_vp, 0); 807 if (!error && nd.ni_vp->v_type != VREG) 808 error = EOPNOTSUPP; 809 if (error) 810 goto close_and_exit; 811 vnd->sc_vp = nd.ni_vp; 812 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 813 814 /* 815 * Use pseudo-geometry specified. If none was provided, 816 * use "standard" Adaptec fictitious geometry. 817 */ 818 if (vio->vnd_flags & VNDIOF_HASGEOM) { 819 820 memcpy(&vnd->sc_geom, &vio->vnd_geom, 821 sizeof(vio->vnd_geom)); 822 823 /* 824 * Sanity-check the sector size. 825 * XXX Don't allow secsize < DEV_BSIZE. Should 826 * XXX we? 827 */ 828 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 829 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 830 error = EINVAL; 831 goto close_and_exit; 832 } 833 834 /* 835 * Compute the size (in DEV_BSIZE blocks) specified 836 * by the geometry. 837 */ 838 geomsize = (vnd->sc_geom.vng_nsectors * 839 vnd->sc_geom.vng_ntracks * 840 vnd->sc_geom.vng_ncylinders) * 841 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 842 843 /* 844 * Sanity-check the size against the specified 845 * geometry. 846 */ 847 if (vnd->sc_size < geomsize) { 848 error = EINVAL; 849 goto close_and_exit; 850 } 851 } else { 852 /* 853 * Size must be at least 2048 DEV_BSIZE blocks 854 * (1M) in order to use this geometry. 855 */ 856 if (vnd->sc_size < (32 * 64)) { 857 error = EINVAL; 858 goto close_and_exit; 859 } 860 861 vnd->sc_geom.vng_secsize = DEV_BSIZE; 862 vnd->sc_geom.vng_nsectors = 32; 863 vnd->sc_geom.vng_ntracks = 64; 864 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 865 } 866 867 if (vio->vnd_flags & VNDIOF_READONLY) { 868 vnd->sc_flags |= VNF_READONLY; 869 } 870 871 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 872 goto close_and_exit; 873 vndthrottle(vnd, vnd->sc_vp); 874 vio->vnd_size = dbtob(vnd->sc_size); 875 vnd->sc_flags |= VNF_INITED; 876#ifdef DEBUG 877 if (vnddebug & VDB_INIT) 878 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 879 vnd->sc_vp, (unsigned long) vnd->sc_size, 880 vnd->sc_geom.vng_secsize, 881 vnd->sc_geom.vng_nsectors, 882 vnd->sc_geom.vng_ntracks, 883 vnd->sc_geom.vng_ncylinders); 884#endif 885 886 /* Attach the disk. */ 887 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 888 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 889 vnd->sc_dkdev.dk_name = vnd->sc_xname; 890 disk_attach(&vnd->sc_dkdev); 891 892 /* Initialize the xfer and buffer pools. */ 893 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 894 0, 0, "vndxpl", NULL); 895 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 896 0, 0, "vndbpl", NULL); 897 898 /* Try and read the disklabel. */ 899 vndgetdisklabel(dev); 900 901 vndunlock(vnd); 902 903 break; 904 905close_and_exit: 906 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, p); 907unlock_and_exit: 908 vndunlock(vnd); 909 return (error); 910 911 case VNDIOCCLR: 912 if ((error = vndlock(vnd)) != 0) 913 return (error); 914 915 /* 916 * Don't unconfigure if any other partitions are open 917 * or if both the character and block flavors of this 918 * partition are open. 919 */ 920 part = DISKPART(dev); 921 pmask = (1 << part); 922 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 923 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 924 (vnd->sc_dkdev.dk_copenmask & pmask))) { 925 vndunlock(vnd); 926 return (EBUSY); 927 } 928 929 vndclear(vnd); 930#ifdef DEBUG 931 if (vnddebug & VDB_INIT) 932 printf("vndioctl: CLRed\n"); 933#endif 934 935 /* Destroy the xfer and buffer pools. */ 936 pool_destroy(&vnd->sc_vxpool); 937 pool_destroy(&vnd->sc_vbpool); 938 939 /* Detatch the disk. */ 940 disk_detach(&vnd->sc_dkdev); 941 942 vndunlock(vnd); 943 944 break; 945 946 case VNDIOCGET: { 947 struct vnd_user *vnu; 948 struct vattr va; 949 950 vnu = (struct vnd_user *)data; 951 952 if (vnu->vnu_unit == -1) 953 vnu->vnu_unit = unit; 954 if (vnu->vnu_unit >= numvnd) 955 return (ENXIO); 956 if (vnu->vnu_unit < 0) 957 return (EINVAL); 958 959 vnd = &vnd_softc[vnu->vnu_unit]; 960 961 if (vnd->sc_flags & VNF_INITED) { 962 error = VOP_GETATTR(vnd->sc_vp, &va, p->p_ucred, p); 963 if (error) 964 return (error); 965 vnu->vnu_dev = va.va_fsid; 966 vnu->vnu_ino = va.va_fileid; 967 } 968 else { 969 /* unused is not an error */ 970 vnu->vnu_dev = 0; 971 vnu->vnu_ino = 0; 972 } 973 974 break; 975 } 976 977 case DIOCGDINFO: 978 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 979 break; 980 981#ifdef __HAVE_OLD_DISKLABEL 982 case ODIOCGDINFO: 983 newlabel = *(vnd->sc_dkdev.dk_label); 984 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 985 return ENOTTY; 986 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 987 break; 988#endif 989 990 case DIOCGPART: 991 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 992 ((struct partinfo *)data)->part = 993 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 994 break; 995 996 case DIOCWDINFO: 997 case DIOCSDINFO: 998#ifdef __HAVE_OLD_DISKLABEL 999 case ODIOCWDINFO: 1000 case ODIOCSDINFO: 1001#endif 1002 { 1003 struct disklabel *lp; 1004 1005 if ((error = vndlock(vnd)) != 0) 1006 return (error); 1007 1008 vnd->sc_flags |= VNF_LABELLING; 1009 1010#ifdef __HAVE_OLD_DISKLABEL 1011 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1012 memset(&newlabel, 0, sizeof newlabel); 1013 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1014 lp = &newlabel; 1015 } else 1016#endif 1017 lp = (struct disklabel *)data; 1018 1019 error = setdisklabel(vnd->sc_dkdev.dk_label, 1020 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1021 if (error == 0) { 1022 if (cmd == DIOCWDINFO 1023#ifdef __HAVE_OLD_DISKLABEL 1024 || cmd == ODIOCWDINFO 1025#endif 1026 ) 1027 error = writedisklabel(VNDLABELDEV(dev), 1028 vndstrategy, vnd->sc_dkdev.dk_label, 1029 vnd->sc_dkdev.dk_cpulabel); 1030 } 1031 1032 vnd->sc_flags &= ~VNF_LABELLING; 1033 1034 vndunlock(vnd); 1035 1036 if (error) 1037 return (error); 1038 break; 1039 } 1040 1041 case DIOCWLABEL: 1042 if (*(int *)data != 0) 1043 vnd->sc_flags |= VNF_WLABEL; 1044 else 1045 vnd->sc_flags &= ~VNF_WLABEL; 1046 break; 1047 1048 case DIOCGDEFLABEL: 1049 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1050 break; 1051 1052#ifdef __HAVE_OLD_DISKLABEL 1053 case ODIOCGDEFLABEL: 1054 vndgetdefaultlabel(vnd, &newlabel); 1055 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1056 return ENOTTY; 1057 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1058 break; 1059#endif 1060 1061 default: 1062 return (ENOTTY); 1063 } 1064 1065 return (0); 1066} 1067 1068/* 1069 * Duplicate the current processes' credentials. Since we are called only 1070 * as the result of a SET ioctl and only root can do that, any future access 1071 * to this "disk" is essentially as root. Note that credentials may change 1072 * if some other uid can write directly to the mapped file (NFS). 1073 */ 1074int 1075vndsetcred(vnd, cred) 1076 struct vnd_softc *vnd; 1077 struct ucred *cred; 1078{ 1079 struct uio auio; 1080 struct iovec aiov; 1081 char *tmpbuf; 1082 int error; 1083 1084 vnd->sc_cred = crdup(cred); 1085 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1086 1087 /* XXX: Horrible kludge to establish credentials for NFS */ 1088 aiov.iov_base = tmpbuf; 1089 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1090 auio.uio_iov = &aiov; 1091 auio.uio_iovcnt = 1; 1092 auio.uio_offset = 0; 1093 auio.uio_rw = UIO_READ; 1094 auio.uio_segflg = UIO_SYSSPACE; 1095 auio.uio_resid = aiov.iov_len; 1096 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1097 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1098 if (error == 0) { 1099 /* 1100 * Because vnd does all IO directly through the vnode 1101 * we need to flush (at least) the buffer from the above 1102 * VOP_READ from the buffer cache to prevent cache 1103 * incoherencies. Also, be careful to write dirty 1104 * buffers back to stable storage. 1105 */ 1106 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1107 curproc, 0, 0); 1108 } 1109 VOP_UNLOCK(vnd->sc_vp, 0); 1110 1111 free(tmpbuf, M_TEMP); 1112 return (error); 1113} 1114 1115/* 1116 * Set maxactive based on FS type 1117 */ 1118void 1119vndthrottle(vnd, vp) 1120 struct vnd_softc *vnd; 1121 struct vnode *vp; 1122{ 1123#ifdef NFS 1124 extern int (**nfsv2_vnodeop_p) __P((void *)); 1125 1126 if (vp->v_op == nfsv2_vnodeop_p) 1127 vnd->sc_maxactive = 2; 1128 else 1129#endif 1130 vnd->sc_maxactive = 8; 1131 1132 if (vnd->sc_maxactive < 1) 1133 vnd->sc_maxactive = 1; 1134} 1135 1136void 1137vndshutdown() 1138{ 1139 struct vnd_softc *vnd; 1140 1141 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1142 if (vnd->sc_flags & VNF_INITED) 1143 vndclear(vnd); 1144} 1145 1146void 1147vndclear(vnd) 1148 struct vnd_softc *vnd; 1149{ 1150 struct vnode *vp = vnd->sc_vp; 1151 struct proc *p = curproc; /* XXX */ 1152 int fflags = FREAD; 1153 1154#ifdef DEBUG 1155 if (vnddebug & VDB_FOLLOW) 1156 printf("vndclear(%p): vp %p\n", vnd, vp); 1157#endif 1158 if ((vnd->sc_flags & VNF_READONLY) == 0) 1159 fflags |= FWRITE; 1160 vnd->sc_flags &= ~(VNF_INITED | VNF_READONLY); 1161 if (vp == (struct vnode *)0) 1162 panic("vndioctl: null vp"); 1163 (void) vn_close(vp, fflags, vnd->sc_cred, p); 1164 crfree(vnd->sc_cred); 1165 vnd->sc_vp = (struct vnode *)0; 1166 vnd->sc_cred = (struct ucred *)0; 1167 vnd->sc_size = 0; 1168} 1169 1170int 1171vndsize(dev) 1172 dev_t dev; 1173{ 1174 struct vnd_softc *sc; 1175 struct disklabel *lp; 1176 int part, unit, omask; 1177 int size; 1178 1179 unit = vndunit(dev); 1180 if (unit >= numvnd) 1181 return (-1); 1182 sc = &vnd_softc[unit]; 1183 1184 if ((sc->sc_flags & VNF_INITED) == 0) 1185 return (-1); 1186 1187 part = DISKPART(dev); 1188 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1189 lp = sc->sc_dkdev.dk_label; 1190 1191 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1192 return (-1); 1193 1194 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1195 size = -1; 1196 else 1197 size = lp->d_partitions[part].p_size * 1198 (lp->d_secsize / DEV_BSIZE); 1199 1200 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1201 return (-1); 1202 1203 return (size); 1204} 1205 1206int 1207vnddump(dev, blkno, va, size) 1208 dev_t dev; 1209 daddr_t blkno; 1210 caddr_t va; 1211 size_t size; 1212{ 1213 1214 /* Not implemented. */ 1215 return ENXIO; 1216} 1217 1218void 1219vndgetdefaultlabel(sc, lp) 1220 struct vnd_softc *sc; 1221 struct disklabel *lp; 1222{ 1223 struct vndgeom *vng = &sc->sc_geom; 1224 struct partition *pp; 1225 1226 memset(lp, 0, sizeof(*lp)); 1227 1228 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1229 lp->d_secsize = vng->vng_secsize; 1230 lp->d_nsectors = vng->vng_nsectors; 1231 lp->d_ntracks = vng->vng_ntracks; 1232 lp->d_ncylinders = vng->vng_ncylinders; 1233 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1234 1235 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1236 lp->d_type = DTYPE_VND; 1237 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1238 lp->d_rpm = 3600; 1239 lp->d_interleave = 1; 1240 lp->d_flags = 0; 1241 1242 pp = &lp->d_partitions[RAW_PART]; 1243 pp->p_offset = 0; 1244 pp->p_size = lp->d_secperunit; 1245 pp->p_fstype = FS_UNUSED; 1246 lp->d_npartitions = RAW_PART + 1; 1247 1248 lp->d_magic = DISKMAGIC; 1249 lp->d_magic2 = DISKMAGIC; 1250 lp->d_checksum = dkcksum(lp); 1251} 1252 1253/* 1254 * Read the disklabel from a vnd. If one is not present, create a fake one. 1255 */ 1256void 1257vndgetdisklabel(dev) 1258 dev_t dev; 1259{ 1260 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1261 char *errstring; 1262 struct disklabel *lp = sc->sc_dkdev.dk_label; 1263 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1264 int i; 1265 1266 memset(clp, 0, sizeof(*clp)); 1267 1268 vndgetdefaultlabel(sc, lp); 1269 1270 /* 1271 * Call the generic disklabel extraction routine. 1272 */ 1273 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1274 if (errstring) { 1275 /* 1276 * Lack of disklabel is common, but we print the warning 1277 * anyway, since it might contain other useful information. 1278 */ 1279 printf("%s: %s\n", sc->sc_xname, errstring); 1280 1281 /* 1282 * For historical reasons, if there's no disklabel 1283 * present, all partitions must be FS_BSDFFS and 1284 * occupy the entire disk. 1285 */ 1286 for (i = 0; i < MAXPARTITIONS; i++) { 1287 /* 1288 * Don't wipe out port specific hack (such as 1289 * dos partition hack of i386 port). 1290 */ 1291 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1292 continue; 1293 1294 lp->d_partitions[i].p_size = lp->d_secperunit; 1295 lp->d_partitions[i].p_offset = 0; 1296 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1297 } 1298 1299 strncpy(lp->d_packname, "default label", 1300 sizeof(lp->d_packname)); 1301 1302 lp->d_checksum = dkcksum(lp); 1303 } 1304} 1305 1306/* 1307 * Wait interruptibly for an exclusive lock. 1308 * 1309 * XXX 1310 * Several drivers do this; it should be abstracted and made MP-safe. 1311 */ 1312static int 1313vndlock(sc) 1314 struct vnd_softc *sc; 1315{ 1316 int error; 1317 1318 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1319 sc->sc_flags |= VNF_WANTED; 1320 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1321 return (error); 1322 } 1323 sc->sc_flags |= VNF_LOCKED; 1324 return (0); 1325} 1326 1327/* 1328 * Unlock and wake up any waiters. 1329 */ 1330static void 1331vndunlock(sc) 1332 struct vnd_softc *sc; 1333{ 1334 1335 sc->sc_flags &= ~VNF_LOCKED; 1336 if ((sc->sc_flags & VNF_WANTED) != 0) { 1337 sc->sc_flags &= ~VNF_WANTED; 1338 wakeup(sc); 1339 } 1340} 1341