vnd.c revision 1.86
1/* $NetBSD: vnd.c,v 1.86 2002/09/06 13:18:43 gehenna Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81/* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100#include <sys/cdefs.h> 101__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.86 2002/09/06 13:18:43 gehenna Exp $"); 102 103#if defined(_KERNEL_OPT) 104#include "fs_nfs.h" 105#endif 106 107#include <sys/param.h> 108#include <sys/systm.h> 109#include <sys/namei.h> 110#include <sys/proc.h> 111#include <sys/errno.h> 112#include <sys/buf.h> 113#include <sys/malloc.h> 114#include <sys/ioctl.h> 115#include <sys/disklabel.h> 116#include <sys/device.h> 117#include <sys/disk.h> 118#include <sys/stat.h> 119#include <sys/mount.h> 120#include <sys/vnode.h> 121#include <sys/file.h> 122#include <sys/uio.h> 123#include <sys/conf.h> 124 125#include <miscfs/specfs/specdev.h> 126 127#include <dev/vndvar.h> 128 129#if defined(VNDDEBUG) && !defined(DEBUG) 130#define DEBUG 131#endif 132 133#ifdef DEBUG 134int dovndcluster = 1; 135#define VDB_FOLLOW 0x01 136#define VDB_INIT 0x02 137#define VDB_IO 0x04 138#define VDB_LABEL 0x08 139int vnddebug = 0x00; 140#endif 141 142#define vndunit(x) DISKUNIT(x) 143 144struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149#define VX_BUSY 1 150}; 151 152struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155}; 156 157#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160#define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161#define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163struct vnd_softc *vnd_softc; 164int numvnd = 0; 165 166#define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169/* called by main() at boot time */ 170void vndattach __P((int)); 171void vnddetach __P((void)); 172 173void vndclear __P((struct vnd_softc *)); 174void vndstart __P((struct vnd_softc *)); 175int vndsetcred __P((struct vnd_softc *, struct ucred *)); 176void vndthrottle __P((struct vnd_softc *, struct vnode *)); 177void vndiodone __P((struct buf *)); 178void vndshutdown __P((void)); 179 180void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 181void vndgetdisklabel __P((dev_t)); 182 183static int vndlock __P((struct vnd_softc *)); 184static void vndunlock __P((struct vnd_softc *)); 185 186dev_type_open(vndopen); 187dev_type_close(vndclose); 188dev_type_read(vndread); 189dev_type_write(vndwrite); 190dev_type_ioctl(vndioctl); 191dev_type_strategy(vndstrategy); 192dev_type_dump(vnddump); 193dev_type_size(vndsize); 194 195const struct bdevsw vnd_bdevsw = { 196 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 197}; 198 199const struct cdevsw vnd_cdevsw = { 200 vndopen, vndclose, vndread, vndwrite, vndioctl, 201 nostop, notty, nopoll, nommap, D_DISK 202}; 203 204void 205vndattach(num) 206 int num; 207{ 208 int i; 209 char *mem; 210 211 if (num <= 0) 212 return; 213 i = num * sizeof(struct vnd_softc); 214 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 215 if (mem == NULL) { 216 printf("WARNING: no memory for vnode disks\n"); 217 return; 218 } 219 vnd_softc = (struct vnd_softc *)mem; 220 numvnd = num; 221 222 for (i = 0; i < numvnd; i++) 223 bufq_alloc(&vnd_softc[i].sc_tab, 224 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 225} 226 227void 228vnddetach() 229{ 230 int i; 231 232 for (i = 0; i < numvnd; i++) 233 bufq_free(&vnd_softc[i].sc_tab); 234 235 free(vnd_softc, M_DEVBUF); 236} 237 238int 239vndopen(dev, flags, mode, p) 240 dev_t dev; 241 int flags, mode; 242 struct proc *p; 243{ 244 int unit = vndunit(dev); 245 struct vnd_softc *sc; 246 int error = 0, part, pmask; 247 struct disklabel *lp; 248 249#ifdef DEBUG 250 if (vnddebug & VDB_FOLLOW) 251 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 252#endif 253 if (unit >= numvnd) 254 return (ENXIO); 255 sc = &vnd_softc[unit]; 256 257 if ((error = vndlock(sc)) != 0) 258 return (error); 259 260 lp = sc->sc_dkdev.dk_label; 261 262 part = DISKPART(dev); 263 pmask = (1 << part); 264 265 /* 266 * If we're initialized, check to see if there are any other 267 * open partitions. If not, then it's safe to update the 268 * in-core disklabel. 269 */ 270 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 271 vndgetdisklabel(dev); 272 273 /* Check that the partitions exists. */ 274 if (part != RAW_PART) { 275 if (((sc->sc_flags & VNF_INITED) == 0) || 276 ((part >= lp->d_npartitions) || 277 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 278 error = ENXIO; 279 goto done; 280 } 281 } 282 283 /* Prevent our unit from being unconfigured while open. */ 284 switch (mode) { 285 case S_IFCHR: 286 sc->sc_dkdev.dk_copenmask |= pmask; 287 break; 288 289 case S_IFBLK: 290 sc->sc_dkdev.dk_bopenmask |= pmask; 291 break; 292 } 293 sc->sc_dkdev.dk_openmask = 294 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 295 296 done: 297 vndunlock(sc); 298 return (error); 299} 300 301int 302vndclose(dev, flags, mode, p) 303 dev_t dev; 304 int flags, mode; 305 struct proc *p; 306{ 307 int unit = vndunit(dev); 308 struct vnd_softc *sc; 309 int error = 0, part; 310 311#ifdef DEBUG 312 if (vnddebug & VDB_FOLLOW) 313 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 314#endif 315 316 if (unit >= numvnd) 317 return (ENXIO); 318 sc = &vnd_softc[unit]; 319 320 if ((error = vndlock(sc)) != 0) 321 return (error); 322 323 part = DISKPART(dev); 324 325 /* ...that much closer to allowing unconfiguration... */ 326 switch (mode) { 327 case S_IFCHR: 328 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 329 break; 330 331 case S_IFBLK: 332 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 333 break; 334 } 335 sc->sc_dkdev.dk_openmask = 336 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 337 338 vndunlock(sc); 339 return (0); 340} 341 342/* 343 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 344 */ 345void 346vndstrategy(bp) 347 struct buf *bp; 348{ 349 int unit = vndunit(bp->b_dev); 350 struct vnd_softc *vnd = &vnd_softc[unit]; 351 struct vndxfer *vnx; 352 int s, bsize, resid; 353 off_t bn; 354 caddr_t addr; 355 int sz, flags, error, wlabel; 356 struct disklabel *lp; 357 struct partition *pp; 358 359#ifdef DEBUG 360 if (vnddebug & VDB_FOLLOW) 361 printf("vndstrategy(%p): unit %d\n", bp, unit); 362#endif 363 if ((vnd->sc_flags & VNF_INITED) == 0) { 364 bp->b_error = ENXIO; 365 bp->b_flags |= B_ERROR; 366 goto done; 367 } 368 369 /* If it's a nil transfer, wake up the top half now. */ 370 if (bp->b_bcount == 0) 371 goto done; 372 373 lp = vnd->sc_dkdev.dk_label; 374 375 /* 376 * The transfer must be a whole number of blocks. 377 */ 378 if ((bp->b_bcount % lp->d_secsize) != 0) { 379 bp->b_error = EINVAL; 380 bp->b_flags |= B_ERROR; 381 goto done; 382 } 383 384 /* 385 * Do bounds checking and adjust transfer. If there's an error, 386 * the bounds check will flag that for us. 387 */ 388 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 389 if (DISKPART(bp->b_dev) != RAW_PART) 390 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 391 goto done; 392 393 bp->b_resid = bp->b_bcount; 394 395 /* 396 * Put the block number in terms of the logical blocksize 397 * of the "device". 398 */ 399 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 400 401 /* 402 * Translate the partition-relative block number to an absolute. 403 */ 404 if (DISKPART(bp->b_dev) != RAW_PART) { 405 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 406 bn += pp->p_offset; 407 } 408 409 /* ...and convert to a byte offset within the file. */ 410 bn *= lp->d_secsize; 411 412 if (vnd->sc_vp->v_mount == NULL) { 413 bp->b_error = ENXIO; 414 bp->b_flags |= B_ERROR; 415 goto done; 416 } 417 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 418 addr = bp->b_data; 419 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 420 421 /* Allocate a header for this transfer and link it to the buffer */ 422 s = splbio(); 423 vnx = VND_GETXFER(vnd); 424 splx(s); 425 vnx->vx_flags = VX_BUSY; 426 vnx->vx_error = 0; 427 vnx->vx_pending = 0; 428 vnx->vx_bp = bp; 429 430 for (resid = bp->b_resid; resid; resid -= sz) { 431 struct vndbuf *nbp; 432 struct vnode *vp; 433 daddr_t nbn; 434 int off, nra; 435 436 nra = 0; 437 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 438 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 439 VOP_UNLOCK(vnd->sc_vp, 0); 440 441 if (error == 0 && (long)nbn == -1) 442 error = EIO; 443 444 /* 445 * If there was an error or a hole in the file...punt. 446 * Note that we may have to wait for any operations 447 * that we have already fired off before releasing 448 * the buffer. 449 * 450 * XXX we could deal with holes here but it would be 451 * a hassle (in the write case). 452 */ 453 if (error) { 454 s = splbio(); 455 vnx->vx_error = error; 456 goto out; 457 } 458 459#ifdef DEBUG 460 if (!dovndcluster) 461 nra = 0; 462#endif 463 464 if ((off = bn % bsize) != 0) 465 sz = bsize - off; 466 else 467 sz = (1 + nra) * bsize; 468 if (resid < sz) 469 sz = resid; 470#ifdef DEBUG 471 if (vnddebug & VDB_IO) 472 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 473 vnd->sc_vp, vp, (long long)bn, nbn, sz); 474#endif 475 476 s = splbio(); 477 nbp = VND_GETBUF(vnd); 478 splx(s); 479 nbp->vb_buf.b_flags = flags; 480 nbp->vb_buf.b_bcount = sz; 481 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 482 - trunc_page((ulong) addr); 483 nbp->vb_buf.b_error = 0; 484 nbp->vb_buf.b_data = addr; 485 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 486 nbp->vb_buf.b_proc = bp->b_proc; 487 nbp->vb_buf.b_iodone = vndiodone; 488 nbp->vb_buf.b_vp = NULLVP; 489 LIST_INIT(&nbp->vb_buf.b_dep); 490 491 nbp->vb_xfer = vnx; 492 493 /* 494 * Just sort by block number 495 */ 496 s = splbio(); 497 if (vnx->vx_error != 0) { 498 VND_PUTBUF(vnd, nbp); 499 goto out; 500 } 501 vnx->vx_pending++; 502 bgetvp(vp, &nbp->vb_buf); 503 BUFQ_PUT(&vnd->sc_tab, &nbp->vb_buf); 504 vndstart(vnd); 505 splx(s); 506 bn += sz; 507 addr += sz; 508 } 509 510 s = splbio(); 511 512out: /* Arrive here at splbio */ 513 vnx->vx_flags &= ~VX_BUSY; 514 if (vnx->vx_pending == 0) { 515 if (vnx->vx_error != 0) { 516 bp->b_error = vnx->vx_error; 517 bp->b_flags |= B_ERROR; 518 } 519 VND_PUTXFER(vnd, vnx); 520 biodone(bp); 521 } 522 splx(s); 523 return; 524 525 done: 526 biodone(bp); 527} 528 529/* 530 * Feed requests sequentially. 531 * We do it this way to keep from flooding NFS servers if we are connected 532 * to an NFS file. This places the burden on the client rather than the 533 * server. 534 */ 535void 536vndstart(vnd) 537 struct vnd_softc *vnd; 538{ 539 struct buf *bp; 540 541 /* 542 * Dequeue now since lower level strategy routine might 543 * queue using same links 544 */ 545 546 if ((vnd->sc_flags & VNF_BUSY) != 0) 547 return; 548 549 vnd->sc_flags |= VNF_BUSY; 550 551 while (vnd->sc_active < vnd->sc_maxactive) { 552 bp = BUFQ_GET(&vnd->sc_tab); 553 if (bp == NULL) 554 break; 555 vnd->sc_active++; 556#ifdef DEBUG 557 if (vnddebug & VDB_IO) 558 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 559 " flags %lx addr %p cnt 0x%lx\n", 560 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 561 bp->b_flags, bp->b_data, bp->b_bcount); 562#endif 563 564 /* Instrumentation. */ 565 disk_busy(&vnd->sc_dkdev); 566 567 if ((bp->b_flags & B_READ) == 0) 568 bp->b_vp->v_numoutput++; 569 VOP_STRATEGY(bp); 570 } 571 vnd->sc_flags &= ~VNF_BUSY; 572} 573 574void 575vndiodone(bp) 576 struct buf *bp; 577{ 578 struct vndbuf *vbp = (struct vndbuf *) bp; 579 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 580 struct buf *pbp = vnx->vx_bp; 581 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 582 int s, resid; 583 584 s = splbio(); 585#ifdef DEBUG 586 if (vnddebug & VDB_IO) 587 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 588 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 589 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 590 vbp->vb_buf.b_bcount); 591#endif 592 593 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 594 pbp->b_resid -= resid; 595 disk_unbusy(&vnd->sc_dkdev, resid); 596 vnx->vx_pending--; 597 598 if (vbp->vb_buf.b_error) { 599#ifdef DEBUG 600 if (vnddebug & VDB_IO) 601 printf("vndiodone: vbp %p error %d\n", vbp, 602 vbp->vb_buf.b_error); 603#endif 604 vnx->vx_error = vbp->vb_buf.b_error; 605 } 606 607 if (vbp->vb_buf.b_vp != NULLVP) 608 brelvp(&vbp->vb_buf); 609 610 VND_PUTBUF(vnd, vbp); 611 612 /* 613 * Wrap up this transaction if it has run to completion or, in 614 * case of an error, when all auxiliary buffers have returned. 615 */ 616 if (vnx->vx_error != 0) { 617 pbp->b_flags |= B_ERROR; 618 pbp->b_error = vnx->vx_error; 619 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 620 621#ifdef DEBUG 622 if (vnddebug & VDB_IO) 623 printf("vndiodone: pbp %p iodone: error %d\n", 624 pbp, vnx->vx_error); 625#endif 626 VND_PUTXFER(vnd, vnx); 627 biodone(pbp); 628 } 629 } else if (pbp->b_resid == 0) { 630 631#ifdef DIAGNOSTIC 632 if (vnx->vx_pending != 0) 633 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 634#endif 635 636 if ((vnx->vx_flags & VX_BUSY) == 0) { 637#ifdef DEBUG 638 if (vnddebug & VDB_IO) 639 printf("vndiodone: pbp %p iodone\n", pbp); 640#endif 641 VND_PUTXFER(vnd, vnx); 642 biodone(pbp); 643 } 644 } 645 646 vnd->sc_active--; 647 vndstart(vnd); 648 splx(s); 649} 650 651/* ARGSUSED */ 652int 653vndread(dev, uio, flags) 654 dev_t dev; 655 struct uio *uio; 656 int flags; 657{ 658 int unit = vndunit(dev); 659 struct vnd_softc *sc; 660 661#ifdef DEBUG 662 if (vnddebug & VDB_FOLLOW) 663 printf("vndread(0x%x, %p)\n", dev, uio); 664#endif 665 666 if (unit >= numvnd) 667 return (ENXIO); 668 sc = &vnd_softc[unit]; 669 670 if ((sc->sc_flags & VNF_INITED) == 0) 671 return (ENXIO); 672 673 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 674} 675 676/* ARGSUSED */ 677int 678vndwrite(dev, uio, flags) 679 dev_t dev; 680 struct uio *uio; 681 int flags; 682{ 683 int unit = vndunit(dev); 684 struct vnd_softc *sc; 685 686#ifdef DEBUG 687 if (vnddebug & VDB_FOLLOW) 688 printf("vndwrite(0x%x, %p)\n", dev, uio); 689#endif 690 691 if (unit >= numvnd) 692 return (ENXIO); 693 sc = &vnd_softc[unit]; 694 695 if ((sc->sc_flags & VNF_INITED) == 0) 696 return (ENXIO); 697 698 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 699} 700 701/* ARGSUSED */ 702int 703vndioctl(dev, cmd, data, flag, p) 704 dev_t dev; 705 u_long cmd; 706 caddr_t data; 707 int flag; 708 struct proc *p; 709{ 710 int unit = vndunit(dev); 711 struct vnd_softc *vnd; 712 struct vnd_ioctl *vio; 713 struct vattr vattr; 714 struct nameidata nd; 715 int error, part, pmask; 716 size_t geomsize; 717#ifdef __HAVE_OLD_DISKLABEL 718 struct disklabel newlabel; 719#endif 720 721#ifdef DEBUG 722 if (vnddebug & VDB_FOLLOW) 723 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 724 dev, cmd, data, flag, p, unit); 725#endif 726 error = suser(p->p_ucred, &p->p_acflag); 727 if (error) 728 return (error); 729 if (unit >= numvnd) 730 return (ENXIO); 731 732 vnd = &vnd_softc[unit]; 733 vio = (struct vnd_ioctl *)data; 734 735 /* Must be open for writes for these commands... */ 736 switch (cmd) { 737 case VNDIOCSET: 738 case VNDIOCCLR: 739 case DIOCSDINFO: 740 case DIOCWDINFO: 741#ifdef __HAVE_OLD_DISKLABEL 742 case ODIOCSDINFO: 743 case ODIOCWDINFO: 744#endif 745 case DIOCWLABEL: 746 if ((flag & FWRITE) == 0) 747 return (EBADF); 748 } 749 750 /* Must be initialized for these... */ 751 switch (cmd) { 752 case VNDIOCCLR: 753 case DIOCGDINFO: 754 case DIOCSDINFO: 755 case DIOCWDINFO: 756 case DIOCGPART: 757 case DIOCWLABEL: 758 case DIOCGDEFLABEL: 759#ifdef __HAVE_OLD_DISKLABEL 760 case ODIOCGDINFO: 761 case ODIOCSDINFO: 762 case ODIOCWDINFO: 763 case ODIOCGDEFLABEL: 764#endif 765 if ((vnd->sc_flags & VNF_INITED) == 0) 766 return (ENXIO); 767 } 768 769 switch (cmd) { 770 case VNDIOCSET: 771 if (vnd->sc_flags & VNF_INITED) 772 return (EBUSY); 773 774 if ((error = vndlock(vnd)) != 0) 775 return (error); 776 777 /* 778 * Always open for read and write. 779 * This is probably bogus, but it lets vn_open() 780 * weed out directories, sockets, etc. so we don't 781 * have to worry about them. 782 */ 783 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 784 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 785 goto unlock_and_exit; 786 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 787 VOP_UNLOCK(nd.ni_vp, 0); 788 if (error) 789 goto close_and_exit; 790 vnd->sc_vp = nd.ni_vp; 791 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 792 793 /* 794 * Use pseudo-geometry specified. If none was provided, 795 * use "standard" Adaptec fictitious geometry. 796 */ 797 if (vio->vnd_flags & VNDIOF_HASGEOM) { 798 799 memcpy(&vnd->sc_geom, &vio->vnd_geom, 800 sizeof(vio->vnd_geom)); 801 802 /* 803 * Sanity-check the sector size. 804 * XXX Don't allow secsize < DEV_BSIZE. Should 805 * XXX we? 806 */ 807 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 808 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 809 error = EINVAL; 810 goto close_and_exit; 811 } 812 813 /* 814 * Compute the size (in DEV_BSIZE blocks) specified 815 * by the geometry. 816 */ 817 geomsize = (vnd->sc_geom.vng_nsectors * 818 vnd->sc_geom.vng_ntracks * 819 vnd->sc_geom.vng_ncylinders) * 820 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 821 822 /* 823 * Sanity-check the size against the specified 824 * geometry. 825 */ 826 if (vnd->sc_size < geomsize) { 827 error = EINVAL; 828 goto close_and_exit; 829 } 830 } else { 831 /* 832 * Size must be at least 2048 DEV_BSIZE blocks 833 * (1M) in order to use this geometry. 834 */ 835 if (vnd->sc_size < (32 * 64)) { 836 error = EINVAL; 837 goto close_and_exit; 838 } 839 840 vnd->sc_geom.vng_secsize = DEV_BSIZE; 841 vnd->sc_geom.vng_nsectors = 32; 842 vnd->sc_geom.vng_ntracks = 64; 843 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 844 } 845 846 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 847 goto close_and_exit; 848 vndthrottle(vnd, vnd->sc_vp); 849 vio->vnd_size = dbtob(vnd->sc_size); 850 vnd->sc_flags |= VNF_INITED; 851#ifdef DEBUG 852 if (vnddebug & VDB_INIT) 853 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 854 vnd->sc_vp, (unsigned long) vnd->sc_size, 855 vnd->sc_geom.vng_secsize, 856 vnd->sc_geom.vng_nsectors, 857 vnd->sc_geom.vng_ntracks, 858 vnd->sc_geom.vng_ncylinders); 859#endif 860 861 /* Attach the disk. */ 862 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 863 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 864 vnd->sc_dkdev.dk_name = vnd->sc_xname; 865 disk_attach(&vnd->sc_dkdev); 866 867 /* Initialize the xfer and buffer pools. */ 868 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 869 0, 0, "vndxpl", NULL); 870 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 871 0, 0, "vndbpl", NULL); 872 873 /* Try and read the disklabel. */ 874 vndgetdisklabel(dev); 875 876 vndunlock(vnd); 877 878 break; 879 880close_and_exit: 881 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 882unlock_and_exit: 883 vndunlock(vnd); 884 return (error); 885 886 case VNDIOCCLR: 887 if ((error = vndlock(vnd)) != 0) 888 return (error); 889 890 /* 891 * Don't unconfigure if any other partitions are open 892 * or if both the character and block flavors of this 893 * partition are open. 894 */ 895 part = DISKPART(dev); 896 pmask = (1 << part); 897 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 898 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 899 (vnd->sc_dkdev.dk_copenmask & pmask))) { 900 vndunlock(vnd); 901 return (EBUSY); 902 } 903 904 vndclear(vnd); 905#ifdef DEBUG 906 if (vnddebug & VDB_INIT) 907 printf("vndioctl: CLRed\n"); 908#endif 909 910 /* Destroy the xfer and buffer pools. */ 911 pool_destroy(&vnd->sc_vxpool); 912 pool_destroy(&vnd->sc_vbpool); 913 914 /* Detatch the disk. */ 915 disk_detach(&vnd->sc_dkdev); 916 917 vndunlock(vnd); 918 919 break; 920 921 case VNDIOCGET: { 922 struct vnd_user *vnu; 923 struct vattr va; 924 925 vnu = (struct vnd_user *)data; 926 927 if (vnu->vnu_unit == -1) 928 vnu->vnu_unit = unit; 929 if (vnu->vnu_unit >= numvnd) 930 return (ENXIO); 931 if (vnu->vnu_unit < 0) 932 return (EINVAL); 933 934 vnd = &vnd_softc[vnu->vnu_unit]; 935 936 if (vnd->sc_flags & VNF_INITED) { 937 error = VOP_GETATTR(vnd->sc_vp, &va, p->p_ucred, p); 938 if (error) 939 return (error); 940 vnu->vnu_dev = va.va_fsid; 941 vnu->vnu_ino = va.va_fileid; 942 } 943 else { 944 /* unused is not an error */ 945 vnu->vnu_dev = 0; 946 vnu->vnu_ino = 0; 947 } 948 949 break; 950 } 951 952 case DIOCGDINFO: 953 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 954 break; 955 956#ifdef __HAVE_OLD_DISKLABEL 957 case ODIOCGDINFO: 958 newlabel = *(vnd->sc_dkdev.dk_label); 959 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 960 return ENOTTY; 961 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 962 break; 963#endif 964 965 case DIOCGPART: 966 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 967 ((struct partinfo *)data)->part = 968 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 969 break; 970 971 case DIOCWDINFO: 972 case DIOCSDINFO: 973#ifdef __HAVE_OLD_DISKLABEL 974 case ODIOCWDINFO: 975 case ODIOCSDINFO: 976#endif 977 { 978 struct disklabel *lp; 979 980 if ((error = vndlock(vnd)) != 0) 981 return (error); 982 983 vnd->sc_flags |= VNF_LABELLING; 984 985#ifdef __HAVE_OLD_DISKLABEL 986 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 987 memset(&newlabel, 0, sizeof newlabel); 988 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 989 lp = &newlabel; 990 } else 991#endif 992 lp = (struct disklabel *)data; 993 994 error = setdisklabel(vnd->sc_dkdev.dk_label, 995 lp, 0, vnd->sc_dkdev.dk_cpulabel); 996 if (error == 0) { 997 if (cmd == DIOCWDINFO 998#ifdef __HAVE_OLD_DISKLABEL 999 || cmd == ODIOCWDINFO 1000#endif 1001 ) 1002 error = writedisklabel(VNDLABELDEV(dev), 1003 vndstrategy, vnd->sc_dkdev.dk_label, 1004 vnd->sc_dkdev.dk_cpulabel); 1005 } 1006 1007 vnd->sc_flags &= ~VNF_LABELLING; 1008 1009 vndunlock(vnd); 1010 1011 if (error) 1012 return (error); 1013 break; 1014 } 1015 1016 case DIOCWLABEL: 1017 if (*(int *)data != 0) 1018 vnd->sc_flags |= VNF_WLABEL; 1019 else 1020 vnd->sc_flags &= ~VNF_WLABEL; 1021 break; 1022 1023 case DIOCGDEFLABEL: 1024 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1025 break; 1026 1027#ifdef __HAVE_OLD_DISKLABEL 1028 case ODIOCGDEFLABEL: 1029 vndgetdefaultlabel(vnd, &newlabel); 1030 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1031 return ENOTTY; 1032 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1033 break; 1034#endif 1035 1036 default: 1037 return (ENOTTY); 1038 } 1039 1040 return (0); 1041} 1042 1043/* 1044 * Duplicate the current processes' credentials. Since we are called only 1045 * as the result of a SET ioctl and only root can do that, any future access 1046 * to this "disk" is essentially as root. Note that credentials may change 1047 * if some other uid can write directly to the mapped file (NFS). 1048 */ 1049int 1050vndsetcred(vnd, cred) 1051 struct vnd_softc *vnd; 1052 struct ucred *cred; 1053{ 1054 struct uio auio; 1055 struct iovec aiov; 1056 char *tmpbuf; 1057 int error; 1058 1059 vnd->sc_cred = crdup(cred); 1060 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1061 1062 /* XXX: Horrible kludge to establish credentials for NFS */ 1063 aiov.iov_base = tmpbuf; 1064 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1065 auio.uio_iov = &aiov; 1066 auio.uio_iovcnt = 1; 1067 auio.uio_offset = 0; 1068 auio.uio_rw = UIO_READ; 1069 auio.uio_segflg = UIO_SYSSPACE; 1070 auio.uio_resid = aiov.iov_len; 1071 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1072 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1073 if (error == 0) { 1074 /* 1075 * Because vnd does all IO directly through the vnode 1076 * we need to flush (at least) the buffer from the above 1077 * VOP_READ from the buffer cache to prevent cache 1078 * incoherencies. Also, be careful to write dirty 1079 * buffers back to stable storage. 1080 */ 1081 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1082 curproc, 0, 0); 1083 } 1084 VOP_UNLOCK(vnd->sc_vp, 0); 1085 1086 free(tmpbuf, M_TEMP); 1087 return (error); 1088} 1089 1090/* 1091 * Set maxactive based on FS type 1092 */ 1093void 1094vndthrottle(vnd, vp) 1095 struct vnd_softc *vnd; 1096 struct vnode *vp; 1097{ 1098#ifdef NFS 1099 extern int (**nfsv2_vnodeop_p) __P((void *)); 1100 1101 if (vp->v_op == nfsv2_vnodeop_p) 1102 vnd->sc_maxactive = 2; 1103 else 1104#endif 1105 vnd->sc_maxactive = 8; 1106 1107 if (vnd->sc_maxactive < 1) 1108 vnd->sc_maxactive = 1; 1109} 1110 1111void 1112vndshutdown() 1113{ 1114 struct vnd_softc *vnd; 1115 1116 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1117 if (vnd->sc_flags & VNF_INITED) 1118 vndclear(vnd); 1119} 1120 1121void 1122vndclear(vnd) 1123 struct vnd_softc *vnd; 1124{ 1125 struct vnode *vp = vnd->sc_vp; 1126 struct proc *p = curproc; /* XXX */ 1127 1128#ifdef DEBUG 1129 if (vnddebug & VDB_FOLLOW) 1130 printf("vndclear(%p): vp %p\n", vnd, vp); 1131#endif 1132 vnd->sc_flags &= ~VNF_INITED; 1133 if (vp == (struct vnode *)0) 1134 panic("vndioctl: null vp"); 1135 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1136 crfree(vnd->sc_cred); 1137 vnd->sc_vp = (struct vnode *)0; 1138 vnd->sc_cred = (struct ucred *)0; 1139 vnd->sc_size = 0; 1140} 1141 1142int 1143vndsize(dev) 1144 dev_t dev; 1145{ 1146 struct vnd_softc *sc; 1147 struct disklabel *lp; 1148 int part, unit, omask; 1149 int size; 1150 1151 unit = vndunit(dev); 1152 if (unit >= numvnd) 1153 return (-1); 1154 sc = &vnd_softc[unit]; 1155 1156 if ((sc->sc_flags & VNF_INITED) == 0) 1157 return (-1); 1158 1159 part = DISKPART(dev); 1160 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1161 lp = sc->sc_dkdev.dk_label; 1162 1163 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1164 return (-1); 1165 1166 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1167 size = -1; 1168 else 1169 size = lp->d_partitions[part].p_size * 1170 (lp->d_secsize / DEV_BSIZE); 1171 1172 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1173 return (-1); 1174 1175 return (size); 1176} 1177 1178int 1179vnddump(dev, blkno, va, size) 1180 dev_t dev; 1181 daddr_t blkno; 1182 caddr_t va; 1183 size_t size; 1184{ 1185 1186 /* Not implemented. */ 1187 return ENXIO; 1188} 1189 1190void 1191vndgetdefaultlabel(sc, lp) 1192 struct vnd_softc *sc; 1193 struct disklabel *lp; 1194{ 1195 struct vndgeom *vng = &sc->sc_geom; 1196 struct partition *pp; 1197 1198 memset(lp, 0, sizeof(*lp)); 1199 1200 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1201 lp->d_secsize = vng->vng_secsize; 1202 lp->d_nsectors = vng->vng_nsectors; 1203 lp->d_ntracks = vng->vng_ntracks; 1204 lp->d_ncylinders = vng->vng_ncylinders; 1205 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1206 1207 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1208 lp->d_type = DTYPE_VND; 1209 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1210 lp->d_rpm = 3600; 1211 lp->d_interleave = 1; 1212 lp->d_flags = 0; 1213 1214 pp = &lp->d_partitions[RAW_PART]; 1215 pp->p_offset = 0; 1216 pp->p_size = lp->d_secperunit; 1217 pp->p_fstype = FS_UNUSED; 1218 lp->d_npartitions = RAW_PART + 1; 1219 1220 lp->d_magic = DISKMAGIC; 1221 lp->d_magic2 = DISKMAGIC; 1222 lp->d_checksum = dkcksum(lp); 1223} 1224 1225/* 1226 * Read the disklabel from a vnd. If one is not present, create a fake one. 1227 */ 1228void 1229vndgetdisklabel(dev) 1230 dev_t dev; 1231{ 1232 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1233 char *errstring; 1234 struct disklabel *lp = sc->sc_dkdev.dk_label; 1235 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1236 int i; 1237 1238 memset(clp, 0, sizeof(*clp)); 1239 1240 vndgetdefaultlabel(sc, lp); 1241 1242 /* 1243 * Call the generic disklabel extraction routine. 1244 */ 1245 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1246 if (errstring) { 1247 /* 1248 * Lack of disklabel is common, but we print the warning 1249 * anyway, since it might contain other useful information. 1250 */ 1251 printf("%s: %s\n", sc->sc_xname, errstring); 1252 1253 /* 1254 * For historical reasons, if there's no disklabel 1255 * present, all partitions must be FS_BSDFFS and 1256 * occupy the entire disk. 1257 */ 1258 for (i = 0; i < MAXPARTITIONS; i++) { 1259 /* 1260 * Don't wipe out port specific hack (such as 1261 * dos partition hack of i386 port). 1262 */ 1263 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1264 continue; 1265 1266 lp->d_partitions[i].p_size = lp->d_secperunit; 1267 lp->d_partitions[i].p_offset = 0; 1268 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1269 } 1270 1271 strncpy(lp->d_packname, "default label", 1272 sizeof(lp->d_packname)); 1273 1274 lp->d_checksum = dkcksum(lp); 1275 } 1276} 1277 1278/* 1279 * Wait interruptibly for an exclusive lock. 1280 * 1281 * XXX 1282 * Several drivers do this; it should be abstracted and made MP-safe. 1283 */ 1284static int 1285vndlock(sc) 1286 struct vnd_softc *sc; 1287{ 1288 int error; 1289 1290 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1291 sc->sc_flags |= VNF_WANTED; 1292 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1293 return (error); 1294 } 1295 sc->sc_flags |= VNF_LOCKED; 1296 return (0); 1297} 1298 1299/* 1300 * Unlock and wake up any waiters. 1301 */ 1302static void 1303vndunlock(sc) 1304 struct vnd_softc *sc; 1305{ 1306 1307 sc->sc_flags &= ~VNF_LOCKED; 1308 if ((sc->sc_flags & VNF_WANTED) != 0) { 1309 sc->sc_flags &= ~VNF_WANTED; 1310 wakeup(sc); 1311 } 1312} 1313