vnd.c revision 1.83
1/* $NetBSD: vnd.c,v 1.83 2002/07/26 06:16:32 enami Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1988 University of Utah. 41 * Copyright (c) 1990, 1993 42 * The Regents of the University of California. All rights reserved. 43 * 44 * This code is derived from software contributed to Berkeley by 45 * the Systems Programming Group of the University of Utah Computer 46 * Science Department. 47 * 48 * Redistribution and use in source and binary forms, with or without 49 * modification, are permitted provided that the following conditions 50 * are met: 51 * 1. Redistributions of source code must retain the above copyright 52 * notice, this list of conditions and the following disclaimer. 53 * 2. Redistributions in binary form must reproduce the above copyright 54 * notice, this list of conditions and the following disclaimer in the 55 * documentation and/or other materials provided with the distribution. 56 * 3. All advertising materials mentioning features or use of this software 57 * must display the following acknowledgement: 58 * This product includes software developed by the University of 59 * California, Berkeley and its contributors. 60 * 4. Neither the name of the University nor the names of its contributors 61 * may be used to endorse or promote products derived from this software 62 * without specific prior written permission. 63 * 64 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 65 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 66 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 67 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 68 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 69 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 70 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 71 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 72 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 73 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 74 * SUCH DAMAGE. 75 * 76 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 77 * 78 * @(#)vn.c 8.9 (Berkeley) 5/14/95 79 */ 80 81/* 82 * Vnode disk driver. 83 * 84 * Block/character interface to a vnode. Allows one to treat a file 85 * as a disk (e.g. build a filesystem in it, mount it, etc.). 86 * 87 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 88 * instead of a simple VOP_RDWR. We do this to avoid distorting the 89 * local buffer cache. 90 * 91 * NOTE 2: There is a security issue involved with this driver. 92 * Once mounted all access to the contents of the "mapped" file via 93 * the special file is controlled by the permissions on the special 94 * file, the protection of the mapped file is ignored (effectively, 95 * by using root credentials in all transactions). 96 * 97 * NOTE 3: Doesn't interact with leases, should it? 98 */ 99 100#include <sys/cdefs.h> 101__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.83 2002/07/26 06:16:32 enami Exp $"); 102 103#if defined(_KERNEL_OPT) 104#include "fs_nfs.h" 105#endif 106 107#include <sys/param.h> 108#include <sys/systm.h> 109#include <sys/namei.h> 110#include <sys/proc.h> 111#include <sys/errno.h> 112#include <sys/buf.h> 113#include <sys/malloc.h> 114#include <sys/ioctl.h> 115#include <sys/disklabel.h> 116#include <sys/device.h> 117#include <sys/disk.h> 118#include <sys/stat.h> 119#include <sys/mount.h> 120#include <sys/vnode.h> 121#include <sys/file.h> 122#include <sys/uio.h> 123#include <sys/conf.h> 124 125#include <miscfs/specfs/specdev.h> 126 127#include <dev/vndvar.h> 128 129#if defined(VNDDEBUG) && !defined(DEBUG) 130#define DEBUG 131#endif 132 133#ifdef DEBUG 134int dovndcluster = 1; 135#define VDB_FOLLOW 0x01 136#define VDB_INIT 0x02 137#define VDB_IO 0x04 138#define VDB_LABEL 0x08 139int vnddebug = 0x00; 140#endif 141 142#define vndunit(x) DISKUNIT(x) 143 144struct vndxfer { 145 struct buf *vx_bp; /* Pointer to parent buffer */ 146 int vx_error; 147 int vx_pending; /* # of pending aux buffers */ 148 int vx_flags; 149#define VX_BUSY 1 150}; 151 152struct vndbuf { 153 struct buf vb_buf; 154 struct vndxfer *vb_xfer; 155}; 156 157#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_NOWAIT) 158#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 159 160#define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_NOWAIT) 161#define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 162 163struct vnd_softc *vnd_softc; 164int numvnd = 0; 165 166#define VNDLABELDEV(dev) \ 167 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 168 169/* called by main() at boot time */ 170void vndattach __P((int)); 171void vnddetach __P((void)); 172 173void vndclear __P((struct vnd_softc *)); 174void vndstart __P((struct vnd_softc *)); 175int vndsetcred __P((struct vnd_softc *, struct ucred *)); 176void vndthrottle __P((struct vnd_softc *, struct vnode *)); 177void vndiodone __P((struct buf *)); 178void vndshutdown __P((void)); 179 180void vndgetdefaultlabel __P((struct vnd_softc *, struct disklabel *)); 181void vndgetdisklabel __P((dev_t)); 182 183static int vndlock __P((struct vnd_softc *)); 184static void vndunlock __P((struct vnd_softc *)); 185 186void 187vndattach(num) 188 int num; 189{ 190 int i; 191 char *mem; 192 193 if (num <= 0) 194 return; 195 i = num * sizeof(struct vnd_softc); 196 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 197 if (mem == NULL) { 198 printf("WARNING: no memory for vnode disks\n"); 199 return; 200 } 201 vnd_softc = (struct vnd_softc *)mem; 202 numvnd = num; 203 204 for (i = 0; i < numvnd; i++) 205 bufq_alloc(&vnd_softc[i].sc_tab, 206 BUFQ_DISKSORT|BUFQ_SORT_RAWBLOCK); 207} 208 209void 210vnddetach() 211{ 212 int i; 213 214 for (i = 0; i < numvnd; i++) 215 bufq_free(&vnd_softc[i].sc_tab); 216 217 free(vnd_softc, M_DEVBUF); 218} 219 220int 221vndopen(dev, flags, mode, p) 222 dev_t dev; 223 int flags, mode; 224 struct proc *p; 225{ 226 int unit = vndunit(dev); 227 struct vnd_softc *sc; 228 int error = 0, part, pmask; 229 struct disklabel *lp; 230 231#ifdef DEBUG 232 if (vnddebug & VDB_FOLLOW) 233 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 234#endif 235 if (unit >= numvnd) 236 return (ENXIO); 237 sc = &vnd_softc[unit]; 238 239 if ((error = vndlock(sc)) != 0) 240 return (error); 241 242 lp = sc->sc_dkdev.dk_label; 243 244 part = DISKPART(dev); 245 pmask = (1 << part); 246 247 /* 248 * If we're initialized, check to see if there are any other 249 * open partitions. If not, then it's safe to update the 250 * in-core disklabel. 251 */ 252 if ((sc->sc_flags & VNF_INITED) && (sc->sc_dkdev.dk_openmask == 0)) 253 vndgetdisklabel(dev); 254 255 /* Check that the partitions exists. */ 256 if (part != RAW_PART) { 257 if (((sc->sc_flags & VNF_INITED) == 0) || 258 ((part >= lp->d_npartitions) || 259 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 260 error = ENXIO; 261 goto done; 262 } 263 } 264 265 /* Prevent our unit from being unconfigured while open. */ 266 switch (mode) { 267 case S_IFCHR: 268 sc->sc_dkdev.dk_copenmask |= pmask; 269 break; 270 271 case S_IFBLK: 272 sc->sc_dkdev.dk_bopenmask |= pmask; 273 break; 274 } 275 sc->sc_dkdev.dk_openmask = 276 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 277 278 done: 279 vndunlock(sc); 280 return (error); 281} 282 283int 284vndclose(dev, flags, mode, p) 285 dev_t dev; 286 int flags, mode; 287 struct proc *p; 288{ 289 int unit = vndunit(dev); 290 struct vnd_softc *sc; 291 int error = 0, part; 292 293#ifdef DEBUG 294 if (vnddebug & VDB_FOLLOW) 295 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, p); 296#endif 297 298 if (unit >= numvnd) 299 return (ENXIO); 300 sc = &vnd_softc[unit]; 301 302 if ((error = vndlock(sc)) != 0) 303 return (error); 304 305 part = DISKPART(dev); 306 307 /* ...that much closer to allowing unconfiguration... */ 308 switch (mode) { 309 case S_IFCHR: 310 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 311 break; 312 313 case S_IFBLK: 314 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 315 break; 316 } 317 sc->sc_dkdev.dk_openmask = 318 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 319 320 vndunlock(sc); 321 return (0); 322} 323 324/* 325 * Break the request into bsize pieces and submit using VOP_BMAP/VOP_STRATEGY. 326 */ 327void 328vndstrategy(bp) 329 struct buf *bp; 330{ 331 int unit = vndunit(bp->b_dev); 332 struct vnd_softc *vnd = &vnd_softc[unit]; 333 struct vndxfer *vnx; 334 int s, bsize, resid; 335 off_t bn; 336 caddr_t addr; 337 int sz, flags, error, wlabel; 338 struct disklabel *lp; 339 struct partition *pp; 340 341#ifdef DEBUG 342 if (vnddebug & VDB_FOLLOW) 343 printf("vndstrategy(%p): unit %d\n", bp, unit); 344#endif 345 if ((vnd->sc_flags & VNF_INITED) == 0) { 346 bp->b_error = ENXIO; 347 bp->b_flags |= B_ERROR; 348 goto done; 349 } 350 351 /* If it's a nil transfer, wake up the top half now. */ 352 if (bp->b_bcount == 0) 353 goto done; 354 355 lp = vnd->sc_dkdev.dk_label; 356 357 /* 358 * The transfer must be a whole number of blocks. 359 */ 360 if ((bp->b_bcount % lp->d_secsize) != 0) { 361 bp->b_error = EINVAL; 362 bp->b_flags |= B_ERROR; 363 goto done; 364 } 365 366 /* 367 * Do bounds checking and adjust transfer. If there's an error, 368 * the bounds check will flag that for us. 369 */ 370 wlabel = vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING); 371 if (DISKPART(bp->b_dev) != RAW_PART) 372 if (bounds_check_with_label(bp, lp, wlabel) <= 0) 373 goto done; 374 375 bp->b_resid = bp->b_bcount; 376 377 /* 378 * Put the block number in terms of the logical blocksize 379 * of the "device". 380 */ 381 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 382 383 /* 384 * Translate the partition-relative block number to an absolute. 385 */ 386 if (DISKPART(bp->b_dev) != RAW_PART) { 387 pp = &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(bp->b_dev)]; 388 bn += pp->p_offset; 389 } 390 391 /* ...and convert to a byte offset within the file. */ 392 bn *= lp->d_secsize; 393 394 if (vnd->sc_vp->v_mount == NULL) { 395 bp->b_error = ENXIO; 396 bp->b_flags |= B_ERROR; 397 goto done; 398 } 399 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 400 addr = bp->b_data; 401 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 402 403 /* Allocate a header for this transfer and link it to the buffer */ 404 s = splbio(); 405 vnx = VND_GETXFER(vnd); 406 splx(s); 407 vnx->vx_flags = VX_BUSY; 408 vnx->vx_error = 0; 409 vnx->vx_pending = 0; 410 vnx->vx_bp = bp; 411 412 for (resid = bp->b_resid; resid; resid -= sz) { 413 struct vndbuf *nbp; 414 struct vnode *vp; 415 daddr_t nbn; 416 int off, nra; 417 418 nra = 0; 419 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 420 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 421 VOP_UNLOCK(vnd->sc_vp, 0); 422 423 if (error == 0 && (long)nbn == -1) 424 error = EIO; 425 426 /* 427 * If there was an error or a hole in the file...punt. 428 * Note that we may have to wait for any operations 429 * that we have already fired off before releasing 430 * the buffer. 431 * 432 * XXX we could deal with holes here but it would be 433 * a hassle (in the write case). 434 */ 435 if (error) { 436 s = splbio(); 437 vnx->vx_error = error; 438 goto out; 439 } 440 441#ifdef DEBUG 442 if (!dovndcluster) 443 nra = 0; 444#endif 445 446 if ((off = bn % bsize) != 0) 447 sz = bsize - off; 448 else 449 sz = (1 + nra) * bsize; 450 if (resid < sz) 451 sz = resid; 452#ifdef DEBUG 453 if (vnddebug & VDB_IO) 454 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%x sz 0x%x\n", 455 vnd->sc_vp, vp, (long long)bn, nbn, sz); 456#endif 457 458 s = splbio(); 459 nbp = VND_GETBUF(vnd); 460 splx(s); 461 nbp->vb_buf.b_flags = flags; 462 nbp->vb_buf.b_bcount = sz; 463 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 464 - trunc_page((ulong) addr); 465 nbp->vb_buf.b_error = 0; 466 nbp->vb_buf.b_data = addr; 467 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 468 nbp->vb_buf.b_proc = bp->b_proc; 469 nbp->vb_buf.b_iodone = vndiodone; 470 nbp->vb_buf.b_vp = NULLVP; 471 LIST_INIT(&nbp->vb_buf.b_dep); 472 473 nbp->vb_xfer = vnx; 474 475 /* 476 * Just sort by block number 477 */ 478 s = splbio(); 479 if (vnx->vx_error != 0) { 480 VND_PUTBUF(vnd, nbp); 481 goto out; 482 } 483 vnx->vx_pending++; 484 bgetvp(vp, &nbp->vb_buf); 485 BUFQ_PUT(&vnd->sc_tab, &nbp->vb_buf); 486 vndstart(vnd); 487 splx(s); 488 bn += sz; 489 addr += sz; 490 } 491 492 s = splbio(); 493 494out: /* Arrive here at splbio */ 495 vnx->vx_flags &= ~VX_BUSY; 496 if (vnx->vx_pending == 0) { 497 if (vnx->vx_error != 0) { 498 bp->b_error = vnx->vx_error; 499 bp->b_flags |= B_ERROR; 500 } 501 VND_PUTXFER(vnd, vnx); 502 biodone(bp); 503 } 504 splx(s); 505 return; 506 507 done: 508 biodone(bp); 509} 510 511/* 512 * Feed requests sequentially. 513 * We do it this way to keep from flooding NFS servers if we are connected 514 * to an NFS file. This places the burden on the client rather than the 515 * server. 516 */ 517void 518vndstart(vnd) 519 struct vnd_softc *vnd; 520{ 521 struct buf *bp; 522 523 /* 524 * Dequeue now since lower level strategy routine might 525 * queue using same links 526 */ 527 528 if ((vnd->sc_flags & VNF_BUSY) != 0) 529 return; 530 531 vnd->sc_flags |= VNF_BUSY; 532 533 while (vnd->sc_active < vnd->sc_maxactive) { 534 bp = BUFQ_GET(&vnd->sc_tab); 535 if (bp == NULL) 536 break; 537 vnd->sc_active++; 538#ifdef DEBUG 539 if (vnddebug & VDB_IO) 540 printf("vndstart(%ld): bp %p vp %p blkno 0x%x" 541 " flags %lx addr %p cnt 0x%lx\n", 542 (long) (vnd-vnd_softc), bp, bp->b_vp, bp->b_blkno, 543 bp->b_flags, bp->b_data, bp->b_bcount); 544#endif 545 546 /* Instrumentation. */ 547 disk_busy(&vnd->sc_dkdev); 548 549 if ((bp->b_flags & B_READ) == 0) 550 bp->b_vp->v_numoutput++; 551 VOP_STRATEGY(bp); 552 } 553 vnd->sc_flags &= ~VNF_BUSY; 554} 555 556void 557vndiodone(bp) 558 struct buf *bp; 559{ 560 struct vndbuf *vbp = (struct vndbuf *) bp; 561 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 562 struct buf *pbp = vnx->vx_bp; 563 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 564 int s, resid; 565 566 s = splbio(); 567#ifdef DEBUG 568 if (vnddebug & VDB_IO) 569 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%x addr %p cnt 0x%lx\n", 570 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 571 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 572 vbp->vb_buf.b_bcount); 573#endif 574 575 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 576 pbp->b_resid -= resid; 577 disk_unbusy(&vnd->sc_dkdev, resid); 578 vnx->vx_pending--; 579 580 if (vbp->vb_buf.b_error) { 581#ifdef DEBUG 582 if (vnddebug & VDB_IO) 583 printf("vndiodone: vbp %p error %d\n", vbp, 584 vbp->vb_buf.b_error); 585#endif 586 vnx->vx_error = vbp->vb_buf.b_error; 587 } 588 589 if (vbp->vb_buf.b_vp != NULLVP) 590 brelvp(&vbp->vb_buf); 591 592 VND_PUTBUF(vnd, vbp); 593 594 /* 595 * Wrap up this transaction if it has run to completion or, in 596 * case of an error, when all auxiliary buffers have returned. 597 */ 598 if (vnx->vx_error != 0) { 599 pbp->b_flags |= B_ERROR; 600 pbp->b_error = vnx->vx_error; 601 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 602 603#ifdef DEBUG 604 if (vnddebug & VDB_IO) 605 printf("vndiodone: pbp %p iodone: error %d\n", 606 pbp, vnx->vx_error); 607#endif 608 VND_PUTXFER(vnd, vnx); 609 biodone(pbp); 610 } 611 } else if (pbp->b_resid == 0) { 612 613#ifdef DIAGNOSTIC 614 if (vnx->vx_pending != 0) 615 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 616#endif 617 618 if ((vnx->vx_flags & VX_BUSY) == 0) { 619#ifdef DEBUG 620 if (vnddebug & VDB_IO) 621 printf("vndiodone: pbp %p iodone\n", pbp); 622#endif 623 VND_PUTXFER(vnd, vnx); 624 biodone(pbp); 625 } 626 } 627 628 vnd->sc_active--; 629 vndstart(vnd); 630 splx(s); 631} 632 633/* ARGSUSED */ 634int 635vndread(dev, uio, flags) 636 dev_t dev; 637 struct uio *uio; 638 int flags; 639{ 640 int unit = vndunit(dev); 641 struct vnd_softc *sc; 642 643#ifdef DEBUG 644 if (vnddebug & VDB_FOLLOW) 645 printf("vndread(0x%x, %p)\n", dev, uio); 646#endif 647 648 if (unit >= numvnd) 649 return (ENXIO); 650 sc = &vnd_softc[unit]; 651 652 if ((sc->sc_flags & VNF_INITED) == 0) 653 return (ENXIO); 654 655 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 656} 657 658/* ARGSUSED */ 659int 660vndwrite(dev, uio, flags) 661 dev_t dev; 662 struct uio *uio; 663 int flags; 664{ 665 int unit = vndunit(dev); 666 struct vnd_softc *sc; 667 668#ifdef DEBUG 669 if (vnddebug & VDB_FOLLOW) 670 printf("vndwrite(0x%x, %p)\n", dev, uio); 671#endif 672 673 if (unit >= numvnd) 674 return (ENXIO); 675 sc = &vnd_softc[unit]; 676 677 if ((sc->sc_flags & VNF_INITED) == 0) 678 return (ENXIO); 679 680 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 681} 682 683/* ARGSUSED */ 684int 685vndioctl(dev, cmd, data, flag, p) 686 dev_t dev; 687 u_long cmd; 688 caddr_t data; 689 int flag; 690 struct proc *p; 691{ 692 int unit = vndunit(dev); 693 struct vnd_softc *vnd; 694 struct vnd_ioctl *vio; 695 struct vattr vattr; 696 struct nameidata nd; 697 int error, part, pmask; 698 size_t geomsize; 699#ifdef __HAVE_OLD_DISKLABEL 700 struct disklabel newlabel; 701#endif 702 703#ifdef DEBUG 704 if (vnddebug & VDB_FOLLOW) 705 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 706 dev, cmd, data, flag, p, unit); 707#endif 708 error = suser(p->p_ucred, &p->p_acflag); 709 if (error) 710 return (error); 711 if (unit >= numvnd) 712 return (ENXIO); 713 714 vnd = &vnd_softc[unit]; 715 vio = (struct vnd_ioctl *)data; 716 717 /* Must be open for writes for these commands... */ 718 switch (cmd) { 719 case VNDIOCSET: 720 case VNDIOCCLR: 721 case DIOCSDINFO: 722 case DIOCWDINFO: 723#ifdef __HAVE_OLD_DISKLABEL 724 case ODIOCSDINFO: 725 case ODIOCWDINFO: 726#endif 727 case DIOCWLABEL: 728 if ((flag & FWRITE) == 0) 729 return (EBADF); 730 } 731 732 /* Must be initialized for these... */ 733 switch (cmd) { 734 case VNDIOCCLR: 735 case DIOCGDINFO: 736 case DIOCSDINFO: 737 case DIOCWDINFO: 738 case DIOCGPART: 739 case DIOCWLABEL: 740 case DIOCGDEFLABEL: 741#ifdef __HAVE_OLD_DISKLABEL 742 case ODIOCGDINFO: 743 case ODIOCSDINFO: 744 case ODIOCWDINFO: 745 case ODIOCGDEFLABEL: 746#endif 747 if ((vnd->sc_flags & VNF_INITED) == 0) 748 return (ENXIO); 749 } 750 751 switch (cmd) { 752 case VNDIOCSET: 753 if (vnd->sc_flags & VNF_INITED) 754 return (EBUSY); 755 756 if ((error = vndlock(vnd)) != 0) 757 return (error); 758 759 /* 760 * Always open for read and write. 761 * This is probably bogus, but it lets vn_open() 762 * weed out directories, sockets, etc. so we don't 763 * have to worry about them. 764 */ 765 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, p); 766 if ((error = vn_open(&nd, FREAD|FWRITE, 0)) != 0) 767 goto unlock_and_exit; 768 error = VOP_GETATTR(nd.ni_vp, &vattr, p->p_ucred, p); 769 VOP_UNLOCK(nd.ni_vp, 0); 770 if (error) 771 goto close_and_exit; 772 vnd->sc_vp = nd.ni_vp; 773 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 774 775 /* 776 * Use pseudo-geometry specified. If none was provided, 777 * use "standard" Adaptec fictitious geometry. 778 */ 779 if (vio->vnd_flags & VNDIOF_HASGEOM) { 780 781 memcpy(&vnd->sc_geom, &vio->vnd_geom, 782 sizeof(vio->vnd_geom)); 783 784 /* 785 * Sanity-check the sector size. 786 * XXX Don't allow secsize < DEV_BSIZE. Should 787 * XXX we? 788 */ 789 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 790 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0) { 791 error = EINVAL; 792 goto close_and_exit; 793 } 794 795 /* 796 * Compute the size (in DEV_BSIZE blocks) specified 797 * by the geometry. 798 */ 799 geomsize = (vnd->sc_geom.vng_nsectors * 800 vnd->sc_geom.vng_ntracks * 801 vnd->sc_geom.vng_ncylinders) * 802 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 803 804 /* 805 * Sanity-check the size against the specified 806 * geometry. 807 */ 808 if (vnd->sc_size < geomsize) { 809 error = EINVAL; 810 goto close_and_exit; 811 } 812 } else { 813 /* 814 * Size must be at least 2048 DEV_BSIZE blocks 815 * (1M) in order to use this geometry. 816 */ 817 if (vnd->sc_size < (32 * 64)) { 818 error = EINVAL; 819 goto close_and_exit; 820 } 821 822 vnd->sc_geom.vng_secsize = DEV_BSIZE; 823 vnd->sc_geom.vng_nsectors = 32; 824 vnd->sc_geom.vng_ntracks = 64; 825 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 826 827 /* 828 * Compute the actual size allowed by this geometry. 829 */ 830 geomsize = 32 * 64 * vnd->sc_geom.vng_ncylinders; 831 } 832 833 /* 834 * Truncate the size to that specified by 835 * the geometry. 836 * XXX Should we even bother with this? 837 */ 838 vnd->sc_size = geomsize; 839 840 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 841 goto close_and_exit; 842 vndthrottle(vnd, vnd->sc_vp); 843 vio->vnd_size = dbtob(vnd->sc_size); 844 vnd->sc_flags |= VNF_INITED; 845#ifdef DEBUG 846 if (vnddebug & VDB_INIT) 847 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 848 vnd->sc_vp, (unsigned long) vnd->sc_size, 849 vnd->sc_geom.vng_secsize, 850 vnd->sc_geom.vng_nsectors, 851 vnd->sc_geom.vng_ntracks, 852 vnd->sc_geom.vng_ncylinders); 853#endif 854 855 /* Attach the disk. */ 856 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 857 sprintf(vnd->sc_xname, "vnd%d", unit); /* XXX */ 858 vnd->sc_dkdev.dk_name = vnd->sc_xname; 859 disk_attach(&vnd->sc_dkdev); 860 861 /* Initialize the xfer and buffer pools. */ 862 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 863 0, 0, "vndxpl", NULL); 864 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 865 0, 0, "vndbpl", NULL); 866 867 /* Try and read the disklabel. */ 868 vndgetdisklabel(dev); 869 870 vndunlock(vnd); 871 872 break; 873 874close_and_exit: 875 (void) vn_close(nd.ni_vp, FREAD|FWRITE, p->p_ucred, p); 876unlock_and_exit: 877 vndunlock(vnd); 878 return (error); 879 880 case VNDIOCCLR: 881 if ((error = vndlock(vnd)) != 0) 882 return (error); 883 884 /* 885 * Don't unconfigure if any other partitions are open 886 * or if both the character and block flavors of this 887 * partition are open. 888 */ 889 part = DISKPART(dev); 890 pmask = (1 << part); 891 if ((vnd->sc_dkdev.dk_openmask & ~pmask) || 892 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 893 (vnd->sc_dkdev.dk_copenmask & pmask))) { 894 vndunlock(vnd); 895 return (EBUSY); 896 } 897 898 vndclear(vnd); 899#ifdef DEBUG 900 if (vnddebug & VDB_INIT) 901 printf("vndioctl: CLRed\n"); 902#endif 903 904 /* Destroy the xfer and buffer pools. */ 905 pool_destroy(&vnd->sc_vxpool); 906 pool_destroy(&vnd->sc_vbpool); 907 908 /* Detatch the disk. */ 909 disk_detach(&vnd->sc_dkdev); 910 911 vndunlock(vnd); 912 913 break; 914 915 case VNDIOCGET: { 916 struct vnd_user *vnu; 917 struct vattr va; 918 919 vnu = (struct vnd_user *)data; 920 921 if (vnu->vnu_unit == -1) 922 vnu->vnu_unit = unit; 923 if (vnu->vnu_unit >= numvnd) 924 return (ENXIO); 925 if (vnu->vnu_unit < 0) 926 return (EINVAL); 927 928 vnd = &vnd_softc[vnu->vnu_unit]; 929 930 if (vnd->sc_flags & VNF_INITED) { 931 error = VOP_GETATTR(vnd->sc_vp, &va, p->p_ucred, p); 932 if (error) 933 return (error); 934 vnu->vnu_dev = va.va_fsid; 935 vnu->vnu_ino = va.va_fileid; 936 } 937 else { 938 /* unused is not an error */ 939 vnu->vnu_dev = 0; 940 vnu->vnu_ino = 0; 941 } 942 943 break; 944 } 945 946 case DIOCGDINFO: 947 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 948 break; 949 950#ifdef __HAVE_OLD_DISKLABEL 951 case ODIOCGDINFO: 952 newlabel = *(vnd->sc_dkdev.dk_label); 953 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 954 return ENOTTY; 955 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 956 break; 957#endif 958 959 case DIOCGPART: 960 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 961 ((struct partinfo *)data)->part = 962 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 963 break; 964 965 case DIOCWDINFO: 966 case DIOCSDINFO: 967#ifdef __HAVE_OLD_DISKLABEL 968 case ODIOCWDINFO: 969 case ODIOCSDINFO: 970#endif 971 { 972 struct disklabel *lp; 973 974 if ((error = vndlock(vnd)) != 0) 975 return (error); 976 977 vnd->sc_flags |= VNF_LABELLING; 978 979#ifdef __HAVE_OLD_DISKLABEL 980 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 981 memset(&newlabel, 0, sizeof newlabel); 982 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 983 lp = &newlabel; 984 } else 985#endif 986 lp = (struct disklabel *)data; 987 988 error = setdisklabel(vnd->sc_dkdev.dk_label, 989 lp, 0, vnd->sc_dkdev.dk_cpulabel); 990 if (error == 0) { 991 if (cmd == DIOCWDINFO 992#ifdef __HAVE_OLD_DISKLABEL 993 || cmd == ODIOCWDINFO 994#endif 995 ) 996 error = writedisklabel(VNDLABELDEV(dev), 997 vndstrategy, vnd->sc_dkdev.dk_label, 998 vnd->sc_dkdev.dk_cpulabel); 999 } 1000 1001 vnd->sc_flags &= ~VNF_LABELLING; 1002 1003 vndunlock(vnd); 1004 1005 if (error) 1006 return (error); 1007 break; 1008 } 1009 1010 case DIOCWLABEL: 1011 if (*(int *)data != 0) 1012 vnd->sc_flags |= VNF_WLABEL; 1013 else 1014 vnd->sc_flags &= ~VNF_WLABEL; 1015 break; 1016 1017 case DIOCGDEFLABEL: 1018 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1019 break; 1020 1021#ifdef __HAVE_OLD_DISKLABEL 1022 case ODIOCGDEFLABEL: 1023 vndgetdefaultlabel(vnd, &newlabel); 1024 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1025 return ENOTTY; 1026 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1027 break; 1028#endif 1029 1030 default: 1031 return (ENOTTY); 1032 } 1033 1034 return (0); 1035} 1036 1037/* 1038 * Duplicate the current processes' credentials. Since we are called only 1039 * as the result of a SET ioctl and only root can do that, any future access 1040 * to this "disk" is essentially as root. Note that credentials may change 1041 * if some other uid can write directly to the mapped file (NFS). 1042 */ 1043int 1044vndsetcred(vnd, cred) 1045 struct vnd_softc *vnd; 1046 struct ucred *cred; 1047{ 1048 struct uio auio; 1049 struct iovec aiov; 1050 char *tmpbuf; 1051 int error; 1052 1053 vnd->sc_cred = crdup(cred); 1054 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1055 1056 /* XXX: Horrible kludge to establish credentials for NFS */ 1057 aiov.iov_base = tmpbuf; 1058 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1059 auio.uio_iov = &aiov; 1060 auio.uio_iovcnt = 1; 1061 auio.uio_offset = 0; 1062 auio.uio_rw = UIO_READ; 1063 auio.uio_segflg = UIO_SYSSPACE; 1064 auio.uio_resid = aiov.iov_len; 1065 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1066 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1067 if (error == 0) { 1068 /* 1069 * Because vnd does all IO directly through the vnode 1070 * we need to flush (at least) the buffer from the above 1071 * VOP_READ from the buffer cache to prevent cache 1072 * incoherencies. Also, be careful to write dirty 1073 * buffers back to stable storage. 1074 */ 1075 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1076 curproc, 0, 0); 1077 } 1078 VOP_UNLOCK(vnd->sc_vp, 0); 1079 1080 free(tmpbuf, M_TEMP); 1081 return (error); 1082} 1083 1084/* 1085 * Set maxactive based on FS type 1086 */ 1087void 1088vndthrottle(vnd, vp) 1089 struct vnd_softc *vnd; 1090 struct vnode *vp; 1091{ 1092#ifdef NFS 1093 extern int (**nfsv2_vnodeop_p) __P((void *)); 1094 1095 if (vp->v_op == nfsv2_vnodeop_p) 1096 vnd->sc_maxactive = 2; 1097 else 1098#endif 1099 vnd->sc_maxactive = 8; 1100 1101 if (vnd->sc_maxactive < 1) 1102 vnd->sc_maxactive = 1; 1103} 1104 1105void 1106vndshutdown() 1107{ 1108 struct vnd_softc *vnd; 1109 1110 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1111 if (vnd->sc_flags & VNF_INITED) 1112 vndclear(vnd); 1113} 1114 1115void 1116vndclear(vnd) 1117 struct vnd_softc *vnd; 1118{ 1119 struct vnode *vp = vnd->sc_vp; 1120 struct proc *p = curproc; /* XXX */ 1121 1122#ifdef DEBUG 1123 if (vnddebug & VDB_FOLLOW) 1124 printf("vndclear(%p): vp %p\n", vnd, vp); 1125#endif 1126 vnd->sc_flags &= ~VNF_INITED; 1127 if (vp == (struct vnode *)0) 1128 panic("vndioctl: null vp"); 1129 (void) vn_close(vp, FREAD|FWRITE, vnd->sc_cred, p); 1130 crfree(vnd->sc_cred); 1131 vnd->sc_vp = (struct vnode *)0; 1132 vnd->sc_cred = (struct ucred *)0; 1133 vnd->sc_size = 0; 1134} 1135 1136int 1137vndsize(dev) 1138 dev_t dev; 1139{ 1140 struct vnd_softc *sc; 1141 struct disklabel *lp; 1142 int part, unit, omask; 1143 int size; 1144 1145 unit = vndunit(dev); 1146 if (unit >= numvnd) 1147 return (-1); 1148 sc = &vnd_softc[unit]; 1149 1150 if ((sc->sc_flags & VNF_INITED) == 0) 1151 return (-1); 1152 1153 part = DISKPART(dev); 1154 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1155 lp = sc->sc_dkdev.dk_label; 1156 1157 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curproc)) 1158 return (-1); 1159 1160 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1161 size = -1; 1162 else 1163 size = lp->d_partitions[part].p_size * 1164 (lp->d_secsize / DEV_BSIZE); 1165 1166 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curproc)) 1167 return (-1); 1168 1169 return (size); 1170} 1171 1172int 1173vnddump(dev, blkno, va, size) 1174 dev_t dev; 1175 daddr_t blkno; 1176 caddr_t va; 1177 size_t size; 1178{ 1179 1180 /* Not implemented. */ 1181 return ENXIO; 1182} 1183 1184void 1185vndgetdefaultlabel(sc, lp) 1186 struct vnd_softc *sc; 1187 struct disklabel *lp; 1188{ 1189 struct vndgeom *vng = &sc->sc_geom; 1190 struct partition *pp; 1191 1192 memset(lp, 0, sizeof(*lp)); 1193 1194 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1195 lp->d_secsize = vng->vng_secsize; 1196 lp->d_nsectors = vng->vng_nsectors; 1197 lp->d_ntracks = vng->vng_ntracks; 1198 lp->d_ncylinders = vng->vng_ncylinders; 1199 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1200 1201 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1202 lp->d_type = DTYPE_VND; 1203 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1204 lp->d_rpm = 3600; 1205 lp->d_interleave = 1; 1206 lp->d_flags = 0; 1207 1208 pp = &lp->d_partitions[RAW_PART]; 1209 pp->p_offset = 0; 1210 pp->p_size = lp->d_secperunit; 1211 pp->p_fstype = FS_UNUSED; 1212 lp->d_npartitions = RAW_PART + 1; 1213 1214 lp->d_magic = DISKMAGIC; 1215 lp->d_magic2 = DISKMAGIC; 1216 lp->d_checksum = dkcksum(lp); 1217} 1218 1219/* 1220 * Read the disklabel from a vnd. If one is not present, create a fake one. 1221 */ 1222void 1223vndgetdisklabel(dev) 1224 dev_t dev; 1225{ 1226 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1227 char *errstring; 1228 struct disklabel *lp = sc->sc_dkdev.dk_label; 1229 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1230 int i; 1231 1232 memset(clp, 0, sizeof(*clp)); 1233 1234 vndgetdefaultlabel(sc, lp); 1235 1236 /* 1237 * Call the generic disklabel extraction routine. 1238 */ 1239 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1240 if (errstring) { 1241 /* 1242 * Lack of disklabel is common, but we print the warning 1243 * anyway, since it might contain other useful information. 1244 */ 1245 printf("%s: %s\n", sc->sc_xname, errstring); 1246 1247 /* 1248 * For historical reasons, if there's no disklabel 1249 * present, all partitions must be FS_BSDFFS and 1250 * occupy the entire disk. 1251 */ 1252 for (i = 0; i < MAXPARTITIONS; i++) { 1253 /* 1254 * Don't wipe out port specific hack (such as 1255 * dos partition hack of i386 port). 1256 */ 1257 if (lp->d_partitions[i].p_fstype != FS_UNUSED) 1258 continue; 1259 1260 lp->d_partitions[i].p_size = lp->d_secperunit; 1261 lp->d_partitions[i].p_offset = 0; 1262 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1263 } 1264 1265 strncpy(lp->d_packname, "default label", 1266 sizeof(lp->d_packname)); 1267 1268 lp->d_checksum = dkcksum(lp); 1269 } 1270} 1271 1272/* 1273 * Wait interruptibly for an exclusive lock. 1274 * 1275 * XXX 1276 * Several drivers do this; it should be abstracted and made MP-safe. 1277 */ 1278static int 1279vndlock(sc) 1280 struct vnd_softc *sc; 1281{ 1282 int error; 1283 1284 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1285 sc->sc_flags |= VNF_WANTED; 1286 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1287 return (error); 1288 } 1289 sc->sc_flags |= VNF_LOCKED; 1290 return (0); 1291} 1292 1293/* 1294 * Unlock and wake up any waiters. 1295 */ 1296static void 1297vndunlock(sc) 1298 struct vnd_softc *sc; 1299{ 1300 1301 sc->sc_flags &= ~VNF_LOCKED; 1302 if ((sc->sc_flags & VNF_WANTED) != 0) { 1303 sc->sc_flags &= ~VNF_WANTED; 1304 wakeup(sc); 1305 } 1306} 1307