vnd.c revision 1.124
1/* $NetBSD: vnd.c,v 1.124 2005/12/11 12:20:53 christos Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76/* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116/* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135#include <sys/cdefs.h> 136__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.124 2005/12/11 12:20:53 christos Exp $"); 137 138#if defined(_KERNEL_OPT) 139#include "fs_nfs.h" 140#include "opt_vnd.h" 141#endif 142 143#include <sys/param.h> 144#include <sys/systm.h> 145#include <sys/namei.h> 146#include <sys/proc.h> 147#include <sys/kthread.h> 148#include <sys/errno.h> 149#include <sys/buf.h> 150#include <sys/bufq.h> 151#include <sys/malloc.h> 152#include <sys/ioctl.h> 153#include <sys/disklabel.h> 154#include <sys/device.h> 155#include <sys/disk.h> 156#include <sys/stat.h> 157#include <sys/mount.h> 158#include <sys/vnode.h> 159#include <sys/file.h> 160#include <sys/uio.h> 161#include <sys/conf.h> 162#include <net/zlib.h> 163 164#include <miscfs/specfs/specdev.h> 165 166#include <dev/vndvar.h> 167 168#if defined(VNDDEBUG) && !defined(DEBUG) 169#define DEBUG 170#endif 171 172#ifdef DEBUG 173int dovndcluster = 1; 174#define VDB_FOLLOW 0x01 175#define VDB_INIT 0x02 176#define VDB_IO 0x04 177#define VDB_LABEL 0x08 178int vnddebug = 0x00; 179#endif 180 181#define vndunit(x) DISKUNIT(x) 182 183struct vndxfer { 184 struct buf *vx_bp; /* Pointer to parent buffer */ 185 int vx_error; 186 int vx_pending; /* # of pending aux buffers */ 187 int vx_flags; 188#define VX_BUSY 1 189}; 190 191struct vndbuf { 192 struct buf vb_buf; 193 struct vndxfer *vb_xfer; 194}; 195 196#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 197#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 198 199#define VND_GETBUF(vnd) pool_get(&(vnd)->sc_vbpool, PR_WAITOK) 200#define VND_PUTBUF(vnd, vb) pool_put(&(vnd)->sc_vbpool, (vb)) 201 202struct vnd_softc *vnd_softc; 203int numvnd = 0; 204 205#define VNDLABELDEV(dev) \ 206 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 207 208/* called by main() at boot time (XXX: and the LKM driver) */ 209void vndattach(int); 210int vnddetach(void); 211 212static void vndclear(struct vnd_softc *, int); 213static int vndsetcred(struct vnd_softc *, struct ucred *); 214static void vndthrottle(struct vnd_softc *, struct vnode *); 215static void vndiodone(struct buf *); 216#if 0 217static void vndshutdown(void); 218#endif 219 220static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 221static void vndgetdisklabel(dev_t); 222 223static int vndlock(struct vnd_softc *); 224static void vndunlock(struct vnd_softc *); 225#ifdef VND_COMPRESSION 226static void compstrategy(struct buf *, off_t); 227static void *vnd_alloc(void *, u_int, u_int); 228static void vnd_free(void *, void *); 229#endif /* VND_COMPRESSION */ 230 231void vndthread(void *); 232 233static dev_type_open(vndopen); 234static dev_type_close(vndclose); 235static dev_type_read(vndread); 236static dev_type_write(vndwrite); 237static dev_type_ioctl(vndioctl); 238static dev_type_strategy(vndstrategy); 239static dev_type_dump(vnddump); 240static dev_type_size(vndsize); 241 242const struct bdevsw vnd_bdevsw = { 243 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 244}; 245 246const struct cdevsw vnd_cdevsw = { 247 vndopen, vndclose, vndread, vndwrite, vndioctl, 248 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 249}; 250 251static int vndattached; 252 253void 254vndattach(int num) 255{ 256 int i; 257 char *mem; 258 259 if (vndattached) 260 return; 261 vndattached = 1; 262 if (num <= 0) 263 return; 264 i = num * sizeof(struct vnd_softc); 265 mem = malloc(i, M_DEVBUF, M_NOWAIT|M_ZERO); 266 if (mem == NULL) { 267 printf("WARNING: no memory for vnode disks\n"); 268 return; 269 } 270 vnd_softc = (struct vnd_softc *)mem; 271 numvnd = num; 272 273 for (i = 0; i < numvnd; i++) { 274 vnd_softc[i].sc_unit = i; 275 vnd_softc[i].sc_comp_offsets = NULL; 276 vnd_softc[i].sc_comp_buff = NULL; 277 vnd_softc[i].sc_comp_decombuf = NULL; 278 bufq_alloc(&vnd_softc[i].sc_tab, 279 "disksort", BUFQ_SORT_RAWBLOCK); 280 pseudo_disk_init(&vnd_softc[i].sc_dkdev); 281 } 282} 283 284int 285vnddetach(void) 286{ 287 int i; 288 289 /* First check we aren't in use. */ 290 for (i = 0; i < numvnd; i++) 291 if (vnd_softc[i].sc_flags & VNF_INITED) 292 return (EBUSY); 293 294 for (i = 0; i < numvnd; i++) 295 bufq_free(vnd_softc[i].sc_tab); 296 297 free(vnd_softc, M_DEVBUF); 298 vndattached = 0; 299 300 return (0); 301} 302 303static int 304vndopen(dev_t dev, int flags, int mode, struct lwp *l) 305{ 306 int unit = vndunit(dev); 307 struct vnd_softc *sc; 308 int error = 0, part, pmask; 309 struct disklabel *lp; 310 311#ifdef DEBUG 312 if (vnddebug & VDB_FOLLOW) 313 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 314#endif 315 if (unit >= numvnd) 316 return (ENXIO); 317 sc = &vnd_softc[unit]; 318 319 if ((error = vndlock(sc)) != 0) 320 return (error); 321 322 lp = sc->sc_dkdev.dk_label; 323 324 part = DISKPART(dev); 325 pmask = (1 << part); 326 327 /* 328 * If we're initialized, check to see if there are any other 329 * open partitions. If not, then it's safe to update the 330 * in-core disklabel. Only read the disklabel if it is 331 * not already valid. 332 */ 333 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 334 sc->sc_dkdev.dk_openmask == 0) 335 vndgetdisklabel(dev); 336 337 /* Check that the partitions exists. */ 338 if (part != RAW_PART) { 339 if (((sc->sc_flags & VNF_INITED) == 0) || 340 ((part >= lp->d_npartitions) || 341 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 342 error = ENXIO; 343 goto done; 344 } 345 } 346 347 /* Prevent our unit from being unconfigured while open. */ 348 switch (mode) { 349 case S_IFCHR: 350 sc->sc_dkdev.dk_copenmask |= pmask; 351 break; 352 353 case S_IFBLK: 354 sc->sc_dkdev.dk_bopenmask |= pmask; 355 break; 356 } 357 sc->sc_dkdev.dk_openmask = 358 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 359 360 done: 361 vndunlock(sc); 362 return (error); 363} 364 365static int 366vndclose(dev_t dev, int flags, int mode, struct lwp *l) 367{ 368 int unit = vndunit(dev); 369 struct vnd_softc *sc; 370 int error = 0, part; 371 372#ifdef DEBUG 373 if (vnddebug & VDB_FOLLOW) 374 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 375#endif 376 377 if (unit >= numvnd) 378 return (ENXIO); 379 sc = &vnd_softc[unit]; 380 381 if ((error = vndlock(sc)) != 0) 382 return (error); 383 384 part = DISKPART(dev); 385 386 /* ...that much closer to allowing unconfiguration... */ 387 switch (mode) { 388 case S_IFCHR: 389 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 390 break; 391 392 case S_IFBLK: 393 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 394 break; 395 } 396 sc->sc_dkdev.dk_openmask = 397 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 398 399 vndunlock(sc); 400 return (0); 401} 402 403/* 404 * Qeue the request, and wakeup the kernel thread to handle it. 405 */ 406static void 407vndstrategy(struct buf *bp) 408{ 409 int unit = vndunit(bp->b_dev); 410 struct vnd_softc *vnd = &vnd_softc[unit]; 411 struct disklabel *lp = vnd->sc_dkdev.dk_label; 412 int s = splbio(); 413 414 bp->b_resid = bp->b_bcount; 415 416 if ((vnd->sc_flags & VNF_INITED) == 0) { 417 bp->b_error = ENXIO; 418 bp->b_flags |= B_ERROR; 419 goto done; 420 } 421 422 /* 423 * The transfer must be a whole number of blocks. 424 */ 425 if ((bp->b_bcount % lp->d_secsize) != 0) { 426 bp->b_error = EINVAL; 427 bp->b_flags |= B_ERROR; 428 goto done; 429 } 430 431 /* 432 * check if we're read-only. 433 */ 434 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 435 bp->b_error = EACCES; 436 bp->b_flags |= B_ERROR; 437 goto done; 438 } 439 440 /* 441 * Do bounds checking and adjust transfer. If there's an error, 442 * the bounds check will flag that for us. 443 */ 444 if (DISKPART(bp->b_dev) != RAW_PART) { 445 if (bounds_check_with_label(&vnd->sc_dkdev, 446 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 447 goto done; 448 } 449 450 /* If it's a nil transfer, wake up the top half now. */ 451 if (bp->b_bcount == 0) 452 goto done; 453#ifdef DEBUG 454 if (vnddebug & VDB_FOLLOW) 455 printf("vndstrategy(%p): unit %d\n", bp, unit); 456#endif 457 BUFQ_PUT(vnd->sc_tab, bp); 458 wakeup(&vnd->sc_tab); 459 splx(s); 460 return; 461done: 462 biodone(bp); 463 splx(s); 464} 465 466void 467vndthread(void *arg) 468{ 469 struct vnd_softc *vnd = arg; 470 struct buf *bp; 471 struct vndxfer *vnx; 472 struct mount *mp; 473 int s, bsize, resid; 474 off_t bn; 475 caddr_t addr; 476 int sz, flags, error; 477 struct disklabel *lp; 478 struct partition *pp; 479 480 s = splbio(); 481 vnd->sc_flags |= VNF_KTHREAD; 482 wakeup(&vnd->sc_kthread); 483 484 /* 485 * Dequeue requests, break them into bsize pieces and submit using 486 * VOP_BMAP/VOP_STRATEGY. 487 */ 488 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 489 bp = BUFQ_GET(vnd->sc_tab); 490 if (bp == NULL) { 491 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 492 continue; 493 }; 494 splx(s); 495 496#ifdef DEBUG 497 if (vnddebug & VDB_FOLLOW) 498 printf("vndthread(%p\n", bp); 499#endif 500 lp = vnd->sc_dkdev.dk_label; 501 bp->b_resid = bp->b_bcount; 502 503 /* 504 * Put the block number in terms of the logical blocksize 505 * of the "device". 506 */ 507 bn = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 508 509 /* 510 * Translate the partition-relative block number to an absolute. 511 */ 512 if (DISKPART(bp->b_dev) != RAW_PART) { 513 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 514 DISKPART(bp->b_dev)]; 515 bn += pp->p_offset; 516 } 517 518 /* ...and convert to a byte offset within the file. */ 519 bn *= lp->d_secsize; 520 521 if (vnd->sc_vp->v_mount == NULL) { 522 bp->b_error = ENXIO; 523 bp->b_flags |= B_ERROR; 524 goto done; 525 } 526#ifdef VND_COMPRESSION 527 /* handle a compressed read */ 528 if ((bp->b_flags & B_READ) && (vnd->sc_flags & VNF_COMP)) { 529 compstrategy(bp, bn); 530 goto done; 531 } 532#endif /* VND_COMPRESSION */ 533 534 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 535 addr = bp->b_data; 536 flags = (bp->b_flags & (B_READ|B_ASYNC)) | B_CALL; 537 538 /* 539 * Allocate a header for this transfer and link it to the 540 * buffer 541 */ 542 s = splbio(); 543 vnx = VND_GETXFER(vnd); 544 splx(s); 545 vnx->vx_flags = VX_BUSY; 546 vnx->vx_error = 0; 547 vnx->vx_pending = 0; 548 vnx->vx_bp = bp; 549 550 if ((flags & B_READ) == 0) 551 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 552 553 /* 554 * Feed requests sequentially. 555 * We do it this way to keep from flooding NFS servers if we 556 * are connected to an NFS file. This places the burden on 557 * the client rather than the server. 558 */ 559 for (resid = bp->b_resid; resid; resid -= sz) { 560 struct vndbuf *nbp; 561 struct vnode *vp; 562 daddr_t nbn; 563 int off, nra; 564 565 nra = 0; 566 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 567 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 568 VOP_UNLOCK(vnd->sc_vp, 0); 569 570 if (error == 0 && (long)nbn == -1) 571 error = EIO; 572 573 /* 574 * If there was an error or a hole in the file...punt. 575 * Note that we may have to wait for any operations 576 * that we have already fired off before releasing 577 * the buffer. 578 * 579 * XXX we could deal with holes here but it would be 580 * a hassle (in the write case). 581 */ 582 if (error) { 583 s = splbio(); 584 vnx->vx_error = error; 585 goto out; 586 } 587 588#ifdef DEBUG 589 if (!dovndcluster) 590 nra = 0; 591#endif 592 593 if ((off = bn % bsize) != 0) 594 sz = bsize - off; 595 else 596 sz = (1 + nra) * bsize; 597 if (resid < sz) 598 sz = resid; 599#ifdef DEBUG 600 if (vnddebug & VDB_IO) 601 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 602 " sz 0x%x\n", 603 vnd->sc_vp, vp, (long long)bn, nbn, sz); 604#endif 605 606 s = splbio(); 607 while (vnd->sc_active >= vnd->sc_maxactive) { 608 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 609 } 610 vnd->sc_active++; 611 nbp = VND_GETBUF(vnd); 612 splx(s); 613 BUF_INIT(&nbp->vb_buf); 614 nbp->vb_buf.b_flags = flags; 615 nbp->vb_buf.b_bcount = sz; 616 nbp->vb_buf.b_bufsize = round_page((ulong)addr + sz) 617 - trunc_page((ulong) addr); 618 nbp->vb_buf.b_error = 0; 619 nbp->vb_buf.b_data = addr; 620 nbp->vb_buf.b_blkno = nbp->vb_buf.b_rawblkno = nbn + btodb(off); 621 nbp->vb_buf.b_proc = bp->b_proc; 622 nbp->vb_buf.b_iodone = vndiodone; 623 nbp->vb_buf.b_vp = vp; 624 625 nbp->vb_xfer = vnx; 626 627 BIO_COPYPRIO(&nbp->vb_buf, bp); 628 629 /* 630 * Just sort by block number 631 */ 632 s = splbio(); 633 if (vnx->vx_error != 0) { 634 VND_PUTBUF(vnd, nbp); 635 goto out; 636 } 637 vnx->vx_pending++; 638#ifdef DEBUG 639 if (vnddebug & VDB_IO) 640 printf("vndstart(%ld): bp %p vp %p blkno " 641 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 642 (long) (vnd-vnd_softc), &nbp->vb_buf, 643 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 644 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 645 nbp->vb_buf.b_bcount); 646#endif 647 648 /* Instrumentation. */ 649 disk_busy(&vnd->sc_dkdev); 650 651 if ((nbp->vb_buf.b_flags & B_READ) == 0) 652 vp->v_numoutput++; 653 VOP_STRATEGY(vp, &nbp->vb_buf); 654 655 splx(s); 656 bn += sz; 657 addr += sz; 658 } 659 660 s = splbio(); 661 662out: /* Arrive here at splbio */ 663 if ((flags & B_READ) == 0) 664 vn_finished_write(mp, 0); 665 vnx->vx_flags &= ~VX_BUSY; 666 if (vnx->vx_pending == 0) { 667 if (vnx->vx_error != 0) { 668 bp->b_error = vnx->vx_error; 669 bp->b_flags |= B_ERROR; 670 } 671 VND_PUTXFER(vnd, vnx); 672 biodone(bp); 673 } 674 continue; 675done: 676 biodone(bp); 677 s = splbio(); 678 } 679 680 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 681 wakeup(&vnd->sc_kthread); 682 splx(s); 683 kthread_exit(0); 684} 685 686 687static void 688vndiodone(struct buf *bp) 689{ 690 struct vndbuf *vbp = (struct vndbuf *) bp; 691 struct vndxfer *vnx = (struct vndxfer *)vbp->vb_xfer; 692 struct buf *pbp = vnx->vx_bp; 693 struct vnd_softc *vnd = &vnd_softc[vndunit(pbp->b_dev)]; 694 int s, resid; 695 696 s = splbio(); 697#ifdef DEBUG 698 if (vnddebug & VDB_IO) 699 printf("vndiodone(%ld): vbp %p vp %p blkno 0x%" PRIx64 700 " addr %p cnt 0x%x\n", 701 (long) (vnd-vnd_softc), vbp, vbp->vb_buf.b_vp, 702 vbp->vb_buf.b_blkno, vbp->vb_buf.b_data, 703 vbp->vb_buf.b_bcount); 704#endif 705 706 resid = vbp->vb_buf.b_bcount - vbp->vb_buf.b_resid; 707 pbp->b_resid -= resid; 708 disk_unbusy(&vnd->sc_dkdev, resid, (pbp->b_flags & B_READ)); 709 vnx->vx_pending--; 710 711 if (vbp->vb_buf.b_error) { 712#ifdef DEBUG 713 if (vnddebug & VDB_IO) 714 printf("vndiodone: vbp %p error %d\n", vbp, 715 vbp->vb_buf.b_error); 716#endif 717 vnx->vx_error = vbp->vb_buf.b_error; 718 } 719 720 VND_PUTBUF(vnd, vbp); 721 722 /* 723 * Wrap up this transaction if it has run to completion or, in 724 * case of an error, when all auxiliary buffers have returned. 725 */ 726 if (vnx->vx_error != 0) { 727 pbp->b_flags |= B_ERROR; 728 pbp->b_error = vnx->vx_error; 729 if ((vnx->vx_flags & VX_BUSY) == 0 && vnx->vx_pending == 0) { 730 731#ifdef DEBUG 732 if (vnddebug & VDB_IO) 733 printf("vndiodone: pbp %p iodone: error %d\n", 734 pbp, vnx->vx_error); 735#endif 736 VND_PUTXFER(vnd, vnx); 737 biodone(pbp); 738 } 739 } else if (pbp->b_resid == 0) { 740 741#ifdef DIAGNOSTIC 742 if (vnx->vx_pending != 0) 743 panic("vndiodone: vnx pending: %d", vnx->vx_pending); 744#endif 745 746 if ((vnx->vx_flags & VX_BUSY) == 0) { 747#ifdef DEBUG 748 if (vnddebug & VDB_IO) 749 printf("vndiodone: pbp %p iodone\n", pbp); 750#endif 751 VND_PUTXFER(vnd, vnx); 752 biodone(pbp); 753 } 754 } 755 756 vnd->sc_active--; 757 wakeup(&vnd->sc_tab); 758 splx(s); 759} 760 761/* ARGSUSED */ 762static int 763vndread(dev_t dev, struct uio *uio, int flags) 764{ 765 int unit = vndunit(dev); 766 struct vnd_softc *sc; 767 768#ifdef DEBUG 769 if (vnddebug & VDB_FOLLOW) 770 printf("vndread(0x%x, %p)\n", dev, uio); 771#endif 772 773 if (unit >= numvnd) 774 return (ENXIO); 775 sc = &vnd_softc[unit]; 776 777 if ((sc->sc_flags & VNF_INITED) == 0) 778 return (ENXIO); 779 780 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 781} 782 783/* ARGSUSED */ 784static int 785vndwrite(dev_t dev, struct uio *uio, int flags) 786{ 787 int unit = vndunit(dev); 788 struct vnd_softc *sc; 789 790#ifdef DEBUG 791 if (vnddebug & VDB_FOLLOW) 792 printf("vndwrite(0x%x, %p)\n", dev, uio); 793#endif 794 795 if (unit >= numvnd) 796 return (ENXIO); 797 sc = &vnd_softc[unit]; 798 799 if ((sc->sc_flags & VNF_INITED) == 0) 800 return (ENXIO); 801 802 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 803} 804 805static int 806vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 807{ 808 struct vnd_softc *vnd; 809 810 if (*un == -1) 811 *un = unit; 812 if (*un >= numvnd) 813 return ENXIO; 814 if (*un < 0) 815 return EINVAL; 816 817 vnd = &vnd_softc[*un]; 818 819 if ((vnd->sc_flags & VNF_INITED) == 0) 820 return -1; 821 822 return VOP_GETATTR(vnd->sc_vp, va, l->l_proc->p_ucred, l); 823} 824 825/* ARGSUSED */ 826static int 827vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 828{ 829 int unit = vndunit(dev); 830 struct vnd_softc *vnd; 831 struct vnd_ioctl *vio; 832 struct vattr vattr; 833 struct nameidata nd; 834 int error, part, pmask; 835 size_t geomsize; 836 struct proc *p = l->l_proc; 837 int fflags; 838#ifdef __HAVE_OLD_DISKLABEL 839 struct disklabel newlabel; 840#endif 841 842#ifdef DEBUG 843 if (vnddebug & VDB_FOLLOW) 844 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 845 dev, cmd, data, flag, p, unit); 846#endif 847 if (unit >= numvnd) 848 return (ENXIO); 849 850 vnd = &vnd_softc[unit]; 851 vio = (struct vnd_ioctl *)data; 852 853 /* Must be open for writes for these commands... */ 854 switch (cmd) { 855 case VNDIOCSET: 856 case VNDIOCCLR: 857 case DIOCSDINFO: 858 case DIOCWDINFO: 859#ifdef __HAVE_OLD_DISKLABEL 860 case ODIOCSDINFO: 861 case ODIOCWDINFO: 862#endif 863 case DIOCKLABEL: 864 case DIOCWLABEL: 865 if ((flag & FWRITE) == 0) 866 return (EBADF); 867 } 868 869 /* Must be initialized for these... */ 870 switch (cmd) { 871 case VNDIOCCLR: 872 case DIOCGDINFO: 873 case DIOCSDINFO: 874 case DIOCWDINFO: 875 case DIOCGPART: 876 case DIOCKLABEL: 877 case DIOCWLABEL: 878 case DIOCGDEFLABEL: 879#ifdef __HAVE_OLD_DISKLABEL 880 case ODIOCGDINFO: 881 case ODIOCSDINFO: 882 case ODIOCWDINFO: 883 case ODIOCGDEFLABEL: 884#endif 885 if ((vnd->sc_flags & VNF_INITED) == 0) 886 return (ENXIO); 887 } 888 889 switch (cmd) { 890 case VNDIOCSET: 891 if (vnd->sc_flags & VNF_INITED) 892 return (EBUSY); 893 894 if ((error = vndlock(vnd)) != 0) 895 return (error); 896 897 fflags = FREAD; 898 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 899 fflags |= FWRITE; 900 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 901 if ((error = vn_open(&nd, fflags, 0)) != 0) 902 goto unlock_and_exit; 903 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_proc->p_ucred, l); 904 if (!error && nd.ni_vp->v_type != VREG) 905 error = EOPNOTSUPP; 906 if (error) { 907 VOP_UNLOCK(nd.ni_vp, 0); 908 goto close_and_exit; 909 } 910 911 /* If using a compressed file, initialize its info */ 912 /* (or abort with an error if kernel has no compression) */ 913 if (vio->vnd_flags & VNF_COMP) { 914#ifdef VND_COMPRESSION 915 struct vnd_comp_header *ch; 916 int i; 917 u_int32_t comp_size; 918 u_int32_t comp_maxsize; 919 920 /* allocate space for compresed file header */ 921 ch = malloc(sizeof(struct vnd_comp_header), 922 M_TEMP, M_WAITOK); 923 924 /* read compressed file header */ 925 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 926 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 927 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 928 if(error) { 929 free(ch, M_TEMP); 930 VOP_UNLOCK(nd.ni_vp, 0); 931 goto close_and_exit; 932 } 933 934 /* save some header info */ 935 vnd->sc_comp_blksz = ntohl(ch->block_size); 936 /* note last offset is the file byte size */ 937 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 938 free(ch, M_TEMP); 939 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 940 VOP_UNLOCK(nd.ni_vp, 0); 941 error = EINVAL; 942 goto close_and_exit; 943 } 944 if(sizeof(struct vnd_comp_header) + 945 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 946 vattr.va_size) { 947 VOP_UNLOCK(nd.ni_vp, 0); 948 error = EINVAL; 949 goto close_and_exit; 950 } 951 952 /* set decompressed file size */ 953 vattr.va_size = 954 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 955 956 /* allocate space for all the compressed offsets */ 957 vnd->sc_comp_offsets = 958 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 959 M_DEVBUF, M_WAITOK); 960 961 /* read in the offsets */ 962 error = vn_rdwr(UIO_READ, nd.ni_vp, 963 (caddr_t)vnd->sc_comp_offsets, 964 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 965 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 966 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 967 if(error) { 968 VOP_UNLOCK(nd.ni_vp, 0); 969 goto close_and_exit; 970 } 971 /* 972 * find largest block size (used for allocation limit). 973 * Also convert offset to native byte order. 974 */ 975 comp_maxsize = 0; 976 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 977 vnd->sc_comp_offsets[i] = 978 be64toh(vnd->sc_comp_offsets[i]); 979 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 980 - vnd->sc_comp_offsets[i]; 981 if (comp_size > comp_maxsize) 982 comp_maxsize = comp_size; 983 } 984 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 985 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 986 987 /* create compressed data buffer */ 988 vnd->sc_comp_buff = malloc(comp_maxsize, 989 M_DEVBUF, M_WAITOK); 990 991 /* create decompressed buffer */ 992 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 993 M_DEVBUF, M_WAITOK); 994 vnd->sc_comp_buffblk = -1; 995 996 /* Initialize decompress stream */ 997 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 998 vnd->sc_comp_stream.zalloc = vnd_alloc; 999 vnd->sc_comp_stream.zfree = vnd_free; 1000 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 1001 if(error) { 1002 if(vnd->sc_comp_stream.msg) 1003 printf("vnd%d: compressed file, %s\n", 1004 unit, vnd->sc_comp_stream.msg); 1005 VOP_UNLOCK(nd.ni_vp, 0); 1006 error = EINVAL; 1007 goto close_and_exit; 1008 } 1009 1010 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 1011#else /* !VND_COMPRESSION */ 1012 VOP_UNLOCK(nd.ni_vp, 0); 1013 error = EOPNOTSUPP; 1014 goto close_and_exit; 1015#endif /* VND_COMPRESSION */ 1016 } 1017 1018 VOP_UNLOCK(nd.ni_vp, 0); 1019 vnd->sc_vp = nd.ni_vp; 1020 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 1021 1022 /* 1023 * Use pseudo-geometry specified. If none was provided, 1024 * use "standard" Adaptec fictitious geometry. 1025 */ 1026 if (vio->vnd_flags & VNDIOF_HASGEOM) { 1027 1028 memcpy(&vnd->sc_geom, &vio->vnd_geom, 1029 sizeof(vio->vnd_geom)); 1030 1031 /* 1032 * Sanity-check the sector size. 1033 * XXX Don't allow secsize < DEV_BSIZE. Should 1034 * XXX we? 1035 */ 1036 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1037 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1038 vnd->sc_geom.vng_ncylinders == 0 || 1039 (vnd->sc_geom.vng_ntracks * 1040 vnd->sc_geom.vng_nsectors) == 0) { 1041 error = EINVAL; 1042 goto close_and_exit; 1043 } 1044 1045 /* 1046 * Compute the size (in DEV_BSIZE blocks) specified 1047 * by the geometry. 1048 */ 1049 geomsize = (vnd->sc_geom.vng_nsectors * 1050 vnd->sc_geom.vng_ntracks * 1051 vnd->sc_geom.vng_ncylinders) * 1052 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1053 1054 /* 1055 * Sanity-check the size against the specified 1056 * geometry. 1057 */ 1058 if (vnd->sc_size < geomsize) { 1059 error = EINVAL; 1060 goto close_and_exit; 1061 } 1062 } else if (vnd->sc_size >= (32 * 64)) { 1063 /* 1064 * Size must be at least 2048 DEV_BSIZE blocks 1065 * (1M) in order to use this geometry. 1066 */ 1067 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1068 vnd->sc_geom.vng_nsectors = 32; 1069 vnd->sc_geom.vng_ntracks = 64; 1070 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1071 } else { 1072 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1073 vnd->sc_geom.vng_nsectors = 1; 1074 vnd->sc_geom.vng_ntracks = 1; 1075 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1076 } 1077 1078 if (vio->vnd_flags & VNDIOF_READONLY) { 1079 vnd->sc_flags |= VNF_READONLY; 1080 } 1081 1082 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 1083 goto close_and_exit; 1084 1085 memset(vnd->sc_xname, 0, sizeof(vnd->sc_xname)); /* XXX */ 1086 snprintf(vnd->sc_xname, sizeof(vnd->sc_xname), "vnd%d", unit); 1087 1088 1089 vndthrottle(vnd, vnd->sc_vp); 1090 vio->vnd_size = dbtob(vnd->sc_size); 1091 vnd->sc_flags |= VNF_INITED; 1092 1093 /* create the kernel thread, wait for it to be up */ 1094 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1095 vnd->sc_xname); 1096 if (error) 1097 goto close_and_exit; 1098 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1099 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1100 } 1101#ifdef DEBUG 1102 if (vnddebug & VDB_INIT) 1103 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1104 vnd->sc_vp, (unsigned long) vnd->sc_size, 1105 vnd->sc_geom.vng_secsize, 1106 vnd->sc_geom.vng_nsectors, 1107 vnd->sc_geom.vng_ntracks, 1108 vnd->sc_geom.vng_ncylinders); 1109#endif 1110 1111 /* Attach the disk. */ 1112 vnd->sc_dkdev.dk_name = vnd->sc_xname; 1113 pseudo_disk_attach(&vnd->sc_dkdev); 1114 1115 /* Initialize the xfer and buffer pools. */ 1116 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1117 0, 0, "vndxpl", NULL); 1118 pool_init(&vnd->sc_vbpool, sizeof(struct vndbuf), 0, 1119 0, 0, "vndbpl", NULL); 1120 1121 /* Try and read the disklabel. */ 1122 vndgetdisklabel(dev); 1123 1124 vndunlock(vnd); 1125 1126 break; 1127 1128close_and_exit: 1129 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, l); 1130unlock_and_exit: 1131#ifdef VND_COMPRESSION 1132 /* free any allocated memory (for compressed file) */ 1133 if(vnd->sc_comp_offsets) { 1134 free(vnd->sc_comp_offsets, M_DEVBUF); 1135 vnd->sc_comp_offsets = NULL; 1136 } 1137 if(vnd->sc_comp_buff) { 1138 free(vnd->sc_comp_buff, M_DEVBUF); 1139 vnd->sc_comp_buff = NULL; 1140 } 1141 if(vnd->sc_comp_decombuf) { 1142 free(vnd->sc_comp_decombuf, M_DEVBUF); 1143 vnd->sc_comp_decombuf = NULL; 1144 } 1145#endif /* VND_COMPRESSION */ 1146 vndunlock(vnd); 1147 return (error); 1148 1149 case VNDIOCCLR: 1150 if ((error = vndlock(vnd)) != 0) 1151 return (error); 1152 1153 /* 1154 * Don't unconfigure if any other partitions are open 1155 * or if both the character and block flavors of this 1156 * partition are open. 1157 */ 1158 part = DISKPART(dev); 1159 pmask = (1 << part); 1160 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1161 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1162 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1163 !(vio->vnd_flags & VNDIOF_FORCE)) { 1164 vndunlock(vnd); 1165 return (EBUSY); 1166 } 1167 1168 /* 1169 * XXX vndclear() might call vndclose() implicitely; 1170 * release lock to avoid recursion 1171 */ 1172 vndunlock(vnd); 1173 vndclear(vnd, minor(dev)); 1174#ifdef DEBUG 1175 if (vnddebug & VDB_INIT) 1176 printf("vndioctl: CLRed\n"); 1177#endif 1178 1179 /* Destroy the xfer and buffer pools. */ 1180 pool_destroy(&vnd->sc_vxpool); 1181 pool_destroy(&vnd->sc_vbpool); 1182 1183 /* Detatch the disk. */ 1184 pseudo_disk_detach(&vnd->sc_dkdev); 1185 1186 break; 1187 1188#ifdef COMPAT_30 1189 case VNDIOOCGET: { 1190 struct vnd_ouser *vnu; 1191 struct vattr va; 1192 vnu = (struct vnd_ouser *)data; 1193 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1194 case 0: 1195 vnu->vnu_dev = va.va_fsid; 1196 vnu->vnu_ino = va.va_fileid; 1197 break; 1198 case -1: 1199 /* unused is not an error */ 1200 vnu->vnu_dev = 0; 1201 vnu->vnu_ino = 0; 1202 break; 1203 default: 1204 return error; 1205 } 1206 break; 1207 } 1208#endif 1209 case VNDIOCGET: { 1210 struct vnd_user *vnu; 1211 struct vattr va; 1212 vnu = (struct vnd_user *)data; 1213 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1214 case 0: 1215 vnu->vnu_dev = va.va_fsid; 1216 vnu->vnu_ino = va.va_fileid; 1217 break; 1218 case -1: 1219 /* unused is not an error */ 1220 vnu->vnu_dev = 0; 1221 vnu->vnu_ino = 0; 1222 break; 1223 default: 1224 return error; 1225 } 1226 break; 1227 } 1228 1229 case DIOCGDINFO: 1230 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1231 break; 1232 1233#ifdef __HAVE_OLD_DISKLABEL 1234 case ODIOCGDINFO: 1235 newlabel = *(vnd->sc_dkdev.dk_label); 1236 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1237 return ENOTTY; 1238 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1239 break; 1240#endif 1241 1242 case DIOCGPART: 1243 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1244 ((struct partinfo *)data)->part = 1245 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1246 break; 1247 1248 case DIOCWDINFO: 1249 case DIOCSDINFO: 1250#ifdef __HAVE_OLD_DISKLABEL 1251 case ODIOCWDINFO: 1252 case ODIOCSDINFO: 1253#endif 1254 { 1255 struct disklabel *lp; 1256 1257 if ((error = vndlock(vnd)) != 0) 1258 return (error); 1259 1260 vnd->sc_flags |= VNF_LABELLING; 1261 1262#ifdef __HAVE_OLD_DISKLABEL 1263 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1264 memset(&newlabel, 0, sizeof newlabel); 1265 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1266 lp = &newlabel; 1267 } else 1268#endif 1269 lp = (struct disklabel *)data; 1270 1271 error = setdisklabel(vnd->sc_dkdev.dk_label, 1272 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1273 if (error == 0) { 1274 if (cmd == DIOCWDINFO 1275#ifdef __HAVE_OLD_DISKLABEL 1276 || cmd == ODIOCWDINFO 1277#endif 1278 ) 1279 error = writedisklabel(VNDLABELDEV(dev), 1280 vndstrategy, vnd->sc_dkdev.dk_label, 1281 vnd->sc_dkdev.dk_cpulabel); 1282 } 1283 1284 vnd->sc_flags &= ~VNF_LABELLING; 1285 1286 vndunlock(vnd); 1287 1288 if (error) 1289 return (error); 1290 break; 1291 } 1292 1293 case DIOCKLABEL: 1294 if (*(int *)data != 0) 1295 vnd->sc_flags |= VNF_KLABEL; 1296 else 1297 vnd->sc_flags &= ~VNF_KLABEL; 1298 break; 1299 1300 case DIOCWLABEL: 1301 if (*(int *)data != 0) 1302 vnd->sc_flags |= VNF_WLABEL; 1303 else 1304 vnd->sc_flags &= ~VNF_WLABEL; 1305 break; 1306 1307 case DIOCGDEFLABEL: 1308 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1309 break; 1310 1311#ifdef __HAVE_OLD_DISKLABEL 1312 case ODIOCGDEFLABEL: 1313 vndgetdefaultlabel(vnd, &newlabel); 1314 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1315 return ENOTTY; 1316 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1317 break; 1318#endif 1319 1320 default: 1321 return (ENOTTY); 1322 } 1323 1324 return (0); 1325} 1326 1327/* 1328 * Duplicate the current processes' credentials. Since we are called only 1329 * as the result of a SET ioctl and only root can do that, any future access 1330 * to this "disk" is essentially as root. Note that credentials may change 1331 * if some other uid can write directly to the mapped file (NFS). 1332 */ 1333static int 1334vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1335{ 1336 struct uio auio; 1337 struct iovec aiov; 1338 char *tmpbuf; 1339 int error; 1340 1341 vnd->sc_cred = crdup(cred); 1342 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1343 1344 /* XXX: Horrible kludge to establish credentials for NFS */ 1345 aiov.iov_base = tmpbuf; 1346 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1347 auio.uio_iov = &aiov; 1348 auio.uio_iovcnt = 1; 1349 auio.uio_offset = 0; 1350 auio.uio_rw = UIO_READ; 1351 auio.uio_segflg = UIO_SYSSPACE; 1352 auio.uio_resid = aiov.iov_len; 1353 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1354 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1355 if (error == 0) { 1356 /* 1357 * Because vnd does all IO directly through the vnode 1358 * we need to flush (at least) the buffer from the above 1359 * VOP_READ from the buffer cache to prevent cache 1360 * incoherencies. Also, be careful to write dirty 1361 * buffers back to stable storage. 1362 */ 1363 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1364 curlwp, 0, 0); 1365 } 1366 VOP_UNLOCK(vnd->sc_vp, 0); 1367 1368 free(tmpbuf, M_TEMP); 1369 return (error); 1370} 1371 1372/* 1373 * Set maxactive based on FS type 1374 */ 1375static void 1376vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1377{ 1378#ifdef NFS 1379 extern int (**nfsv2_vnodeop_p)(void *); 1380 1381 if (vp->v_op == nfsv2_vnodeop_p) 1382 vnd->sc_maxactive = 2; 1383 else 1384#endif 1385 vnd->sc_maxactive = 8; 1386 1387 if (vnd->sc_maxactive < 1) 1388 vnd->sc_maxactive = 1; 1389} 1390 1391#if 0 1392static void 1393vndshutdown(void) 1394{ 1395 struct vnd_softc *vnd; 1396 1397 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1398 if (vnd->sc_flags & VNF_INITED) 1399 vndclear(vnd); 1400} 1401#endif 1402 1403static void 1404vndclear(struct vnd_softc *vnd, int myminor) 1405{ 1406 struct vnode *vp = vnd->sc_vp; 1407 struct lwp *l = curlwp; 1408 int fflags = FREAD; 1409 int bmaj, cmaj, i, mn; 1410 int s; 1411 1412#ifdef DEBUG 1413 if (vnddebug & VDB_FOLLOW) 1414 printf("vndclear(%p): vp %p\n", vnd, vp); 1415#endif 1416 /* locate the major number */ 1417 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1418 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1419 1420 /* Nuke the vnodes for any open instances */ 1421 for (i = 0; i < MAXPARTITIONS; i++) { 1422 mn = DISKMINOR(vnd->sc_unit, i); 1423 vdevgone(bmaj, mn, mn, VBLK); 1424 if (mn != myminor) /* XXX avoid to kill own vnode */ 1425 vdevgone(cmaj, mn, mn, VCHR); 1426 } 1427 1428 if ((vnd->sc_flags & VNF_READONLY) == 0) 1429 fflags |= FWRITE; 1430 1431 s = splbio(); 1432 bufq_drain(vnd->sc_tab); 1433 splx(s); 1434 1435 vnd->sc_flags |= VNF_VUNCONF; 1436 wakeup(&vnd->sc_tab); 1437 while (vnd->sc_flags & VNF_KTHREAD) 1438 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1439 1440#ifdef VND_COMPRESSION 1441 /* free the compressed file buffers */ 1442 if(vnd->sc_flags & VNF_COMP) { 1443 if(vnd->sc_comp_offsets) { 1444 free(vnd->sc_comp_offsets, M_DEVBUF); 1445 vnd->sc_comp_offsets = NULL; 1446 } 1447 if(vnd->sc_comp_buff) { 1448 free(vnd->sc_comp_buff, M_DEVBUF); 1449 vnd->sc_comp_buff = NULL; 1450 } 1451 if(vnd->sc_comp_decombuf) { 1452 free(vnd->sc_comp_decombuf, M_DEVBUF); 1453 vnd->sc_comp_decombuf = NULL; 1454 } 1455 } 1456#endif /* VND_COMPRESSION */ 1457 vnd->sc_flags &= 1458 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1459 | VNF_VUNCONF | VNF_COMP); 1460 if (vp == (struct vnode *)0) 1461 panic("vndclear: null vp"); 1462 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1463 crfree(vnd->sc_cred); 1464 vnd->sc_vp = (struct vnode *)0; 1465 vnd->sc_cred = (struct ucred *)0; 1466 vnd->sc_size = 0; 1467} 1468 1469static int 1470vndsize(dev_t dev) 1471{ 1472 struct vnd_softc *sc; 1473 struct disklabel *lp; 1474 int part, unit, omask; 1475 int size; 1476 1477 unit = vndunit(dev); 1478 if (unit >= numvnd) 1479 return (-1); 1480 sc = &vnd_softc[unit]; 1481 1482 if ((sc->sc_flags & VNF_INITED) == 0) 1483 return (-1); 1484 1485 part = DISKPART(dev); 1486 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1487 lp = sc->sc_dkdev.dk_label; 1488 1489 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1490 return (-1); 1491 1492 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1493 size = -1; 1494 else 1495 size = lp->d_partitions[part].p_size * 1496 (lp->d_secsize / DEV_BSIZE); 1497 1498 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1499 return (-1); 1500 1501 return (size); 1502} 1503 1504static int 1505vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1506{ 1507 1508 /* Not implemented. */ 1509 return ENXIO; 1510} 1511 1512static void 1513vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1514{ 1515 struct vndgeom *vng = &sc->sc_geom; 1516 struct partition *pp; 1517 1518 memset(lp, 0, sizeof(*lp)); 1519 1520 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1521 lp->d_secsize = vng->vng_secsize; 1522 lp->d_nsectors = vng->vng_nsectors; 1523 lp->d_ntracks = vng->vng_ntracks; 1524 lp->d_ncylinders = vng->vng_ncylinders; 1525 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1526 1527 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1528 lp->d_type = DTYPE_VND; 1529 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1530 lp->d_rpm = 3600; 1531 lp->d_interleave = 1; 1532 lp->d_flags = 0; 1533 1534 pp = &lp->d_partitions[RAW_PART]; 1535 pp->p_offset = 0; 1536 pp->p_size = lp->d_secperunit; 1537 pp->p_fstype = FS_UNUSED; 1538 lp->d_npartitions = RAW_PART + 1; 1539 1540 lp->d_magic = DISKMAGIC; 1541 lp->d_magic2 = DISKMAGIC; 1542 lp->d_checksum = dkcksum(lp); 1543} 1544 1545/* 1546 * Read the disklabel from a vnd. If one is not present, create a fake one. 1547 */ 1548static void 1549vndgetdisklabel(dev_t dev) 1550{ 1551 struct vnd_softc *sc = &vnd_softc[vndunit(dev)]; 1552 const char *errstring; 1553 struct disklabel *lp = sc->sc_dkdev.dk_label; 1554 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1555 int i; 1556 1557 memset(clp, 0, sizeof(*clp)); 1558 1559 vndgetdefaultlabel(sc, lp); 1560 1561 /* 1562 * Call the generic disklabel extraction routine. 1563 */ 1564 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1565 if (errstring) { 1566 /* 1567 * Lack of disklabel is common, but we print the warning 1568 * anyway, since it might contain other useful information. 1569 */ 1570 printf("%s: %s\n", sc->sc_xname, errstring); 1571 1572 /* 1573 * For historical reasons, if there's no disklabel 1574 * present, all partitions must be FS_BSDFFS and 1575 * occupy the entire disk. 1576 */ 1577 for (i = 0; i < MAXPARTITIONS; i++) { 1578 /* 1579 * Don't wipe out port specific hack (such as 1580 * dos partition hack of i386 port). 1581 */ 1582 if (lp->d_partitions[i].p_size != 0) 1583 continue; 1584 1585 lp->d_partitions[i].p_size = lp->d_secperunit; 1586 lp->d_partitions[i].p_offset = 0; 1587 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1588 } 1589 1590 strncpy(lp->d_packname, "default label", 1591 sizeof(lp->d_packname)); 1592 1593 lp->d_npartitions = MAXPARTITIONS; 1594 lp->d_checksum = dkcksum(lp); 1595 } 1596 1597 /* In-core label now valid. */ 1598 sc->sc_flags |= VNF_VLABEL; 1599} 1600 1601/* 1602 * Wait interruptibly for an exclusive lock. 1603 * 1604 * XXX 1605 * Several drivers do this; it should be abstracted and made MP-safe. 1606 */ 1607static int 1608vndlock(struct vnd_softc *sc) 1609{ 1610 int error; 1611 1612 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1613 sc->sc_flags |= VNF_WANTED; 1614 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1615 return (error); 1616 } 1617 sc->sc_flags |= VNF_LOCKED; 1618 return (0); 1619} 1620 1621/* 1622 * Unlock and wake up any waiters. 1623 */ 1624static void 1625vndunlock(struct vnd_softc *sc) 1626{ 1627 1628 sc->sc_flags &= ~VNF_LOCKED; 1629 if ((sc->sc_flags & VNF_WANTED) != 0) { 1630 sc->sc_flags &= ~VNF_WANTED; 1631 wakeup(sc); 1632 } 1633} 1634 1635#ifdef VND_COMPRESSION 1636/* compressed file read */ 1637static void 1638compstrategy(struct buf *bp, off_t bn) 1639{ 1640 int error; 1641 int unit = vndunit(bp->b_dev); 1642 struct vnd_softc *vnd = &vnd_softc[unit]; 1643 u_int32_t comp_block; 1644 struct uio auio; 1645 caddr_t addr; 1646 int s; 1647 1648 /* set up constants for data move */ 1649 auio.uio_rw = UIO_READ; 1650 auio.uio_segflg = bp->b_flags & B_PHYS ? UIO_USERSPACE : UIO_SYSSPACE; 1651 auio.uio_lwp = LIST_FIRST(&bp->b_proc->p_lwps); 1652 1653 /* read, and transfer the data */ 1654 addr = bp->b_data; 1655 s = splbio(); 1656 while (bp->b_resid > 0) { 1657 unsigned length; 1658 size_t length_in_buffer; 1659 u_int32_t offset_in_buffer; 1660 struct iovec aiov; 1661 1662 /* calculate the compressed block number */ 1663 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1664 1665 /* check for good block number */ 1666 if (comp_block >= vnd->sc_comp_numoffs) { 1667 bp->b_error = EINVAL; 1668 bp->b_flags |= B_ERROR; 1669 splx(s); 1670 return; 1671 } 1672 1673 /* read in the compressed block, if not in buffer */ 1674 if (comp_block != vnd->sc_comp_buffblk) { 1675 length = vnd->sc_comp_offsets[comp_block + 1] - 1676 vnd->sc_comp_offsets[comp_block]; 1677 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1678 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1679 length, vnd->sc_comp_offsets[comp_block], 1680 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1681 if (error) { 1682 bp->b_error = error; 1683 bp->b_flags |= B_ERROR; 1684 VOP_UNLOCK(vnd->sc_vp, 0); 1685 splx(s); 1686 return; 1687 } 1688 /* uncompress the buffer */ 1689 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1690 vnd->sc_comp_stream.avail_in = length; 1691 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1692 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1693 inflateReset(&vnd->sc_comp_stream); 1694 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1695 if (error != Z_STREAM_END) { 1696 if (vnd->sc_comp_stream.msg) 1697 printf("%s: compressed file, %s\n", 1698 vnd->sc_xname, 1699 vnd->sc_comp_stream.msg); 1700 bp->b_error = EBADMSG; 1701 bp->b_flags |= B_ERROR; 1702 VOP_UNLOCK(vnd->sc_vp, 0); 1703 splx(s); 1704 return; 1705 } 1706 vnd->sc_comp_buffblk = comp_block; 1707 VOP_UNLOCK(vnd->sc_vp, 0); 1708 } 1709 1710 /* transfer the usable uncompressed data */ 1711 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1712 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1713 if (length_in_buffer > bp->b_resid) 1714 length_in_buffer = bp->b_resid; 1715 auio.uio_iov = &aiov; 1716 auio.uio_iovcnt = 1; 1717 aiov.iov_base = addr; 1718 aiov.iov_len = length_in_buffer; 1719 auio.uio_resid = aiov.iov_len; 1720 auio.uio_offset = 0; 1721 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1722 length_in_buffer, &auio); 1723 if (error) { 1724 bp->b_error = error; 1725 bp->b_flags |= B_ERROR; 1726 splx(s); 1727 return; 1728 } 1729 1730 bn += length_in_buffer; 1731 addr += length_in_buffer; 1732 bp->b_resid -= length_in_buffer; 1733 } 1734 splx(s); 1735} 1736 1737/* compression memory allocation routines */ 1738static void * 1739vnd_alloc(void *aux, u_int items, u_int siz) 1740{ 1741 return malloc(items * siz, M_TEMP, M_NOWAIT); 1742} 1743 1744static void 1745vnd_free(void *aux, void *ptr) 1746{ 1747 free(ptr, M_TEMP); 1748} 1749#endif /* VND_COMPRESSION */ 1750