vnd.c revision 1.135
1/* $NetBSD: vnd.c,v 1.135 2006/02/02 06:57:35 cube Exp $ */ 2 3/*- 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the NetBSD 21 * Foundation, Inc. and its contributors. 22 * 4. Neither the name of The NetBSD Foundation nor the names of its 23 * contributors may be used to endorse or promote products derived 24 * from this software without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 36 * POSSIBILITY OF SUCH DAMAGE. 37 */ 38 39/* 40 * Copyright (c) 1990, 1993 41 * The Regents of the University of California. All rights reserved. 42 * 43 * This code is derived from software contributed to Berkeley by 44 * the Systems Programming Group of the University of Utah Computer 45 * Science Department. 46 * 47 * Redistribution and use in source and binary forms, with or without 48 * modification, are permitted provided that the following conditions 49 * are met: 50 * 1. Redistributions of source code must retain the above copyright 51 * notice, this list of conditions and the following disclaimer. 52 * 2. Redistributions in binary form must reproduce the above copyright 53 * notice, this list of conditions and the following disclaimer in the 54 * documentation and/or other materials provided with the distribution. 55 * 3. Neither the name of the University nor the names of its contributors 56 * may be used to endorse or promote products derived from this software 57 * without specific prior written permission. 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 * SUCH DAMAGE. 70 * 71 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 72 * 73 * @(#)vn.c 8.9 (Berkeley) 5/14/95 74 */ 75 76/* 77 * Copyright (c) 1988 University of Utah. 78 * 79 * This code is derived from software contributed to Berkeley by 80 * the Systems Programming Group of the University of Utah Computer 81 * Science Department. 82 * 83 * Redistribution and use in source and binary forms, with or without 84 * modification, are permitted provided that the following conditions 85 * are met: 86 * 1. Redistributions of source code must retain the above copyright 87 * notice, this list of conditions and the following disclaimer. 88 * 2. Redistributions in binary form must reproduce the above copyright 89 * notice, this list of conditions and the following disclaimer in the 90 * documentation and/or other materials provided with the distribution. 91 * 3. All advertising materials mentioning features or use of this software 92 * must display the following acknowledgement: 93 * This product includes software developed by the University of 94 * California, Berkeley and its contributors. 95 * 4. Neither the name of the University nor the names of its contributors 96 * may be used to endorse or promote products derived from this software 97 * without specific prior written permission. 98 * 99 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 100 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 101 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 102 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 103 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 104 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 105 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 106 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 107 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 108 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 109 * SUCH DAMAGE. 110 * 111 * from: Utah $Hdr: vn.c 1.13 94/04/02$ 112 * 113 * @(#)vn.c 8.9 (Berkeley) 5/14/95 114 */ 115 116/* 117 * Vnode disk driver. 118 * 119 * Block/character interface to a vnode. Allows one to treat a file 120 * as a disk (e.g. build a filesystem in it, mount it, etc.). 121 * 122 * NOTE 1: This uses the VOP_BMAP/VOP_STRATEGY interface to the vnode 123 * instead of a simple VOP_RDWR. We do this to avoid distorting the 124 * local buffer cache. 125 * 126 * NOTE 2: There is a security issue involved with this driver. 127 * Once mounted all access to the contents of the "mapped" file via 128 * the special file is controlled by the permissions on the special 129 * file, the protection of the mapped file is ignored (effectively, 130 * by using root credentials in all transactions). 131 * 132 * NOTE 3: Doesn't interact with leases, should it? 133 */ 134 135#include <sys/cdefs.h> 136__KERNEL_RCSID(0, "$NetBSD: vnd.c,v 1.135 2006/02/02 06:57:35 cube Exp $"); 137 138#if defined(_KERNEL_OPT) 139#include "fs_nfs.h" 140#include "opt_vnd.h" 141#endif 142 143#include <sys/param.h> 144#include <sys/systm.h> 145#include <sys/namei.h> 146#include <sys/proc.h> 147#include <sys/kthread.h> 148#include <sys/errno.h> 149#include <sys/buf.h> 150#include <sys/bufq.h> 151#include <sys/malloc.h> 152#include <sys/ioctl.h> 153#include <sys/disklabel.h> 154#include <sys/device.h> 155#include <sys/disk.h> 156#include <sys/stat.h> 157#include <sys/mount.h> 158#include <sys/vnode.h> 159#include <sys/file.h> 160#include <sys/uio.h> 161#include <sys/conf.h> 162#include <net/zlib.h> 163 164#include <miscfs/specfs/specdev.h> 165 166#include <dev/vndvar.h> 167 168#if defined(VNDDEBUG) && !defined(DEBUG) 169#define DEBUG 170#endif 171 172#ifdef DEBUG 173int dovndcluster = 1; 174#define VDB_FOLLOW 0x01 175#define VDB_INIT 0x02 176#define VDB_IO 0x04 177#define VDB_LABEL 0x08 178int vnddebug = 0x00; 179#endif 180 181#define vndunit(x) DISKUNIT(x) 182 183struct vndxfer { 184 struct buf vx_buf; 185 struct vnd_softc *vx_vnd; 186}; 187#define VND_BUFTOXFER(bp) ((struct vndxfer *)(void *)bp) 188 189#define VND_GETXFER(vnd) pool_get(&(vnd)->sc_vxpool, PR_WAITOK) 190#define VND_PUTXFER(vnd, vx) pool_put(&(vnd)->sc_vxpool, (vx)) 191 192#define VNDLABELDEV(dev) \ 193 (MAKEDISKDEV(major((dev)), vndunit((dev)), RAW_PART)) 194 195/* called by main() at boot time (XXX: and the LKM driver) */ 196void vndattach(int); 197 198static void vndclear(struct vnd_softc *, int); 199static int vndsetcred(struct vnd_softc *, struct ucred *); 200static void vndthrottle(struct vnd_softc *, struct vnode *); 201static void vndiodone(struct buf *); 202#if 0 203static void vndshutdown(void); 204#endif 205 206static void vndgetdefaultlabel(struct vnd_softc *, struct disklabel *); 207static void vndgetdisklabel(dev_t, struct vnd_softc *); 208 209static int vndlock(struct vnd_softc *); 210static void vndunlock(struct vnd_softc *); 211#ifdef VND_COMPRESSION 212static void compstrategy(struct buf *, off_t); 213static void *vnd_alloc(void *, u_int, u_int); 214static void vnd_free(void *, void *); 215#endif /* VND_COMPRESSION */ 216 217static void vndthread(void *); 218 219static dev_type_open(vndopen); 220static dev_type_close(vndclose); 221static dev_type_read(vndread); 222static dev_type_write(vndwrite); 223static dev_type_ioctl(vndioctl); 224static dev_type_strategy(vndstrategy); 225static dev_type_dump(vnddump); 226static dev_type_size(vndsize); 227 228const struct bdevsw vnd_bdevsw = { 229 vndopen, vndclose, vndstrategy, vndioctl, vnddump, vndsize, D_DISK 230}; 231 232const struct cdevsw vnd_cdevsw = { 233 vndopen, vndclose, vndread, vndwrite, vndioctl, 234 nostop, notty, nopoll, nommap, nokqfilter, D_DISK 235}; 236 237static int vnd_match(struct device *, struct cfdata *, void *); 238static void vnd_attach(struct device *, struct device *, void *); 239static int vnd_detach(struct device *, int); 240 241CFATTACH_DECL(vnd, sizeof(struct vnd_softc), 242 vnd_match, vnd_attach, vnd_detach, NULL); 243extern struct cfdriver vnd_cd; 244 245static struct vnd_softc *vnd_spawn(int); 246int vnd_destroy(struct device *); 247 248void 249vndattach(int num) 250{ 251 int error; 252 253 error = config_cfattach_attach(vnd_cd.cd_name, &vnd_ca); 254 if (error) 255 aprint_error("%s: unable to register cfattach\n", 256 vnd_cd.cd_name); 257} 258 259static int 260vnd_match(struct device *self, struct cfdata *cfdata, void *aux) 261{ 262 return 1; 263} 264 265static void 266vnd_attach(struct device *parent, struct device *self, void *aux) 267{ 268 struct vnd_softc *sc = (struct vnd_softc *)self; 269 270 sc->sc_comp_offsets = NULL; 271 sc->sc_comp_buff = NULL; 272 sc->sc_comp_decombuf = NULL; 273 bufq_alloc(&sc->sc_tab, "disksort", BUFQ_SORT_RAWBLOCK); 274 pseudo_disk_init(&sc->sc_dkdev); 275 276 aprint_normal("%s: vnode disk driver\n", self->dv_xname); 277} 278 279static int 280vnd_detach(struct device *self, int flags) 281{ 282 struct vnd_softc *sc = (struct vnd_softc *)self; 283 if (sc->sc_flags & VNF_INITED) 284 return EBUSY; 285 286 bufq_free(sc->sc_tab); 287 288 return 0; 289} 290 291static struct vnd_softc * 292vnd_spawn(int unit) 293{ 294 struct cfdata *cf; 295 296 cf = malloc(sizeof(*cf), M_DEVBUF, M_WAITOK); 297 cf->cf_name = vnd_cd.cd_name; 298 cf->cf_atname = vnd_cd.cd_name; 299 cf->cf_unit = unit; 300 cf->cf_fstate = FSTATE_STAR; 301 302 return (struct vnd_softc *)config_attach_pseudo(cf); 303} 304 305int 306vnd_destroy(struct device *dev) 307{ 308 int error; 309 310 error = config_detach(dev, 0); 311 if (error) 312 return error; 313 free(dev->dv_cfdata, M_DEVBUF); 314 return 0; 315} 316 317static int 318vndopen(dev_t dev, int flags, int mode, struct lwp *l) 319{ 320 int unit = vndunit(dev); 321 struct vnd_softc *sc; 322 int error = 0, part, pmask; 323 struct disklabel *lp; 324 325#ifdef DEBUG 326 if (vnddebug & VDB_FOLLOW) 327 printf("vndopen(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 328#endif 329 sc = device_lookup(&vnd_cd, unit); 330 if (sc == NULL) { 331 sc = vnd_spawn(unit); 332 if (sc == NULL) 333 return ENOMEM; 334 } 335 336 if ((error = vndlock(sc)) != 0) 337 return (error); 338 339 lp = sc->sc_dkdev.dk_label; 340 341 part = DISKPART(dev); 342 pmask = (1 << part); 343 344 /* 345 * If we're initialized, check to see if there are any other 346 * open partitions. If not, then it's safe to update the 347 * in-core disklabel. Only read the disklabel if it is 348 * not already valid. 349 */ 350 if ((sc->sc_flags & (VNF_INITED|VNF_VLABEL)) == VNF_INITED && 351 sc->sc_dkdev.dk_openmask == 0) 352 vndgetdisklabel(dev, sc); 353 354 /* Check that the partitions exists. */ 355 if (part != RAW_PART) { 356 if (((sc->sc_flags & VNF_INITED) == 0) || 357 ((part >= lp->d_npartitions) || 358 (lp->d_partitions[part].p_fstype == FS_UNUSED))) { 359 error = ENXIO; 360 goto done; 361 } 362 } 363 364 /* Prevent our unit from being unconfigured while open. */ 365 switch (mode) { 366 case S_IFCHR: 367 sc->sc_dkdev.dk_copenmask |= pmask; 368 break; 369 370 case S_IFBLK: 371 sc->sc_dkdev.dk_bopenmask |= pmask; 372 break; 373 } 374 sc->sc_dkdev.dk_openmask = 375 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 376 377 done: 378 vndunlock(sc); 379 return (error); 380} 381 382static int 383vndclose(dev_t dev, int flags, int mode, struct lwp *l) 384{ 385 int unit = vndunit(dev); 386 struct vnd_softc *sc; 387 int error = 0, part; 388 389#ifdef DEBUG 390 if (vnddebug & VDB_FOLLOW) 391 printf("vndclose(0x%x, 0x%x, 0x%x, %p)\n", dev, flags, mode, l); 392#endif 393 sc = device_lookup(&vnd_cd, unit); 394 if (sc == NULL) 395 return ENXIO; 396 397 if ((error = vndlock(sc)) != 0) 398 return (error); 399 400 part = DISKPART(dev); 401 402 /* ...that much closer to allowing unconfiguration... */ 403 switch (mode) { 404 case S_IFCHR: 405 sc->sc_dkdev.dk_copenmask &= ~(1 << part); 406 break; 407 408 case S_IFBLK: 409 sc->sc_dkdev.dk_bopenmask &= ~(1 << part); 410 break; 411 } 412 sc->sc_dkdev.dk_openmask = 413 sc->sc_dkdev.dk_copenmask | sc->sc_dkdev.dk_bopenmask; 414 415 vndunlock(sc); 416 417 if ((sc->sc_flags & VNF_INITED) == 0) { 418 if ((error = vnd_destroy((struct device *)sc)) != 0) { 419 aprint_error("%s: unable to detach instance\n", 420 sc->sc_dev.dv_xname); 421 return error; 422 } 423 } 424 425 return (0); 426} 427 428/* 429 * Queue the request, and wakeup the kernel thread to handle it. 430 */ 431static void 432vndstrategy(struct buf *bp) 433{ 434 int unit = vndunit(bp->b_dev); 435 struct vnd_softc *vnd = 436 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 437 struct disklabel *lp = vnd->sc_dkdev.dk_label; 438 daddr_t blkno; 439 int s = splbio(); 440 441 bp->b_resid = bp->b_bcount; 442 443 if ((vnd->sc_flags & VNF_INITED) == 0) { 444 bp->b_error = ENXIO; 445 bp->b_flags |= B_ERROR; 446 goto done; 447 } 448 449 /* 450 * The transfer must be a whole number of blocks. 451 */ 452 if ((bp->b_bcount % lp->d_secsize) != 0) { 453 bp->b_error = EINVAL; 454 bp->b_flags |= B_ERROR; 455 goto done; 456 } 457 458 /* 459 * check if we're read-only. 460 */ 461 if ((vnd->sc_flags & VNF_READONLY) && !(bp->b_flags & B_READ)) { 462 bp->b_error = EACCES; 463 bp->b_flags |= B_ERROR; 464 goto done; 465 } 466 467 /* 468 * Do bounds checking and adjust transfer. If there's an error, 469 * the bounds check will flag that for us. 470 */ 471 if (DISKPART(bp->b_dev) != RAW_PART) { 472 if (bounds_check_with_label(&vnd->sc_dkdev, 473 bp, vnd->sc_flags & (VNF_WLABEL|VNF_LABELLING)) <= 0) 474 goto done; 475 } 476 477 /* If it's a nil transfer, wake up the top half now. */ 478 if (bp->b_bcount == 0) 479 goto done; 480 481 /* 482 * Put the block number in terms of the logical blocksize 483 * of the "device". 484 */ 485 486 blkno = bp->b_blkno / (lp->d_secsize / DEV_BSIZE); 487 488 /* 489 * Translate the partition-relative block number to an absolute. 490 */ 491 if (DISKPART(bp->b_dev) != RAW_PART) { 492 struct partition *pp; 493 494 pp = &vnd->sc_dkdev.dk_label->d_partitions[ 495 DISKPART(bp->b_dev)]; 496 blkno += pp->p_offset; 497 } 498 bp->b_rawblkno = blkno; 499 500#ifdef DEBUG 501 if (vnddebug & VDB_FOLLOW) 502 printf("vndstrategy(%p): unit %d\n", bp, unit); 503#endif 504 BUFQ_PUT(vnd->sc_tab, bp); 505 wakeup(&vnd->sc_tab); 506 splx(s); 507 return; 508done: 509 biodone(bp); 510 splx(s); 511} 512 513static void 514vndthread(void *arg) 515{ 516 struct vnd_softc *vnd = arg; 517 struct mount *mp; 518 int s, bsize; 519 int sz, error; 520 struct disklabel *lp; 521 522 s = splbio(); 523 vnd->sc_flags |= VNF_KTHREAD; 524 wakeup(&vnd->sc_kthread); 525 526 /* 527 * Dequeue requests, break them into bsize pieces and submit using 528 * VOP_BMAP/VOP_STRATEGY. 529 */ 530 while ((vnd->sc_flags & VNF_VUNCONF) == 0) { 531 struct vndxfer *vnx; 532 off_t offset; 533 int resid; 534 int skipped = 0; 535 off_t bn; 536 int flags; 537 struct buf *obp; 538 struct buf *bp; 539 540 obp = BUFQ_GET(vnd->sc_tab); 541 if (obp == NULL) { 542 tsleep(&vnd->sc_tab, PRIBIO, "vndbp", 0); 543 continue; 544 }; 545 splx(s); 546 flags = obp->b_flags; 547#ifdef DEBUG 548 if (vnddebug & VDB_FOLLOW) 549 printf("vndthread(%p\n", obp); 550#endif 551 lp = vnd->sc_dkdev.dk_label; 552 553 /* convert to a byte offset within the file. */ 554 bn = obp->b_rawblkno * lp->d_secsize; 555 556 if (vnd->sc_vp->v_mount == NULL) { 557 obp->b_error = ENXIO; 558 obp->b_flags |= B_ERROR; 559 goto done; 560 } 561#ifdef VND_COMPRESSION 562 /* handle a compressed read */ 563 if ((flags & B_READ) != 0 && (vnd->sc_flags & VNF_COMP)) { 564 compstrategy(obp, bn); 565 goto done; 566 } 567#endif /* VND_COMPRESSION */ 568 569 bsize = vnd->sc_vp->v_mount->mnt_stat.f_iosize; 570 571 /* 572 * Allocate a header for this transfer and link it to the 573 * buffer 574 */ 575 s = splbio(); 576 vnx = VND_GETXFER(vnd); 577 splx(s); 578 vnx->vx_vnd = vnd; 579 580 bp = &vnx->vx_buf; 581 BUF_INIT(bp); 582 bp->b_flags = (obp->b_flags & B_READ) | B_CALL; 583 bp->b_iodone = vndiodone; 584 bp->b_private = obp; 585 bp->b_vp = NULL; 586 bp->b_data = obp->b_data; 587 bp->b_bcount = bp->b_resid = obp->b_bcount; 588 BIO_COPYPRIO(bp, obp); 589 590 s = splbio(); 591 while (vnd->sc_active >= vnd->sc_maxactive) { 592 tsleep(&vnd->sc_tab, PRIBIO, "vndac", 0); 593 } 594 vnd->sc_active++; 595 splx(s); 596 597 if ((flags & B_READ) == 0) 598 vn_start_write(vnd->sc_vp, &mp, V_WAIT); 599 600 /* Instrumentation. */ 601 disk_busy(&vnd->sc_dkdev); 602 603 /* 604 * Feed requests sequentially. 605 * We do it this way to keep from flooding NFS servers if we 606 * are connected to an NFS file. This places the burden on 607 * the client rather than the server. 608 */ 609 error = 0; 610 for (offset = 0, resid = bp->b_resid; resid; 611 resid -= sz, offset += sz) { 612 struct buf *nbp; 613 struct vnode *vp; 614 daddr_t nbn; 615 int off, nra; 616 617 nra = 0; 618 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY | LK_CANRECURSE); 619 error = VOP_BMAP(vnd->sc_vp, bn / bsize, &vp, &nbn, &nra); 620 VOP_UNLOCK(vnd->sc_vp, 0); 621 622 if (error == 0 && (long)nbn == -1) 623 error = EIO; 624 625 /* 626 * If there was an error or a hole in the file...punt. 627 * Note that we may have to wait for any operations 628 * that we have already fired off before releasing 629 * the buffer. 630 * 631 * XXX we could deal with holes here but it would be 632 * a hassle (in the write case). 633 */ 634 if (error) { 635 skipped += resid; 636 break; 637 } 638 639#ifdef DEBUG 640 if (!dovndcluster) 641 nra = 0; 642#endif 643 644 if ((off = bn % bsize) != 0) 645 sz = bsize - off; 646 else 647 sz = (1 + nra) * bsize; 648 if (resid < sz) 649 sz = resid; 650#ifdef DEBUG 651 if (vnddebug & VDB_IO) 652 printf("vndstrategy: vp %p/%p bn 0x%qx/0x%" PRIx64 653 " sz 0x%x\n", 654 vnd->sc_vp, vp, (long long)bn, nbn, sz); 655#endif 656 657 nbp = getiobuf(); 658 nestiobuf_setup(bp, nbp, offset, sz); 659 nbp->b_blkno = nbn + btodb(off); 660 661#if 0 /* XXX #ifdef DEBUG */ 662 if (vnddebug & VDB_IO) 663 printf("vndstart(%ld): bp %p vp %p blkno " 664 "0x%" PRIx64 " flags %x addr %p cnt 0x%x\n", 665 (long) (vnd-vnd_softc), &nbp->vb_buf, 666 nbp->vb_buf.b_vp, nbp->vb_buf.b_blkno, 667 nbp->vb_buf.b_flags, nbp->vb_buf.b_data, 668 nbp->vb_buf.b_bcount); 669#endif 670 VOP_STRATEGY(vp, nbp); 671 bn += sz; 672 } 673 nestiobuf_done(bp, skipped, error); 674 675 if ((flags & B_READ) == 0) 676 vn_finished_write(mp, 0); 677 678 s = splbio(); 679 continue; 680done: 681 biodone(obp); 682 s = splbio(); 683 } 684 685 vnd->sc_flags &= (~VNF_KTHREAD | VNF_VUNCONF); 686 wakeup(&vnd->sc_kthread); 687 splx(s); 688 kthread_exit(0); 689} 690 691static void 692vndiodone(struct buf *bp) 693{ 694 struct vndxfer *vnx = VND_BUFTOXFER(bp); 695 struct vnd_softc *vnd = vnx->vx_vnd; 696 struct buf *obp = bp->b_private; 697 698 KASSERT(&vnx->vx_buf == bp); 699 KASSERT(vnd->sc_active > 0); 700#ifdef DEBUG 701 if (vnddebug & VDB_IO) { 702 printf("vndiodone1: bp %p iodone: error %d\n", 703 bp, (bp->b_flags & B_ERROR) != 0 ? bp->b_error : 0); 704 } 705#endif 706 disk_unbusy(&vnd->sc_dkdev, bp->b_bcount - bp->b_resid, 707 (bp->b_flags & B_READ)); 708 vnd->sc_active--; 709 if (vnd->sc_active == 0) { 710 wakeup(&vnd->sc_tab); 711 } 712 obp->b_flags |= bp->b_flags & B_ERROR; 713 obp->b_error = bp->b_error; 714 obp->b_resid = bp->b_resid; 715 VND_PUTXFER(vnd, vnx); 716 biodone(obp); 717} 718 719/* ARGSUSED */ 720static int 721vndread(dev_t dev, struct uio *uio, int flags) 722{ 723 int unit = vndunit(dev); 724 struct vnd_softc *sc; 725 726#ifdef DEBUG 727 if (vnddebug & VDB_FOLLOW) 728 printf("vndread(0x%x, %p)\n", dev, uio); 729#endif 730 731 sc = device_lookup(&vnd_cd, unit); 732 if (sc == NULL) 733 return ENXIO; 734 735 if ((sc->sc_flags & VNF_INITED) == 0) 736 return (ENXIO); 737 738 return (physio(vndstrategy, NULL, dev, B_READ, minphys, uio)); 739} 740 741/* ARGSUSED */ 742static int 743vndwrite(dev_t dev, struct uio *uio, int flags) 744{ 745 int unit = vndunit(dev); 746 struct vnd_softc *sc; 747 748#ifdef DEBUG 749 if (vnddebug & VDB_FOLLOW) 750 printf("vndwrite(0x%x, %p)\n", dev, uio); 751#endif 752 753 sc = device_lookup(&vnd_cd, unit); 754 if (sc == NULL) 755 return ENXIO; 756 757 if ((sc->sc_flags & VNF_INITED) == 0) 758 return (ENXIO); 759 760 return (physio(vndstrategy, NULL, dev, B_WRITE, minphys, uio)); 761} 762 763static int 764vnd_cget(struct lwp *l, int unit, int *un, struct vattr *va) 765{ 766 struct vnd_softc *vnd; 767 768 if (*un == -1) 769 *un = unit; 770 if (*un < 0) 771 return EINVAL; 772 773 vnd = device_lookup(&vnd_cd, *un); 774 if (vnd == NULL) 775 /* 776 * vnconfig(8) has weird expectations to list the 777 * devices. 778 * It will stop as soon as it gets ENXIO, but 779 * will continue if it gets something else... 780 */ 781 return (*un >= vnd_cd.cd_ndevs) ? ENXIO : -1; 782 783 if ((vnd->sc_flags & VNF_INITED) == 0) 784 return -1; 785 786 return VOP_GETATTR(vnd->sc_vp, va, l->l_proc->p_ucred, l); 787} 788 789/* ARGSUSED */ 790static int 791vndioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct lwp *l) 792{ 793 int unit = vndunit(dev); 794 struct vnd_softc *vnd; 795 struct vnd_ioctl *vio; 796 struct vattr vattr; 797 struct nameidata nd; 798 int error, part, pmask; 799 size_t geomsize; 800 struct proc *p = (l != NULL) ? l->l_proc : NULL; 801 int fflags; 802#ifdef __HAVE_OLD_DISKLABEL 803 struct disklabel newlabel; 804#endif 805 806#ifdef DEBUG 807 if (vnddebug & VDB_FOLLOW) 808 printf("vndioctl(0x%x, 0x%lx, %p, 0x%x, %p): unit %d\n", 809 dev, cmd, data, flag, p, unit); 810#endif 811 vnd = device_lookup(&vnd_cd, unit); 812 if (vnd == NULL && 813#ifdef COMPAT_30 814 cmd != VNDIOOCGET && 815#endif 816 cmd != VNDIOCGET) 817 return ENXIO; 818 vio = (struct vnd_ioctl *)data; 819 820 /* Must be open for writes for these commands... */ 821 switch (cmd) { 822 case VNDIOCSET: 823 case VNDIOCCLR: 824 case DIOCSDINFO: 825 case DIOCWDINFO: 826#ifdef __HAVE_OLD_DISKLABEL 827 case ODIOCSDINFO: 828 case ODIOCWDINFO: 829#endif 830 case DIOCKLABEL: 831 case DIOCWLABEL: 832 if ((flag & FWRITE) == 0) 833 return (EBADF); 834 } 835 836 /* Must be initialized for these... */ 837 switch (cmd) { 838 case VNDIOCCLR: 839 case DIOCGDINFO: 840 case DIOCSDINFO: 841 case DIOCWDINFO: 842 case DIOCGPART: 843 case DIOCKLABEL: 844 case DIOCWLABEL: 845 case DIOCGDEFLABEL: 846#ifdef __HAVE_OLD_DISKLABEL 847 case ODIOCGDINFO: 848 case ODIOCSDINFO: 849 case ODIOCWDINFO: 850 case ODIOCGDEFLABEL: 851#endif 852 if ((vnd->sc_flags & VNF_INITED) == 0) 853 return (ENXIO); 854 } 855 856 switch (cmd) { 857 case VNDIOCSET: 858 if (vnd->sc_flags & VNF_INITED) 859 return (EBUSY); 860 861 if ((error = vndlock(vnd)) != 0) 862 return (error); 863 864 fflags = FREAD; 865 if ((vio->vnd_flags & VNDIOF_READONLY) == 0) 866 fflags |= FWRITE; 867 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, vio->vnd_file, l); 868 if ((error = vn_open(&nd, fflags, 0)) != 0) 869 goto unlock_and_exit; 870 error = VOP_GETATTR(nd.ni_vp, &vattr, l->l_proc->p_ucred, l); 871 if (!error && nd.ni_vp->v_type != VREG) 872 error = EOPNOTSUPP; 873 if (error) { 874 VOP_UNLOCK(nd.ni_vp, 0); 875 goto close_and_exit; 876 } 877 878 /* If using a compressed file, initialize its info */ 879 /* (or abort with an error if kernel has no compression) */ 880 if (vio->vnd_flags & VNF_COMP) { 881#ifdef VND_COMPRESSION 882 struct vnd_comp_header *ch; 883 int i; 884 u_int32_t comp_size; 885 u_int32_t comp_maxsize; 886 887 /* allocate space for compresed file header */ 888 ch = malloc(sizeof(struct vnd_comp_header), 889 M_TEMP, M_WAITOK); 890 891 /* read compressed file header */ 892 error = vn_rdwr(UIO_READ, nd.ni_vp, (caddr_t)ch, 893 sizeof(struct vnd_comp_header), 0, UIO_SYSSPACE, 894 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 895 if(error) { 896 free(ch, M_TEMP); 897 VOP_UNLOCK(nd.ni_vp, 0); 898 goto close_and_exit; 899 } 900 901 /* save some header info */ 902 vnd->sc_comp_blksz = ntohl(ch->block_size); 903 /* note last offset is the file byte size */ 904 vnd->sc_comp_numoffs = ntohl(ch->num_blocks)+1; 905 free(ch, M_TEMP); 906 if(vnd->sc_comp_blksz % DEV_BSIZE !=0) { 907 VOP_UNLOCK(nd.ni_vp, 0); 908 error = EINVAL; 909 goto close_and_exit; 910 } 911 if(sizeof(struct vnd_comp_header) + 912 sizeof(u_int64_t) * vnd->sc_comp_numoffs > 913 vattr.va_size) { 914 VOP_UNLOCK(nd.ni_vp, 0); 915 error = EINVAL; 916 goto close_and_exit; 917 } 918 919 /* set decompressed file size */ 920 vattr.va_size = 921 (vnd->sc_comp_numoffs - 1) * vnd->sc_comp_blksz; 922 923 /* allocate space for all the compressed offsets */ 924 vnd->sc_comp_offsets = 925 malloc(sizeof(u_int64_t) * vnd->sc_comp_numoffs, 926 M_DEVBUF, M_WAITOK); 927 928 /* read in the offsets */ 929 error = vn_rdwr(UIO_READ, nd.ni_vp, 930 (caddr_t)vnd->sc_comp_offsets, 931 sizeof(u_int64_t) * vnd->sc_comp_numoffs, 932 sizeof(struct vnd_comp_header), UIO_SYSSPACE, 933 IO_UNIT|IO_NODELOCKED, p->p_ucred, NULL, NULL); 934 if(error) { 935 VOP_UNLOCK(nd.ni_vp, 0); 936 goto close_and_exit; 937 } 938 /* 939 * find largest block size (used for allocation limit). 940 * Also convert offset to native byte order. 941 */ 942 comp_maxsize = 0; 943 for (i = 0; i < vnd->sc_comp_numoffs - 1; i++) { 944 vnd->sc_comp_offsets[i] = 945 be64toh(vnd->sc_comp_offsets[i]); 946 comp_size = be64toh(vnd->sc_comp_offsets[i + 1]) 947 - vnd->sc_comp_offsets[i]; 948 if (comp_size > comp_maxsize) 949 comp_maxsize = comp_size; 950 } 951 vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1] = 952 be64toh(vnd->sc_comp_offsets[vnd->sc_comp_numoffs - 1]); 953 954 /* create compressed data buffer */ 955 vnd->sc_comp_buff = malloc(comp_maxsize, 956 M_DEVBUF, M_WAITOK); 957 958 /* create decompressed buffer */ 959 vnd->sc_comp_decombuf = malloc(vnd->sc_comp_blksz, 960 M_DEVBUF, M_WAITOK); 961 vnd->sc_comp_buffblk = -1; 962 963 /* Initialize decompress stream */ 964 bzero(&vnd->sc_comp_stream, sizeof(z_stream)); 965 vnd->sc_comp_stream.zalloc = vnd_alloc; 966 vnd->sc_comp_stream.zfree = vnd_free; 967 error = inflateInit2(&vnd->sc_comp_stream, MAX_WBITS); 968 if(error) { 969 if(vnd->sc_comp_stream.msg) 970 printf("vnd%d: compressed file, %s\n", 971 unit, vnd->sc_comp_stream.msg); 972 VOP_UNLOCK(nd.ni_vp, 0); 973 error = EINVAL; 974 goto close_and_exit; 975 } 976 977 vnd->sc_flags |= VNF_COMP | VNF_READONLY; 978#else /* !VND_COMPRESSION */ 979 VOP_UNLOCK(nd.ni_vp, 0); 980 error = EOPNOTSUPP; 981 goto close_and_exit; 982#endif /* VND_COMPRESSION */ 983 } 984 985 VOP_UNLOCK(nd.ni_vp, 0); 986 vnd->sc_vp = nd.ni_vp; 987 vnd->sc_size = btodb(vattr.va_size); /* note truncation */ 988 989 /* 990 * Use pseudo-geometry specified. If none was provided, 991 * use "standard" Adaptec fictitious geometry. 992 */ 993 if (vio->vnd_flags & VNDIOF_HASGEOM) { 994 995 memcpy(&vnd->sc_geom, &vio->vnd_geom, 996 sizeof(vio->vnd_geom)); 997 998 /* 999 * Sanity-check the sector size. 1000 * XXX Don't allow secsize < DEV_BSIZE. Should 1001 * XXX we? 1002 */ 1003 if (vnd->sc_geom.vng_secsize < DEV_BSIZE || 1004 (vnd->sc_geom.vng_secsize % DEV_BSIZE) != 0 || 1005 vnd->sc_geom.vng_ncylinders == 0 || 1006 (vnd->sc_geom.vng_ntracks * 1007 vnd->sc_geom.vng_nsectors) == 0) { 1008 error = EINVAL; 1009 goto close_and_exit; 1010 } 1011 1012 /* 1013 * Compute the size (in DEV_BSIZE blocks) specified 1014 * by the geometry. 1015 */ 1016 geomsize = (vnd->sc_geom.vng_nsectors * 1017 vnd->sc_geom.vng_ntracks * 1018 vnd->sc_geom.vng_ncylinders) * 1019 (vnd->sc_geom.vng_secsize / DEV_BSIZE); 1020 1021 /* 1022 * Sanity-check the size against the specified 1023 * geometry. 1024 */ 1025 if (vnd->sc_size < geomsize) { 1026 error = EINVAL; 1027 goto close_and_exit; 1028 } 1029 } else if (vnd->sc_size >= (32 * 64)) { 1030 /* 1031 * Size must be at least 2048 DEV_BSIZE blocks 1032 * (1M) in order to use this geometry. 1033 */ 1034 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1035 vnd->sc_geom.vng_nsectors = 32; 1036 vnd->sc_geom.vng_ntracks = 64; 1037 vnd->sc_geom.vng_ncylinders = vnd->sc_size / (64 * 32); 1038 } else { 1039 vnd->sc_geom.vng_secsize = DEV_BSIZE; 1040 vnd->sc_geom.vng_nsectors = 1; 1041 vnd->sc_geom.vng_ntracks = 1; 1042 vnd->sc_geom.vng_ncylinders = vnd->sc_size; 1043 } 1044 1045 if (vio->vnd_flags & VNDIOF_READONLY) { 1046 vnd->sc_flags |= VNF_READONLY; 1047 } 1048 1049 if ((error = vndsetcred(vnd, p->p_ucred)) != 0) 1050 goto close_and_exit; 1051 1052 vndthrottle(vnd, vnd->sc_vp); 1053 vio->vnd_size = dbtob(vnd->sc_size); 1054 vnd->sc_flags |= VNF_INITED; 1055 1056 /* create the kernel thread, wait for it to be up */ 1057 error = kthread_create1(vndthread, vnd, &vnd->sc_kthread, 1058 vnd->sc_dev.dv_xname); 1059 if (error) 1060 goto close_and_exit; 1061 while ((vnd->sc_flags & VNF_KTHREAD) == 0) { 1062 tsleep(&vnd->sc_kthread, PRIBIO, "vndthr", 0); 1063 } 1064#ifdef DEBUG 1065 if (vnddebug & VDB_INIT) 1066 printf("vndioctl: SET vp %p size 0x%lx %d/%d/%d/%d\n", 1067 vnd->sc_vp, (unsigned long) vnd->sc_size, 1068 vnd->sc_geom.vng_secsize, 1069 vnd->sc_geom.vng_nsectors, 1070 vnd->sc_geom.vng_ntracks, 1071 vnd->sc_geom.vng_ncylinders); 1072#endif 1073 1074 /* Attach the disk. */ 1075 vnd->sc_dkdev.dk_name = vnd->sc_dev.dv_xname; 1076 pseudo_disk_attach(&vnd->sc_dkdev); 1077 1078 /* Initialize the xfer and buffer pools. */ 1079 pool_init(&vnd->sc_vxpool, sizeof(struct vndxfer), 0, 1080 0, 0, "vndxpl", NULL); 1081 1082 /* Try and read the disklabel. */ 1083 vndgetdisklabel(dev, vnd); 1084 1085 vndunlock(vnd); 1086 1087 break; 1088 1089close_and_exit: 1090 (void) vn_close(nd.ni_vp, fflags, p->p_ucred, l); 1091unlock_and_exit: 1092#ifdef VND_COMPRESSION 1093 /* free any allocated memory (for compressed file) */ 1094 if(vnd->sc_comp_offsets) { 1095 free(vnd->sc_comp_offsets, M_DEVBUF); 1096 vnd->sc_comp_offsets = NULL; 1097 } 1098 if(vnd->sc_comp_buff) { 1099 free(vnd->sc_comp_buff, M_DEVBUF); 1100 vnd->sc_comp_buff = NULL; 1101 } 1102 if(vnd->sc_comp_decombuf) { 1103 free(vnd->sc_comp_decombuf, M_DEVBUF); 1104 vnd->sc_comp_decombuf = NULL; 1105 } 1106#endif /* VND_COMPRESSION */ 1107 vndunlock(vnd); 1108 return (error); 1109 1110 case VNDIOCCLR: 1111 if ((error = vndlock(vnd)) != 0) 1112 return (error); 1113 1114 /* 1115 * Don't unconfigure if any other partitions are open 1116 * or if both the character and block flavors of this 1117 * partition are open. 1118 */ 1119 part = DISKPART(dev); 1120 pmask = (1 << part); 1121 if (((vnd->sc_dkdev.dk_openmask & ~pmask) || 1122 ((vnd->sc_dkdev.dk_bopenmask & pmask) && 1123 (vnd->sc_dkdev.dk_copenmask & pmask))) && 1124 !(vio->vnd_flags & VNDIOF_FORCE)) { 1125 vndunlock(vnd); 1126 return (EBUSY); 1127 } 1128 1129 /* 1130 * XXX vndclear() might call vndclose() implicitely; 1131 * release lock to avoid recursion 1132 */ 1133 vndunlock(vnd); 1134 vndclear(vnd, minor(dev)); 1135#ifdef DEBUG 1136 if (vnddebug & VDB_INIT) 1137 printf("vndioctl: CLRed\n"); 1138#endif 1139 1140 /* Destroy the xfer and buffer pools. */ 1141 pool_destroy(&vnd->sc_vxpool); 1142 1143 /* Detatch the disk. */ 1144 pseudo_disk_detach(&vnd->sc_dkdev); 1145 break; 1146 1147#ifdef COMPAT_30 1148 case VNDIOOCGET: { 1149 struct vnd_ouser *vnu; 1150 struct vattr va; 1151 vnu = (struct vnd_ouser *)data; 1152 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1153 case 0: 1154 vnu->vnu_dev = va.va_fsid; 1155 vnu->vnu_ino = va.va_fileid; 1156 break; 1157 case -1: 1158 /* unused is not an error */ 1159 vnu->vnu_dev = 0; 1160 vnu->vnu_ino = 0; 1161 break; 1162 default: 1163 return error; 1164 } 1165 break; 1166 } 1167#endif 1168 case VNDIOCGET: { 1169 struct vnd_user *vnu; 1170 struct vattr va; 1171 vnu = (struct vnd_user *)data; 1172 switch (error = vnd_cget(l, unit, &vnu->vnu_unit, &va)) { 1173 case 0: 1174 vnu->vnu_dev = va.va_fsid; 1175 vnu->vnu_ino = va.va_fileid; 1176 break; 1177 case -1: 1178 /* unused is not an error */ 1179 vnu->vnu_dev = 0; 1180 vnu->vnu_ino = 0; 1181 break; 1182 default: 1183 return error; 1184 } 1185 break; 1186 } 1187 1188 case DIOCGDINFO: 1189 *(struct disklabel *)data = *(vnd->sc_dkdev.dk_label); 1190 break; 1191 1192#ifdef __HAVE_OLD_DISKLABEL 1193 case ODIOCGDINFO: 1194 newlabel = *(vnd->sc_dkdev.dk_label); 1195 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1196 return ENOTTY; 1197 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1198 break; 1199#endif 1200 1201 case DIOCGPART: 1202 ((struct partinfo *)data)->disklab = vnd->sc_dkdev.dk_label; 1203 ((struct partinfo *)data)->part = 1204 &vnd->sc_dkdev.dk_label->d_partitions[DISKPART(dev)]; 1205 break; 1206 1207 case DIOCWDINFO: 1208 case DIOCSDINFO: 1209#ifdef __HAVE_OLD_DISKLABEL 1210 case ODIOCWDINFO: 1211 case ODIOCSDINFO: 1212#endif 1213 { 1214 struct disklabel *lp; 1215 1216 if ((error = vndlock(vnd)) != 0) 1217 return (error); 1218 1219 vnd->sc_flags |= VNF_LABELLING; 1220 1221#ifdef __HAVE_OLD_DISKLABEL 1222 if (cmd == ODIOCSDINFO || cmd == ODIOCWDINFO) { 1223 memset(&newlabel, 0, sizeof newlabel); 1224 memcpy(&newlabel, data, sizeof (struct olddisklabel)); 1225 lp = &newlabel; 1226 } else 1227#endif 1228 lp = (struct disklabel *)data; 1229 1230 error = setdisklabel(vnd->sc_dkdev.dk_label, 1231 lp, 0, vnd->sc_dkdev.dk_cpulabel); 1232 if (error == 0) { 1233 if (cmd == DIOCWDINFO 1234#ifdef __HAVE_OLD_DISKLABEL 1235 || cmd == ODIOCWDINFO 1236#endif 1237 ) 1238 error = writedisklabel(VNDLABELDEV(dev), 1239 vndstrategy, vnd->sc_dkdev.dk_label, 1240 vnd->sc_dkdev.dk_cpulabel); 1241 } 1242 1243 vnd->sc_flags &= ~VNF_LABELLING; 1244 1245 vndunlock(vnd); 1246 1247 if (error) 1248 return (error); 1249 break; 1250 } 1251 1252 case DIOCKLABEL: 1253 if (*(int *)data != 0) 1254 vnd->sc_flags |= VNF_KLABEL; 1255 else 1256 vnd->sc_flags &= ~VNF_KLABEL; 1257 break; 1258 1259 case DIOCWLABEL: 1260 if (*(int *)data != 0) 1261 vnd->sc_flags |= VNF_WLABEL; 1262 else 1263 vnd->sc_flags &= ~VNF_WLABEL; 1264 break; 1265 1266 case DIOCGDEFLABEL: 1267 vndgetdefaultlabel(vnd, (struct disklabel *)data); 1268 break; 1269 1270#ifdef __HAVE_OLD_DISKLABEL 1271 case ODIOCGDEFLABEL: 1272 vndgetdefaultlabel(vnd, &newlabel); 1273 if (newlabel.d_npartitions > OLDMAXPARTITIONS) 1274 return ENOTTY; 1275 memcpy(data, &newlabel, sizeof (struct olddisklabel)); 1276 break; 1277#endif 1278 1279 default: 1280 return (ENOTTY); 1281 } 1282 1283 return (0); 1284} 1285 1286/* 1287 * Duplicate the current processes' credentials. Since we are called only 1288 * as the result of a SET ioctl and only root can do that, any future access 1289 * to this "disk" is essentially as root. Note that credentials may change 1290 * if some other uid can write directly to the mapped file (NFS). 1291 */ 1292static int 1293vndsetcred(struct vnd_softc *vnd, struct ucred *cred) 1294{ 1295 struct uio auio; 1296 struct iovec aiov; 1297 char *tmpbuf; 1298 int error; 1299 1300 vnd->sc_cred = crdup(cred); 1301 tmpbuf = malloc(DEV_BSIZE, M_TEMP, M_WAITOK); 1302 1303 /* XXX: Horrible kludge to establish credentials for NFS */ 1304 aiov.iov_base = tmpbuf; 1305 aiov.iov_len = min(DEV_BSIZE, dbtob(vnd->sc_size)); 1306 auio.uio_iov = &aiov; 1307 auio.uio_iovcnt = 1; 1308 auio.uio_offset = 0; 1309 auio.uio_rw = UIO_READ; 1310 auio.uio_segflg = UIO_SYSSPACE; 1311 auio.uio_resid = aiov.iov_len; 1312 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1313 error = VOP_READ(vnd->sc_vp, &auio, 0, vnd->sc_cred); 1314 if (error == 0) { 1315 /* 1316 * Because vnd does all IO directly through the vnode 1317 * we need to flush (at least) the buffer from the above 1318 * VOP_READ from the buffer cache to prevent cache 1319 * incoherencies. Also, be careful to write dirty 1320 * buffers back to stable storage. 1321 */ 1322 error = vinvalbuf(vnd->sc_vp, V_SAVE, vnd->sc_cred, 1323 curlwp, 0, 0); 1324 } 1325 VOP_UNLOCK(vnd->sc_vp, 0); 1326 1327 free(tmpbuf, M_TEMP); 1328 return (error); 1329} 1330 1331/* 1332 * Set maxactive based on FS type 1333 */ 1334static void 1335vndthrottle(struct vnd_softc *vnd, struct vnode *vp) 1336{ 1337#ifdef NFS 1338 extern int (**nfsv2_vnodeop_p)(void *); 1339 1340 if (vp->v_op == nfsv2_vnodeop_p) 1341 vnd->sc_maxactive = 2; 1342 else 1343#endif 1344 vnd->sc_maxactive = 8; 1345 1346 if (vnd->sc_maxactive < 1) 1347 vnd->sc_maxactive = 1; 1348} 1349 1350#if 0 1351static void 1352vndshutdown(void) 1353{ 1354 struct vnd_softc *vnd; 1355 1356 for (vnd = &vnd_softc[0]; vnd < &vnd_softc[numvnd]; vnd++) 1357 if (vnd->sc_flags & VNF_INITED) 1358 vndclear(vnd); 1359} 1360#endif 1361 1362static void 1363vndclear(struct vnd_softc *vnd, int myminor) 1364{ 1365 struct vnode *vp = vnd->sc_vp; 1366 struct lwp *l = curlwp; 1367 int fflags = FREAD; 1368 int bmaj, cmaj, i, mn; 1369 int s; 1370 1371#ifdef DEBUG 1372 if (vnddebug & VDB_FOLLOW) 1373 printf("vndclear(%p): vp %p\n", vnd, vp); 1374#endif 1375 /* locate the major number */ 1376 bmaj = bdevsw_lookup_major(&vnd_bdevsw); 1377 cmaj = cdevsw_lookup_major(&vnd_cdevsw); 1378 1379 /* Nuke the vnodes for any open instances */ 1380 for (i = 0; i < MAXPARTITIONS; i++) { 1381 mn = DISKMINOR(vnd->sc_dev.dv_unit, i); 1382 vdevgone(bmaj, mn, mn, VBLK); 1383 if (mn != myminor) /* XXX avoid to kill own vnode */ 1384 vdevgone(cmaj, mn, mn, VCHR); 1385 } 1386 1387 if ((vnd->sc_flags & VNF_READONLY) == 0) 1388 fflags |= FWRITE; 1389 1390 s = splbio(); 1391 bufq_drain(vnd->sc_tab); 1392 splx(s); 1393 1394 vnd->sc_flags |= VNF_VUNCONF; 1395 wakeup(&vnd->sc_tab); 1396 while (vnd->sc_flags & VNF_KTHREAD) 1397 tsleep(&vnd->sc_kthread, PRIBIO, "vnthr", 0); 1398 1399#ifdef VND_COMPRESSION 1400 /* free the compressed file buffers */ 1401 if(vnd->sc_flags & VNF_COMP) { 1402 if(vnd->sc_comp_offsets) { 1403 free(vnd->sc_comp_offsets, M_DEVBUF); 1404 vnd->sc_comp_offsets = NULL; 1405 } 1406 if(vnd->sc_comp_buff) { 1407 free(vnd->sc_comp_buff, M_DEVBUF); 1408 vnd->sc_comp_buff = NULL; 1409 } 1410 if(vnd->sc_comp_decombuf) { 1411 free(vnd->sc_comp_decombuf, M_DEVBUF); 1412 vnd->sc_comp_decombuf = NULL; 1413 } 1414 } 1415#endif /* VND_COMPRESSION */ 1416 vnd->sc_flags &= 1417 ~(VNF_INITED | VNF_READONLY | VNF_VLABEL 1418 | VNF_VUNCONF | VNF_COMP); 1419 if (vp == (struct vnode *)0) 1420 panic("vndclear: null vp"); 1421 (void) vn_close(vp, fflags, vnd->sc_cred, l); 1422 crfree(vnd->sc_cred); 1423 vnd->sc_vp = (struct vnode *)0; 1424 vnd->sc_cred = (struct ucred *)0; 1425 vnd->sc_size = 0; 1426} 1427 1428static int 1429vndsize(dev_t dev) 1430{ 1431 struct vnd_softc *sc; 1432 struct disklabel *lp; 1433 int part, unit, omask; 1434 int size; 1435 1436 unit = vndunit(dev); 1437 sc = (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1438 if (sc == NULL) 1439 return -1; 1440 1441 if ((sc->sc_flags & VNF_INITED) == 0) 1442 return (-1); 1443 1444 part = DISKPART(dev); 1445 omask = sc->sc_dkdev.dk_openmask & (1 << part); 1446 lp = sc->sc_dkdev.dk_label; 1447 1448 if (omask == 0 && vndopen(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1449 return (-1); 1450 1451 if (lp->d_partitions[part].p_fstype != FS_SWAP) 1452 size = -1; 1453 else 1454 size = lp->d_partitions[part].p_size * 1455 (lp->d_secsize / DEV_BSIZE); 1456 1457 if (omask == 0 && vndclose(dev, 0, S_IFBLK, curlwp)) /* XXX */ 1458 return (-1); 1459 1460 return (size); 1461} 1462 1463static int 1464vnddump(dev_t dev, daddr_t blkno, caddr_t va, size_t size) 1465{ 1466 1467 /* Not implemented. */ 1468 return ENXIO; 1469} 1470 1471static void 1472vndgetdefaultlabel(struct vnd_softc *sc, struct disklabel *lp) 1473{ 1474 struct vndgeom *vng = &sc->sc_geom; 1475 struct partition *pp; 1476 1477 memset(lp, 0, sizeof(*lp)); 1478 1479 lp->d_secperunit = sc->sc_size / (vng->vng_secsize / DEV_BSIZE); 1480 lp->d_secsize = vng->vng_secsize; 1481 lp->d_nsectors = vng->vng_nsectors; 1482 lp->d_ntracks = vng->vng_ntracks; 1483 lp->d_ncylinders = vng->vng_ncylinders; 1484 lp->d_secpercyl = lp->d_ntracks * lp->d_nsectors; 1485 1486 strncpy(lp->d_typename, "vnd", sizeof(lp->d_typename)); 1487 lp->d_type = DTYPE_VND; 1488 strncpy(lp->d_packname, "fictitious", sizeof(lp->d_packname)); 1489 lp->d_rpm = 3600; 1490 lp->d_interleave = 1; 1491 lp->d_flags = 0; 1492 1493 pp = &lp->d_partitions[RAW_PART]; 1494 pp->p_offset = 0; 1495 pp->p_size = lp->d_secperunit; 1496 pp->p_fstype = FS_UNUSED; 1497 lp->d_npartitions = RAW_PART + 1; 1498 1499 lp->d_magic = DISKMAGIC; 1500 lp->d_magic2 = DISKMAGIC; 1501 lp->d_checksum = dkcksum(lp); 1502} 1503 1504/* 1505 * Read the disklabel from a vnd. If one is not present, create a fake one. 1506 */ 1507static void 1508vndgetdisklabel(dev_t dev, struct vnd_softc *sc) 1509{ 1510 const char *errstring; 1511 struct disklabel *lp = sc->sc_dkdev.dk_label; 1512 struct cpu_disklabel *clp = sc->sc_dkdev.dk_cpulabel; 1513 int i; 1514 1515 memset(clp, 0, sizeof(*clp)); 1516 1517 vndgetdefaultlabel(sc, lp); 1518 1519 /* 1520 * Call the generic disklabel extraction routine. 1521 */ 1522 errstring = readdisklabel(VNDLABELDEV(dev), vndstrategy, lp, clp); 1523 if (errstring) { 1524 /* 1525 * Lack of disklabel is common, but we print the warning 1526 * anyway, since it might contain other useful information. 1527 */ 1528 printf("%s: %s\n", sc->sc_dev.dv_xname, errstring); 1529 1530 /* 1531 * For historical reasons, if there's no disklabel 1532 * present, all partitions must be FS_BSDFFS and 1533 * occupy the entire disk. 1534 */ 1535 for (i = 0; i < MAXPARTITIONS; i++) { 1536 /* 1537 * Don't wipe out port specific hack (such as 1538 * dos partition hack of i386 port). 1539 */ 1540 if (lp->d_partitions[i].p_size != 0) 1541 continue; 1542 1543 lp->d_partitions[i].p_size = lp->d_secperunit; 1544 lp->d_partitions[i].p_offset = 0; 1545 lp->d_partitions[i].p_fstype = FS_BSDFFS; 1546 } 1547 1548 strncpy(lp->d_packname, "default label", 1549 sizeof(lp->d_packname)); 1550 1551 lp->d_npartitions = MAXPARTITIONS; 1552 lp->d_checksum = dkcksum(lp); 1553 } 1554 1555 /* In-core label now valid. */ 1556 sc->sc_flags |= VNF_VLABEL; 1557} 1558 1559/* 1560 * Wait interruptibly for an exclusive lock. 1561 * 1562 * XXX 1563 * Several drivers do this; it should be abstracted and made MP-safe. 1564 */ 1565static int 1566vndlock(struct vnd_softc *sc) 1567{ 1568 int error; 1569 1570 while ((sc->sc_flags & VNF_LOCKED) != 0) { 1571 sc->sc_flags |= VNF_WANTED; 1572 if ((error = tsleep(sc, PRIBIO | PCATCH, "vndlck", 0)) != 0) 1573 return (error); 1574 } 1575 sc->sc_flags |= VNF_LOCKED; 1576 return (0); 1577} 1578 1579/* 1580 * Unlock and wake up any waiters. 1581 */ 1582static void 1583vndunlock(struct vnd_softc *sc) 1584{ 1585 1586 sc->sc_flags &= ~VNF_LOCKED; 1587 if ((sc->sc_flags & VNF_WANTED) != 0) { 1588 sc->sc_flags &= ~VNF_WANTED; 1589 wakeup(sc); 1590 } 1591} 1592 1593#ifdef VND_COMPRESSION 1594/* compressed file read */ 1595static void 1596compstrategy(struct buf *bp, off_t bn) 1597{ 1598 int error; 1599 int unit = vndunit(bp->b_dev); 1600 struct vnd_softc *vnd = 1601 (struct vnd_softc *)device_lookup(&vnd_cd, unit); 1602 u_int32_t comp_block; 1603 struct uio auio; 1604 caddr_t addr; 1605 int s; 1606 1607 /* set up constants for data move */ 1608 auio.uio_rw = UIO_READ; 1609 auio.uio_segflg = UIO_SYSSPACE; 1610 1611 /* read, and transfer the data */ 1612 addr = bp->b_data; 1613 s = splbio(); 1614 while (bp->b_resid > 0) { 1615 unsigned length; 1616 size_t length_in_buffer; 1617 u_int32_t offset_in_buffer; 1618 struct iovec aiov; 1619 1620 /* calculate the compressed block number */ 1621 comp_block = bn / (off_t)vnd->sc_comp_blksz; 1622 1623 /* check for good block number */ 1624 if (comp_block >= vnd->sc_comp_numoffs) { 1625 bp->b_error = EINVAL; 1626 bp->b_flags |= B_ERROR; 1627 splx(s); 1628 return; 1629 } 1630 1631 /* read in the compressed block, if not in buffer */ 1632 if (comp_block != vnd->sc_comp_buffblk) { 1633 length = vnd->sc_comp_offsets[comp_block + 1] - 1634 vnd->sc_comp_offsets[comp_block]; 1635 vn_lock(vnd->sc_vp, LK_EXCLUSIVE | LK_RETRY); 1636 error = vn_rdwr(UIO_READ, vnd->sc_vp, vnd->sc_comp_buff, 1637 length, vnd->sc_comp_offsets[comp_block], 1638 UIO_SYSSPACE, IO_UNIT, vnd->sc_cred, NULL, NULL); 1639 if (error) { 1640 bp->b_error = error; 1641 bp->b_flags |= B_ERROR; 1642 VOP_UNLOCK(vnd->sc_vp, 0); 1643 splx(s); 1644 return; 1645 } 1646 /* uncompress the buffer */ 1647 vnd->sc_comp_stream.next_in = vnd->sc_comp_buff; 1648 vnd->sc_comp_stream.avail_in = length; 1649 vnd->sc_comp_stream.next_out = vnd->sc_comp_decombuf; 1650 vnd->sc_comp_stream.avail_out = vnd->sc_comp_blksz; 1651 inflateReset(&vnd->sc_comp_stream); 1652 error = inflate(&vnd->sc_comp_stream, Z_FINISH); 1653 if (error != Z_STREAM_END) { 1654 if (vnd->sc_comp_stream.msg) 1655 printf("%s: compressed file, %s\n", 1656 vnd->sc_dev.dv_xname, 1657 vnd->sc_comp_stream.msg); 1658 bp->b_error = EBADMSG; 1659 bp->b_flags |= B_ERROR; 1660 VOP_UNLOCK(vnd->sc_vp, 0); 1661 splx(s); 1662 return; 1663 } 1664 vnd->sc_comp_buffblk = comp_block; 1665 VOP_UNLOCK(vnd->sc_vp, 0); 1666 } 1667 1668 /* transfer the usable uncompressed data */ 1669 offset_in_buffer = bn % (off_t)vnd->sc_comp_blksz; 1670 length_in_buffer = vnd->sc_comp_blksz - offset_in_buffer; 1671 if (length_in_buffer > bp->b_resid) 1672 length_in_buffer = bp->b_resid; 1673 auio.uio_iov = &aiov; 1674 auio.uio_iovcnt = 1; 1675 aiov.iov_base = addr; 1676 aiov.iov_len = length_in_buffer; 1677 auio.uio_resid = aiov.iov_len; 1678 auio.uio_offset = 0; 1679 error = uiomove(vnd->sc_comp_decombuf + offset_in_buffer, 1680 length_in_buffer, &auio); 1681 if (error) { 1682 bp->b_error = error; 1683 bp->b_flags |= B_ERROR; 1684 splx(s); 1685 return; 1686 } 1687 1688 bn += length_in_buffer; 1689 addr += length_in_buffer; 1690 bp->b_resid -= length_in_buffer; 1691 } 1692 splx(s); 1693} 1694 1695/* compression memory allocation routines */ 1696static void * 1697vnd_alloc(void *aux, u_int items, u_int siz) 1698{ 1699 return malloc(items * siz, M_TEMP, M_NOWAIT); 1700} 1701 1702static void 1703vnd_free(void *aux, void *ptr) 1704{ 1705 free(ptr, M_TEMP); 1706} 1707#endif /* VND_COMPRESSION */ 1708