1/* 2 * Copyright (c) 2000-2010 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28 29/* 30 * Copyright (c) 1988 University of Utah. 31 * Copyright (c) 1990, 1993 32 * The Regents of the University of California. All rights reserved. 33 * 34 * This code is derived from software contributed to Berkeley by 35 * the Systems Programming Group of the University of Utah Computer 36 * Science Department. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. All advertising materials mentioning features or use of this software 47 * must display the following acknowledgement: 48 * This product includes software developed by the University of 49 * California, Berkeley and its contributors. 50 * 4. Neither the name of the University nor the names of its contributors 51 * may be used to endorse or promote products derived from this software 52 * without specific prior written permission. 53 * 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 64 * SUCH DAMAGE. 65 * 66 * from: Utah Hdr: vn.c 1.13 94/04/02 67 * 68 * from: @(#)vn.c 8.6 (Berkeley) 4/1/94 69 * $FreeBSD: src/sys/dev/vn/vn.c,v 1.105.2.4 2001/11/18 07:11:00 dillon Exp $ 70 */ 71 72/* 73 * Vnode disk driver. 74 * 75 * Block/character interface to a vnode. Allows one to treat a file 76 * as a disk (e.g. build a filesystem in it, mount it, etc.). 77 * 78 * NOTE 1: This uses the vnop_blockmap/vnop_strategy interface to the vnode 79 * instead of a simple VOP_RDWR. We do this to avoid distorting the 80 * local buffer cache. 81 * 82 * NOTE 2: There is a security issue involved with this driver. 83 * Once mounted all access to the contents of the "mapped" file via 84 * the special file is controlled by the permissions on the special 85 * file, the protection of the mapped file is ignored (effectively, 86 * by using root credentials in all transactions). 87 * 88 * NOTE 3: Doesn't interact with leases, should it? 89 */ 90 91#include "vndevice.h" 92 93#if NVNDEVICE > 0 94 95#include <sys/param.h> 96#include <sys/systm.h> 97#include <sys/kernel.h> 98#include <sys/mount.h> 99#include <sys/namei.h> 100#include <sys/proc.h> 101#include <sys/kauth.h> 102#include <sys/buf.h> 103#include <sys/malloc.h> 104#include <sys/vnode_internal.h> 105#include <sys/fcntl.h> 106#include <sys/conf.h> 107#include <sys/disk.h> 108#include <sys/stat.h> 109#include <sys/conf.h> 110#include <sys/uio_internal.h> 111 112#include <sys/vnioctl.h> 113 114#include <sys/vm.h> 115 116#include <vm/vm_pager.h> 117#include <mach/memory_object_types.h> 118 119#include <miscfs/devfs/devfs.h> 120 121 122#include "shadow.h" 123static void 124vndevice_do_init(void); 125 126static ioctl_fcn_t vnioctl_chr; 127static ioctl_fcn_t vnioctl_blk; 128static open_close_fcn_t vnopen; 129static open_close_fcn_t vnclose; 130static psize_fcn_t vnsize; 131static strategy_fcn_t vnstrategy; 132static read_write_fcn_t vnread; 133static read_write_fcn_t vnwrite; 134 135static int vndevice_bdev_major; 136static int vndevice_cdev_major; 137 138/* 139 * cdevsw 140 * D_DISK we want to look like a disk 141 * D_CANFREE We support B_FREEBUF 142 */ 143 144static struct bdevsw vn_bdevsw = { 145 /* open */ vnopen, 146 /* close */ vnclose, 147 /* strategy */ vnstrategy, 148 /* ioctl */ vnioctl_blk, 149 /* dump */ eno_dump, 150 /* psize */ vnsize, 151 /* flags */ D_DISK, 152}; 153 154static struct cdevsw vn_cdevsw = { 155 /* open */ vnopen, 156 /* close */ vnclose, 157 /* read */ vnread, 158 /* write */ vnwrite, 159 /* ioctl */ vnioctl_chr, 160 /* stop */ eno_stop, 161 /* reset */ eno_reset, 162 /* ttys */ NULL, 163 /* select */ eno_select, 164 /* mmap */ eno_mmap, 165 /* strategy */ eno_strat, 166 /* getc */ eno_getc, 167 /* putc */ eno_putc, 168 /* flags */ D_DISK, 169}; 170 171struct vn_softc { 172 u_int64_t sc_fsize; /* file size in bytes */ 173 u_int64_t sc_size; /* size of vn, sc_secsize scale */ 174 int sc_flags; /* flags */ 175 u_int32_t sc_secsize; /* sector size */ 176 struct vnode *sc_vp; /* vnode if not NULL */ 177 uint32_t sc_vid; 178 int sc_open_flags; 179 struct vnode *sc_shadow_vp; /* shadow vnode if not NULL */ 180 uint32_t sc_shadow_vid; 181 shadow_map_t * sc_shadow_map; /* shadow map if not NULL */ 182 kauth_cred_t sc_cred; /* credentials */ 183 u_int32_t sc_options; /* options */ 184 void * sc_bdev; 185 void * sc_cdev; 186} vn_table[NVNDEVICE]; 187 188#define ROOT_IMAGE_UNIT 0 189 190/* sc_flags */ 191#define VNF_INITED 0x01 192#define VNF_READONLY 0x02 193 194static u_int32_t vn_options; 195 196#define IFOPT(vn,opt) if (((vn)->sc_options|vn_options) & (opt)) 197#define TESTOPT(vn,opt) (((vn)->sc_options|vn_options) & (opt)) 198 199static int setcred(struct vnode * vp, kauth_cred_t cred); 200static void vnclear (struct vn_softc *vn, vfs_context_t ctx); 201static void vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to); 202void vndevice_init(void); 203int vndevice_root_image(char * path, char devname[], dev_t * dev_p); 204 205static int 206vniocattach_file(struct vn_softc *vn, 207 struct vn_ioctl_64 *vniop, 208 dev_t dev, 209 int in_kernel, 210 proc_t p); 211static int 212vniocattach_shadow(struct vn_softc * vn, 213 struct vn_ioctl_64 *vniop, 214 dev_t dev, 215 int in_kernel, 216 proc_t p); 217static __inline__ int 218vnunit(dev_t dev) 219{ 220 return (minor(dev)); 221} 222 223static int 224vnclose(__unused dev_t dev, __unused int flags, 225 __unused int devtype, __unused proc_t p) 226{ 227 return (0); 228} 229 230static int 231vnopen(dev_t dev, int flags, __unused int devtype, __unused proc_t p) 232{ 233 struct vn_softc *vn; 234 int unit; 235 236 unit = vnunit(dev); 237 if (vnunit(dev) >= NVNDEVICE) { 238 return (ENXIO); 239 } 240 vn = vn_table + unit; 241 if ((flags & FWRITE) && (vn->sc_flags & VNF_READONLY)) 242 return (EACCES); 243 244 return(0); 245} 246 247static int 248file_io(struct vnode * vp, vfs_context_t ctx, 249 enum uio_rw op, char * base, off_t offset, user_ssize_t count, 250 user_ssize_t * resid) 251{ 252 uio_t auio; 253 int error; 254 char uio_buf[UIO_SIZEOF(1)]; 255 256 auio = uio_createwithbuffer(1, offset, UIO_SYSSPACE, op, 257 &uio_buf[0], sizeof(uio_buf)); 258 uio_addiov(auio, CAST_USER_ADDR_T(base), count); 259 if (op == UIO_READ) 260 error = VNOP_READ(vp, auio, IO_SYNC, ctx); 261 else 262 error = VNOP_WRITE(vp, auio, IO_SYNC, ctx); 263 264 if (resid != NULL) { 265 *resid = uio_resid(auio); 266 } 267 return (error); 268} 269 270static __inline__ off_t 271block_round(off_t o, int blocksize) 272{ 273 return ((o + blocksize - 1) / blocksize); 274} 275 276static __inline__ off_t 277block_truncate(off_t o, int blocksize) 278{ 279 return (o / blocksize); 280} 281 282static __inline__ int 283block_remainder(off_t o, int blocksize) 284{ 285 return (o % blocksize); 286} 287 288static int 289vnread_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, 290 vfs_context_t ctx) 291{ 292 u_int32_t blocksize = vn->sc_secsize; 293 int error = 0; 294 off_t offset; 295 user_ssize_t resid; 296 off_t orig_offset; 297 user_ssize_t orig_resid; 298 299 orig_resid = resid = uio_resid(uio); 300 orig_offset = offset = uio_offset(uio); 301 302 while (resid > 0) { 303 u_int32_t remainder; 304 u_int32_t this_block_number; 305 u_int32_t this_block_count; 306 off_t this_offset; 307 user_ssize_t this_resid; 308 struct vnode * vp; 309 310 /* figure out which blocks to read */ 311 remainder = block_remainder(offset, blocksize); 312 if (shadow_map_read(vn->sc_shadow_map, 313 block_truncate(offset, blocksize), 314 block_round(resid + remainder, blocksize), 315 &this_block_number, &this_block_count)) { 316 vp = vn->sc_shadow_vp; 317 } 318 else { 319 vp = vn->sc_vp; 320 } 321 322 /* read the blocks (or parts thereof) */ 323 this_offset = (off_t)this_block_number * blocksize + remainder; 324 uio_setoffset(uio, this_offset); 325 this_resid = this_block_count * blocksize - remainder; 326 if (this_resid > resid) { 327 this_resid = resid; 328 } 329 uio_setresid(uio, this_resid); 330 error = VNOP_READ(vp, uio, ioflag, ctx); 331 if (error) { 332 break; 333 } 334 335 /* figure out how much we actually read */ 336 this_resid -= uio_resid(uio); 337 if (this_resid == 0) { 338 printf("vn device: vnread_shadow zero length read\n"); 339 break; 340 } 341 resid -= this_resid; 342 offset += this_resid; 343 } 344 uio_setresid(uio, resid); 345 uio_setoffset(uio, offset); 346 return (error); 347} 348 349static int 350vncopy_block_to_shadow(struct vn_softc * vn, vfs_context_t ctx, 351 u_int32_t file_block, u_int32_t shadow_block) 352{ 353 int error; 354 char * tmpbuf; 355 356 tmpbuf = _MALLOC(vn->sc_secsize, M_TEMP, M_WAITOK); 357 if (tmpbuf == NULL) { 358 return (ENOMEM); 359 } 360 /* read one block from file at file_block offset */ 361 error = file_io(vn->sc_vp, ctx, UIO_READ, 362 tmpbuf, (off_t)file_block * vn->sc_secsize, 363 vn->sc_secsize, NULL); 364 if (error) { 365 goto done; 366 } 367 /* write one block to shadow file at shadow_block offset */ 368 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE, 369 tmpbuf, (off_t)shadow_block * vn->sc_secsize, 370 vn->sc_secsize, NULL); 371 done: 372 FREE(tmpbuf, M_TEMP); 373 return (error); 374} 375 376enum { 377 FLAGS_FIRST_BLOCK_PARTIAL = 0x1, 378 FLAGS_LAST_BLOCK_PARTIAL = 0x2 379}; 380 381static int 382vnwrite_shadow(struct vn_softc * vn, struct uio *uio, int ioflag, 383 vfs_context_t ctx) 384{ 385 u_int32_t blocksize = vn->sc_secsize; 386 int error = 0; 387 user_ssize_t resid; 388 off_t offset; 389 390 resid = uio_resid(uio); 391 offset = uio_offset(uio); 392 393 while (resid > 0) { 394 int flags = 0; 395 u_int32_t offset_block_number; 396 u_int32_t remainder; 397 u_int32_t resid_block_count; 398 u_int32_t shadow_block_count; 399 u_int32_t shadow_block_number; 400 user_ssize_t this_resid; 401 402 /* figure out which blocks to write */ 403 offset_block_number = block_truncate(offset, blocksize); 404 remainder = block_remainder(offset, blocksize); 405 resid_block_count = block_round(resid + remainder, blocksize); 406 /* figure out if the first or last blocks are partial writes */ 407 if (remainder > 0 408 && !shadow_map_is_written(vn->sc_shadow_map, 409 offset_block_number)) { 410 /* the first block is a partial write */ 411 flags |= FLAGS_FIRST_BLOCK_PARTIAL; 412 } 413 if (resid_block_count > 1 414 && !shadow_map_is_written(vn->sc_shadow_map, 415 offset_block_number 416 + resid_block_count - 1) 417 && block_remainder(offset + resid, blocksize) > 0) { 418 /* the last block is a partial write */ 419 flags |= FLAGS_LAST_BLOCK_PARTIAL; 420 } 421 if (shadow_map_write(vn->sc_shadow_map, 422 offset_block_number, resid_block_count, 423 &shadow_block_number, 424 &shadow_block_count)) { 425 /* shadow file is growing */ 426#if 0 427 /* truncate the file to its new length before write */ 428 off_t size; 429 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) 430 * vn->sc_secsize; 431 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx); 432#endif 433 } 434 /* write the blocks (or parts thereof) */ 435 uio_setoffset(uio, (off_t) 436 shadow_block_number * blocksize + remainder); 437 this_resid = (off_t)shadow_block_count * blocksize - remainder; 438 if (this_resid >= resid) { 439 this_resid = resid; 440 if ((flags & FLAGS_LAST_BLOCK_PARTIAL) != 0) { 441 /* copy the last block to the shadow */ 442 u_int32_t d; 443 u_int32_t s; 444 445 s = offset_block_number 446 + resid_block_count - 1; 447 d = shadow_block_number 448 + shadow_block_count - 1; 449 error = vncopy_block_to_shadow(vn, ctx, s, d); 450 if (error) { 451 printf("vnwrite_shadow: failed to copy" 452 " block %u to shadow block %u\n", 453 s, d); 454 break; 455 } 456 } 457 } 458 uio_setresid(uio, this_resid); 459 if ((flags & FLAGS_FIRST_BLOCK_PARTIAL) != 0) { 460 /* copy the first block to the shadow */ 461 error = vncopy_block_to_shadow(vn, ctx, 462 offset_block_number, 463 shadow_block_number); 464 if (error) { 465 printf("vnwrite_shadow: failed to" 466 " copy block %u to shadow block %u\n", 467 offset_block_number, 468 shadow_block_number); 469 break; 470 } 471 } 472 error = VNOP_WRITE(vn->sc_shadow_vp, uio, ioflag, ctx); 473 if (error) { 474 break; 475 } 476 /* figure out how much we actually wrote */ 477 this_resid -= uio_resid(uio); 478 if (this_resid == 0) { 479 printf("vn device: vnwrite_shadow zero length write\n"); 480 break; 481 } 482 resid -= this_resid; 483 offset += this_resid; 484 } 485 uio_setresid(uio, resid); 486 uio_setoffset(uio, offset); 487 return (error); 488} 489 490static int 491vnread(dev_t dev, struct uio *uio, int ioflag) 492{ 493 struct vfs_context context; 494 int error = 0; 495 off_t offset; 496 proc_t p; 497 user_ssize_t resid; 498 struct vn_softc * vn; 499 int unit; 500 501 unit = vnunit(dev); 502 if (vnunit(dev) >= NVNDEVICE) { 503 return (ENXIO); 504 } 505 p = current_proc(); 506 vn = vn_table + unit; 507 if ((vn->sc_flags & VNF_INITED) == 0) { 508 error = ENXIO; 509 goto done; 510 } 511 512 context.vc_thread = current_thread(); 513 context.vc_ucred = vn->sc_cred; 514 515 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid); 516 if (error != 0) { 517 /* the vnode is no longer available, abort */ 518 error = ENXIO; 519 vnclear(vn, &context); 520 goto done; 521 } 522 523 resid = uio_resid(uio); 524 offset = uio_offset(uio); 525 526 /* 527 * If out of bounds return an error. If at the EOF point, 528 * simply read less. 529 */ 530 if (offset >= (off_t)vn->sc_fsize) { 531 if (offset > (off_t)vn->sc_fsize) { 532 error = EINVAL; 533 } 534 goto done; 535 } 536 /* 537 * If the request crosses EOF, truncate the request. 538 */ 539 if ((offset + resid) > (off_t)vn->sc_fsize) { 540 resid = vn->sc_fsize - offset; 541 uio_setresid(uio, resid); 542 } 543 544 if (vn->sc_shadow_vp != NULL) { 545 error = vnode_getwithvid(vn->sc_shadow_vp, 546 vn->sc_shadow_vid); 547 if (error != 0) { 548 /* the vnode is no longer available, abort */ 549 error = ENXIO; 550 vnode_put(vn->sc_vp); 551 vnclear(vn, &context); 552 goto done; 553 } 554 error = vnread_shadow(vn, uio, ioflag, &context); 555 vnode_put(vn->sc_shadow_vp); 556 } else { 557 error = VNOP_READ(vn->sc_vp, uio, ioflag, &context); 558 } 559 vnode_put(vn->sc_vp); 560 done: 561 return (error); 562} 563 564static int 565vnwrite(dev_t dev, struct uio *uio, int ioflag) 566{ 567 struct vfs_context context; 568 int error; 569 off_t offset; 570 proc_t p; 571 user_ssize_t resid; 572 struct vn_softc * vn; 573 int unit; 574 575 unit = vnunit(dev); 576 if (vnunit(dev) >= NVNDEVICE) { 577 return (ENXIO); 578 } 579 p = current_proc(); 580 vn = vn_table + unit; 581 if ((vn->sc_flags & VNF_INITED) == 0) { 582 error = ENXIO; 583 goto done; 584 } 585 if (vn->sc_flags & VNF_READONLY) { 586 error = EROFS; 587 goto done; 588 } 589 590 context.vc_thread = current_thread(); 591 context.vc_ucred = vn->sc_cred; 592 593 error = vnode_getwithvid(vn->sc_vp, vn->sc_vid); 594 if (error != 0) { 595 /* the vnode is no longer available, abort */ 596 error = ENXIO; 597 vnclear(vn, &context); 598 goto done; 599 } 600 resid = uio_resid(uio); 601 offset = uio_offset(uio); 602 603 /* 604 * If out of bounds return an error. If at the EOF point, 605 * simply write less. 606 */ 607 if (offset >= (off_t)vn->sc_fsize) { 608 if (offset > (off_t)vn->sc_fsize) { 609 error = EINVAL; 610 } 611 goto done; 612 } 613 /* 614 * If the request crosses EOF, truncate the request. 615 */ 616 if ((offset + resid) > (off_t)vn->sc_fsize) { 617 resid = (off_t)vn->sc_fsize - offset; 618 uio_setresid(uio, resid); 619 } 620 621 if (vn->sc_shadow_vp != NULL) { 622 error = vnode_getwithvid(vn->sc_shadow_vp, 623 vn->sc_shadow_vid); 624 if (error != 0) { 625 /* the vnode is no longer available, abort */ 626 error = ENXIO; 627 vnode_put(vn->sc_vp); 628 vnclear(vn, &context); 629 goto done; 630 } 631 error = vnwrite_shadow(vn, uio, ioflag, &context); 632 vnode_put(vn->sc_shadow_vp); 633 } else { 634 error = VNOP_WRITE(vn->sc_vp, uio, ioflag, &context); 635 } 636 vnode_put(vn->sc_vp); 637 done: 638 return (error); 639} 640 641static int 642shadow_read(struct vn_softc * vn, struct buf * bp, char * base, 643 vfs_context_t ctx) 644{ 645 u_int32_t blocksize = vn->sc_secsize; 646 int error = 0; 647 u_int32_t offset; 648 boolean_t read_shadow; 649 u_int32_t resid; 650 u_int32_t start = 0; 651 652 offset = buf_blkno(bp); 653 resid = buf_resid(bp) / blocksize; 654 while (resid > 0) { 655 user_ssize_t temp_resid; 656 u_int32_t this_offset; 657 u_int32_t this_resid; 658 struct vnode * vp; 659 660 read_shadow = shadow_map_read(vn->sc_shadow_map, 661 offset, resid, 662 &this_offset, &this_resid); 663 if (read_shadow) { 664 vp = vn->sc_shadow_vp; 665 } 666 else { 667 vp = vn->sc_vp; 668 } 669 error = file_io(vp, ctx, UIO_READ, base + start, 670 (off_t)this_offset * blocksize, 671 (user_ssize_t)this_resid * blocksize, 672 &temp_resid); 673 if (error) { 674 break; 675 } 676 this_resid -= (temp_resid / blocksize); 677 if (this_resid == 0) { 678 printf("vn device: shadow_read zero length read\n"); 679 break; 680 } 681 resid -= this_resid; 682 offset += this_resid; 683 start += this_resid * blocksize; 684 } 685 buf_setresid(bp, resid * blocksize); 686 return (error); 687} 688 689static int 690shadow_write(struct vn_softc * vn, struct buf * bp, char * base, 691 vfs_context_t ctx) 692{ 693 u_int32_t blocksize = vn->sc_secsize; 694 int error = 0; 695 u_int32_t offset; 696 boolean_t shadow_grew; 697 u_int32_t resid; 698 u_int32_t start = 0; 699 700 offset = buf_blkno(bp); 701 resid = buf_resid(bp) / blocksize; 702 while (resid > 0) { 703 user_ssize_t temp_resid; 704 u_int32_t this_offset; 705 u_int32_t this_resid; 706 707 shadow_grew = shadow_map_write(vn->sc_shadow_map, 708 offset, resid, 709 &this_offset, &this_resid); 710 if (shadow_grew) { 711#if 0 712 off_t size; 713 /* truncate the file to its new length before write */ 714 size = (off_t)shadow_map_shadow_size(vn->sc_shadow_map) 715 * blocksize; 716 vnode_setsize(vn->sc_shadow_vp, size, IO_SYNC, ctx); 717#endif 718 } 719 error = file_io(vn->sc_shadow_vp, ctx, UIO_WRITE, 720 base + start, 721 (off_t)this_offset * blocksize, 722 (user_ssize_t)this_resid * blocksize, 723 &temp_resid); 724 if (error) { 725 break; 726 } 727 this_resid -= (temp_resid / blocksize); 728 if (this_resid == 0) { 729 printf("vn device: shadow_write zero length write\n"); 730 break; 731 } 732 resid -= this_resid; 733 offset += this_resid; 734 start += this_resid * blocksize; 735 } 736 buf_setresid(bp, resid * blocksize); 737 return (error); 738} 739 740static int 741vn_readwrite_io(struct vn_softc * vn, struct buf * bp, vfs_context_t ctx) 742{ 743 int error = 0; 744 char * iov_base; 745 caddr_t vaddr; 746 747 if (buf_map(bp, &vaddr)) 748 panic("vn device: buf_map failed"); 749 iov_base = (char *)vaddr; 750 751 if (vn->sc_shadow_vp == NULL) { 752 user_ssize_t temp_resid; 753 754 error = file_io(vn->sc_vp, ctx, 755 buf_flags(bp) & B_READ ? UIO_READ : UIO_WRITE, 756 iov_base, 757 (off_t)buf_blkno(bp) * vn->sc_secsize, 758 buf_resid(bp), &temp_resid); 759 buf_setresid(bp, temp_resid); 760 } 761 else { 762 if (buf_flags(bp) & B_READ) 763 error = shadow_read(vn, bp, iov_base, ctx); 764 else 765 error = shadow_write(vn, bp, iov_base, ctx); 766 } 767 buf_unmap(bp); 768 769 return (error); 770} 771 772static void 773vnstrategy(struct buf *bp) 774{ 775 struct vn_softc *vn; 776 int error = 0; 777 long sz; /* in sc_secsize chunks */ 778 daddr64_t blk_num; 779 struct vnode * shadow_vp = NULL; 780 struct vnode * vp = NULL; 781 struct vfs_context context; 782 783 vn = vn_table + vnunit(buf_device(bp)); 784 if ((vn->sc_flags & VNF_INITED) == 0) { 785 error = ENXIO; 786 goto done; 787 } 788 789 context.vc_thread = current_thread(); 790 context.vc_ucred = vn->sc_cred; 791 792 buf_setresid(bp, buf_count(bp)); 793 /* 794 * Check for required alignment. Transfers must be a valid 795 * multiple of the sector size. 796 */ 797 blk_num = buf_blkno(bp); 798 if (buf_count(bp) % vn->sc_secsize != 0) { 799 error = EINVAL; 800 goto done; 801 } 802 sz = howmany(buf_count(bp), vn->sc_secsize); 803 804 /* 805 * If out of bounds return an error. If at the EOF point, 806 * simply read or write less. 807 */ 808 if (blk_num >= 0 && (u_int64_t)blk_num >= vn->sc_size) { 809 if (blk_num > 0 && (u_int64_t)blk_num > vn->sc_size) { 810 error = EINVAL; 811 } 812 goto done; 813 } 814 /* 815 * If the request crosses EOF, truncate the request. 816 */ 817 if ((blk_num + sz) > 0 && ((u_int64_t)(blk_num + sz)) > vn->sc_size) { 818 buf_setcount(bp, (vn->sc_size - blk_num) * vn->sc_secsize); 819 buf_setresid(bp, buf_count(bp)); 820 } 821 vp = vn->sc_vp; 822 if (vp == NULL) { 823 error = ENXIO; 824 goto done; 825 } 826 827 error = vnode_getwithvid(vp, vn->sc_vid); 828 if (error != 0) { 829 /* the vnode is no longer available, abort */ 830 error = ENXIO; 831 vnclear(vn, &context); 832 goto done; 833 } 834 shadow_vp = vn->sc_shadow_vp; 835 if (shadow_vp != NULL) { 836 error = vnode_getwithvid(shadow_vp, 837 vn->sc_shadow_vid); 838 if (error != 0) { 839 /* the vnode is no longer available, abort */ 840 error = ENXIO; 841 vnode_put(vn->sc_vp); 842 vnclear(vn, &context); 843 goto done; 844 } 845 } 846 847 error = vn_readwrite_io(vn, bp, &context); 848 vnode_put(vp); 849 if (shadow_vp != NULL) { 850 vnode_put(shadow_vp); 851 } 852 853 done: 854 if (error) { 855 buf_seterror(bp, error); 856 } 857 buf_biodone(bp); 858 return; 859} 860 861/* ARGSUSED */ 862static int 863vnioctl(dev_t dev, u_long cmd, caddr_t data, 864 __unused int flag, proc_t p, 865 int is_char) 866{ 867 struct vn_softc *vn; 868 struct vn_ioctl_64 *viop; 869 int error; 870 u_int32_t *f; 871 u_int64_t * o; 872 int unit; 873 struct vfsioattr ioattr; 874 struct vn_ioctl_64 user_vnio; 875 struct vfs_context context; 876 877 unit = vnunit(dev); 878 if (vnunit(dev) >= NVNDEVICE) { 879 return (ENXIO); 880 } 881 882 vn = vn_table + unit; 883 error = proc_suser(p); 884 if (error) { 885 goto done; 886 } 887 888 context.vc_thread = current_thread(); 889 context.vc_ucred = vn->sc_cred; 890 891 viop = (struct vn_ioctl_64 *)data; 892 f = (u_int32_t *)data; 893 o = (u_int64_t *)data; 894 switch (cmd) { 895#ifdef __LP64__ 896 case VNIOCDETACH32: 897 case VNIOCDETACH: 898#else 899 case VNIOCDETACH: 900 case VNIOCDETACH64: 901#endif 902 case DKIOCGETBLOCKSIZE: 903 case DKIOCSETBLOCKSIZE: 904 case DKIOCGETMAXBLOCKCOUNTREAD: 905 case DKIOCGETMAXBLOCKCOUNTWRITE: 906 case DKIOCGETMAXSEGMENTCOUNTREAD: 907 case DKIOCGETMAXSEGMENTCOUNTWRITE: 908 case DKIOCGETMAXSEGMENTBYTECOUNTREAD: 909 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE: 910 case DKIOCGETBLOCKCOUNT: 911 case DKIOCGETBLOCKCOUNT32: 912 if ((vn->sc_flags & VNF_INITED) == 0) { 913 error = ENXIO; 914 goto done; 915 } 916 break; 917 default: 918 break; 919 } 920 921 if (vn->sc_vp != NULL) 922 vfs_ioattr(vnode_mount(vn->sc_vp), &ioattr); 923 else 924 bzero(&ioattr, sizeof(ioattr)); 925 926 switch (cmd) { 927 case DKIOCISVIRTUAL: 928 *f = 1; 929 break; 930 case DKIOCGETMAXBLOCKCOUNTREAD: 931 *o = ioattr.io_maxreadcnt / vn->sc_secsize; 932 break; 933 case DKIOCGETMAXBLOCKCOUNTWRITE: 934 *o = ioattr.io_maxwritecnt / vn->sc_secsize; 935 break; 936 case DKIOCGETMAXBYTECOUNTREAD: 937 *o = ioattr.io_maxreadcnt; 938 break; 939 case DKIOCGETMAXBYTECOUNTWRITE: 940 *o = ioattr.io_maxwritecnt; 941 break; 942 case DKIOCGETMAXSEGMENTCOUNTREAD: 943 *o = ioattr.io_segreadcnt; 944 break; 945 case DKIOCGETMAXSEGMENTCOUNTWRITE: 946 *o = ioattr.io_segwritecnt; 947 break; 948 case DKIOCGETMAXSEGMENTBYTECOUNTREAD: 949 *o = ioattr.io_maxsegreadsize; 950 break; 951 case DKIOCGETMAXSEGMENTBYTECOUNTWRITE: 952 *o = ioattr.io_maxsegwritesize; 953 break; 954 case DKIOCGETBLOCKSIZE: 955 *f = vn->sc_secsize; 956 break; 957 case DKIOCSETBLOCKSIZE: 958 if (is_char) { 959 /* can only set block size on block device */ 960 error = ENODEV; 961 break; 962 } 963 if (*f < DEV_BSIZE) { 964 error = EINVAL; 965 break; 966 } 967 if (vn->sc_shadow_vp != NULL) { 968 if (*f == (unsigned)vn->sc_secsize) { 969 break; 970 } 971 /* can't change the block size if already shadowing */ 972 error = EBUSY; 973 break; 974 } 975 vn->sc_secsize = *f; 976 /* recompute the size in terms of the new blocksize */ 977 vn->sc_size = vn->sc_fsize / vn->sc_secsize; 978 break; 979 case DKIOCISWRITABLE: 980 *f = 1; 981 break; 982 case DKIOCGETBLOCKCOUNT32: 983 *f = vn->sc_size; 984 break; 985 case DKIOCGETBLOCKCOUNT: 986 *o = vn->sc_size; 987 break; 988#ifdef __LP64__ 989 case VNIOCSHADOW32: 990 case VNIOCSHADOW: 991#else 992 case VNIOCSHADOW: 993 case VNIOCSHADOW64: 994#endif 995 if (vn->sc_shadow_vp != NULL) { 996 error = EBUSY; 997 break; 998 } 999 if (vn->sc_vp == NULL) { 1000 /* much be attached before we can shadow */ 1001 error = EINVAL; 1002 break; 1003 } 1004 if (!proc_is64bit(p)) { 1005 /* downstream code expects LP64 version of vn_ioctl structure */ 1006 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio); 1007 viop = &user_vnio; 1008 } 1009 if (viop->vn_file == USER_ADDR_NULL) { 1010 error = EINVAL; 1011 break; 1012 } 1013 error = vniocattach_shadow(vn, viop, dev, 0, p); 1014 break; 1015 1016#ifdef __LP64__ 1017 case VNIOCATTACH32: 1018 case VNIOCATTACH: 1019#else 1020 case VNIOCATTACH: 1021 case VNIOCATTACH64: 1022#endif 1023 if (is_char) { 1024 /* attach only on block device */ 1025 error = ENODEV; 1026 break; 1027 } 1028 if (vn->sc_flags & VNF_INITED) { 1029 error = EBUSY; 1030 break; 1031 } 1032 if (!proc_is64bit(p)) { 1033 /* downstream code expects LP64 version of vn_ioctl structure */ 1034 vn_ioctl_to_64((struct vn_ioctl_32 *)viop, &user_vnio); 1035 viop = &user_vnio; 1036 } 1037 if (viop->vn_file == USER_ADDR_NULL) { 1038 error = EINVAL; 1039 break; 1040 } 1041 error = vniocattach_file(vn, viop, dev, 0, p); 1042 break; 1043 1044#ifdef __LP64__ 1045 case VNIOCDETACH32: 1046 case VNIOCDETACH: 1047#else 1048 case VNIOCDETACH: 1049 case VNIOCDETACH64: 1050#endif 1051 if (is_char) { 1052 /* detach only on block device */ 1053 error = ENODEV; 1054 break; 1055 } 1056 /* Note: spec_open won't open a mounted block device */ 1057 1058 /* 1059 * XXX handle i/o in progress. Return EBUSY, or wait, or 1060 * flush the i/o. 1061 * XXX handle multiple opens of the device. Return EBUSY, 1062 * or revoke the fd's. 1063 * How are these problems handled for removable and failing 1064 * hardware devices? (Hint: They are not) 1065 */ 1066 vnclear(vn, &context); 1067 break; 1068 1069 case VNIOCGSET: 1070 vn_options |= *f; 1071 *f = vn_options; 1072 break; 1073 1074 case VNIOCGCLEAR: 1075 vn_options &= ~(*f); 1076 *f = vn_options; 1077 break; 1078 1079 case VNIOCUSET: 1080 vn->sc_options |= *f; 1081 *f = vn->sc_options; 1082 break; 1083 1084 case VNIOCUCLEAR: 1085 vn->sc_options &= ~(*f); 1086 *f = vn->sc_options; 1087 break; 1088 1089 default: 1090 error = ENOTTY; 1091 break; 1092 } 1093 done: 1094 return(error); 1095} 1096 1097static int 1098vnioctl_chr(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) 1099{ 1100 return (vnioctl(dev, cmd, data, flag, p, TRUE)); 1101} 1102 1103static int 1104vnioctl_blk(dev_t dev, u_long cmd, caddr_t data, int flag, proc_t p) 1105{ 1106 return (vnioctl(dev, cmd, data, flag, p, FALSE)); 1107} 1108 1109/* 1110 * vniocattach_file: 1111 * 1112 * Attach a file to a VN partition. Return the size in the vn_size 1113 * field. 1114 */ 1115 1116static int 1117vniocattach_file(struct vn_softc *vn, 1118 struct vn_ioctl_64 *vniop, 1119 dev_t dev, 1120 int in_kernel, 1121 proc_t p) 1122{ 1123 dev_t cdev; 1124 vfs_context_t ctx = vfs_context_current(); 1125 kauth_cred_t cred; 1126 struct nameidata nd; 1127 off_t file_size; 1128 int error, flags; 1129 1130 flags = FREAD|FWRITE; 1131 if (in_kernel) { 1132 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); 1133 } 1134 else { 1135 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 1136 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1137 vniop->vn_file, ctx); 1138 } 1139 /* vn_open gives both long- and short-term references */ 1140 error = vn_open(&nd, flags, 0); 1141 if (error) { 1142 if (error != EACCES && error != EPERM && error != EROFS) { 1143 return (error); 1144 } 1145 flags &= ~FWRITE; 1146 if (in_kernel) { 1147 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, 1148 vniop->vn_file, ctx); 1149 } 1150 else { 1151 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 1152 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1153 vniop->vn_file, ctx); 1154 } 1155 error = vn_open(&nd, flags, 0); 1156 if (error) { 1157 return (error); 1158 } 1159 } 1160 if (nd.ni_vp->v_type != VREG) { 1161 error = EINVAL; 1162 } 1163 else { 1164 error = vnode_size(nd.ni_vp, &file_size, ctx); 1165 } 1166 if (error != 0) { 1167 (void) vn_close(nd.ni_vp, flags, ctx); 1168 vnode_put(nd.ni_vp); 1169 return (error); 1170 } 1171 cred = kauth_cred_proc_ref(p); 1172 nd.ni_vp->v_flag |= VNOCACHE_DATA; 1173 error = setcred(nd.ni_vp, cred); 1174 if (error) { 1175 (void)vn_close(nd.ni_vp, flags, ctx); 1176 vnode_put(nd.ni_vp); 1177 kauth_cred_unref(&cred); 1178 return(error); 1179 } 1180 vn->sc_secsize = DEV_BSIZE; 1181 vn->sc_fsize = file_size; 1182 vn->sc_size = file_size / vn->sc_secsize; 1183 vn->sc_vp = nd.ni_vp; 1184 vn->sc_vid = vnode_vid(nd.ni_vp); 1185 vn->sc_open_flags = flags; 1186 vn->sc_cred = cred; 1187 cdev = makedev(vndevice_cdev_major, minor(dev)); 1188 vn->sc_cdev = devfs_make_node(cdev, DEVFS_CHAR, 1189 UID_ROOT, GID_OPERATOR, 1190 0600, "rvn%d", 1191 minor(dev)); 1192 vn->sc_flags |= VNF_INITED; 1193 if (flags == FREAD) 1194 vn->sc_flags |= VNF_READONLY; 1195 /* lose the short-term reference */ 1196 vnode_put(nd.ni_vp); 1197 return(0); 1198} 1199 1200static int 1201vniocattach_shadow(struct vn_softc *vn, struct vn_ioctl_64 *vniop, 1202 __unused dev_t dev, int in_kernel, proc_t p) 1203{ 1204 vfs_context_t ctx = vfs_context_current(); 1205 struct nameidata nd; 1206 int error, flags; 1207 shadow_map_t * map; 1208 off_t file_size; 1209 1210 flags = FREAD|FWRITE; 1211 if (in_kernel) { 1212 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, UIO_SYSSPACE, vniop->vn_file, ctx); 1213 } 1214 else { 1215 NDINIT(&nd, LOOKUP, OP_OPEN, FOLLOW, 1216 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 1217 vniop->vn_file, ctx); 1218 } 1219 /* vn_open gives both long- and short-term references */ 1220 error = vn_open(&nd, flags, 0); 1221 if (error) { 1222 /* shadow MUST be writable! */ 1223 return (error); 1224 } 1225 if (nd.ni_vp->v_type != VREG 1226 || (error = vnode_size(nd.ni_vp, &file_size, ctx))) { 1227 (void)vn_close(nd.ni_vp, flags, ctx); 1228 vnode_put(nd.ni_vp); 1229 return (error ? error : EINVAL); 1230 } 1231 map = shadow_map_create(vn->sc_fsize, file_size, 1232 0, vn->sc_secsize); 1233 if (map == NULL) { 1234 (void)vn_close(nd.ni_vp, flags, ctx); 1235 vnode_put(nd.ni_vp); 1236 vn->sc_shadow_vp = NULL; 1237 return (ENOMEM); 1238 } 1239 vn->sc_shadow_vp = nd.ni_vp; 1240 vn->sc_shadow_vid = vnode_vid(nd.ni_vp); 1241 vn->sc_shadow_vp->v_flag |= VNOCACHE_DATA; 1242 vn->sc_shadow_map = map; 1243 vn->sc_flags &= ~VNF_READONLY; /* we're now read/write */ 1244 1245 /* lose the short-term reference */ 1246 vnode_put(nd.ni_vp); 1247 return(0); 1248} 1249 1250int 1251vndevice_root_image(char * path, char devname[], dev_t * dev_p) 1252{ 1253 int error = 0; 1254 struct vn_softc * vn; 1255 struct vn_ioctl_64 vnio; 1256 1257 vnio.vn_file = CAST_USER_ADDR_T(path); 1258 vnio.vn_size = 0; 1259 1260 vn = vn_table + ROOT_IMAGE_UNIT; 1261 *dev_p = makedev(vndevice_bdev_major, 1262 ROOT_IMAGE_UNIT); 1263 snprintf(devname, 16, "vn%d", ROOT_IMAGE_UNIT); 1264 error = vniocattach_file(vn, &vnio, *dev_p, 1, current_proc()); 1265 return (error); 1266} 1267 1268/* 1269 * Duplicate the current processes' credentials. Since we are called only 1270 * as the result of a SET ioctl and only root can do that, any future access 1271 * to this "disk" is essentially as root. Note that credentials may change 1272 * if some other uid can write directly to the mapped file (NFS). 1273 */ 1274static int 1275setcred(struct vnode * vp, kauth_cred_t cred) 1276{ 1277 char *tmpbuf; 1278 int error = 0; 1279 struct vfs_context context; 1280 1281 /* 1282 * Horrible kludge to establish credentials for NFS XXX. 1283 */ 1284 context.vc_thread = current_thread(); 1285 context.vc_ucred = cred; 1286 tmpbuf = _MALLOC(DEV_BSIZE, M_TEMP, M_WAITOK); 1287 error = file_io(vp, &context, UIO_READ, tmpbuf, 0, DEV_BSIZE, NULL); 1288 FREE(tmpbuf, M_TEMP); 1289 return (error); 1290} 1291 1292void 1293vnclear(struct vn_softc *vn, vfs_context_t ctx) 1294{ 1295 if (vn->sc_vp != NULL) { 1296 /* release long-term reference */ 1297 (void)vn_close(vn->sc_vp, vn->sc_open_flags, ctx); 1298 vn->sc_vp = NULL; 1299 } 1300 if (vn->sc_shadow_vp != NULL) { 1301 /* release long-term reference */ 1302 (void)vn_close(vn->sc_shadow_vp, FREAD | FWRITE, ctx); 1303 vn->sc_shadow_vp = NULL; 1304 } 1305 if (vn->sc_shadow_map != NULL) { 1306 shadow_map_free(vn->sc_shadow_map); 1307 vn->sc_shadow_map = NULL; 1308 } 1309 vn->sc_flags &= ~(VNF_INITED | VNF_READONLY); 1310 if (vn->sc_cred) { 1311 kauth_cred_unref(&vn->sc_cred); 1312 } 1313 vn->sc_size = 0; 1314 vn->sc_fsize = 0; 1315 if (vn->sc_cdev) { 1316 devfs_remove(vn->sc_cdev); 1317 vn->sc_cdev = NULL; 1318 } 1319} 1320 1321static int 1322vnsize(dev_t dev) 1323{ 1324 int secsize; 1325 struct vn_softc *vn; 1326 int unit; 1327 1328 unit = vnunit(dev); 1329 if (vnunit(dev) >= NVNDEVICE) { 1330 return (-1); 1331 } 1332 1333 vn = vn_table + unit; 1334 if ((vn->sc_flags & VNF_INITED) == 0) 1335 secsize = -1; 1336 else 1337 secsize = vn->sc_secsize; 1338 1339 return (secsize); 1340} 1341 1342#define CDEV_MAJOR -1 1343#define BDEV_MAJOR -1 1344static int vndevice_inited = 0; 1345 1346void 1347vndevice_init(void) 1348{ 1349 if (vndevice_inited) 1350 return; 1351 1352 vndevice_do_init(); 1353} 1354 1355static void 1356vndevice_do_init( void ) 1357{ 1358 int i; 1359 1360 vndevice_bdev_major = bdevsw_add(BDEV_MAJOR, &vn_bdevsw); 1361 1362 if (vndevice_bdev_major < 0) { 1363 printf("vndevice_init: bdevsw_add() returned %d\n", 1364 vndevice_bdev_major); 1365 return; 1366 } 1367 vndevice_cdev_major = cdevsw_add_with_bdev(CDEV_MAJOR, &vn_cdevsw, 1368 vndevice_bdev_major); 1369 if (vndevice_cdev_major < 0) { 1370 printf("vndevice_init: cdevsw_add() returned %d\n", 1371 vndevice_cdev_major); 1372 return; 1373 } 1374 for (i = 0; i < NVNDEVICE; i++) { 1375 dev_t dev = makedev(vndevice_bdev_major, i); 1376 vn_table[i].sc_bdev = devfs_make_node(dev, DEVFS_BLOCK, 1377 UID_ROOT, GID_OPERATOR, 1378 0600, "vn%d", 1379 i); 1380 if (vn_table[i].sc_bdev == NULL) 1381 printf("vninit: devfs_make_node failed!\n"); 1382 } 1383} 1384 1385static void 1386vn_ioctl_to_64(struct vn_ioctl_32 *from, struct vn_ioctl_64 *to) 1387{ 1388 to->vn_file = CAST_USER_ADDR_T(from->vn_file); 1389 to->vn_size = from->vn_size; 1390 to->vn_control = from->vn_control; 1391} 1392 1393#endif /* NVNDEVICE */ 1394