1/* 2 * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved. 3 * 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ 5 * 6 * This file contains Original Code and/or Modifications of Original Code 7 * as defined in and that are subject to the Apple Public Source License 8 * Version 2.0 (the 'License'). You may not use this file except in 9 * compliance with the License. The rights granted to you under the License 10 * may not be used to create, or enable the creation or redistribution of, 11 * unlawful or unlicensed copies of an Apple operating system, or to 12 * circumvent, violate, or enable the circumvention or violation of, any 13 * terms of an Apple operating system software license agreement. 14 * 15 * Please obtain a copy of the License at 16 * http://www.opensource.apple.com/apsl/ and read it before using this file. 17 * 18 * The Original Code and all software distributed under the License are 19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER 20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES, 21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT. 23 * Please see the License for the specific language governing rights and 24 * limitations under the License. 25 * 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@ 27 */ 28/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */ 29/* 30 * Copyright (c) 1994, 1995 The Regents of the University of California. 31 * Copyright (c) 1994, 1995 Jan-Simon Pendry. 32 * All rights reserved. 33 * 34 * This code is derived from software donated to Berkeley by 35 * Jan-Simon Pendry. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 66 */ 67 68/* 69 * Union Layer 70 */ 71 72#include <sys/param.h> 73#include <sys/systm.h> 74#include <sys/time.h> 75#include <sys/types.h> 76#include <sys/proc_internal.h> 77#include <sys/kauth.h> 78#include <sys/vnode_internal.h> 79#include <sys/mount_internal.h> 80#include <sys/namei.h> 81#include <sys/malloc.h> 82#include <sys/filedesc.h> 83#include <sys/queue.h> 84#include <miscfs/union/union.h> 85 86static int union_itercallback(vnode_t, void *); 87static int union_root(mount_t, vnode_t *, vfs_context_t); 88 89/* 90 * Mount union filesystem 91 */ 92static int 93union_mount(mount_t mp, __unused vnode_t devvp, user_addr_t data, vfs_context_t context) 94{ 95 proc_t p = vfs_context_proc(context); 96 int error = 0; 97 struct user_union_args args; 98 struct vnode *lowerrootvp = NULLVP; 99 struct vnode *upperrootvp = NULLVP; 100 struct union_mount *um = NULL; 101 kauth_cred_t cred = NOCRED; 102 const char *cp = NULL; 103 char *vcp; 104 int len; 105 u_int size; 106 struct nameidata nd; 107 108#ifdef UNION_DIAGNOSTIC 109 printf("union_mount(mp = %x)\n", mp); 110#endif 111 112 /* 113 * Update is a no-op 114 */ 115 if (mp->mnt_flag & MNT_UPDATE) { 116 /* 117 * Need to provide. 118 * 1. a way to convert between rdonly and rdwr mounts. 119 * 2. support for nfs exports. 120 */ 121 error = ENOTSUP; 122 goto bad; 123 } 124 125 /* 126 * Get argument 127 */ 128 if (vfs_context_is64bit(context)) { 129 error = copyin(data, (caddr_t)&args, sizeof(args)); 130 } 131 else { 132 struct union_args temp; 133 error = copyin(data, (caddr_t)&temp, sizeof (temp)); 134 args.target = CAST_USER_ADDR_T(temp.target); 135 args.mntflags = temp.mntflags; 136 } 137 if (error) 138 goto bad; 139 140 lowerrootvp = mp->mnt_vnodecovered; 141 vnode_get(lowerrootvp); 142 143 /* 144 * Find upper node. 145 */ 146 NDINIT(&nd, LOOKUP, FOLLOW|WANTPARENT, 147 (IS_64BIT_PROCESS(p) ? UIO_USERSPACE64 : UIO_USERSPACE32), 148 args.target, context); 149 150 if ((error = namei(&nd))) 151 goto bad; 152 153 nameidone(&nd); 154 upperrootvp = nd.ni_vp; 155 vnode_put(nd.ni_dvp); 156 nd.ni_dvp = NULL; 157 158 if (upperrootvp->v_type != VDIR) { 159 error = EINVAL; 160 goto bad; 161 } 162 163 MALLOC(um, struct union_mount *, sizeof(struct union_mount), 164 M_UFSMNT, M_WAITOK); 165 166 /* 167 * Keep a held reference to the target vnodes. 168 * They are vnode_put'd in union_unmount. 169 * 170 * Depending on the _BELOW flag, the filesystems are 171 * viewed in a different order. In effect, this is the 172 * same as providing a mount under option to the mount syscall. 173 */ 174 175 um->um_op = args.mntflags & UNMNT_OPMASK; 176 switch (um->um_op) { 177 case UNMNT_ABOVE: 178 um->um_lowervp = lowerrootvp; 179 um->um_uppervp = upperrootvp; 180 break; 181 182 case UNMNT_BELOW: 183 um->um_lowervp = upperrootvp; 184 um->um_uppervp = lowerrootvp; 185 break; 186 187 case UNMNT_REPLACE: 188 vnode_put(lowerrootvp); 189 lowerrootvp = NULLVP; 190 um->um_uppervp = upperrootvp; 191 um->um_lowervp = lowerrootvp; 192 break; 193 194#ifdef FAULTFS 195 case UNMNT_FAULTIN: 196 um->um_lowervp = upperrootvp; 197 um->um_uppervp = lowerrootvp; 198 break; 199#endif 200 201 default: 202 error = EINVAL; 203 goto bad; 204 } 205 206 if (um->um_lowervp != NULLVP) 207 um->um_lowervid = vnode_vid(um->um_lowervp); 208 if (um->um_uppervp != NULLVP) 209 um->um_uppervid = vnode_vid(um->um_uppervp); 210 /* 211 * Unless the mount is readonly, ensure that the top layer 212 * supports whiteout operations 213 */ 214#ifdef FAULTFS 215 if ((um->um_op != UNMNT_FAULTIN) && (mp->mnt_flag & MNT_RDONLY) == 0) 216#else 217 if ((mp->mnt_flag & MNT_RDONLY) == 0) 218#endif 219 { 220 error = VNOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, 221 LOOKUP, context); 222 if (error) 223 goto bad; 224 } 225 226 um->um_cred = kauth_cred_get_with_ref(); 227 um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask; 228 229 /* 230 * Depending on what you think the MNT_LOCAL flag might mean, 231 * you may want the && to be || on the conditional below. 232 * At the moment it has been defined that the filesystem is 233 * only local if it is all local, ie the MNT_LOCAL flag implies 234 * that the entire namespace is local. If you think the MNT_LOCAL 235 * flag implies that some of the files might be stored locally 236 * then you will want to change the conditional. 237 */ 238 if (um->um_op == UNMNT_ABOVE) { 239 if (((um->um_lowervp == NULLVP) || 240 (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && 241 (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) 242 mp->mnt_flag |= MNT_LOCAL; 243 } 244 245 /* 246 * Copy in the upper layer's RDONLY flag. This is for the benefit 247 * of lookup() which explicitly checks the flag, rather than asking 248 * the filesystem for it's own opinion. This means, that an update 249 * mount of the underlying filesystem to go from rdonly to rdwr 250 * will leave the unioned view as read-only. 251 */ 252 mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); 253 254 mp->mnt_data = (qaddr_t) um; 255 vfs_getnewfsid(mp); 256 257 258 switch (um->um_op) { 259 case UNMNT_ABOVE: 260 cp = "<above>:"; 261 break; 262 case UNMNT_BELOW: 263 cp = "<below>:"; 264 break; 265 case UNMNT_REPLACE: 266 cp = ""; 267 break; 268#ifdef FAULTFS 269 case UNMNT_FAULTIN: 270 cp = "/FaultingFS/"; 271 break; 272#endif 273 } 274 len = strlen(cp); 275 bcopy(cp, mp->mnt_vfsstat.f_mntfromname, len); 276 277 vcp = mp->mnt_vfsstat.f_mntfromname + len; 278 len = MNAMELEN - len; 279 280 (void) copyinstr(args.target, vcp, len - 1, (size_t *)&size); 281 bzero(vcp + size, len - size); 282 283 /* mark the filesystem thred safe */ 284 mp->mnt_vtable->vfc_threadsafe = TRUE; 285 286#ifdef UNION_DIAGNOSTIC 287 printf("union_mount: from %s, on %s\n", 288 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname); 289#endif 290 return (0); 291 292bad: 293 if (um) 294 _FREE(um, M_UFSMNT); 295 if (IS_VALID_CRED(cred)) 296 kauth_cred_unref(&cred); 297 if (upperrootvp) 298 vnode_put(upperrootvp); 299 if (lowerrootvp) 300 vnode_put(lowerrootvp); 301 return (error); 302} 303 304/* 305 * VFS start. Nothing needed here - the start routine 306 * on the underlying filesystem(s) will have been called 307 * when that filesystem was mounted. 308 */ 309static int 310union_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context) 311{ 312 313 return (0); 314} 315 316static int 317union_itercallback(__unused vnode_t vp, void *args) 318{ 319 int num = *(int *)args; 320 321 *(int *)args = num + 1; 322 return(VNODE_RETURNED); 323} 324 325 326 327/* 328 * Free reference to union layer 329 */ 330static int 331union_unmount(mount_t mp, int mntflags, vfs_context_t context) 332{ 333 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 334 struct vnode *um_rootvp; 335 int error; 336 int freeing; 337 int flags = 0; 338 339#ifdef UNION_DIAGNOSTIC 340 printf("union_unmount(mp = %x)\n", mp); 341#endif 342 343 if (mntflags & MNT_FORCE) 344 flags |= FORCECLOSE; 345 346 if ((error = union_root(mp, &um_rootvp, context))) 347 return (error); 348 349 /* 350 * Keep flushing vnodes from the mount list. 351 * This is needed because of the un_pvp held 352 * reference to the parent vnode. 353 * If more vnodes have been freed on a given pass, 354 * the try again. The loop will iterate at most 355 * (d) times, where (d) is the maximum tree depth 356 * in the filesystem. 357 */ 358 for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) { 359 int n = 0; 360 361 vnode_iterate(mp, VNODE_NOLOCK_INTERNAL, union_itercallback, &n); 362 363 /* if this is unchanged then stop */ 364 if (n == freeing) 365 break; 366 367 /* otherwise try once more time */ 368 freeing = n; 369 } 370 371 /* At this point the root vnode should have a single reference */ 372 if (vnode_isinuse(um_rootvp, 0)) { 373 vnode_put(um_rootvp); 374 return (EBUSY); 375 } 376 377#ifdef UNION_DIAGNOSTIC 378 vprint("union root", um_rootvp); 379#endif 380 /* 381 * Discard references to upper and lower target vnodes. 382 */ 383 if (um->um_lowervp) 384 vnode_put(um->um_lowervp); 385 vnode_put(um->um_uppervp); 386 if (IS_VALID_CRED(um->um_cred)) { 387 kauth_cred_unref(&um->um_cred); 388 } 389 /* 390 * Release reference on underlying root vnode 391 */ 392 vnode_put(um_rootvp); 393 /* 394 * And blow it away for future re-use 395 */ 396 vnode_reclaim(um_rootvp); 397 /* 398 * Finally, throw away the union_mount structure 399 */ 400 _FREE(mp->mnt_data, M_UFSMNT); /* XXX */ 401 mp->mnt_data = NULL; 402 return (0); 403} 404 405static int 406union_root(mount_t mp, vnode_t *vpp, __unused vfs_context_t context) 407{ 408 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 409 int error; 410 411 /* 412 * Return locked reference to root. 413 */ 414 vnode_get(um->um_uppervp); 415 if (um->um_lowervp) 416 vnode_get(um->um_lowervp); 417 418 union_lock(); 419 error = union_allocvp(vpp, mp, 420 (struct vnode *) 0, 421 (struct vnode *) 0, 422 (struct componentname *) 0, 423 um->um_uppervp, 424 um->um_lowervp, 425 1); 426 union_unlock(); 427 428 if (error) { 429 vnode_put(um->um_uppervp); 430 if (um->um_lowervp) 431 vnode_put(um->um_lowervp); 432 } 433 434 return (error); 435} 436 437static int 438union_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t context) 439{ 440 int error; 441 struct union_mount *um = MOUNTTOUNIONMOUNT(mp); 442 struct vfs_attr attr; 443 uint32_t lbsize = 0; 444 445#ifdef UNION_DIAGNOSTIC 446 printf("union_vfs_getattr(mp = %x, lvp = %x, uvp = %x)\n", mp, 447 um->um_lowervp, 448 um->um_uppervp); 449#endif 450 451 /* Get values from lower file system (if any) */ 452 if (um->um_lowervp) { 453 VFSATTR_INIT(&attr); 454 VFSATTR_WANTED(&attr, f_bsize); 455 VFSATTR_WANTED(&attr, f_blocks); 456 VFSATTR_WANTED(&attr, f_bused); 457 VFSATTR_WANTED(&attr, f_files); 458 error = vfs_getattr(um->um_lowervp->v_mount, &attr, context); 459 if (error) 460 return (error); 461 462 /* now copy across the "interesting" information and fake the rest */ 463 if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) 464 lbsize = attr.f_bsize; 465 else 466 lbsize = um->um_lowervp->v_mount->mnt_devblocksize; 467 fsap->f_blocks = VFSATTR_IS_SUPPORTED(&attr, f_blocks) ? attr.f_blocks : 0; 468 fsap->f_bused = VFSATTR_IS_SUPPORTED(&attr, f_bused) ? attr.f_bused : 0; 469 fsap->f_files = VFSATTR_IS_SUPPORTED(&attr, f_files) ? attr.f_files : 0; 470 } else { 471 fsap->f_blocks = 0; 472 fsap->f_bused = 0; 473 fsap->f_files = 0; 474 } 475 476 VFSATTR_INIT(&attr); 477 VFSATTR_WANTED(&attr, f_bsize); 478 VFSATTR_WANTED(&attr, f_blocks); 479 VFSATTR_WANTED(&attr, f_bfree); 480 VFSATTR_WANTED(&attr, f_bavail); 481 VFSATTR_WANTED(&attr, f_files); 482 VFSATTR_WANTED(&attr, f_ffree); 483 error = vfs_getattr(um->um_uppervp->v_mount, &attr, context); 484 if (error) 485 return (error); 486 487 if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) { 488 fsap->f_bsize = attr.f_bsize; 489 VFSATTR_SET_SUPPORTED(fsap, f_bsize); 490 } 491 if (VFSATTR_IS_SUPPORTED(&attr, f_iosize)) { 492 fsap->f_iosize = attr.f_iosize; 493 VFSATTR_SET_SUPPORTED(fsap, f_iosize); 494 } 495 496 /* 497 * if the lower and upper blocksizes differ, then frig the 498 * block counts so that the sizes reported by df make some 499 * kind of sense. none of this makes sense though. 500 */ 501 if (VFSATTR_IS_SUPPORTED(&attr, f_bsize)) 502 fsap->f_bsize = attr.f_bsize; 503 else 504 fsap->f_bsize = um->um_uppervp->v_mount->mnt_devblocksize; 505 VFSATTR_RETURN(fsap, f_bsize, attr.f_bsize); 506 if (fsap->f_bsize != lbsize) 507 fsap->f_blocks = fsap->f_blocks * lbsize / attr.f_bsize; 508 509 /* 510 * The "total" fields count total resources in all layers, 511 * the "free" fields count only those resources which are 512 * free in the upper layer (since only the upper layer 513 * is writeable). 514 */ 515 if (VFSATTR_IS_SUPPORTED(&attr, f_blocks)) 516 fsap->f_blocks += attr.f_blocks; 517 if (VFSATTR_IS_SUPPORTED(&attr, f_bfree)) 518 fsap->f_bfree = attr.f_bfree; 519 if (VFSATTR_IS_SUPPORTED(&attr, f_bavail)) 520 fsap->f_bavail = attr.f_bavail; 521 if (VFSATTR_IS_SUPPORTED(&attr, f_bused)) 522 fsap->f_bused += attr.f_bused; 523 if (VFSATTR_IS_SUPPORTED(&attr, f_files)) 524 fsap->f_files += attr.f_files; 525 if (VFSATTR_IS_SUPPORTED(&attr, f_ffree)) 526 fsap->f_ffree = attr.f_ffree; 527 528 VFSATTR_SET_SUPPORTED(fsap, f_bsize); 529 VFSATTR_SET_SUPPORTED(fsap, f_blocks); 530 VFSATTR_SET_SUPPORTED(fsap, f_bfree); 531 VFSATTR_SET_SUPPORTED(fsap, f_bavail); 532 VFSATTR_SET_SUPPORTED(fsap, f_bused); 533 VFSATTR_SET_SUPPORTED(fsap, f_files); 534 VFSATTR_SET_SUPPORTED(fsap, f_ffree); 535 536 return (0); 537} 538 539/* 540 * XXX - Assumes no data cached at union layer. 541 */ 542#define union_sync (int (*) (mount_t, int, vfs_context_t))nullop 543 544#define union_fhtovp (int (*) (mount_t, int, unsigned char *, vnode_t *, vfs_context_t))eopnotsupp 545#define union_sysctl (int (*) (int *, u_int, user_addr_t, size_t *, user_addr_t, size_t, vfs_context_t))eopnotsupp 546#define union_vget (int (*) (mount_t, ino64_t, vnode_t *, vfs_context_t))eopnotsupp 547#define union_vptofh (int (*) (vnode_t, int *, unsigned char *, vfs_context_t))eopnotsupp 548 549struct vfsops union_vfsops = { 550 union_mount, 551 union_start, 552 union_unmount, 553 union_root, 554 NULL, /* quotactl */ 555 union_vfs_getattr, 556 union_sync, 557 union_vget, 558 union_fhtovp, 559 union_vptofh, 560 union_init, 561 union_sysctl, 562 NULL, 563 {NULL} 564}; 565 566 567